Ejemplo n.º 1
0
    def generateGaussianCenters(self, referenceSamples=None):
        """
        Choose Gaussian centers based on a strategy
        """
        gaussianCenters = self.generateAllGaussianCenters(referenceSamples)

        Matrix.show("Gaussian Centers", gaussianCenters, self.settings)

        return gaussianCenters
Ejemplo n.º 2
0
    def generateGaussianCenters(self, referenceSamples=None) :
        """
        Choose Gaussian centers based on a strategy
        """
        gaussianCenters = self.generateAllGaussianCenters(referenceSamples)

        Matrix.show("Gaussian Centers", gaussianCenters, self.settings)

        return gaussianCenters
Ejemplo n.º 3
0
    def computeModelParameters(self,
                               referenceSamples=None,
                               testSamples=None,
                               gaussianCenters=None):
        """
        Computes model parameters via k-fold cross validation process
        """
        (refRows, refCols) = referenceSamples.shape
        (testRows, testCols) = testSamples.shape

        sigmaWidths = self.computeGaussianWidthCandidates(
            referenceSamples, testSamples)
        lambdaCandidates = self.generateRegularizationParams()

        Vector.show("Sigma Candidates", sigmaWidths, self.settings)
        Vector.show("Lambda Candidates", lambdaCandidates, self.settings)

        # Initialize cross validation scoring matrix
        crossValidationScores = numpy.zeros(
            (numpy.size(sigmaWidths), numpy.size(lambdaCandidates)))

        # Initialize a cross validation index assignment list
        referenceSamplesCVIdxs = numpy.random.permutation(refCols)
        referenceSamplesCVSplit = numpy.floor(numpy.r_[0:refCols] *
                                              self.crossFolds / refCols)
        testSamplesCVIdxs = numpy.random.permutation(testCols)
        testSamplesCVSplit = numpy.floor(numpy.r_[0:testCols] *
                                         self.crossFolds / testCols)

        # Initiate k-fold cross-validation procedure. Using variable
        # notation similar to the RULSIF formulas.
        for sigmaIdx in numpy.r_[0:numpy.size(sigmaWidths)]:

            # (re-)Calculate the kernel matrix using the candidate sigma width
            sigma = sigmaWidths[sigmaIdx]
            K_ref = GaussianKernel(sigma).apply(referenceSamples,
                                                gaussianCenters).T
            K_test = GaussianKernel(sigma).apply(testSamples,
                                                 gaussianCenters).T

            # Initialize a new result matrix for the current sigma candidate
            foldResult = numpy.zeros(
                (self.crossFolds, numpy.size(lambdaCandidates)))

            for foldIdx in numpy.r_[0:self.crossFolds]:

                K_ref_trainingSet = K_ref[:, referenceSamplesCVIdxs[
                    referenceSamplesCVSplit != foldIdx]]
                K_test_trainingSet = K_test[:, testSamplesCVIdxs[
                    testSamplesCVSplit != foldIdx]]

                H_h_KthFold = AlphaRelativeDensityRatioEstimator.H_hat(
                    self.alphaConstraint, K_ref_trainingSet,
                    K_test_trainingSet)
                h_h_KthFold = AlphaRelativeDensityRatioEstimator.h_hat(
                    K_ref_trainingSet)

                for lambdaIdx in numpy.r_[0:numpy.size(lambdaCandidates)]:

                    lambdaCandidate = lambdaCandidates[lambdaIdx]

                    theta_h_KthFold = AlphaRelativeDensityRatioEstimator.theta_hat(
                        H_h_KthFold, h_h_KthFold, lambdaCandidate,
                        self.kernelBasis)

                    # Select the subset of the kernel matrix not used in the training set
                    # for use as the test set to validate against
                    K_ref_testSet = K_ref[:, referenceSamplesCVIdxs[
                        referenceSamplesCVSplit == foldIdx]]
                    K_test_testSet = K_test[:, testSamplesCVIdxs[
                        testSamplesCVSplit == foldIdx]]

                    r_alpha_Xref = AlphaRelativeDensityRatioEstimator.g_of_X_theta(
                        K_ref_testSet, theta_h_KthFold)
                    r_alpha_Xtest = AlphaRelativeDensityRatioEstimator.g_of_X_theta(
                        K_test_testSet, theta_h_KthFold)

                    # Calculate the objective function J(theta) under the current parameters
                    J = AlphaRelativeDensityRatioEstimator.J_of_theta(
                        self.alphaConstraint, r_alpha_Xref, r_alpha_Xtest)

                    foldResult[foldIdx, lambdaIdx] = J

                crossValidationScores[sigmaIdx, :] = numpy.mean(foldResult, 0)

        Matrix.show("Cross-Validation Scores", crossValidationScores,
                    self.settings)

        crossValidationMinScores = crossValidationScores.min(1)
        crossValidationMinIdxForLambda = crossValidationScores.argmin(1)
        crossValidationMinIdxForSigma = crossValidationMinScores.argmin()

        optimalSigma = sigmaWidths[crossValidationMinIdxForSigma]
        optimalLambda = lambdaCandidates[
            crossValidationMinIdxForLambda[crossValidationMinIdxForSigma]]

        return (optimalSigma, optimalLambda)
Ejemplo n.º 4
0
    def computeModelParameters(self, referenceSamples=None, testSamples=None, gaussianCenters=None) :
        """
        Computes model parameters via k-fold cross validation process
        """
        (refRows , refCols )     = referenceSamples.shape
        (testRows, testCols)     = testSamples.shape

        sigmaWidths              = self.computeGaussianWidthCandidates(referenceSamples, testSamples)
        lambdaCandidates         = self.generateRegularizationParams()

        Vector.show("Sigma Candidates", sigmaWidths, self.settings)
        Vector.show("Lambda Candidates", lambdaCandidates, self.settings)

        # Initialize cross validation scoring matrix
        crossValidationScores    = numpy.zeros( (numpy.size(sigmaWidths), numpy.size(lambdaCandidates)) )

        # Initialize a cross validation index assignment list
        referenceSamplesCVIdxs   = numpy.random.permutation(refCols)
        referenceSamplesCVSplit  = numpy.floor(numpy.r_[0:refCols] * self.crossFolds / refCols)
        testSamplesCVIdxs        = numpy.random.permutation(testCols)
        testSamplesCVSplit       = numpy.floor(numpy.r_[0:testCols] * self.crossFolds / testCols)

        # Initiate k-fold cross-validation procedure. Using variable
        # notation similar to the RULSIF formulas.
        for sigmaIdx in numpy.r_[0:numpy.size(sigmaWidths)] :

            # (re-)Calculate the kernel matrix using the candidate sigma width
            sigma              = sigmaWidths[sigmaIdx]
            K_ref              = GaussianKernel(sigma).apply(referenceSamples, gaussianCenters).T
            K_test             = GaussianKernel(sigma).apply(testSamples, gaussianCenters).T

            # Initialize a new result matrix for the current sigma candidate
            foldResult         = numpy.zeros( (self.crossFolds, numpy.size(lambdaCandidates)) )

            for foldIdx in numpy.r_[0:self.crossFolds] :

                K_ref_trainingSet  = K_ref[:, referenceSamplesCVIdxs[referenceSamplesCVSplit != foldIdx]]
                K_test_trainingSet = K_test[:, testSamplesCVIdxs[testSamplesCVSplit != foldIdx]]

                H_h_KthFold    = AlphaRelativeDensityRatioEstimator.H_hat(self.alphaConstraint, K_ref_trainingSet, K_test_trainingSet)
                h_h_KthFold    = AlphaRelativeDensityRatioEstimator.h_hat(K_ref_trainingSet)

                for lambdaIdx in numpy.r_[0:numpy.size(numpy.lambdaCandidates)] :

                    lambdaCandidate = lambdaCandidates[lambdaIdx]

                    theta_h_KthFold = AlphaRelativeDensityRatioEstimator.theta_hat(H_h_KthFold, h_h_KthFold, lambdaCandidate, self.kernelBasis)

                    # Select the subset of the kernel matrix not used in the training set
                    # for use as the test set to validate against
                    K_ref_testSet   = K_ref[:, referenceSamplesCVIdxs[referenceSamplesCVSplit == foldIdx]]
                    K_test_testSet  = K_test[:, testSamplesCVIdxs[testSamplesCVSplit == foldIdx]]

                    r_alpha_Xref    = AlphaRelativeDensityRatioEstimator.g_of_X_theta(K_ref_testSet , theta_h_KthFold)
                    r_alpha_Xtest   = AlphaRelativeDensityRatioEstimator.g_of_X_theta(K_test_testSet, theta_h_KthFold)

                    # Calculate the objective function J(theta) under the current parameters
                    J = AlphaRelativeDensityRatioEstimator.J_of_theta(self.alphaConstraint, r_alpha_Xref, r_alpha_Xtest)

                    foldResult[foldIdx, lambdaIdx] = J

                crossValidationScores[sigmaIdx, :] = numpy.mean(foldResult, 0)

        Matrix.show("Cross-Validation Scores", crossValidationScores, self.settings)

        crossValidationMinScores       = crossValidationScores.min(1)
        crossValidationMinIdxForLambda = crossValidationScores.argmin(1)
        crossValidationMinIdxForSigma  = crossValidationMinScores.argmin()

        optimalSigma  = sigmaWidths[crossValidationMinIdxForSigma]
        optimalLambda = lambdaCandidates[crossValidationMinIdxForLambda[crossValidationMinIdxForSigma]]

        return (optimalSigma, optimalLambda)