def generateGaussianCenters(self, referenceSamples=None): """ Choose Gaussian centers based on a strategy """ gaussianCenters = self.generateAllGaussianCenters(referenceSamples) Matrix.show("Gaussian Centers", gaussianCenters, self.settings) return gaussianCenters
def generateGaussianCenters(self, referenceSamples=None) : """ Choose Gaussian centers based on a strategy """ gaussianCenters = self.generateAllGaussianCenters(referenceSamples) Matrix.show("Gaussian Centers", gaussianCenters, self.settings) return gaussianCenters
def computeModelParameters(self, referenceSamples=None, testSamples=None, gaussianCenters=None): """ Computes model parameters via k-fold cross validation process """ (refRows, refCols) = referenceSamples.shape (testRows, testCols) = testSamples.shape sigmaWidths = self.computeGaussianWidthCandidates( referenceSamples, testSamples) lambdaCandidates = self.generateRegularizationParams() Vector.show("Sigma Candidates", sigmaWidths, self.settings) Vector.show("Lambda Candidates", lambdaCandidates, self.settings) # Initialize cross validation scoring matrix crossValidationScores = numpy.zeros( (numpy.size(sigmaWidths), numpy.size(lambdaCandidates))) # Initialize a cross validation index assignment list referenceSamplesCVIdxs = numpy.random.permutation(refCols) referenceSamplesCVSplit = numpy.floor(numpy.r_[0:refCols] * self.crossFolds / refCols) testSamplesCVIdxs = numpy.random.permutation(testCols) testSamplesCVSplit = numpy.floor(numpy.r_[0:testCols] * self.crossFolds / testCols) # Initiate k-fold cross-validation procedure. Using variable # notation similar to the RULSIF formulas. for sigmaIdx in numpy.r_[0:numpy.size(sigmaWidths)]: # (re-)Calculate the kernel matrix using the candidate sigma width sigma = sigmaWidths[sigmaIdx] K_ref = GaussianKernel(sigma).apply(referenceSamples, gaussianCenters).T K_test = GaussianKernel(sigma).apply(testSamples, gaussianCenters).T # Initialize a new result matrix for the current sigma candidate foldResult = numpy.zeros( (self.crossFolds, numpy.size(lambdaCandidates))) for foldIdx in numpy.r_[0:self.crossFolds]: K_ref_trainingSet = K_ref[:, referenceSamplesCVIdxs[ referenceSamplesCVSplit != foldIdx]] K_test_trainingSet = K_test[:, testSamplesCVIdxs[ testSamplesCVSplit != foldIdx]] H_h_KthFold = AlphaRelativeDensityRatioEstimator.H_hat( self.alphaConstraint, K_ref_trainingSet, K_test_trainingSet) h_h_KthFold = AlphaRelativeDensityRatioEstimator.h_hat( K_ref_trainingSet) for lambdaIdx in numpy.r_[0:numpy.size(lambdaCandidates)]: lambdaCandidate = lambdaCandidates[lambdaIdx] theta_h_KthFold = AlphaRelativeDensityRatioEstimator.theta_hat( H_h_KthFold, h_h_KthFold, lambdaCandidate, self.kernelBasis) # Select the subset of the kernel matrix not used in the training set # for use as the test set to validate against K_ref_testSet = K_ref[:, referenceSamplesCVIdxs[ referenceSamplesCVSplit == foldIdx]] K_test_testSet = K_test[:, testSamplesCVIdxs[ testSamplesCVSplit == foldIdx]] r_alpha_Xref = AlphaRelativeDensityRatioEstimator.g_of_X_theta( K_ref_testSet, theta_h_KthFold) r_alpha_Xtest = AlphaRelativeDensityRatioEstimator.g_of_X_theta( K_test_testSet, theta_h_KthFold) # Calculate the objective function J(theta) under the current parameters J = AlphaRelativeDensityRatioEstimator.J_of_theta( self.alphaConstraint, r_alpha_Xref, r_alpha_Xtest) foldResult[foldIdx, lambdaIdx] = J crossValidationScores[sigmaIdx, :] = numpy.mean(foldResult, 0) Matrix.show("Cross-Validation Scores", crossValidationScores, self.settings) crossValidationMinScores = crossValidationScores.min(1) crossValidationMinIdxForLambda = crossValidationScores.argmin(1) crossValidationMinIdxForSigma = crossValidationMinScores.argmin() optimalSigma = sigmaWidths[crossValidationMinIdxForSigma] optimalLambda = lambdaCandidates[ crossValidationMinIdxForLambda[crossValidationMinIdxForSigma]] return (optimalSigma, optimalLambda)
def computeModelParameters(self, referenceSamples=None, testSamples=None, gaussianCenters=None) : """ Computes model parameters via k-fold cross validation process """ (refRows , refCols ) = referenceSamples.shape (testRows, testCols) = testSamples.shape sigmaWidths = self.computeGaussianWidthCandidates(referenceSamples, testSamples) lambdaCandidates = self.generateRegularizationParams() Vector.show("Sigma Candidates", sigmaWidths, self.settings) Vector.show("Lambda Candidates", lambdaCandidates, self.settings) # Initialize cross validation scoring matrix crossValidationScores = numpy.zeros( (numpy.size(sigmaWidths), numpy.size(lambdaCandidates)) ) # Initialize a cross validation index assignment list referenceSamplesCVIdxs = numpy.random.permutation(refCols) referenceSamplesCVSplit = numpy.floor(numpy.r_[0:refCols] * self.crossFolds / refCols) testSamplesCVIdxs = numpy.random.permutation(testCols) testSamplesCVSplit = numpy.floor(numpy.r_[0:testCols] * self.crossFolds / testCols) # Initiate k-fold cross-validation procedure. Using variable # notation similar to the RULSIF formulas. for sigmaIdx in numpy.r_[0:numpy.size(sigmaWidths)] : # (re-)Calculate the kernel matrix using the candidate sigma width sigma = sigmaWidths[sigmaIdx] K_ref = GaussianKernel(sigma).apply(referenceSamples, gaussianCenters).T K_test = GaussianKernel(sigma).apply(testSamples, gaussianCenters).T # Initialize a new result matrix for the current sigma candidate foldResult = numpy.zeros( (self.crossFolds, numpy.size(lambdaCandidates)) ) for foldIdx in numpy.r_[0:self.crossFolds] : K_ref_trainingSet = K_ref[:, referenceSamplesCVIdxs[referenceSamplesCVSplit != foldIdx]] K_test_trainingSet = K_test[:, testSamplesCVIdxs[testSamplesCVSplit != foldIdx]] H_h_KthFold = AlphaRelativeDensityRatioEstimator.H_hat(self.alphaConstraint, K_ref_trainingSet, K_test_trainingSet) h_h_KthFold = AlphaRelativeDensityRatioEstimator.h_hat(K_ref_trainingSet) for lambdaIdx in numpy.r_[0:numpy.size(numpy.lambdaCandidates)] : lambdaCandidate = lambdaCandidates[lambdaIdx] theta_h_KthFold = AlphaRelativeDensityRatioEstimator.theta_hat(H_h_KthFold, h_h_KthFold, lambdaCandidate, self.kernelBasis) # Select the subset of the kernel matrix not used in the training set # for use as the test set to validate against K_ref_testSet = K_ref[:, referenceSamplesCVIdxs[referenceSamplesCVSplit == foldIdx]] K_test_testSet = K_test[:, testSamplesCVIdxs[testSamplesCVSplit == foldIdx]] r_alpha_Xref = AlphaRelativeDensityRatioEstimator.g_of_X_theta(K_ref_testSet , theta_h_KthFold) r_alpha_Xtest = AlphaRelativeDensityRatioEstimator.g_of_X_theta(K_test_testSet, theta_h_KthFold) # Calculate the objective function J(theta) under the current parameters J = AlphaRelativeDensityRatioEstimator.J_of_theta(self.alphaConstraint, r_alpha_Xref, r_alpha_Xtest) foldResult[foldIdx, lambdaIdx] = J crossValidationScores[sigmaIdx, :] = numpy.mean(foldResult, 0) Matrix.show("Cross-Validation Scores", crossValidationScores, self.settings) crossValidationMinScores = crossValidationScores.min(1) crossValidationMinIdxForLambda = crossValidationScores.argmin(1) crossValidationMinIdxForSigma = crossValidationMinScores.argmin() optimalSigma = sigmaWidths[crossValidationMinIdxForSigma] optimalLambda = lambdaCandidates[crossValidationMinIdxForLambda[crossValidationMinIdxForSigma]] return (optimalSigma, optimalLambda)