def test1DNormalDist(self): # prepare data U = dists.Normal(1.85, .3, 0, 3) trainSamples = np.array([U.rvs(500)]).T testSamples = np.array([U.rvs(1000)]).T # build parameter set dist = KDEDist(trainSamples, kernelType=KernelType_GAUSSIAN, bandwidthOptimizationType= BandwidthOptimizationType_MAXIMUMLIKELIHOOD, bounds=U.getBounds()) # fig = plt.figure() # plotDensity1d(U) # plotDensity1d(dist) print("quad = %s" % (quad(lambda x: dist.pdf([x]), 0, 3), )) print("mean = %g ~ %g" % (U.mean(), dist.mean())) print("var = %g ~ %g" % (U.var(), dist.var())) print("KL = %g" % U.klDivergence(dist, testSamples, testSamples)) print("CE = %g" % dist.crossEntropy(testSamples)) print("MSE = %g" % dist.l2error(U, testSamples, testSamples)) plt.show()
def estimateKDEDensity(functionName, trainSamples, testSamples=None, iteration=0, plot=False, out=True, label="kde_gaussian", bandwidthOptimizationTypeStr="rot"): print("train: %i x %i (mean=%g, var=%g)" % (trainSamples.shape[0], trainSamples.shape[1], np.mean(trainSamples), np.var(trainSamples))) if testSamples is not None: print("test : %i x %i (mean=%g, var=%g)" % (testSamples.shape[0], testSamples.shape[1], np.mean(testSamples), np.var(testSamples))) if "gaussian" in label: kernelType = KernelType_GAUSSIAN elif "epanechnikov" in label: kernelType = KernelType_EPANECHNIKOV else: raise AttributeError("label is unknown") bandwidthOptimizationType = strTobandwidthOptimizationType( bandwidthOptimizationTypeStr) kdeDist = KDEDist(trainSamples, kernelType=kernelType, bandwidthOptimizationType=bandwidthOptimizationType) # ----------------------------------------------------------- cvKDE = kdeDist.crossEntropy(testSamples) if plot and kdeDist.getDim() == 2: fig = plt.figure() plotDensity2d(kdeDist) plt.title("log=%g" % cvKDE) if out: plt.tight_layout() plt.savefig( os.path.join(pathResults, "kde_dist.%s.i%i.jpg" % (functionName, iteration))) plt.savefig( os.path.join(pathResults, "kde_dist.%s.i%i.pdf" % (functionName, iteration))) if out: plt.close(fig) else: plt.show() print("CV test = %g" % cvKDE) # ----------------------------------------------------------- if out: pathResults = os.path.join("data", label) # serialize cross entropies out_crossEntropies = os.path.join( pathResults, "kde_cross_entropies.%s.i%i.csv" % (functionName, iteration)) fd = open(out_crossEntropies, 'wb') file_writer = csv.writer(fd) file_writer.writerow(["crossEntropy"]) file_writer.writerow([cvKDE]) fd.close() # serialize samples np.savetxt( os.path.join( pathResults, "kde_train_samples.%s.i%i.csv" % (functionName, iteration)), trainSamples) np.savetxt( os.path.join( pathResults, "kde_test_samples.%s.i%i.csv" % (functionName, iteration)), testSamples) if plot: # plot density fig = plt.figure() plotDensity2d(kdeDist) plt.title("%s -> CV = %g" % (kdeDist.getBandwidths(), cvKDE)) plt.savefig( os.path.join(pathResults, "kde_pdf.%s.i%i.jpg" % (functionName, iteration))) plt.close(fig) # serialize best configuration to json out_bestDist = os.path.join( pathResults, "kde_best_config.%s.i%i.json" % (functionName, iteration)) text = kdeDist.toJson() fd = open(out_bestDist, "w") fd.write(text) fd.close() # stats stats = { 'config': { 'functionName': functionName, 'numDims': 2, 'label': label, 'bandwidth_optimization': BandwidthOptimizationType_MAXIMUMLIKELIHOOD, 'kernelType': kernelType, 'iteration': iteration }, 'trainSamples': trainSamples, 'testSamples': testSamples, 'crossEntropyTrainKDE': kdeDist.crossEntropy(trainSamples), 'crossEntropyTestKDE': cvKDE, 'KDEDist_json': kdeDist.toJson() } return kdeDist, stats