def _estimateDensityByConfig(self, dtype, samples, config={}): if dtype == "kde": # compute bounds of samples bounds = np.ndarray((1, 2)) bounds[:, 0] = np.min(samples) bounds[:, 1] = np.max(samples) return KDEDist(samples, bandwidthOptimizationType=BandwidthOptimizationType_SILVERMANSRULE, bounds=bounds) elif dtype == "sgde": # compute bounds of samples bounds = np.ndarray((1, 2)) bounds[:, 0] = np.min(samples) bounds[:, 1] = np.max(samples) return SGDEdist.byLearnerSGDEConfig(samples, bounds, config=config) else: raise AttributeError("density estimation type %s is not known. Select one in [gaussianKDE, sgde]")
def test2DCovarianceMatrix(self): # prepare data np.random.seed(1234567) C = np.array([[0.3, 0.09], [0.09, 0.3]]) / 10. U = dists.MultivariateNormal([0.5, 0.5], C, 0, 1) samples = U.rvs(2000) kde = KDEDist(samples) sgde = SGDEdist.byLearnerSGDEConfig( samples, bounds=U.getBounds(), config={ "grid_level": 5, "grid_type": "linear", "grid_maxDegree": 1, "refinement_numSteps": 0, "refinement_numPoints": 10, "solver_threshold": 1e-10, "solver_verbose": False, "regularization_type": "Laplace", "crossValidation_lambda": 3.16228e-06, "crossValidation_enable": False, "crossValidation_kfold": 5, "crossValidation_silent": False, "sgde_makePositive": True, "sgde_makePositive_candidateSearchAlgorithm": "joined", "sgde_makePositive_interpolationAlgorithm": "setToZero", "sgde_generateConsistentGrid": True, "sgde_unitIntegrand": True }) sgde_x1 = sgde.marginalizeToDimX(0) sgde_x2 = sgde.marginalizeToDimX(1) plt.figure() plotDensity1d(sgde_x1, label="x1") plotDensity1d(sgde_x2, label="x2") plt.title( "mean: x1=%g, x2=%g; var: x1=%g, x2=%g" % (sgde_x1.mean(), sgde_x2.mean(), sgde_x1.var(), sgde_x2.var())) plt.legend() jsonStr = sgde.toJson() jsonObject = json.loads(jsonStr) sgde = Dist.fromJson(jsonObject) fig = plt.figure() plotDensity2d(U, addContour=True) plt.title("analytic") fig = plt.figure() plotDensity2d(kde, addContour=True) plt.title("kde") fig = plt.figure() plotDensity2d(sgde, addContour=True) plt.title("sgde (I(f) = %g)" % (doQuadrature(sgde.grid, sgde.alpha), )) # print the results print("E(x) ~ %g ~ %g" % (kde.mean(), sgde.mean())) print("V(x) ~ %g ~ %g" % (kde.var(), sgde.var())) print("-" * 60) print(kde.cov()) print(sgde.cov()) self.assertTrue(np.linalg.norm(C - kde.cov()) < 1e-2, "KDE cov wrong") self.assertTrue( np.linalg.norm(np.corrcoef(samples.T) - kde.corrcoeff()) < 1e-1, "KDE corrcoef wrong") plt.show()
def test2DNormalMoments(self): mean = 0 var = 0.5 U = dists.J( [dists.Normal(mean, var, -2, 2), dists.Normal(mean, var, -2, 2)]) np.random.seed(1234567) trainSamples = U.rvs(1000) dist = SGDEdist.byLearnerSGDEConfig(trainSamples, config={ "grid_level": 5, "grid_type": "linear", "refinement_numSteps": 0, "refinement_numPoints": 10, "regularization_type": "Laplace", "crossValidation_lambda": 0.000562341, "crossValidation_enable": False, "crossValidation_kfold": 5, "crossValidation_silent": True, "sgde_makePositive": True }, bounds=U.getBounds()) samples_dist = dist.rvs(1000, shuffle=True) kde = KDEDist(trainSamples) samples_kde = kde.rvs(1000, shuffle=True) # ----------------------------------------------- self.assertTrue( np.abs(U.mean() - dist.mean()) < 1e-2, "SGDE mean wrong") self.assertTrue( np.abs(U.var() - dist.var()) < 4e-2, "SGDE variance wrong") # ----------------------------------------------- # print the results print("E(x) ~ %g ~ %g" % (kde.mean(), dist.mean())) print("V(x) ~ %g ~ %g" % (kde.var(), dist.var())) print( "log ~ %g ~ %g" % (kde.crossEntropy(trainSamples), dist.crossEntropy(trainSamples))) print("-" * 60) print(dist.cov()) print(kde.cov()) sgde_x1 = dist.marginalizeToDimX(0) kde_x1 = kde.marginalizeToDimX(0) plt.figure() plotDensity1d(U.getDistributions()[0], label="analytic") plotDensity1d(sgde_x1, label="sgde") plotDensity1d(kde_x1, label="kde") plt.title("mean: sgde=%g, kde=%g; var: sgde=%g, kde=%g" % (sgde_x1.mean(), kde_x1.mean(), sgde_x1.var(), kde_x1.var())) plt.legend() fig = plt.figure() plotDensity2d(U, addContour=True) plt.title("analytic") fig = plt.figure() plotDensity2d(kde, addContour=True) plt.scatter(samples_kde[:, 0], samples_kde[:, 1]) plt.title("kde") fig = plt.figure() plotDensity2d(dist, addContour=True) plt.scatter(samples_dist[:, 0], samples_dist[:, 1]) plt.title( "sgde (I(f) = %g)" % (np.prod(U.getBounds()) * doQuadrature(dist.grid, dist.alpha), )) plt.show()