def test1_KMN_with_2d_gaussian_noise_y(self): X, Y = self.get_samples(std=0.5) with tf.Session(): model_no_noise = KernelMixtureNetwork("kmn_no_noise_y", 1, 1, n_centers=5, x_noise_std=None, y_noise_std=None) model_no_noise.fit(X, Y) var_no_noise = model_no_noise.covariance( x_cond=np.array([[2]]))[0][0][0] model_noise = KernelMixtureNetwork("kmn_noise_y", 1, 1, n_centers=5, x_noise_std=None, y_noise_std=1) model_noise.fit(X, Y) var_noise = model_noise.covariance(x_cond=np.array([[2]]))[0][0][0] print("Training w/o noise:", var_no_noise) print("Training w/ noise:", var_noise) self.assertGreaterEqual(var_noise - var_no_noise, 0.1)
def test_tail_risks_risk_mixture(self): X, Y = self.get_samples(std=0.5) model = KernelMixtureNetwork("kmn-var2", 1, 1, center_sampling_method="k_means", n_centers=5, n_training_epochs=50) model.fit(X, Y) x_cond = np.array([[0], [1]]) VaR_mixture, CVaR_mixture = model.tail_risk_measures(x_cond, alpha=0.07) VaR_cdf, CVaR_mc = BaseDensityEstimator.tail_risk_measures(model, x_cond, alpha=0.07) print("CVaR mixture:", CVaR_mixture) print("CVaR cdf:", CVaR_mc) diff_cvar = np.mean(np.abs(CVaR_mc - CVaR_mixture)) self.assertAlmostEqual(diff_cvar, 0, places=1) diff_var = np.mean(np.abs(VaR_mixture - VaR_cdf)) self.assertAlmostEqual(diff_var, 0, places=1)
def test_KMN_l2_regularization(self): mu = 5 std = 5 X, Y = self.get_samples(mu=mu, std=std, n_samples=500) kmn_no_reg = KernelMixtureNetwork("kmn_no_reg", 1, 1, n_centers=10, n_training_epochs=200, l2_reg=0.0, weight_normalization=False) kmn_reg_l2 = KernelMixtureNetwork("kmn_reg_l2", 1, 1, n_centers=10, hidden_sizes=(16, 16), n_training_epochs=200, l2_reg=1.0, weight_normalization=False) kmn_no_reg.fit(X, Y) kmn_reg_l2.fit(X, Y) y = np.arange(mu - 3 * std, mu + 3 * std, 6 * std / 20) x = np.asarray([mu for i in range(y.shape[0])]) p_true = norm.pdf(y, loc=mu, scale=std) err_no_reg = np.mean(np.abs(kmn_no_reg.pdf(x, y) - p_true)) err_reg_l2 = np.mean(np.abs(kmn_reg_l2.pdf(x, y) - p_true)) self.assertLessEqual(err_reg_l2, err_no_reg)
def test_conditional_value_at_risk_mixture(self): np.random.seed(20) X, Y = self.get_samples(std=0.5) model = KernelMixtureNetwork("kmn-var", 1, 1, center_sampling_method="k_means", n_centers=5, n_training_epochs=500, random_seed=24) model.fit(X, Y) x_cond = np.array([[0], [1]]) CVaR_mixture = model.conditional_value_at_risk(x_cond, alpha=0.05) CVaR_cdf = BaseDensityEstimator.conditional_value_at_risk(model, x_cond, alpha=0.05, n_samples=5 * 10**7) print("CVaR mixture:", CVaR_mixture) print("CVaR cdf:", CVaR_cdf) diff = np.mean(np.abs(CVaR_cdf - CVaR_mixture)) self.assertAlmostEqual(diff, 0, places=1)
def test_KMN_adaptive_noise(self): adaptive_noise_fn = lambda n, d: 0.0 if n < 1000 else 5.0 X, Y = self.get_samples(mu=0, std=1, n_samples=999) est = KernelMixtureNetwork("kmn_999", 1, 1, n_centers=5, y_noise_std=0.0, x_noise_std=0.0, adaptive_noise_fn=adaptive_noise_fn) est.fit(X, Y) std_999 = est.std_(x_cond=np.array([[0.0]]))[0] X, Y = self.get_samples(mu=0, std=1, n_samples=1002) est = KernelMixtureNetwork("kmn_1002", 1, 1, n_centers=5, y_noise_std=0.0, x_noise_std=0.0, adaptive_noise_fn=adaptive_noise_fn) est.fit(X, Y) std_1002 = est.std_(x_cond=np.array([[0.0]]))[0] self.assertLess(std_999, std_1002) self.assertGreater(std_1002, 2)
def eval1(): n_observations = 2000 # number of data points n_features = 1 # number of features X_train, X_test, y_train, y_test = build_econ1_dataset(n_observations) print("Size of features in training data: {}".format(X_train.shape)) print("Size of output in training data: {}".format(y_train.shape)) print("Size of features in test data: {}".format(X_test.shape)) print("Size of output in test data: {}".format(y_test.shape)) fig, ax = plt.subplots() fig.set_size_inches(10, 8) sns.regplot(X_train, y_train, fit_reg=False) # plt.savefig('toydata.png') # plt.show() # plot.figure.size = 100 # plt.show() kmn = KernelMixtureNetwork(train_scales=True, n_centers=20) kmn.fit(X_train, y_train, n_epoch=300, eval_set=(X_test, y_test)) kmn.plot_loss() # plt.savefig('trainplot.png') samples = kmn.sample(X_test) print(X_test.shape, samples.shape) jp = sns.jointplot(X_test.ravel(), samples, kind="hex", stat_func=None, size=10) jp.ax_joint.add_line(Line2D([X_test[0][0], X_test[0][0]], [-40, 40], linewidth=3)) jp.ax_joint.add_line(Line2D([X_test[1][0], X_test[1][0]], [-40, 40], color='g', linewidth=3)) jp.ax_joint.add_line(Line2D([X_test[2][0], X_test[2][0]], [-40, 40], color='r', linewidth=3)) plt.savefig('hexplot.png') plt.show() d = kmn.predict_density(X_test[0:3, :].reshape(-1, 1), resolution=1000) df = pd.DataFrame(d).transpose() df.index = np.linspace(kmn.y_min, kmn.y_max, num=1000) df.plot(legend=False, linewidth=3, figsize=(12.2, 8)) plt.savefig('conditional_density.png')
def test7_data_normalization(self): X, Y = self.get_samples(std=2, mu=20) with tf.Session() as sess: model = KernelMixtureNetwork("kmn_data_normalization", 1, 1, n_centers=2, x_noise_std=None, y_noise_std=None, data_normalization=True, n_training_epochs=100) model.fit(X, Y) # test if data statistics were properly assigned to tf graph x_mean, x_std = sess.run([model.mean_x_sym, model.std_x_sym]) print(x_mean, x_std) mean_diff = float(np.abs(x_mean - 20)) std_diff = float(np.abs(x_std - 2)) self.assertLessEqual(mean_diff, 0.5) self.assertLessEqual(std_diff, 0.5)
def test9_data_normalization(self): np.random.seed(24) mean = -80 std = 7 data = np.random.normal([mean, mean, mean, mean], std, size=(4000, 4)) X = data[:, 0:2] Y = data[:, 2:4] with tf.Session(): model = KernelMixtureNetwork("kmn_data_normalization_2", 2, 2, n_centers=5, x_noise_std=None, y_noise_std=None, data_normalization=True, n_training_epochs=2000, random_seed=22, keep_edges=False, train_scales=True, weight_normalization=True, init_scales=np.array([1.0])) model.fit(X, Y) cond_mean = model.mean_(Y) print(np.mean(cond_mean)) mean_diff = np.abs(mean - np.mean(cond_mean)) self.assertLessEqual(mean_diff, np.abs(mean) * 0.1) cond_cov = np.mean(model.covariance(Y), axis=0) print(cond_cov) self.assertGreaterEqual(cond_cov[0][0], std**2 * 0.7) self.assertLessEqual(cond_cov[0][0], std**2 * 1.3) self.assertGreaterEqual(cond_cov[1][1], std**2 * 0.7) self.assertLessEqual(cond_cov[1][1], std**2 * 1.3)
def test_KMN_with_2d_gaussian_2(self): mu = 200 std = 23 X, Y = self.get_samples(mu=mu, std=std) for method in ["agglomerative"]: with tf.Session() as sess: model = KernelMixtureNetwork("kmn2_" + method, 1, 1, center_sampling_method=method, n_centers=10, hidden_sizes=(16, 16), init_scales=np.array([1.0]), train_scales=True, data_normalization=True) model.fit(X, Y) y = np.arange(mu - 3 * std, mu + 3 * std, 6 * std / 20) x = np.asarray([mu for i in range(y.shape[0])]) p_est = model.pdf(x, y) p_true = norm.pdf(y, loc=mu, scale=std) self.assertLessEqual(np.mean(np.abs(p_true - p_est)), 0.1) p_est = model.cdf(x, y) p_true = norm.cdf(y, loc=mu, scale=std) self.assertLessEqual(np.mean(np.abs(p_true - p_est)), 0.1)
def test2_KMN_with_2d_gaussian_noise_x(self): np.random.seed(22) X = np.random.uniform(0, 6, size=4000) Y = X + np.random.normal(0, 1, size=4000) x_test_2 = np.ones(100) * 2 x_test_4 = np.ones(100) * 4 y_test = np.linspace(1, 5, num=100) with tf.Session(): model_no_noise = KernelMixtureNetwork("kmn_no_noise_x", 1, 1, n_centers=5, x_noise_std=None, y_noise_std=None) model_no_noise.fit(X, Y) pdf_distance_no_noise = np.mean( np.abs( model_no_noise.pdf(x_test_2, y_test) - model_no_noise.pdf(x_test_4, y_test))) model_noise = KernelMixtureNetwork("kmn_noise_x", 1, 1, n_centers=5, x_noise_std=2, y_noise_std=None) model_noise.fit(X, Y) pdf_distance_noise = np.mean( np.abs( model_noise.pdf(x_test_2, y_test) - model_noise.pdf(x_test_4, y_test))) print("Training w/o noise - pdf distance:", pdf_distance_no_noise) print("Training w/ noise - pdf distance", pdf_distance_noise) self.assertGreaterEqual(pdf_distance_no_noise / pdf_distance_noise, 2.0)
def testPickleUnpickleKDN(self): X, Y = self.get_samples() with tf.Session() as sess: model = KernelMixtureNetwork("kde", 2, 2, n_centers=10, n_training_epochs=10, data_normalization=True, weight_normalization=True) model.fit(X, Y) pdf_before = model.pdf(X, Y) # pickle and unpickle model dump_string = pickle.dumps(model) tf.reset_default_graph() with tf.Session() as sess: model_loaded = pickle.loads(dump_string) pdf_after = model_loaded.pdf(X, Y) diff = np.sum(np.abs(pdf_after - pdf_before)) self.assertAlmostEqual(diff, 0, places=2)
def test_KMN_with_2d_gaussian_sampling(self): np.random.seed(22) X, Y = self.get_samples(mu=5) import time t = time.time() model = KernelMixtureNetwork("kmn_sampling", 1, 1, center_sampling_method='k_means', n_centers=5, n_training_epochs=1000, data_normalization=True) print("time to build model:", time.time() - t) t = time.time() model.fit(X, Y) print("time to fit model:", time.time() - t) x_cond = 5 * np.ones(shape=(2000000, 1)) _, y_sample = model.sample(x_cond) print(np.mean(y_sample), np.std(y_sample)) self.assertAlmostEqual(np.mean(y_sample), float(model.mean_(x_cond[1])), places=1) self.assertAlmostEqual(np.std(y_sample), float(model.covariance(x_cond[1])), places=1) x_cond = np.ones(shape=(400000, 1)) x_cond[0, 0] = 5.0 _, y_sample = model.sample(x_cond) self.assertAlmostEqual(np.mean(y_sample), float(model.mean_(x_cond[1])), places=1) self.assertAlmostEqual(np.std(y_sample), float(np.sqrt(model.covariance(x_cond[1]))), places=1)
def eval_econ_data(): gmm = GaussianMixture(ndim_x=1, ndim_y=1) econ_density = EconDensity() # print("ECON DATA --------------") # print("KMN") # for n_centers in [50, 100, 200]: # kmn = KernelMixtureNetwork(n_centers=n_centers) # gof = GoodnessOfFit(kmn, econ_density, n_observations=2000, print_fit_result=False, repeat_kolmogorov=1) # gof_results = gof.compute_results() # print("N_Centers:", n_centers) # print(gof_results) print("LAZY-Learner:") nkde = KernelMixtureNetwork(n_training_epochs=10) gof = GoodnessOfFit(nkde, gmm, n_observations=100, print_fit_result=False) gof_results = gof.compute_results() print(gof_results) print(gof_results.report_dict())
def test_MDN_KMN_eval_set(self): mu = 200 std = 23 X_train, Y_train = self.get_samples(mu=mu, std=std) X_test, Y_test = self.get_samples(mu=mu, std=std) X_test = X_test model = MixtureDensityNetwork("mdn_eval_set", 1, 1, n_centers=10, data_normalization=True, n_training_epochs=100) model.fit(X_train, Y_train, eval_set=(X_test, Y_test)) model = KernelMixtureNetwork("kmn_eval_set", 1, 1, n_centers=10, data_normalization=True, n_training_epochs=100) model.fit(X_train, Y_train, eval_set=(X_test, Y_test))
def test_1_KMN_with_2d_gaussian_fit_by_crossval(self): X, Y = self.get_samples() param_grid = { "n_centers": [3, 10], "center_sampling_method": ["k_means"], "keep_edges": [True] } model = KernelMixtureNetwork(center_sampling_method="k_means", n_centers=20) model.fit_by_cv(X, Y, param_grid=param_grid) y = np.arange(-1, 5, 0.5) x = np.asarray([2 for i in range(y.shape[0])]) p_est = model.pdf(x, y) p_true = norm.pdf(y, loc=2, scale=1) self.assertEqual(model.get_params()["n_centers"], 10) self.assertLessEqual(np.mean(np.abs(p_true - p_est)), 0.2)
def test_KMN_log_pdf(self): X, Y = np.random.normal(size=(1000, 3)), np.random.normal(size=(1000, 2)) for data_norm in [True, False]: with tf.Session() as sess: model = KernelMixtureNetwork("kmn_logprob" + str(data_norm), 3, 2, n_centers=5, hidden_sizes=(8, 8), init_scales=np.array([0.5]), n_training_epochs=10, data_normalization=data_norm) model.fit(X, Y) x, y = np.random.normal(size=(1000, 3)), np.random.normal(size=(1000, 2)) prob = model.pdf(x, y) log_prob = model.log_pdf(x, y) self.assertLessEqual(np.mean(np.abs(prob - np.exp(log_prob))), 0.001)
def plot_fitted_distribution(): n_observations = 1000 # number of data points n_features = 3 # number of features np.random.seed(22) X_train, X_test, Y_train, Y_test = econ_density.simulate(n_observations) model = KernelMixtureNetwork() X_train = np.random.normal(loc=0, size=[n_observations, 1]) Y_train = 3 * X_train + np.random.normal(loc=0, size=[n_observations, 1]) X_test = np.random.normal(loc=0, size=[100, 1]) Y_test = 3 * X_test + np.random.normal(loc=0, size=[100, 1]) model.fit(X_train, Y_train) print(model.score(X_test, Y_test)) #print(model.fit_by_cv(X_train, Y_train)) # plt.scatter(model.X_train, model.Y_test) # plt.scatter(model.centr_x, model.centr_y, s=10*model.alpha) # plt.show() # # fig, ax = plt.subplots() # fig.set_size_inches(10, 8) # sns.regplot(X_train, Y_train, fit_reg=False) # plt.show() # # n_samples = 1000 Y_plot = np.linspace(-10, 10, num=n_samples) X_plot = np.expand_dims(np.asarray([-1 for _ in range(n_samples)]), axis=1) result = model.pdf(X_plot, Y_plot) plt.plot(Y_plot, result) #plt.show() #2d plot X_plot = np.expand_dims(np.asarray([2 for _ in range(n_samples)]), axis=1) result = model.pdf(X_plot, Y_plot) plt.plot(Y_plot, result) plt.show() #3d plot n_samples = 100 linspace_x = np.linspace(-15, 15, num=n_samples) linspace_y = np.linspace(-15, 15, num=n_samples) X, Y = np.meshgrid(linspace_x, linspace_y) X, Y = X.flatten(), Y.flatten() Z = model.pdf(X, Y) X, Y, Z = X.reshape([n_samples, n_samples]), Y.reshape([n_samples, n_samples]), Z.reshape([n_samples, n_samples]) fig = plt.figure() ax = fig.gca(projection='3d') surf = ax.plot_surface(X, Y, Z, cmap=cm.coolwarm, linewidth=0, antialiased=True) plt.show()
import warnings warnings.filterwarnings("ignore") from cde.density_simulation import SkewNormal from cde.density_estimator import KernelMixtureNetwork import numpy as np """ simulate some data """ seed = 22 density_simulator = SkewNormal(random_seed=seed) X, Y = density_simulator.simulate(n_samples=3000) """ fit density model """ model = KernelMixtureNetwork("KDE_demo", ndim_x=1, ndim_y=1, n_centers=50, x_noise_std=0.2, y_noise_std=0.1, random_seed=22) model.fit(X, Y) """ query the conditional pdf and cdf""" x_cond = np.zeros((1, 1)) y_query = np.ones((1, 1)) * 0.1 prob = model.pdf(x_cond, y_query) cum_prob = model.cdf(x_cond, y_query) """ compute conditional moments & VaR """ x_cond = np.zeros((1, 1)) mean = model.mean_(x_cond)[0][0] std = model.std_(x_cond)[0][0] skewness = model.skewness(x_cond)[0] VaR = model.value_at_risk(x_cond, alpha=0.01)[0]