def test1_KMN_with_2d_gaussian_noise_y(self): X, Y = self.get_samples(std=0.5) with tf.Session(): model_no_noise = KernelMixtureNetwork("kmn_no_noise_y", 1, 1, n_centers=5, x_noise_std=None, y_noise_std=None) model_no_noise.fit(X, Y) var_no_noise = model_no_noise.covariance( x_cond=np.array([[2]]))[0][0][0] model_noise = KernelMixtureNetwork("kmn_noise_y", 1, 1, n_centers=5, x_noise_std=None, y_noise_std=1) model_noise.fit(X, Y) var_noise = model_noise.covariance(x_cond=np.array([[2]]))[0][0][0] print("Training w/o noise:", var_no_noise) print("Training w/ noise:", var_noise) self.assertGreaterEqual(var_noise - var_no_noise, 0.1)
def test7_data_normalization(self): X, Y = self.get_samples(std=2, mean=20) with tf.Session() as sess: model = KernelMixtureNetwork("kmn_data_normalization", 1, 1, n_centers=2, x_noise_std=None, y_noise_std=None, data_normalization=True, n_training_epochs=100) model.fit(X, Y) # test if data statistics were properly assigned to tf graph x_mean, x_std = sess.run([model.mean_x_sym, model.std_x_sym]) print(x_mean, x_std) mean_diff = float(np.abs(x_mean - 20)) std_diff = float(np.abs(x_std - 2)) self.assertLessEqual(mean_diff, 0.5) self.assertLessEqual(std_diff, 0.5)
def test9_data_normalization(self): np.random.seed(24) mean = -80 std = 7 data = np.random.normal([mean, mean, mean, mean], std, size=(4000, 4)) X = data[:, 0:2] Y = data[:, 2:4] with tf.Session(): model = KernelMixtureNetwork("kmn_data_normalization_2", 2, 2, n_centers=5, x_noise_std=None, y_noise_std=None, data_normalization=True, n_training_epochs=2000, random_seed=22, keep_edges=False, train_scales=True, weight_normalization=True, init_scales=np.array([1.0])) model.fit(X, Y) cond_mean = model.mean_(Y) print(np.mean(cond_mean)) mean_diff = np.abs(mean - np.mean(cond_mean)) self.assertLessEqual(mean_diff, np.abs(mean) * 0.1) cond_cov = np.mean(model.covariance(Y), axis=0) print(cond_cov) self.assertGreaterEqual(cond_cov[0][0], std**2 * 0.7) self.assertLessEqual(cond_cov[0][0], std**2 * 1.3) self.assertGreaterEqual(cond_cov[1][1], std**2 * 0.7) self.assertLessEqual(cond_cov[1][1], std**2 * 1.3)
def test_KMN_with_2d_gaussian_2(self): mu = 200 std = 23 X, Y = self.get_samples(mu=mu, std=std) for method in ["agglomerative"]: with tf.Session(): model = KernelMixtureNetwork("kmn2_" + method, 1, 1, center_sampling_method=method, n_centers=10, hidden_sizes=(16, 16), init_scales=np.array([1.0]), train_scales=True, data_normalization=True) model.fit(X, Y) y = np.arange(mu - 3 * std, mu + 3 * std, 6 * std / 20) x = np.asarray([mu for i in range(y.shape[0])]) p_est = model.pdf(x, y) p_true = norm.pdf(y, loc=mu, scale=std) self.assertLessEqual(np.mean(np.abs(p_true - p_est)), 0.1) p_est = model.cdf(x, y) p_true = norm.cdf(y, loc=mu, scale=std) self.assertLessEqual(np.mean(np.abs(p_true - p_est)), 0.1)
def test2_KMN_with_2d_gaussian_noise_x(self): np.random.seed(22) X = np.random.uniform(0, 6, size=4000) Y = X + np.random.normal(0, 1, size=4000) x_test_2 = np.ones(100) * 2 x_test_4 = np.ones(100) * 4 y_test = np.linspace(1, 5, num=100) with tf.Session(): model_no_noise = KernelMixtureNetwork("kmn_no_noise_x", 1, 1, n_centers=5, x_noise_std=None, y_noise_std=None) model_no_noise.fit(X, Y) pdf_distance_no_noise = np.mean( np.abs( model_no_noise.pdf(x_test_2, y_test) - model_no_noise.pdf(x_test_4, y_test))) model_noise = KernelMixtureNetwork("kmn_noise_x", 1, 1, n_centers=5, x_noise_std=2, y_noise_std=None) model_noise.fit(X, Y) pdf_distance_noise = np.mean( np.abs( model_noise.pdf(x_test_2, y_test) - model_noise.pdf(x_test_4, y_test))) print("Training w/o noise - pdf distance:", pdf_distance_no_noise) print("Training w/ noise - pdf distance", pdf_distance_noise) self.assertGreaterEqual(pdf_distance_no_noise / pdf_distance_noise, 2.0)
def testPickleUnpickleKDN(self): X, Y = self.get_samples() with tf.Session(): model = KernelMixtureNetwork("kde", 2, 2, n_centers=10, n_training_epochs=10, data_normalization=True, weight_normalization=True) model.fit(X, Y) pdf_before = model.pdf(X, Y) # pickle and unpickle model dump_string = pickle.dumps(model) tf.reset_default_graph() with tf.Session(): model_loaded = pickle.loads(dump_string) pdf_after = model_loaded.pdf(X, Y) diff = np.sum(np.abs(pdf_after - pdf_before)) self.assertAlmostEqual(diff, 0, places=2)
def test_KMN_with_2d_gaussian_sampling(self): np.random.seed(22) X, Y = self.get_samples(mu=5) import time t = time.time() model = KernelMixtureNetwork("kmn_sampling", 1, 1, center_sampling_method='k_means', n_centers=5, n_training_epochs=500, data_normalization=False) print("time to build model:", time.time() - t) t = time.time() model.fit(X, Y) print("time to fit model:", time.time() - t) x_cond = 5 * np.ones(shape=(2000000, 1)) _, y_sample = model.sample(x_cond) print(np.mean(y_sample), np.std(y_sample)) self.assertAlmostEqual(np.mean(y_sample), float(model.mean_(x_cond[1])), places=1) self.assertAlmostEqual(np.std(y_sample), float(model.covariance(x_cond[1])), places=1) x_cond = np.ones(shape=(400000, 1)) x_cond[0, 0] = 5.0 _, y_sample = model.sample(x_cond) self.assertAlmostEqual(np.mean(y_sample), float(model.mean_(x_cond[1])), places=1) self.assertAlmostEqual(np.std(y_sample), float(np.sqrt(model.covariance(x_cond[1]))), places=1)
def test6_KMN_entropy_regularization(self): X1, Y1 = self.get_samples(std=1, mean=2) X2, Y2 = self.get_samples(std=1, mean=-2) # DATA for GMM with two modes X = np.expand_dims(np.concatenate([X1, X2], axis=0), axis=1) Y = np.expand_dims(np.concatenate([Y1, Y2], axis=0), axis=1) with tf.Session() as sess: model_no_reg = KernelMixtureNetwork("kmn_no_entropy_reg", 1, 1, n_centers=2, x_noise_std=None, y_noise_std=None, entropy_reg_coef=0.0) model_no_reg.fit(X, Y) entropy1 = np.mean( sess.run(model_no_reg.softmax_entropy, feed_dict={ model_no_reg.X_ph: X, model_no_reg.Y_ph: Y })) model_reg = KernelMixtureNetwork("kmn_entropy_reg", 1, 1, n_centers=2, x_noise_std=None, y_noise_std=None, entropy_reg_coef=10.0) model_reg.fit(X, Y) entropy2 = np.mean( sess.run(model_reg.softmax_entropy, feed_dict={ model_reg.X_ph: X, model_reg.Y_ph: Y })) print(entropy1) print(entropy2) self.assertGreaterEqual(entropy1 / entropy2, 10)
def test_1_KMN_with_2d_gaussian_fit_by_crossval(self): X, Y = self.get_samples() param_grid = { "n_centers": [3, 10], "center_sampling_method": ["k_means"], "keep_edges": [True] } model = KernelMixtureNetwork("kmn", 1, 1, center_sampling_method="k_means", n_centers=20) model.fit_by_cv(X, Y, param_grid=param_grid) y = np.arange(-1, 5, 0.5) x = np.asarray([2 for _ in range(y.shape[0])]) p_est = model.pdf(x, y) p_true = norm.pdf(y, loc=2, scale=1) self.assertEqual(model.get_params()["n_centers"], 10) self.assertLessEqual(np.mean(np.abs(p_true - p_est)), 0.2)
def test_KMN_log_pdf(self): X, Y = np.random.normal(size=(1000, 3)), np.random.normal(size=(1000, 2)) for data_norm in [True, False]: with tf.Session() as sess: model = KernelMixtureNetwork("kmn_logprob" + str(data_norm), 3, 2, n_centers=5, hidden_sizes=(8, 8), init_scales=np.array([0.5]), n_training_epochs=10, data_normalization=data_norm) model.fit(X, Y) x, y = np.random.normal(size=(1000, 3)), np.random.normal(size=(1000, 2)) prob = model.pdf(x, y) log_prob = model.log_pdf(x, y) self.assertLessEqual(np.mean(np.abs(prob - np.exp(log_prob))), 0.001)