def test1_KMN_with_2d_gaussian_noise_y(self):
        X, Y = self.get_samples(std=0.5)

        with tf.Session():
            model_no_noise = KernelMixtureNetwork("kmn_no_noise_y",
                                                  1,
                                                  1,
                                                  n_centers=5,
                                                  x_noise_std=None,
                                                  y_noise_std=None)
            model_no_noise.fit(X, Y)
            var_no_noise = model_no_noise.covariance(
                x_cond=np.array([[2]]))[0][0][0]

            model_noise = KernelMixtureNetwork("kmn_noise_y",
                                               1,
                                               1,
                                               n_centers=5,
                                               x_noise_std=None,
                                               y_noise_std=1)
            model_noise.fit(X, Y)
            var_noise = model_noise.covariance(x_cond=np.array([[2]]))[0][0][0]

            print("Training w/o noise:", var_no_noise)
            print("Training w/ noise:", var_noise)

            self.assertGreaterEqual(var_noise - var_no_noise, 0.1)
    def test7_data_normalization(self):
        X, Y = self.get_samples(std=2, mean=20)
        with tf.Session() as sess:
            model = KernelMixtureNetwork("kmn_data_normalization",
                                         1,
                                         1,
                                         n_centers=2,
                                         x_noise_std=None,
                                         y_noise_std=None,
                                         data_normalization=True,
                                         n_training_epochs=100)
            model.fit(X, Y)

            # test if data statistics were properly assigned to tf graph
            x_mean, x_std = sess.run([model.mean_x_sym, model.std_x_sym])
            print(x_mean, x_std)
            mean_diff = float(np.abs(x_mean - 20))
            std_diff = float(np.abs(x_std - 2))
            self.assertLessEqual(mean_diff, 0.5)
            self.assertLessEqual(std_diff, 0.5)
    def test9_data_normalization(self):
        np.random.seed(24)
        mean = -80
        std = 7
        data = np.random.normal([mean, mean, mean, mean], std, size=(4000, 4))
        X = data[:, 0:2]
        Y = data[:, 2:4]

        with tf.Session():
            model = KernelMixtureNetwork("kmn_data_normalization_2",
                                         2,
                                         2,
                                         n_centers=5,
                                         x_noise_std=None,
                                         y_noise_std=None,
                                         data_normalization=True,
                                         n_training_epochs=2000,
                                         random_seed=22,
                                         keep_edges=False,
                                         train_scales=True,
                                         weight_normalization=True,
                                         init_scales=np.array([1.0]))

            model.fit(X, Y)
            cond_mean = model.mean_(Y)
            print(np.mean(cond_mean))
            mean_diff = np.abs(mean - np.mean(cond_mean))
            self.assertLessEqual(mean_diff, np.abs(mean) * 0.1)

            cond_cov = np.mean(model.covariance(Y), axis=0)
            print(cond_cov)
            self.assertGreaterEqual(cond_cov[0][0], std**2 * 0.7)
            self.assertLessEqual(cond_cov[0][0], std**2 * 1.3)
            self.assertGreaterEqual(cond_cov[1][1], std**2 * 0.7)
            self.assertLessEqual(cond_cov[1][1], std**2 * 1.3)
    def test_KMN_with_2d_gaussian_2(self):
        mu = 200
        std = 23
        X, Y = self.get_samples(mu=mu, std=std)

        for method in ["agglomerative"]:
            with tf.Session():
                model = KernelMixtureNetwork("kmn2_" + method,
                                             1,
                                             1,
                                             center_sampling_method=method,
                                             n_centers=10,
                                             hidden_sizes=(16, 16),
                                             init_scales=np.array([1.0]),
                                             train_scales=True,
                                             data_normalization=True)
                model.fit(X, Y)

                y = np.arange(mu - 3 * std, mu + 3 * std, 6 * std / 20)
                x = np.asarray([mu for i in range(y.shape[0])])
                p_est = model.pdf(x, y)
                p_true = norm.pdf(y, loc=mu, scale=std)
                self.assertLessEqual(np.mean(np.abs(p_true - p_est)), 0.1)

                p_est = model.cdf(x, y)
                p_true = norm.cdf(y, loc=mu, scale=std)
                self.assertLessEqual(np.mean(np.abs(p_true - p_est)), 0.1)
    def test2_KMN_with_2d_gaussian_noise_x(self):
        np.random.seed(22)
        X = np.random.uniform(0, 6, size=4000)
        Y = X + np.random.normal(0, 1, size=4000)

        x_test_2 = np.ones(100) * 2
        x_test_4 = np.ones(100) * 4
        y_test = np.linspace(1, 5, num=100)

        with tf.Session():
            model_no_noise = KernelMixtureNetwork("kmn_no_noise_x",
                                                  1,
                                                  1,
                                                  n_centers=5,
                                                  x_noise_std=None,
                                                  y_noise_std=None)
            model_no_noise.fit(X, Y)
            pdf_distance_no_noise = np.mean(
                np.abs(
                    model_no_noise.pdf(x_test_2, y_test) -
                    model_no_noise.pdf(x_test_4, y_test)))

            model_noise = KernelMixtureNetwork("kmn_noise_x",
                                               1,
                                               1,
                                               n_centers=5,
                                               x_noise_std=2,
                                               y_noise_std=None)
            model_noise.fit(X, Y)
            pdf_distance_noise = np.mean(
                np.abs(
                    model_noise.pdf(x_test_2, y_test) -
                    model_noise.pdf(x_test_4, y_test)))

            print("Training w/o noise - pdf distance:", pdf_distance_no_noise)
            print("Training w/ noise - pdf distance", pdf_distance_noise)

            self.assertGreaterEqual(pdf_distance_no_noise / pdf_distance_noise,
                                    2.0)
    def testPickleUnpickleKDN(self):
        X, Y = self.get_samples()
        with tf.Session():
            model = KernelMixtureNetwork("kde",
                                         2,
                                         2,
                                         n_centers=10,
                                         n_training_epochs=10,
                                         data_normalization=True,
                                         weight_normalization=True)
            model.fit(X, Y)
            pdf_before = model.pdf(X, Y)

            # pickle and unpickle model
            dump_string = pickle.dumps(model)
        tf.reset_default_graph()
        with tf.Session():
            model_loaded = pickle.loads(dump_string)
            pdf_after = model_loaded.pdf(X, Y)

        diff = np.sum(np.abs(pdf_after - pdf_before))
        self.assertAlmostEqual(diff, 0, places=2)
    def test_KMN_with_2d_gaussian_sampling(self):
        np.random.seed(22)
        X, Y = self.get_samples(mu=5)

        import time
        t = time.time()
        model = KernelMixtureNetwork("kmn_sampling",
                                     1,
                                     1,
                                     center_sampling_method='k_means',
                                     n_centers=5,
                                     n_training_epochs=500,
                                     data_normalization=False)
        print("time to build model:", time.time() - t)
        t = time.time()

        model.fit(X, Y)
        print("time to fit model:", time.time() - t)

        x_cond = 5 * np.ones(shape=(2000000, 1))
        _, y_sample = model.sample(x_cond)
        print(np.mean(y_sample), np.std(y_sample))
        self.assertAlmostEqual(np.mean(y_sample),
                               float(model.mean_(x_cond[1])),
                               places=1)
        self.assertAlmostEqual(np.std(y_sample),
                               float(model.covariance(x_cond[1])),
                               places=1)

        x_cond = np.ones(shape=(400000, 1))
        x_cond[0, 0] = 5.0
        _, y_sample = model.sample(x_cond)
        self.assertAlmostEqual(np.mean(y_sample),
                               float(model.mean_(x_cond[1])),
                               places=1)
        self.assertAlmostEqual(np.std(y_sample),
                               float(np.sqrt(model.covariance(x_cond[1]))),
                               places=1)
    def test6_KMN_entropy_regularization(self):
        X1, Y1 = self.get_samples(std=1, mean=2)
        X2, Y2 = self.get_samples(std=1, mean=-2)

        # DATA for GMM with two modes
        X = np.expand_dims(np.concatenate([X1, X2], axis=0), axis=1)
        Y = np.expand_dims(np.concatenate([Y1, Y2], axis=0), axis=1)

        with tf.Session() as sess:
            model_no_reg = KernelMixtureNetwork("kmn_no_entropy_reg",
                                                1,
                                                1,
                                                n_centers=2,
                                                x_noise_std=None,
                                                y_noise_std=None,
                                                entropy_reg_coef=0.0)
            model_no_reg.fit(X, Y)

            entropy1 = np.mean(
                sess.run(model_no_reg.softmax_entropy,
                         feed_dict={
                             model_no_reg.X_ph: X,
                             model_no_reg.Y_ph: Y
                         }))

            model_reg = KernelMixtureNetwork("kmn_entropy_reg",
                                             1,
                                             1,
                                             n_centers=2,
                                             x_noise_std=None,
                                             y_noise_std=None,
                                             entropy_reg_coef=10.0)
            model_reg.fit(X, Y)
            entropy2 = np.mean(
                sess.run(model_reg.softmax_entropy,
                         feed_dict={
                             model_reg.X_ph: X,
                             model_reg.Y_ph: Y
                         }))

            print(entropy1)
            print(entropy2)
            self.assertGreaterEqual(entropy1 / entropy2, 10)
    def test_1_KMN_with_2d_gaussian_fit_by_crossval(self):
        X, Y = self.get_samples()

        param_grid = {
            "n_centers": [3, 10],
            "center_sampling_method": ["k_means"],
            "keep_edges": [True]
        }

        model = KernelMixtureNetwork("kmn",
                                     1,
                                     1,
                                     center_sampling_method="k_means",
                                     n_centers=20)
        model.fit_by_cv(X, Y, param_grid=param_grid)

        y = np.arange(-1, 5, 0.5)
        x = np.asarray([2 for _ in range(y.shape[0])])
        p_est = model.pdf(x, y)
        p_true = norm.pdf(y, loc=2, scale=1)
        self.assertEqual(model.get_params()["n_centers"], 10)
        self.assertLessEqual(np.mean(np.abs(p_true - p_est)), 0.2)
    def test_KMN_log_pdf(self):
        X, Y = np.random.normal(size=(1000, 3)), np.random.normal(size=(1000,
                                                                        2))

        for data_norm in [True, False]:
            with tf.Session() as sess:
                model = KernelMixtureNetwork("kmn_logprob" + str(data_norm),
                                             3,
                                             2,
                                             n_centers=5,
                                             hidden_sizes=(8, 8),
                                             init_scales=np.array([0.5]),
                                             n_training_epochs=10,
                                             data_normalization=data_norm)
                model.fit(X, Y)

                x, y = np.random.normal(size=(1000,
                                              3)), np.random.normal(size=(1000,
                                                                          2))
                prob = model.pdf(x, y)
                log_prob = model.log_pdf(x, y)
                self.assertLessEqual(np.mean(np.abs(prob - np.exp(log_prob))),
                                     0.001)