def test_KMN_l2_regularization(self):
        mu = 5
        std = 5
        X, Y = self.get_samples(mu=mu, std=std, n_samples=500)

        kmn_no_reg = KernelMixtureNetwork("kmn_no_reg",
                                          1,
                                          1,
                                          n_centers=10,
                                          n_training_epochs=200,
                                          l2_reg=0.0,
                                          weight_normalization=False)
        kmn_reg_l2 = KernelMixtureNetwork("kmn_reg_l2",
                                          1,
                                          1,
                                          n_centers=10,
                                          hidden_sizes=(16, 16),
                                          n_training_epochs=200,
                                          l2_reg=1.0,
                                          weight_normalization=False)
        kmn_no_reg.fit(X, Y)
        kmn_reg_l2.fit(X, Y)

        y = np.arange(mu - 3 * std, mu + 3 * std, 6 * std / 20)
        x = np.asarray([mu for i in range(y.shape[0])])
        p_true = norm.pdf(y, loc=mu, scale=std)
        err_no_reg = np.mean(np.abs(kmn_no_reg.pdf(x, y) - p_true))
        err_reg_l2 = np.mean(np.abs(kmn_reg_l2.pdf(x, y) - p_true))

        self.assertLessEqual(err_reg_l2, err_no_reg)
    def test_KMN_with_2d_gaussian_2(self):
        mu = 200
        std = 23
        X, Y = self.get_samples(mu=mu, std=std)

        for method in ["agglomerative"]:
            with tf.Session() as sess:
                model = KernelMixtureNetwork("kmn2_" + method,
                                             1,
                                             1,
                                             center_sampling_method=method,
                                             n_centers=10,
                                             hidden_sizes=(16, 16),
                                             init_scales=np.array([1.0]),
                                             train_scales=True,
                                             data_normalization=True)
                model.fit(X, Y)

                y = np.arange(mu - 3 * std, mu + 3 * std, 6 * std / 20)
                x = np.asarray([mu for i in range(y.shape[0])])
                p_est = model.pdf(x, y)
                p_true = norm.pdf(y, loc=mu, scale=std)
                self.assertLessEqual(np.mean(np.abs(p_true - p_est)), 0.1)

                p_est = model.cdf(x, y)
                p_true = norm.cdf(y, loc=mu, scale=std)
                self.assertLessEqual(np.mean(np.abs(p_true - p_est)), 0.1)
    def test2_KMN_with_2d_gaussian_noise_x(self):
        np.random.seed(22)
        X = np.random.uniform(0, 6, size=4000)
        Y = X + np.random.normal(0, 1, size=4000)

        x_test_2 = np.ones(100) * 2
        x_test_4 = np.ones(100) * 4
        y_test = np.linspace(1, 5, num=100)

        with tf.Session():
            model_no_noise = KernelMixtureNetwork("kmn_no_noise_x",
                                                  1,
                                                  1,
                                                  n_centers=5,
                                                  x_noise_std=None,
                                                  y_noise_std=None)
            model_no_noise.fit(X, Y)
            pdf_distance_no_noise = np.mean(
                np.abs(
                    model_no_noise.pdf(x_test_2, y_test) -
                    model_no_noise.pdf(x_test_4, y_test)))

            model_noise = KernelMixtureNetwork("kmn_noise_x",
                                               1,
                                               1,
                                               n_centers=5,
                                               x_noise_std=2,
                                               y_noise_std=None)
            model_noise.fit(X, Y)
            pdf_distance_noise = np.mean(
                np.abs(
                    model_noise.pdf(x_test_2, y_test) -
                    model_noise.pdf(x_test_4, y_test)))

            print("Training w/o noise - pdf distance:", pdf_distance_no_noise)
            print("Training w/ noise - pdf distance", pdf_distance_noise)

            self.assertGreaterEqual(pdf_distance_no_noise / pdf_distance_noise,
                                    2.0)
    def test_1_KMN_with_2d_gaussian_fit_by_crossval(self):
        X, Y = self.get_samples()

        param_grid = {
            "n_centers": [3, 10],
            "center_sampling_method": ["k_means"],
            "keep_edges": [True]
        }

        model = KernelMixtureNetwork(center_sampling_method="k_means",
                                     n_centers=20)
        model.fit_by_cv(X, Y, param_grid=param_grid)

        y = np.arange(-1, 5, 0.5)
        x = np.asarray([2 for i in range(y.shape[0])])
        p_est = model.pdf(x, y)
        p_true = norm.pdf(y, loc=2, scale=1)
        self.assertEqual(model.get_params()["n_centers"], 10)
        self.assertLessEqual(np.mean(np.abs(p_true - p_est)), 0.2)
    def testPickleUnpickleKDN(self):
        X, Y = self.get_samples()
        with tf.Session() as sess:
            model = KernelMixtureNetwork("kde",
                                         2,
                                         2,
                                         n_centers=10,
                                         n_training_epochs=10,
                                         data_normalization=True,
                                         weight_normalization=True)
            model.fit(X, Y)
            pdf_before = model.pdf(X, Y)

            # pickle and unpickle model
            dump_string = pickle.dumps(model)
        tf.reset_default_graph()
        with tf.Session() as sess:
            model_loaded = pickle.loads(dump_string)
            pdf_after = model_loaded.pdf(X, Y)

        diff = np.sum(np.abs(pdf_after - pdf_before))
        self.assertAlmostEqual(diff, 0, places=2)
    def test_KMN_log_pdf(self):
        X, Y = np.random.normal(size=(1000, 3)), np.random.normal(size=(1000,
                                                                        2))

        for data_norm in [True, False]:
            with tf.Session() as sess:
                model = KernelMixtureNetwork("kmn_logprob" + str(data_norm),
                                             3,
                                             2,
                                             n_centers=5,
                                             hidden_sizes=(8, 8),
                                             init_scales=np.array([0.5]),
                                             n_training_epochs=10,
                                             data_normalization=data_norm)
                model.fit(X, Y)

                x, y = np.random.normal(size=(1000,
                                              3)), np.random.normal(size=(1000,
                                                                          2))
                prob = model.pdf(x, y)
                log_prob = model.log_pdf(x, y)
                self.assertLessEqual(np.mean(np.abs(prob - np.exp(log_prob))),
                                     0.001)
def plot_fitted_distribution():
  n_observations = 1000  # number of data points
  n_features = 3  # number of features

  np.random.seed(22)


  X_train, X_test, Y_train, Y_test = econ_density.simulate(n_observations)
  model = KernelMixtureNetwork()

  X_train = np.random.normal(loc=0, size=[n_observations, 1])
  Y_train = 3 * X_train + np.random.normal(loc=0, size=[n_observations, 1])
  X_test = np.random.normal(loc=0, size=[100, 1])
  Y_test = 3 * X_test + np.random.normal(loc=0, size=[100, 1])

  model.fit(X_train, Y_train)
  print(model.score(X_test, Y_test))
  #print(model.fit_by_cv(X_train, Y_train))



  # plt.scatter(model.X_train, model.Y_test)
  # plt.scatter(model.centr_x, model.centr_y, s=10*model.alpha)
  # plt.show()
  #
  # fig, ax = plt.subplots()
  # fig.set_size_inches(10, 8)
  # sns.regplot(X_train, Y_train, fit_reg=False)
  # plt.show()
  #
  #


  n_samples = 1000

  Y_plot = np.linspace(-10, 10, num=n_samples)

  X_plot = np.expand_dims(np.asarray([-1 for _ in range(n_samples)]), axis=1)
  result = model.pdf(X_plot, Y_plot)
  plt.plot(Y_plot, result)
  #plt.show()

  #2d plot
  X_plot = np.expand_dims(np.asarray([2 for _ in range(n_samples)]), axis=1)
  result = model.pdf(X_plot, Y_plot)
  plt.plot(Y_plot, result)

  plt.show()

  #3d plot
  n_samples = 100
  linspace_x = np.linspace(-15, 15, num=n_samples)
  linspace_y = np.linspace(-15, 15, num=n_samples)
  X, Y = np.meshgrid(linspace_x, linspace_y)
  X, Y = X.flatten(), Y.flatten()

  Z = model.pdf(X, Y)

  X, Y, Z = X.reshape([n_samples, n_samples]), Y.reshape([n_samples, n_samples]), Z.reshape([n_samples, n_samples])
  fig = plt.figure()
  ax = fig.gca(projection='3d')
  surf = ax.plot_surface(X, Y, Z, cmap=cm.coolwarm,
                         linewidth=0, antialiased=True)

  plt.show()
seed = 22
density_simulator = SkewNormal(random_seed=seed)
X, Y = density_simulator.simulate(n_samples=3000)
""" fit density model """
model = KernelMixtureNetwork("KDE_demo",
                             ndim_x=1,
                             ndim_y=1,
                             n_centers=50,
                             x_noise_std=0.2,
                             y_noise_std=0.1,
                             random_seed=22)
model.fit(X, Y)
""" query the conditional pdf and cdf"""
x_cond = np.zeros((1, 1))
y_query = np.ones((1, 1)) * 0.1
prob = model.pdf(x_cond, y_query)
cum_prob = model.cdf(x_cond, y_query)
""" compute conditional moments & VaR  """
x_cond = np.zeros((1, 1))

mean = model.mean_(x_cond)[0][0]
std = model.std_(x_cond)[0][0]
skewness = model.skewness(x_cond)[0]
VaR = model.value_at_risk(x_cond, alpha=0.01)[0]

print("Mean:", mean)
print("Std:", std)
print("Skewness:", skewness)
print("Value-at-Risk", VaR)
""" plot the fitted distribution """
x_cond_plot = np.array([-0.5, 0, 0.5])