def test8_data_normalization(self):
        np.random.seed(24)
        mean = 10
        std = 2
        data = np.random.normal([mean, mean, mean, mean], std, size=(2000, 4))
        X = data[:, 0:2]
        Y = data[:, 2:4]

        with tf.Session() as sess:
            model = MixtureDensityNetwork("mdn_data_normalization",
                                          2,
                                          2,
                                          n_centers=2,
                                          x_noise_std=None,
                                          y_noise_std=None,
                                          data_normalization=True,
                                          n_training_epochs=2000,
                                          random_seed=22)
            model.fit(X, Y)
            y_mean, y_std = sess.run([model.mean_y_sym, model.std_y_sym])
            print(y_mean, y_std)
            cond_mean = model.mean_(Y)
            mean_diff = np.abs(mean - np.mean(cond_mean))
            self.assertLessEqual(mean_diff, 0.5)

            cond_cov = np.mean(model.covariance(Y), axis=0)
            print(cond_cov)
            self.assertGreaterEqual(cond_cov[0][0], std**2 * 0.7)
            self.assertLessEqual(cond_cov[0][0], std**2 * 1.3)
            self.assertGreaterEqual(cond_cov[1][1], std**2 * 0.7)
            self.assertLessEqual(cond_cov[1][1], std**2 * 1.3)
Beispiel #2
0
def cv_for_cde(data,
               labels,
               name,
               std,
               n_splits=5,
               want_r2=False,
               want_mae=False,
               hidden_sizes=(16, 16)):
    '''
    model: must be a sklearn object with .fit and .predict methods
    data: the X matrix containing the features, can be a pd.DataFrame or a np object (array or matrix)
    labels: y, can be a pd.DataFrame or a np array
    n_splits: number of desired folds
    => returns array of mean suqared error calculated on each fold
    '''
    from cde.density_estimator import MixtureDensityNetwork

    input_dim = data.shape[1]
    kf = KFold(n_splits=n_splits, shuffle=True)
    data = np.array(data)
    labels = np.array(labels)
    mses = []
    r2s = []
    maes = []
    i = 1
    for train, test in kf.split(data):
        model = MixtureDensityNetwork(name=name + str(i),
                                      ndim_x=input_dim,
                                      ndim_y=1,
                                      n_centers=10,
                                      hidden_sizes=hidden_sizes,
                                      hidden_nonlinearity=tanh,
                                      n_training_epochs=1000,
                                      x_noise_std=std,
                                      y_noise_std=std)

        print("Split: {}".format(i), end="\r")
        X_train, X_test, y_train, y_test = data[train], data[test], labels[
            train], labels[test]
        model.fit(X=X_train, Y=y_train, verbose=True)
        pred = model.mean_(X_test)
        pred = pred.reshape((-1, 1)).flatten()
        mse = sum((pred - y_test)**2) / len(test)
        print('MSE: {}'.format(mse))
        mses.append(mse)
        r2 = r2_score(y_pred=pred, y_true=y_test)
        r2s.append(r2)
        maes.append(mean_absolute_error(y_pred=pred, y_true=y_test))
        i = i + 1

    if want_r2 and want_mae:
        return (mses, r2s, maes)
    elif want_r2:
        return (mses, r2s)
    elif want_mae:
        return (mses, maes)
    else:
        return mses
    def test_MDN_with_2d_gaussian_sampling(self):
        X, Y = self.get_samples()

        model = MixtureDensityNetwork("mdn_gaussian_sampling",
                                      1,
                                      1,
                                      n_centers=5,
                                      n_training_epochs=200)
        model.fit(X, Y)

        x_cond = np.ones(shape=(10**6, 1))
        _, y_sample = model.sample(x_cond)
        self.assertAlmostEqual(np.mean(y_sample),
                               float(model.mean_(y_sample[1])),
                               places=0)
        self.assertAlmostEqual(np.std(y_sample),
                               float(model.covariance(y_sample[1])),
                               places=0)
Beispiel #4
0
    def test_mean_mixture(self):
        np.random.seed(24)
        from tensorflow import set_random_seed
        set_random_seed(24)

        data = np.random.normal([2, 2, 7, -2], 1, size=(5000, 4))
        X = data[:, 0:2]
        Y = data[:, 2:4]

        model = MixtureDensityNetwork("mdn_mean",
                                      2,
                                      2,
                                      n_centers=3,
                                      y_noise_std=0.1,
                                      x_noise_std=0.1)
        model.fit(X, Y)

        mean_est = model.mean_(x_cond=np.array([[1.5, 2]]), n_samples=10**7)
        self.assertAlmostEqual(mean_est[0][0], 7, places=0)
        self.assertAlmostEqual(mean_est[0][1], -2, places=0)