Beispiel #1
0
    def load_cv_split(self, i, ratio):
        n_splits = np.arange(self.cv)
        train = n_splits[np.arange(self.cv) != i]
        self.test = self.data.iloc[self.split[i]]
        self.train = self.data.iloc[np.concatenate(
            np.array(self.split)[train])]
        new_mask = np.zeros(self.data.shape[0], dtype=np.bool)
        new_mask[np.concatenate(np.array(self.split)[train])] = True
        self.masks.append(new_mask)

        self.mask = new_mask

        train_disc, disc_map = px.discretize(self.train.values)
        test_disc, _ = px.discretize(self.test.values, discretization=disc_map)

        self.train[:] = train_disc
        self.test[:] = test_disc

        self.test_labels = np.copy(self.test[self.label_column].to_numpy())
Beispiel #2
0
 def px_discretize(self):
     for i, (col_name, col) in enumerate(self.train.iteritems()):
         if col_name != self.label_column:
             if np.unique(col).shape[0] > self.disc_quantiles:
                 train_disc, disc_map = px.discretize(
                     np.ascontiguousarray(self.train.values).astype(
                         np.float64),
                     num_states=self.disc_quantiles,
                     targets=np.array(i))
                 train_disc = train_disc[:, i]
                 test_disc, _ = px.discretize(np.ascontiguousarray(
                     self.test.values).astype(np.float64),
                                              discretization=disc_map,
                                              targets=np.array(i))
                 test_disc = test_disc[:, i]
             else:
                 train_disc = self.train[col_name].to_numpy().astype(
                     np.uint16)
                 test_disc = self.test[col_name].to_numpy().astype(
                     np.uint16)
             self.train.loc[::, col_name] = train_disc
             self.test.loc[::, col_name] = test_disc
Beispiel #3
0
 def px_discretize_holdout(self):
     for i, (col_name, col) in enumerate(self.holdout.iteritems()):
         if col_name != self.label_column:
             if np.unique(col).shape[0] > self.disc_quantiles:
                 holdout_disc, _ = px.discretize(
                     np.ascontiguousarray(self.holdout.to_numpy().astype(
                         np.float64)),
                     num_states=self.disc_quantiles,
                     targets=np.array(i))
                 holdout_disc = holdout_disc[:, i]
             else:
                 holdout_disc = col.to_numpy().astype(np.uint16)
             self.holdout.loc[::, col_name] = holdout_disc
Beispiel #4
0
    def __init__(self, states, edgelist=None, seed=None):
        super(Synthetic, self).__init__()
        n_vars = 15
        n_samples = 1000
        n_states = 10
        self.random_state = np.random.RandomState(seed=seed)
        # Generate random cov
        cov = self.random_state.randn(n_vars, n_vars)
        cov = np.dot(cov, cov.T) / n_vars

        # Generate data from normal
        self.data = pd.DataFrame(
            scipy.stats.multivariate_normal(mean=np.zeros(n_vars),
                                            cov=np.dot(cov, cov.T) /
                                            n_vars).rvs(n_samples))

        data_disc, disc_ttt = px.discretize(data=self.data,
                                            num_states=n_states)

        # Add sample to ensure same state space for each variable
        data_disc = np.concatenate([
            data_disc,
            np.full(shape=(1, n_vars),
                    fill_value=n_states - 1,
                    dtype=np.uint16)
        ])

        # Generate model
        self.global_model = px.train(data_disc,
                                     graph=px.GraphType.auto_tree,
                                     mode=px.ModelType.mrf,
                                     iters=0)
        self.global_weights = np.copy(self.global_model.weights)
        # TODO: Remove the statistics for full point.

        edgelist = self.global_model.graph.edgelist
        stats = self.global_model.statistics
Beispiel #5
0
    for x in range(a.shape[0] - 1):
        cov[a[x]:a[x + 1], a[x]:a[x + 1]] = - rhs[a[x]:a[x + 1], a[x]:a[x + 1]]
    cov -= np.diag(np.diag(cov))
    cov += diag + np.diag(np.full(model.weights.shape[0], eps))

    return cov

if __name__ == '__main__':
    data = main()

    res = None
    for arr in data:
        res = arr if res is None else np.vstack((res, arr))

    res = np.ascontiguousarray(res, dtype=np.float64)
    disc, M = px.discretize(res, 10)
    model = px.train(disc, graph=px.GraphType.auto_tree, iters=10000)
    gen_semi_random_cov(model, 1e-1)
    mu, A = model.infer()
    vars = model.weights.shape[0]
    mu = mu[:vars]
    fi = np.outer(mu - model.statistics, mu - model.statistics)
    phis = []
    for d in disc:
        phis.append(model.phi(d))
    cov_XY = np.cov(np.array(phis).T)
    EX_EY = np.outer(mu, mu)
    E_XY = cov_XY + EX_EY
    new_data = os.path.join(CONFIG.ROOT_DIR, "data")
    os.chdir(new_data)
    os.mkdir("SYNTH")