def load_cv_split(self, i, ratio): n_splits = np.arange(self.cv) train = n_splits[np.arange(self.cv) != i] self.test = self.data.iloc[self.split[i]] self.train = self.data.iloc[np.concatenate( np.array(self.split)[train])] new_mask = np.zeros(self.data.shape[0], dtype=np.bool) new_mask[np.concatenate(np.array(self.split)[train])] = True self.masks.append(new_mask) self.mask = new_mask train_disc, disc_map = px.discretize(self.train.values) test_disc, _ = px.discretize(self.test.values, discretization=disc_map) self.train[:] = train_disc self.test[:] = test_disc self.test_labels = np.copy(self.test[self.label_column].to_numpy())
def px_discretize(self): for i, (col_name, col) in enumerate(self.train.iteritems()): if col_name != self.label_column: if np.unique(col).shape[0] > self.disc_quantiles: train_disc, disc_map = px.discretize( np.ascontiguousarray(self.train.values).astype( np.float64), num_states=self.disc_quantiles, targets=np.array(i)) train_disc = train_disc[:, i] test_disc, _ = px.discretize(np.ascontiguousarray( self.test.values).astype(np.float64), discretization=disc_map, targets=np.array(i)) test_disc = test_disc[:, i] else: train_disc = self.train[col_name].to_numpy().astype( np.uint16) test_disc = self.test[col_name].to_numpy().astype( np.uint16) self.train.loc[::, col_name] = train_disc self.test.loc[::, col_name] = test_disc
def px_discretize_holdout(self): for i, (col_name, col) in enumerate(self.holdout.iteritems()): if col_name != self.label_column: if np.unique(col).shape[0] > self.disc_quantiles: holdout_disc, _ = px.discretize( np.ascontiguousarray(self.holdout.to_numpy().astype( np.float64)), num_states=self.disc_quantiles, targets=np.array(i)) holdout_disc = holdout_disc[:, i] else: holdout_disc = col.to_numpy().astype(np.uint16) self.holdout.loc[::, col_name] = holdout_disc
def __init__(self, states, edgelist=None, seed=None): super(Synthetic, self).__init__() n_vars = 15 n_samples = 1000 n_states = 10 self.random_state = np.random.RandomState(seed=seed) # Generate random cov cov = self.random_state.randn(n_vars, n_vars) cov = np.dot(cov, cov.T) / n_vars # Generate data from normal self.data = pd.DataFrame( scipy.stats.multivariate_normal(mean=np.zeros(n_vars), cov=np.dot(cov, cov.T) / n_vars).rvs(n_samples)) data_disc, disc_ttt = px.discretize(data=self.data, num_states=n_states) # Add sample to ensure same state space for each variable data_disc = np.concatenate([ data_disc, np.full(shape=(1, n_vars), fill_value=n_states - 1, dtype=np.uint16) ]) # Generate model self.global_model = px.train(data_disc, graph=px.GraphType.auto_tree, mode=px.ModelType.mrf, iters=0) self.global_weights = np.copy(self.global_model.weights) # TODO: Remove the statistics for full point. edgelist = self.global_model.graph.edgelist stats = self.global_model.statistics
for x in range(a.shape[0] - 1): cov[a[x]:a[x + 1], a[x]:a[x + 1]] = - rhs[a[x]:a[x + 1], a[x]:a[x + 1]] cov -= np.diag(np.diag(cov)) cov += diag + np.diag(np.full(model.weights.shape[0], eps)) return cov if __name__ == '__main__': data = main() res = None for arr in data: res = arr if res is None else np.vstack((res, arr)) res = np.ascontiguousarray(res, dtype=np.float64) disc, M = px.discretize(res, 10) model = px.train(disc, graph=px.GraphType.auto_tree, iters=10000) gen_semi_random_cov(model, 1e-1) mu, A = model.infer() vars = model.weights.shape[0] mu = mu[:vars] fi = np.outer(mu - model.statistics, mu - model.statistics) phis = [] for d in disc: phis.append(model.phi(d)) cov_XY = np.cov(np.array(phis).T) EX_EY = np.outer(mu, mu) E_XY = cov_XY + EX_EY new_data = os.path.join(CONFIG.ROOT_DIR, "data") os.chdir(new_data) os.mkdir("SYNTH")