def run_experiment(index, dataset_name, name, constraint_getter, master_tree, X, y, out_dir, n_iters=1000, add_constraint=200, add_score=200, add_likelihood=200, should_continue=False): N, D = X.shape df = Inverse(c=1) if dataset_name == 'iris': lm = GaussianLikelihoodModel(sigma=np.eye(D) / 9.0, sigma0=np.eye(D) / 2.0, mu0=X.mean(axis=0)).compile() elif dataset_name == 'zoo': lm = GaussianLikelihoodModel(sigma=np.diag(np.diag(np.cov(X.T))) / 4.0, sigma0=np.eye(D) / 2.0, mu0=X.mean(axis=0)).compile() else: lm = GaussianLikelihoodModel(sigma=np.diag(np.diag(np.cov(X.T))) / 2.0, sigma0=np.eye(D) / 2.0, mu0=X.mean(axis=0)).compile() if should_continue: with open(out_dir / name / 'scores-%u.pkl' % index, 'r') as fp: scores = pickle.load(fp) with open(out_dir / name / 'costs-%u.pkl' % index, 'r') as fp: costs = pickle.load(fp) with open(out_dir / name / 'final-tree-%u.pkl' % index, 'r') as fp: tree = DirichletDiffusionTree(df=df, likelihood_model=lm) tree.set_state(pickle.load(fp)) sampler = MetropolisHastingsSampler(tree, X) else: scores = [] costs = [] tree = DirichletDiffusionTree(df=df, likelihood_model=lm) sampler = MetropolisHastingsSampler(tree, X) sampler.initialize_assignments() if dataset_name == 'zoo': sampler.tree = sampler.tree.induced_subtree(master_tree.points()) current_run = [] for i in tqdm(xrange(n_iters + 1)): sampler.sample() current_run.append(sampler.tree) if i % add_score == 0: scores.append(dist(master_tree, sampler.tree)) if i % add_likelihood == 0: costs.append(sampler.tree.marg_log_likelihood()) if i != 0 and i % add_constraint == 0: if constraint_getter is not None: constraint = constraint_getter.get_constraint(current_run) if constraint is not None: sampler.add_constraint(constraint) current_run = [] # plot_tree(sampler.tree, y) (out_dir / name).mkdir_p() with open(out_dir / name / 'scores-%u.pkl' % index, 'w') as fp: pickle.dump(scores, fp) print len(costs) with open(out_dir / name / 'costs-%u.pkl' % index, 'w') as fp: pickle.dump(costs, fp) # with open(out_dir / name / 'trees-%u.pkl' % index, 'r') as fp: # previous_trees = pickle.load(fp) # with open(out_dir / name / 'trees-%u.pkl' % index, 'w') as fp: # pickle.dump(previous_trees + [t.get_state() for t in trees], fp) with open(out_dir / name / 'final-tree-%u.pkl' % index, 'w') as fp: pickle.dump(sampler.tree.get_state(), fp) return costs, scores, sampler
X = X[idx] y = y[idx] constraints = set() for constraint in interactor.current_interactions: a, b, c = constraint if a not in idx_map or b not in idx_map or c not in idx_map: continue constraints.add(tuple(map(lambda x: idx_map[x], constraint))) logging.info("Interactions: %s", constraints) logging.info("Interactions: %u", len(constraints)) N, D = X.shape df = Inverse(c=2) cov = np.cov(X.T) / 4.0 + np.eye(D) * 0.001 lm = GaussianLikelihoodModel(sigma=cov, sigma0=np.eye(D) / 2.0, mu0=X.mean(axis=0)).compile() model = DirichletDiffusionTree(df=df, likelihood_model=lm, constraints=constraints) sampler = MetropolisHastingsSampler(model, X) sampler.initialize_assignments() def iterate(iters): for i in tqdm(xrange(iters)): sampler.sample()
import numpy as np from trees.util import plot_tree, plot_tree_2d from trees.ddt import DirichletDiffusionTree, Inverse, GaussianLikelihoodModel from trees.mcmc import MetropolisHastingsSampler from tqdm import tqdm if __name__ == "__main__": D = 2 N = 100 X = np.random.multivariate_normal(mean=np.zeros(D), cov=np.eye(D), size=N).astype(np.float32) df = Inverse(c=1) lm = GaussianLikelihoodModel(sigma=np.eye(D) / 4.0, mu0=np.zeros(D), sigma0=np.eye(D)) ddt = DirichletDiffusionTree(df=df, likelihood_model=lm) mh = MetropolisHastingsSampler(ddt, X) mh.initialize_assignments() for _ in tqdm(xrange(1000)): mh.sample() plt.figure() plt.plot(mh.likelihoods) plt.figure() plot_tree(mh.tree) plt.figure() plot_tree_2d(mh.tree, X) plt.show()
N = X.shape[0] np.random.seed(0) # idx = np.random.permutation(np.arange(N))[:20] # X = X[idx] # y = np.array(y) # y = y[idx] N, D = X.shape df = Inverse(c=1) lm = GaussianLikelihoodModel(sigma=np.eye(D) / 4.0, sigma0=np.eye(D) / 2.0, mu0=X.mean(axis=0)).compile() tree = DirichletDiffusionTree(df=df, likelihood_model=lm) sampler = MetropolisHastingsSampler(tree, X) sampler.initialize_assignments() D = 1.0 / squareform(pdist(X)) def plot_tree(tree): final_tree = tree.copy() for node in final_tree.dfs(): if node.is_leaf(): node.point = y[node.point] newick = final_tree.to_newick() tree = Phylo.read(StringIO(newick), 'newick') Phylo.draw_graphviz(tree, prog='neato') plt.show()
master_constraints = list(master_tree.generate_constraints()) random.seed(0) random.shuffle(master_constraints) train_constraints, test_constraints = master_constraints[: 200], master_constraints[ 200:] test_constraints = test_constraints[:10000] df = Inverse(c=0.9) lm = GaussianLikelihoodModel(sigma=np.eye(D) / 4.0, sigma0=np.eye(D) / 2.0, mu0=X.mean(axis=0)).compile() model = DirichletDiffusionTree(df=df, likelihood_model=lm, constraints=[]) sampler = MetropolisHastingsSampler(model, X) sampler.initialize_assignments() constraint_add = 500 constraint_index = 0 n_iters = 100000 score_every = 1000 likelihoods = [] scores = [] for i in tqdm(xrange(n_iters + constraint_add)): if i != 0 and i % constraint_add == 0: sampler.add_constraint(train_constraints[constraint_index]) constraint_index += 1 sampler.sample() likelihoods.append(sampler.tree.marg_log_likelihood())
with open("../../scripts/zoo.tree", "rb") as fp: master_tree = pickle.load(fp) master_constraints = list(master_tree.generate_constraints()) random.seed(0) random.shuffle(master_constraints) train_constraints, test_constraints = master_constraints[:200], master_constraints[200:] test_constraints = test_constraints[:10000] df = Inverse(c=0.9) lm = GaussianLikelihoodModel(sigma=np.eye(D) / 4.0, sigma0=np.eye(D) / 2.0, mu0=X.mean(axis=0)).compile() model = DirichletDiffusionTree(df=df, likelihood_model=lm, constraints=[]) sampler = MetropolisHastingsSampler(model, X) sampler.initialize_assignments() constraint_add = 500 constraint_index = 0 n_iters = 100000 score_every = 1000 likelihoods = [] scores = [] for i in tqdm(xrange(n_iters + constraint_add)): if i != 0 and i % constraint_add == 0: sampler.add_constraint(train_constraints[constraint_index]) constraint_index += 1 sampler.sample()
from trees.util import plot_tree, plot_tree_2d from trees.ddt import DirichletDiffusionTree, Inverse, GaussianLikelihoodModel from trees.mcmc import MetropolisHastingsSampler from tqdm import tqdm if __name__ == "__main__": D = 2 N = 100 X = np.random.multivariate_normal(mean=np.zeros(D), cov=np.eye(D), size=N).astype(np.float32) df = Inverse(c=1) lm = GaussianLikelihoodModel(sigma=np.eye(D) / 4.0, mu0=np.zeros(D), sigma0=np.eye(D)) ddt = DirichletDiffusionTree(df=df, likelihood_model=lm) mh = MetropolisHastingsSampler(ddt, X) mh.initialize_assignments() for _ in tqdm(range(1000)): mh.sample() plt.figure() plt.plot(mh.likelihoods) #plt.figure() #plot_tree(mh.tree) #plt.figure() #plot_tree_2d(mh.tree, X) plt.show()
'50 constraints': DirichletDiffusionTree(df=df, likelihood_model=lm, constraints=train_constraints[:50]), '100 constraints': DirichletDiffusionTree(df=df, likelihood_model=lm, constraints=train_constraints[:100]), # '150 constraints': DirichletDiffusionTree(df=df, likelihood_model=lm, constraints=train_constraints[:150]), '200 constraints': DirichletDiffusionTree(df=df, likelihood_model=lm, constraints=train_constraints), } samplers = {a: MetropolisHastingsSampler(d, X) for a, d in models.iteritems()} for sampler in samplers.values(): sampler.initialize_assignments() score_every = 1000 def iterate(n_iters): scores = {a: [] for a in samplers} likelihoods = {a: [] for a in samplers} for i in tqdm(xrange(n_iters)): for name, sampler in samplers.items(): sampler.sample() likelihoods[name].append(sampler.tree.marg_log_likelihood()) if i % score_every == 0:
np.random.seed(0) # idx = np.random.permutation(np.arange(N))[:20] # X = X[idx] # y = np.array(y) # y = y[idx] N, D = X.shape df = Inverse(c=1) lm = GaussianLikelihoodModel(sigma=np.eye(D) / 4.0, sigma0=np.eye(D) / 2.0, mu0=X.mean(axis=0)).compile() tree = DirichletDiffusionTree(df=df, likelihood_model=lm) sampler = MetropolisHastingsSampler(tree, X) sampler.initialize_assignments() D = 1.0 / squareform(pdist(X)) def plot_tree(tree): final_tree = tree.copy() for node in final_tree.dfs(): if node.is_leaf(): node.point = y[node.point] newick = final_tree.to_newick() tree = Phylo.read(StringIO(newick), 'newick') Phylo.draw_graphviz(tree, prog='neato') plt.show()