def run_experiment(index, dataset_name, name, constraint_getter, master_tree, X, y, out_dir, n_iters=1000, add_constraint=200, add_score=200, add_likelihood=200, should_continue=False): N, D = X.shape df = Inverse(c=1) if dataset_name == 'iris': lm = GaussianLikelihoodModel(sigma=np.eye(D) / 9.0, sigma0=np.eye(D) / 2.0, mu0=X.mean(axis=0)).compile() elif dataset_name == 'zoo': lm = GaussianLikelihoodModel(sigma=np.diag(np.diag(np.cov(X.T))) / 4.0, sigma0=np.eye(D) / 2.0, mu0=X.mean(axis=0)).compile() else: lm = GaussianLikelihoodModel(sigma=np.diag(np.diag(np.cov(X.T))) / 2.0, sigma0=np.eye(D) / 2.0, mu0=X.mean(axis=0)).compile() if should_continue: with open(out_dir / name / 'scores-%u.pkl' % index, 'r') as fp: scores = pickle.load(fp) with open(out_dir / name / 'costs-%u.pkl' % index, 'r') as fp: costs = pickle.load(fp) with open(out_dir / name / 'final-tree-%u.pkl' % index, 'r') as fp: tree = DirichletDiffusionTree(df=df, likelihood_model=lm) tree.set_state(pickle.load(fp)) sampler = MetropolisHastingsSampler(tree, X) else: scores = [] costs = [] tree = DirichletDiffusionTree(df=df, likelihood_model=lm) sampler = MetropolisHastingsSampler(tree, X) sampler.initialize_assignments() if dataset_name == 'zoo': sampler.tree = sampler.tree.induced_subtree(master_tree.points()) current_run = [] for i in tqdm(xrange(n_iters + 1)): sampler.sample() current_run.append(sampler.tree) if i % add_score == 0: scores.append(dist(master_tree, sampler.tree)) if i % add_likelihood == 0: costs.append(sampler.tree.marg_log_likelihood()) if i != 0 and i % add_constraint == 0: if constraint_getter is not None: constraint = constraint_getter.get_constraint(current_run) if constraint is not None: sampler.add_constraint(constraint) current_run = [] # plot_tree(sampler.tree, y) (out_dir / name).mkdir_p() with open(out_dir / name / 'scores-%u.pkl' % index, 'w') as fp: pickle.dump(scores, fp) print len(costs) with open(out_dir / name / 'costs-%u.pkl' % index, 'w') as fp: pickle.dump(costs, fp) # with open(out_dir / name / 'trees-%u.pkl' % index, 'r') as fp: # previous_trees = pickle.load(fp) # with open(out_dir / name / 'trees-%u.pkl' % index, 'w') as fp: # pickle.dump(previous_trees + [t.get_state() for t in trees], fp) with open(out_dir / name / 'final-tree-%u.pkl' % index, 'w') as fp: pickle.dump(sampler.tree.get_state(), fp) return costs, scores, sampler
sampler_costs = [None, None] scores = [[], []] for val, sampler in enumerate([sampler1, sampler2]): sampler_cost = [] for i in xrange(100): trees, costs = iterate(sampler, 1000) sampler_cost.extend(costs) if val == 0: constraint = random.choice(all_constraints) while constraint in satisfied[val]: constraint = random.choice(all_constraints) elif val == 1: idx = np.random.permutation(list(points))[:20] combos = get_variance(trees, idx) constraint = get_constraint(master_tree, combos[0]) id = 1 while constraint in satisfied[val]: constraint = get_constraint(combos[id]) id += 1 print "Adding", val, constraint sampler.add_constraint(constraint) satisfied[val].add(constraint) scores[val].append(float(sampler.tree.score_constraints(test_constraints)) / len(test_constraints)) sampler_costs[val] = sampler_cost plt.plot(sampler_costs[0], label='random') plt.plot(sampler_costs[1], label='entropy') plt.show()
mu0=X.mean(axis=0)).compile() model = DirichletDiffusionTree(df=df, likelihood_model=lm, constraints=[]) sampler = MetropolisHastingsSampler(model, X) sampler.initialize_assignments() constraint_add = 500 constraint_index = 0 n_iters = 100000 score_every = 1000 likelihoods = [] scores = [] for i in tqdm(xrange(n_iters + constraint_add)): if i != 0 and i % constraint_add == 0: sampler.add_constraint(train_constraints[constraint_index]) constraint_index += 1 sampler.sample() likelihoods.append(sampler.tree.marg_log_likelihood()) if i % score_every == 0: scores.append( float(sampler.tree.score_constraints(test_constraints)) / len(test_constraints)) scores.append( float(sampler.tree.score_constraints(test_constraints)) / len(test_constraints)) fontsize = 18 plt.figure() plt.xlim([0, n_iters + constraint_add])
model = DirichletDiffusionTree(df=df, likelihood_model=lm, constraints=[]) sampler = MetropolisHastingsSampler(model, X) sampler.initialize_assignments() constraint_add = 500 constraint_index = 0 n_iters = 100000 score_every = 1000 likelihoods = [] scores = [] for i in tqdm(xrange(n_iters + constraint_add)): if i != 0 and i % constraint_add == 0: sampler.add_constraint(train_constraints[constraint_index]) constraint_index += 1 sampler.sample() likelihoods.append(sampler.tree.marg_log_likelihood()) if i % score_every == 0: scores.append(float(sampler.tree.score_constraints(test_constraints)) / len(test_constraints)) scores.append(float(sampler.tree.score_constraints(test_constraints)) / len(test_constraints)) fontsize = 18 plt.figure() plt.xlim([0, n_iters + constraint_add]) plt.ylim([0, 1]) plt.xlabel("Iterations", fontsize=fontsize) plt.ylabel("Constraint Score", fontsize=fontsize) plt.plot(np.arange(0, n_iters + constraint_add + score_every, score_every), scores)
def run_experiment(index, dataset_name, name, constraint_getter, master_tree, X, y, out_dir, n_iters=1000, add_constraint=200, add_score=200, add_likelihood=200, should_continue=False): N, D = X.shape df = Inverse(c=1) if dataset_name == 'iris': lm = GaussianLikelihoodModel(sigma=np.eye(D) / 9.0, sigma0=np.eye(D) / 2.0, mu0=X.mean(axis=0)).compile() elif dataset_name == 'zoo': lm = GaussianLikelihoodModel(sigma=np.diag(np.diag(np.cov(X.T))) / 4.0, sigma0=np.eye(D) / 2.0, mu0=X.mean(axis=0)).compile() else: lm = GaussianLikelihoodModel(sigma=np.diag(np.diag(np.cov(X.T))) / 2.0, sigma0=np.eye(D) / 2.0, mu0=X.mean(axis=0)).compile() if should_continue: with open(out_dir / name / 'scores-%u.pkl' % index, 'r') as fp: scores = pickle.load(fp) with open(out_dir / name / 'costs-%u.pkl' % index, 'r') as fp: costs = pickle.load(fp) with open(out_dir / name / 'final-tree-%u.pkl' % index, 'r') as fp: tree = DirichletDiffusionTree(df=df, likelihood_model=lm) tree.set_state(pickle.load(fp)) sampler = MetropolisHastingsSampler(tree, X) else: scores = [] costs = [] tree = DirichletDiffusionTree(df=df, likelihood_model=lm) sampler = MetropolisHastingsSampler(tree, X) sampler.initialize_assignments() if dataset_name == 'zoo': sampler.tree = sampler.tree.induced_subtree(master_tree.points()) current_run = [] for i in tqdm(xrange(n_iters + 1)): sampler.sample() current_run.append(sampler.tree) if i % add_score == 0: scores.append(dist(master_tree, sampler.tree)) if i % add_likelihood == 0: costs.append(sampler.tree.marg_log_likelihood()) if i != 0 and i % add_constraint == 0: if constraint_getter is not None: constraint = constraint_getter.get_constraint(current_run) if constraint is not None: sampler.add_constraint(constraint) current_run = [] # plot_tree(sampler.tree, y) (out_dir / name).mkdir_p() with open(out_dir / name / 'scores-%u.pkl' % index, 'w') as fp: pickle.dump(scores, fp) print len(costs) with open(out_dir / name / 'costs-%u.pkl' % index, 'w') as fp: pickle.dump(costs, fp) # with open(out_dir / name / 'trees-%u.pkl' % index, 'r') as fp: # previous_trees = pickle.load(fp) # with open(out_dir / name / 'trees-%u.pkl' % index, 'w') as fp: # pickle.dump(previous_trees + [t.get_state() for t in trees], fp) with open(out_dir / name / 'final-tree-%u.pkl' % index, 'w') as fp: pickle.dump(sampler.tree.get_state(), fp) return costs, scores, sampler
scores = [[], []] for val, sampler in enumerate([sampler1, sampler2]): sampler_cost = [] for i in range(100): trees, costs = iterate(sampler, 1000) sampler_cost.extend(costs) if val == 0: constraint = random.choice(all_constraints) while constraint in satisfied[val]: constraint = random.choice(all_constraints) elif val == 1: idx = np.random.permutation(list(points))[:20] combos = get_variance(trees, idx) constraint = get_constraint(master_tree, combos[0]) id = 1 while constraint in satisfied[val]: constraint = get_constraint(combos[id]) id += 1 print "Adding", val, constraint sampler.add_constraint(constraint) satisfied[val].add(constraint) scores[val].append( float(sampler.tree.score_constraints(test_constraints)) / len(test_constraints)) sampler_costs[val] = sampler_cost plt.plot(sampler_costs[0], label='random') plt.plot(sampler_costs[1], label='entropy') plt.show()