def get_trees(tree_type, tree_size, tree_path): if tree_type == "binary": tree = utils.balanced_binary(tree_size) elif tree_type == "catepillar": tree = utils.lopsided_tree(tree_size) elif tree_type == "birthdeath": tree = utils.unrooted_birth_death_tree(tree_size) elif tree_type == "kingman": tree = utils.unrooted_pure_kingman_tree(tree_size) elif tree_type == "path": tree = dendropy.Tree.get(path=args.path, schema="nexus") return tree
jc = generation.Jukes_Cantor(num_classes=2) mutation_rate = jc.p2t(0.9) snj = reconstruct_tree.SpectralNeighborJoining( reconstruct_tree.JC_similarity_matrix) nj = reconstruct_tree.NeighborJoining(reconstruct_tree.JC_similarity_matrix) treesvd = reconstruct_tree.TreeSVD() methods = [snj, treesvd] #results = compare_methods.experiment([reference_tree], jc, N_vec, methods=methods,\ # mutation_rates = [mutation_rate], reps_per_tree=num_reps) df = pd.DataFrame( columns=['method', 'runtime', 'RF', 'n', 'cherries_ref', 'cherries_res']) for i in np.arange(num_reps): print(i) reference_tree = utils.unrooted_pure_kingman_tree(num_taxa) ch_ref = cherry_count_for_tree(reference_tree) for n in N_vec: observations, taxa_meta = generation.simulate_sequences( n, tree_model=reference_tree, seq_model=jc, mutation_rate=mutation_rate, alphabet="DNA") # Tree svd t_s = time.time() tree_svd_rec = treesvd(observations, taxa_meta) runtime_treesvd = time.time() - t_s RF_svd, F1 = reconstruct_tree.compare_trees(tree_svd_rec, reference_tree)
#num_taxa = [128,256,512,1024,2048,4096] num_taxa = np.arange(200, 2000, 200) n_itr = 10 #num_taxa = [8,16,32] N = 1000 jc = generation.Jukes_Cantor() mutation_rate = jc.p2t(0.95) nj = reconstruct_tree.NeighborJoining(reconstruct_tree.JC_similarity_matrix) raxml = reconstruct_tree.RAxML() #tree_list = [utils.balanced_binary(m) for m in num_taxa] df = pd.DataFrame(columns=['method', 'runtime', 'RF', 'm']) for m in num_taxa: for n_itr in range(n_itr): reference_tree = utils.unrooted_pure_kingman_tree(m) observations, taxa_meta = generation.simulate_sequences( N, tree_model=reference_tree, seq_model=jc, mutation_rate=mutation_rate, alphabet="DNA") # NJ time_s = time.time() tree_rec = nj(observations, taxa_meta) runtime = time.time() - time_s RF, F1 = reconstruct_tree.compare_trees(reference_tree, tree_rec) print('NJ iteration: ', n_itr, ' num_taxa: ', m, ' time: ', runtime) df = df.append({