'N': [], 'method': [], 'split size': [], 'split results': [], 'success': [] }) for (tree, tree_str) in zip(tree_list, tree_str_list): for n in N: print(n) start_time = time.time() for b in range(B): print(b) data_HKY = simulate_discrete_chars(n, tree, Hky85(kappa=2), mutation_rate=0.05) ch_list = list() for t in data_HKY.taxon_namespace: ch_list.append([x.symbol for x in data_HKY[t]]) ch_arr = np.array(ch_list) # Compute similarity matrix HKY_sim = reconstruct_tree.HKY_similarity_matrix(ch_arr) # scan over methods for method in threshold_methods: parent_list, splits = recursive_partition_taxa( HKY_sim, threshold, method) splits = [np.array([True] * HKY_sim.shape[0])] + splits parent_list = [-1] + parent_list
import scipy import time from itertools import product import matplotlib.pyplot as plt import cProfile from dendropy.model.discrete import simulate_discrete_chars, Jc69, Hky85 from dendropy.calculate.treecompare import symmetric_difference tree_path = os.path.join(os.path.dirname(sys.path[0]), "data/NY_H3N2.newick") fasta_path = os.path.join(os.path.dirname(sys.path[0]), "data/NY_H3N2.fasta") H3N2_tree = dendropy.Tree.get(path=tree_path, schema="newick") H3N2_dna = dendropy.DnaCharacterMatrix.get(file=open(fasta_path, "r"), schema="fasta") N = 1000 data_HKY = simulate_discrete_chars(N, H3N2_tree, Hky85(kappa = 2), mutation_rate=0.1) ch_list = list() for t in data_HKY.taxon_namespace: ch_list.append([x.symbol for x in data_HKY[t]]) ch_arr = np.array(ch_list) identical = np.array([np.mean(a == b) for a, b in product(ch_arr, repeat = 2)]) #start_time = time.time() #cProfile.run('S = HKY_similarity_matrix(ch_arr)') #compute_s_time = time.time() - start_time #print("--- %s seconds ---" % compute_s_time) threshold = 128 t1 = time.time() spectral_method = reconstruct_tree.STDR(reconstruct_tree.RAxML, reconstruct_tree.HKY_similarity_matrix) tree_rec = spectral_method.deep_spectral_tree_reconstruction(ch_arr, reconstruct_tree.HKY_similarity_matrix,
from dendropy.interop import raxml from dendropy.model.discrete import simulate_discrete_chars, Jc69 from dendropy.calculate.treecompare import symmetric_difference num_taxa = 32 N = 1000 reference_tree = utils.balanced_binary(num_taxa) # %% ########################################################################### ## TEST WITH DENDROPY DATA ########################################################################### print("test dendropy data") time_s = time.time() data = simulate_discrete_chars( N, reference_tree, Jc69(), mutation_rate=generation.Jukes_Cantor().p2t(0.95), ) print("") print("Time for data generation", time.time() - time_s) time_s = time.time() raxml = reconstruct_tree.RAxML() tree = raxml(data) runtime = time.time() - time_s print("Data in DNAcharacterMatrix:") print("symmetric_difference: ", symmetric_difference(reference_tree, tree)) RF, F1 = reconstruct_tree.compare_trees(reference_tree, tree) print("raxml: ") print("RF = ", RF) print("F1% = ", F1)
def run_method(method, tree, threshold=None): data_HKY = simulate_discrete_chars(1000, tree, Hky85(kappa=2), mutation_rate=0.1) ch_list = list() for t in data_HKY.taxon_namespace: ch_list.append([x.symbol for x in data_HKY[t]]) ch_arr = np.array(ch_list) if method == "RaXML": raxml_HKY = reconstruct_tree.RAxML() start_time = time.time() tree_rec = raxml_HKY(data_HKY, raxml_args="-T 2 --HKY85 -c 1") if method == "SNJ": snj = reconstruct_tree.SpectralNeighborJoining( reconstruct_tree.HKY_similarity_matrix) start_time = time.time() tree_rec = snj(ch_arr, tree.taxon_namespace) if method == "NJ": nj = reconstruct_tree.NeighborJoining( reconstruct_tree.HKY_similarity_matrix) start_time = time.time() tree_rec = nj(ch_arr, tree.taxon_namespace) if method == "STR + NJ": spectral_method = reconstruct_tree.SpectralTreeReconstruction( reconstruct_tree.NeighborJoining, reconstruct_tree.HKY_similarity_matrix) start_time = time.time() tree_rec = spectral_method.deep_spectral_tree_reonstruction( ch_arr, reconstruct_tree.HKY_similarity_matrix, taxon_namespace=tree.taxon_namespace, threshhold=threshold, min_split=5) if method == "STR + SNJ": spectral_method = reconstruct_tree.SpectralTreeReconstruction( reconstruct_tree.SpectralNeighborJoining, reconstruct_tree.HKY_similarity_matrix) start_time = time.time() tree_rec = spectral_method.deep_spectral_tree_reonstruction( ch_arr, reconstruct_tree.HKY_similarity_matrix, taxon_namespace=tree.taxon_namespace, threshhold=threshold, min_split=5) if method == "STR + RaXML": spectral_method = reconstruct_tree.SpectralTreeReconstruction( reconstruct_tree.RAxML, reconstruct_tree.HKY_similarity_matrix) start_time = time.time() tree_rec = spectral_method.deep_spectral_tree_reonstruction( ch_arr, reconstruct_tree.HKY_similarity_matrix, taxon_namespace=tree.taxon_namespace, threshhold=threshold, raxml_args="-T 2 --HKY85 -c 1", min_split=5) runtime = time.time() - start_time RF, F1 = reconstruct_tree.compare_trees(tree_rec, tree) print(method) if threshold is not None: print(threshold) print("--- %s seconds ---" % runtime) print("RF = ", RF) print("F1% = ", F1) return ([method, str(threshold), runtime, RF, F1])
#mutation_rate = [jc.p2t(0.95)] mutation_rate = [0.1] #reference_tree = spectraltree.unrooted_birth_death_tree(num_taxa, birth_rate=0.5) # reference_tree = spectraltree.lopsided_tree(num_taxa) reference_tree = spectraltree.balanced_binary(num_taxa) for x in reference_tree.preorder_edge_iter(): x.length = 0.5 print("Genration observations by JC and HKY") #observationsJC = FastCharacterMatrix(spectraltree.simulate_sequences_ordered(N, tree_model=reference_tree, seq_model=jc, mutation_rate=mutation_rate)) # observationsHKY = FastCharacterMatrix(spectraltree.simulate_sequences_ordered(N, tree_model=reference_tree, seq_model=hky, mutation_rate=mutation_rate)) t = time.time() observationsJC, _ = spectraltree.charmatrix2array( simulate_discrete_chars(N, reference_tree, Jc69(), mutation_rate=mutation_rate[0])) print("Time to generate JC:", time.time() - t) t = time.time() observationsHKY, metadataHKY = spectraltree.charmatrix2array( simulate_discrete_chars(N, reference_tree, Hky85(kappa=1), mutation_rate=mutation_rate[0])) print("Time to generate HKY:", time.time() - t) t = time.time() S_JC = spectraltree.JC_similarity_matrix(observationsJC) print() print("Time to compute JC similarity:", time.time() - t) print("JC similarity")