'method': [], 'split size': [], 'split results': [], 'success': [] }) for (tree, tree_str) in zip(tree_list, tree_str_list): for n in N: print(n) start_time = time.time() for b in range(B): print(b) data_HKY = simulate_discrete_chars(n, tree, Hky85(kappa=2), mutation_rate=0.05) ch_list = list() for t in data_HKY.taxon_namespace: ch_list.append([x.symbol for x in data_HKY[t]]) ch_arr = np.array(ch_list) # Compute similarity matrix HKY_sim = reconstruct_tree.HKY_similarity_matrix(ch_arr) # scan over methods for method in threshold_methods: parent_list, splits = recursive_partition_taxa( HKY_sim, threshold, method) splits = [np.array([True] * HKY_sim.shape[0])] + splits parent_list = [-1] + parent_list
def run_method(method, tree, threshold=None): data_HKY = simulate_discrete_chars(1000, tree, Hky85(kappa=2), mutation_rate=0.1) ch_list = list() for t in data_HKY.taxon_namespace: ch_list.append([x.symbol for x in data_HKY[t]]) ch_arr = np.array(ch_list) if method == "RaXML": raxml_HKY = reconstruct_tree.RAxML() start_time = time.time() tree_rec = raxml_HKY(data_HKY, raxml_args="-T 2 --HKY85 -c 1") if method == "SNJ": snj = reconstruct_tree.SpectralNeighborJoining( reconstruct_tree.HKY_similarity_matrix) start_time = time.time() tree_rec = snj(ch_arr, tree.taxon_namespace) if method == "NJ": nj = reconstruct_tree.NeighborJoining( reconstruct_tree.HKY_similarity_matrix) start_time = time.time() tree_rec = nj(ch_arr, tree.taxon_namespace) if method == "STR + NJ": spectral_method = reconstruct_tree.SpectralTreeReconstruction( reconstruct_tree.NeighborJoining, reconstruct_tree.HKY_similarity_matrix) start_time = time.time() tree_rec = spectral_method.deep_spectral_tree_reonstruction( ch_arr, reconstruct_tree.HKY_similarity_matrix, taxon_namespace=tree.taxon_namespace, threshhold=threshold, min_split=5) if method == "STR + SNJ": spectral_method = reconstruct_tree.SpectralTreeReconstruction( reconstruct_tree.SpectralNeighborJoining, reconstruct_tree.HKY_similarity_matrix) start_time = time.time() tree_rec = spectral_method.deep_spectral_tree_reonstruction( ch_arr, reconstruct_tree.HKY_similarity_matrix, taxon_namespace=tree.taxon_namespace, threshhold=threshold, min_split=5) if method == "STR + RaXML": spectral_method = reconstruct_tree.SpectralTreeReconstruction( reconstruct_tree.RAxML, reconstruct_tree.HKY_similarity_matrix) start_time = time.time() tree_rec = spectral_method.deep_spectral_tree_reonstruction( ch_arr, reconstruct_tree.HKY_similarity_matrix, taxon_namespace=tree.taxon_namespace, threshhold=threshold, raxml_args="-T 2 --HKY85 -c 1", min_split=5) runtime = time.time() - start_time RF, F1 = reconstruct_tree.compare_trees(tree_rec, tree) print(method) if threshold is not None: print(threshold) print("--- %s seconds ---" % runtime) print("RF = ", RF) print("F1% = ", F1) return ([method, str(threshold), runtime, RF, F1])
import scipy import time from itertools import product import matplotlib.pyplot as plt import cProfile from dendropy.model.discrete import simulate_discrete_chars, Jc69, Hky85 from dendropy.calculate.treecompare import symmetric_difference tree_path = os.path.join(os.path.dirname(sys.path[0]), "data/NY_H3N2.newick") fasta_path = os.path.join(os.path.dirname(sys.path[0]), "data/NY_H3N2.fasta") H3N2_tree = dendropy.Tree.get(path=tree_path, schema="newick") H3N2_dna = dendropy.DnaCharacterMatrix.get(file=open(fasta_path, "r"), schema="fasta") N = 1000 data_HKY = simulate_discrete_chars(N, H3N2_tree, Hky85(kappa = 2), mutation_rate=0.1) ch_list = list() for t in data_HKY.taxon_namespace: ch_list.append([x.symbol for x in data_HKY[t]]) ch_arr = np.array(ch_list) identical = np.array([np.mean(a == b) for a, b in product(ch_arr, repeat = 2)]) #start_time = time.time() #cProfile.run('S = HKY_similarity_matrix(ch_arr)') #compute_s_time = time.time() - start_time #print("--- %s seconds ---" % compute_s_time) threshold = 128 t1 = time.time() spectral_method = reconstruct_tree.STDR(reconstruct_tree.RAxML, reconstruct_tree.HKY_similarity_matrix) tree_rec = spectral_method.deep_spectral_tree_reconstruction(ch_arr, reconstruct_tree.HKY_similarity_matrix,
print("Genration observations by JC and HKY") #observationsJC = FastCharacterMatrix(spectraltree.simulate_sequences_ordered(N, tree_model=reference_tree, seq_model=jc, mutation_rate=mutation_rate)) # observationsHKY = FastCharacterMatrix(spectraltree.simulate_sequences_ordered(N, tree_model=reference_tree, seq_model=hky, mutation_rate=mutation_rate)) t = time.time() observationsJC, _ = spectraltree.charmatrix2array( simulate_discrete_chars(N, reference_tree, Jc69(), mutation_rate=mutation_rate[0])) print("Time to generate JC:", time.time() - t) t = time.time() observationsHKY, metadataHKY = spectraltree.charmatrix2array( simulate_discrete_chars(N, reference_tree, Hky85(kappa=1), mutation_rate=mutation_rate[0])) print("Time to generate HKY:", time.time() - t) t = time.time() S_JC = spectraltree.JC_similarity_matrix(observationsJC) print() print("Time to compute JC similarity:", time.time() - t) print("JC similarity") print(S_JC) t = time.time() S_HKY = spectraltree.HKY_similarity_matrix(observationsHKY, metadataHKY) print() print("Time to compute HKY similarity:", time.time() - t) print("HKY similarity")