'N': [],
    'method': [],
    'split size': [],
    'split results': [],
    'success': []
})

for (tree, tree_str) in zip(tree_list, tree_str_list):
    for n in N:
        print(n)

        start_time = time.time()
        for b in range(B):
            print(b)
            data_HKY = simulate_discrete_chars(n,
                                               tree,
                                               Hky85(kappa=2),
                                               mutation_rate=0.05)
            ch_list = list()
            for t in data_HKY.taxon_namespace:
                ch_list.append([x.symbol for x in data_HKY[t]])
            ch_arr = np.array(ch_list)

            # Compute similarity matrix
            HKY_sim = reconstruct_tree.HKY_similarity_matrix(ch_arr)

            # scan over methods
            for method in threshold_methods:
                parent_list, splits = recursive_partition_taxa(
                    HKY_sim, threshold, method)
                splits = [np.array([True] * HKY_sim.shape[0])] + splits
                parent_list = [-1] + parent_list
import scipy
import time
from itertools import product
import matplotlib.pyplot as plt
import cProfile

from dendropy.model.discrete import simulate_discrete_chars, Jc69, Hky85
from dendropy.calculate.treecompare import symmetric_difference

tree_path = os.path.join(os.path.dirname(sys.path[0]), "data/NY_H3N2.newick")
fasta_path = os.path.join(os.path.dirname(sys.path[0]), "data/NY_H3N2.fasta")
H3N2_tree = dendropy.Tree.get(path=tree_path, schema="newick")
H3N2_dna = dendropy.DnaCharacterMatrix.get(file=open(fasta_path, "r"), schema="fasta")

N = 1000 
data_HKY = simulate_discrete_chars(N, H3N2_tree, Hky85(kappa = 2), mutation_rate=0.1)
ch_list = list()
for t in data_HKY.taxon_namespace:
    ch_list.append([x.symbol for x in data_HKY[t]])
ch_arr = np.array(ch_list)
identical = np.array([np.mean(a == b) for a, b in product(ch_arr, repeat = 2)])

#start_time = time.time()
#cProfile.run('S = HKY_similarity_matrix(ch_arr)')
#compute_s_time = time.time() - start_time
#print("--- %s seconds ---" % compute_s_time)
threshold = 128
t1 = time.time()
spectral_method = reconstruct_tree.STDR(reconstruct_tree.RAxML,
                                                              reconstruct_tree.HKY_similarity_matrix)
tree_rec = spectral_method.deep_spectral_tree_reconstruction(ch_arr, reconstruct_tree.HKY_similarity_matrix, 
Beispiel #3
0
from dendropy.interop import raxml
from dendropy.model.discrete import simulate_discrete_chars, Jc69
from dendropy.calculate.treecompare import symmetric_difference

num_taxa = 32
N = 1000
reference_tree = utils.balanced_binary(num_taxa)
# %%
###########################################################################
##                   TEST WITH DENDROPY DATA
###########################################################################
print("test dendropy data")
time_s = time.time()
data = simulate_discrete_chars(
    N,
    reference_tree,
    Jc69(),
    mutation_rate=generation.Jukes_Cantor().p2t(0.95),
)
print("")
print("Time for data generation", time.time() - time_s)
time_s = time.time()
raxml = reconstruct_tree.RAxML()
tree = raxml(data)
runtime = time.time() - time_s

print("Data in DNAcharacterMatrix:")
print("symmetric_difference: ", symmetric_difference(reference_tree, tree))
RF, F1 = reconstruct_tree.compare_trees(reference_tree, tree)
print("raxml: ")
print("RF = ", RF)
print("F1% = ", F1)
Beispiel #4
0
def run_method(method, tree, threshold=None):
    data_HKY = simulate_discrete_chars(1000,
                                       tree,
                                       Hky85(kappa=2),
                                       mutation_rate=0.1)
    ch_list = list()
    for t in data_HKY.taxon_namespace:
        ch_list.append([x.symbol for x in data_HKY[t]])
    ch_arr = np.array(ch_list)

    if method == "RaXML":
        raxml_HKY = reconstruct_tree.RAxML()
        start_time = time.time()
        tree_rec = raxml_HKY(data_HKY, raxml_args="-T 2 --HKY85 -c 1")
    if method == "SNJ":
        snj = reconstruct_tree.SpectralNeighborJoining(
            reconstruct_tree.HKY_similarity_matrix)
        start_time = time.time()
        tree_rec = snj(ch_arr, tree.taxon_namespace)
    if method == "NJ":
        nj = reconstruct_tree.NeighborJoining(
            reconstruct_tree.HKY_similarity_matrix)
        start_time = time.time()
        tree_rec = nj(ch_arr, tree.taxon_namespace)
    if method == "STR + NJ":
        spectral_method = reconstruct_tree.SpectralTreeReconstruction(
            reconstruct_tree.NeighborJoining,
            reconstruct_tree.HKY_similarity_matrix)
        start_time = time.time()
        tree_rec = spectral_method.deep_spectral_tree_reonstruction(
            ch_arr,
            reconstruct_tree.HKY_similarity_matrix,
            taxon_namespace=tree.taxon_namespace,
            threshhold=threshold,
            min_split=5)
    if method == "STR + SNJ":
        spectral_method = reconstruct_tree.SpectralTreeReconstruction(
            reconstruct_tree.SpectralNeighborJoining,
            reconstruct_tree.HKY_similarity_matrix)
        start_time = time.time()
        tree_rec = spectral_method.deep_spectral_tree_reonstruction(
            ch_arr,
            reconstruct_tree.HKY_similarity_matrix,
            taxon_namespace=tree.taxon_namespace,
            threshhold=threshold,
            min_split=5)
    if method == "STR + RaXML":
        spectral_method = reconstruct_tree.SpectralTreeReconstruction(
            reconstruct_tree.RAxML, reconstruct_tree.HKY_similarity_matrix)
        start_time = time.time()
        tree_rec = spectral_method.deep_spectral_tree_reonstruction(
            ch_arr,
            reconstruct_tree.HKY_similarity_matrix,
            taxon_namespace=tree.taxon_namespace,
            threshhold=threshold,
            raxml_args="-T 2 --HKY85 -c 1",
            min_split=5)
    runtime = time.time() - start_time
    RF, F1 = reconstruct_tree.compare_trees(tree_rec, tree)
    print(method)
    if threshold is not None: print(threshold)
    print("--- %s seconds ---" % runtime)
    print("RF = ", RF)
    print("F1% = ", F1)
    return ([method, str(threshold), runtime, RF, F1])
Beispiel #5
0
#mutation_rate = [jc.p2t(0.95)]
mutation_rate = [0.1]

#reference_tree = spectraltree.unrooted_birth_death_tree(num_taxa, birth_rate=0.5)
# reference_tree = spectraltree.lopsided_tree(num_taxa)
reference_tree = spectraltree.balanced_binary(num_taxa)
for x in reference_tree.preorder_edge_iter():
    x.length = 0.5

print("Genration observations by JC and HKY")
#observationsJC = FastCharacterMatrix(spectraltree.simulate_sequences_ordered(N, tree_model=reference_tree, seq_model=jc, mutation_rate=mutation_rate))
# observationsHKY = FastCharacterMatrix(spectraltree.simulate_sequences_ordered(N, tree_model=reference_tree, seq_model=hky, mutation_rate=mutation_rate))
t = time.time()
observationsJC, _ = spectraltree.charmatrix2array(
    simulate_discrete_chars(N,
                            reference_tree,
                            Jc69(),
                            mutation_rate=mutation_rate[0]))
print("Time to generate JC:", time.time() - t)
t = time.time()
observationsHKY, metadataHKY = spectraltree.charmatrix2array(
    simulate_discrete_chars(N,
                            reference_tree,
                            Hky85(kappa=1),
                            mutation_rate=mutation_rate[0]))
print("Time to generate HKY:", time.time() - t)

t = time.time()
S_JC = spectraltree.JC_similarity_matrix(observationsJC)
print()
print("Time to compute JC similarity:", time.time() - t)
print("JC similarity")