Exemple #1
0
def run_method(method, tree, m = 300, kappa = 2, mutation_rate=0.05, threshold = None, verbose = False):
    start_time = time.time()
    observations, taxa_meta = generation.simulate_sequences(m, tree_model=tree, seq_model=generation.HKY(kappa = kappa), mutation_rate=mutation_rate, alphabet="DNA")
    runtime = time.time() - start_time
    print("Simulation took %s seconds" % runtime)
    
    if method == "RaXML":
        raxml_HKY = reconstruct_tree.RAxML()
        start_time = time.time()
        tree_rec = raxml_HKY(observations, taxa_meta, raxml_args="-T 2 --HKY85 -c 1")      
    if method == "SNJ":
        snj = reconstruct_tree.SpectralNeighborJoining(reconstruct_tree.HKY_similarity_matrix)
        start_time = time.time()
        tree_rec = snj(observations, taxa_meta)
    if method == "NJ":
        nj = reconstruct_tree.NeighborJoining(reconstruct_tree.HKY_similarity_matrix)
        start_time = time.time()
        tree_rec = nj(observations, taxa_meta)
    if method == "STR+NJ":
        spectral_method = reconstruct_tree.STDR(reconstruct_tree.NeighborJoining, reconstruct_tree.HKY_similarity_matrix)
        start_time = time.time()
        tree_rec = spectral_method.deep_spectral_tree_reconstruction(observations, reconstruct_tree.HKY_similarity_matrix, 
                                                            taxa_metadata = taxa_meta,
                                                            threshhold = threshold, min_split = 5, verbose = verbose)
    if method == "STR+SNJ":
        spectral_method = reconstruct_tree.STDR(reconstruct_tree.SpectralNeighborJoining, reconstruct_tree.HKY_similarity_matrix)
        start_time = time.time()
        tree_rec = spectral_method.deep_spectral_tree_reconstruction(observations, reconstruct_tree.HKY_similarity_matrix, 
                                                            taxa_metadata = taxa_meta, 
                                                            threshhold = threshold, min_split = 5, verbose = verbose)
    if method == "STR+RaXML":
        spectral_method = reconstruct_tree.STDR(reconstruct_tree.RAxML, reconstruct_tree.HKY_similarity_matrix)
        start_time = time.time()
        tree_rec = spectral_method.deep_spectral_tree_reconstruction(observations, reconstruct_tree.HKY_similarity_matrix, 
                                                            taxa_metadata = taxa_meta, 
                                                            threshhold = threshold,
                                                            raxml_args = "-T 2 --HKY85 -c 1", min_split = 5, verbose = verbose)
    runtime = time.time() - start_time
    RF,F1 = reconstruct_tree.compare_trees(tree_rec, tree)
    print(method)
    if threshold is not None: print(threshold)
    print("--- %s seconds ---" % runtime)
    print("RF = ",RF)
    print("F1% = ",F1) 
    return([method, str(threshold), runtime, RF, F1])
Exemple #2
0
def run_method(method, tree, seqs, threshold=None):
    ch_list = list()
    taxons = [x.label for x in tree.taxon_namespace]
    for t in taxons:
        ch_list.append([x.symbol for x in seqs[t]])
    ch_arr = np.array(ch_list)
    ch_arr[ch_arr == "U"] = "T"

    ch_dendro = dendropy.DnaCharacterMatrix()
    ch_dendro.taxon_namespace = tree.taxon_namespace
    for t, taxon in enumerate(taxons):
        ch_dendro.new_sequence(tree.taxon_namespace[t], ch_arr[t, :].tolist())

    if method == "RaXML":
        raxml_HKY = reconstruct_tree.RAxML()
        start_time = time.time()
        tree_rec = raxml_HKY(ch_dendro, raxml_args="-T 2 --HKY85 -c 1")
    if method == "SNJ":
        snj = reconstruct_tree.SpectralNeighborJoining(
            reconstruct_tree.HKY_similarity_matrix_missing_data)
        start_time = time.time()
        tree_rec = snj(ch_arr, tree.taxon_namespace)
    if method == "NJ":
        nj = reconstruct_tree.NeighborJoining(
            reconstruct_tree.HKY_similarity_matrix_missing_data)
        start_time = time.time()
        tree_rec = nj(ch_arr, tree.taxon_namespace)
    if method == "STR + NJ":
        spectral_method = reconstruct_tree.SpectralTreeReconstruction(
            reconstruct_tree.NeighborJoining,
            reconstruct_tree.HKY_similarity_matrix_missing_data)
        start_time = time.time()
        tree_rec = spectral_method.deep_spectral_tree_reonstruction(
            ch_arr,
            reconstruct_tree.HKY_similarity_matrix_missing_data,
            taxon_namespace=tree.taxon_namespace,
            threshhold=threshold,
            min_split=5)
    if method == "STR + SNJ":
        spectral_method = reconstruct_tree.SpectralTreeReconstruction(
            reconstruct_tree.SpectralNeighborJoining,
            reconstruct_tree.HKY_similarity_matrix_missing_data)
        start_time = time.time()
        tree_rec = spectral_method.deep_spectral_tree_reonstruction(
            ch_arr,
            reconstruct_tree.HKY_similarity_matrix_missing_data,
            taxon_namespace=tree.taxon_namespace,
            threshhold=threshold,
            min_split=5)
    if method == "STR + RaXML":
        spectral_method = reconstruct_tree.SpectralTreeReconstruction(
            reconstruct_tree.RAxML,
            reconstruct_tree.HKY_similarity_matrix_missing_data)
        start_time = time.time()
        tree_rec = spectral_method.deep_spectral_tree_reonstruction(
            ch_arr,
            reconstruct_tree.HKY_similarity_matrix_missing_data,
            taxon_namespace=tree.taxon_namespace,
            threshhold=threshold,
            raxml_args="-T 2 --HKY85 -c 1",
            min_split=5)
    runtime = time.time() - start_time
    RF, F1 = reconstruct_tree.compare_trees(tree_rec, tree)
    print(method)
    if threshold is not None: print(threshold)
    print("--- %s seconds ---" % runtime)
    print("RF = ", RF)
    print("F1% = ", F1)
    return ([method, str(threshold), runtime, RF, F1])
Exemple #3
0
def run_method(method, tree, threshold=None):
    data_HKY = simulate_discrete_chars(1000,
                                       tree,
                                       Hky85(kappa=2),
                                       mutation_rate=0.1)
    ch_list = list()
    for t in data_HKY.taxon_namespace:
        ch_list.append([x.symbol for x in data_HKY[t]])
    ch_arr = np.array(ch_list)

    if method == "RaXML":
        raxml_HKY = reconstruct_tree.RAxML()
        start_time = time.time()
        tree_rec = raxml_HKY(data_HKY, raxml_args="-T 2 --HKY85 -c 1")
    if method == "SNJ":
        snj = reconstruct_tree.SpectralNeighborJoining(
            reconstruct_tree.HKY_similarity_matrix)
        start_time = time.time()
        tree_rec = snj(ch_arr, tree.taxon_namespace)
    if method == "NJ":
        nj = reconstruct_tree.NeighborJoining(
            reconstruct_tree.HKY_similarity_matrix)
        start_time = time.time()
        tree_rec = nj(ch_arr, tree.taxon_namespace)
    if method == "STR + NJ":
        spectral_method = reconstruct_tree.SpectralTreeReconstruction(
            reconstruct_tree.NeighborJoining,
            reconstruct_tree.HKY_similarity_matrix)
        start_time = time.time()
        tree_rec = spectral_method.deep_spectral_tree_reonstruction(
            ch_arr,
            reconstruct_tree.HKY_similarity_matrix,
            taxon_namespace=tree.taxon_namespace,
            threshhold=threshold,
            min_split=5)
    if method == "STR + SNJ":
        spectral_method = reconstruct_tree.SpectralTreeReconstruction(
            reconstruct_tree.SpectralNeighborJoining,
            reconstruct_tree.HKY_similarity_matrix)
        start_time = time.time()
        tree_rec = spectral_method.deep_spectral_tree_reonstruction(
            ch_arr,
            reconstruct_tree.HKY_similarity_matrix,
            taxon_namespace=tree.taxon_namespace,
            threshhold=threshold,
            min_split=5)
    if method == "STR + RaXML":
        spectral_method = reconstruct_tree.SpectralTreeReconstruction(
            reconstruct_tree.RAxML, reconstruct_tree.HKY_similarity_matrix)
        start_time = time.time()
        tree_rec = spectral_method.deep_spectral_tree_reonstruction(
            ch_arr,
            reconstruct_tree.HKY_similarity_matrix,
            taxon_namespace=tree.taxon_namespace,
            threshhold=threshold,
            raxml_args="-T 2 --HKY85 -c 1",
            min_split=5)
    runtime = time.time() - start_time
    RF, F1 = reconstruct_tree.compare_trees(tree_rec, tree)
    print(method)
    if threshold is not None: print(threshold)
    print("--- %s seconds ---" % runtime)
    print("RF = ", RF)
    print("F1% = ", F1)
    return ([method, str(threshold), runtime, RF, F1])
import generation
from dendropy.interop import raxml
from dendropy.model.discrete import simulate_discrete_chars, Jc69
from dendropy.calculate.treecompare import symmetric_difference
import pandas as pd
import numpy as np

#num_taxa = [128,256,512,1024,2048,4096]
num_taxa = np.arange(200, 2000, 200)
n_itr = 10
#num_taxa = [8,16,32]
N = 1000
jc = generation.Jukes_Cantor()
mutation_rate = jc.p2t(0.95)
nj = reconstruct_tree.NeighborJoining(reconstruct_tree.JC_similarity_matrix)
raxml = reconstruct_tree.RAxML()
#tree_list = [utils.balanced_binary(m) for m in num_taxa]

df = pd.DataFrame(columns=['method', 'runtime', 'RF', 'm'])
for m in num_taxa:
    for n_itr in range(n_itr):
        reference_tree = utils.unrooted_pure_kingman_tree(m)
        observations, taxa_meta = generation.simulate_sequences(
            N,
            tree_model=reference_tree,
            seq_model=jc,
            mutation_rate=mutation_rate,
            alphabet="DNA")

        # NJ
        time_s = time.time()