Esempio n. 1
0
def run_method(method, tree, m = 300, kappa = 2, mutation_rate=0.05, threshold = None, verbose = False):
    start_time = time.time()
    observations, taxa_meta = generation.simulate_sequences(m, tree_model=tree, seq_model=generation.HKY(kappa = kappa), mutation_rate=mutation_rate, alphabet="DNA")
    runtime = time.time() - start_time
    print("Simulation took %s seconds" % runtime)
    
    if method == "RaXML":
        raxml_HKY = reconstruct_tree.RAxML()
        start_time = time.time()
        tree_rec = raxml_HKY(observations, taxa_meta, raxml_args="-T 2 --HKY85 -c 1")      
    if method == "SNJ":
        snj = reconstruct_tree.SpectralNeighborJoining(reconstruct_tree.HKY_similarity_matrix)
        start_time = time.time()
        tree_rec = snj(observations, taxa_meta)
    if method == "NJ":
        nj = reconstruct_tree.NeighborJoining(reconstruct_tree.HKY_similarity_matrix)
        start_time = time.time()
        tree_rec = nj(observations, taxa_meta)
    if method == "STR+NJ":
        spectral_method = reconstruct_tree.STDR(reconstruct_tree.NeighborJoining, reconstruct_tree.HKY_similarity_matrix)
        start_time = time.time()
        tree_rec = spectral_method.deep_spectral_tree_reconstruction(observations, reconstruct_tree.HKY_similarity_matrix, 
                                                            taxa_metadata = taxa_meta,
                                                            threshhold = threshold, min_split = 5, verbose = verbose)
    if method == "STR+SNJ":
        spectral_method = reconstruct_tree.STDR(reconstruct_tree.SpectralNeighborJoining, reconstruct_tree.HKY_similarity_matrix)
        start_time = time.time()
        tree_rec = spectral_method.deep_spectral_tree_reconstruction(observations, reconstruct_tree.HKY_similarity_matrix, 
                                                            taxa_metadata = taxa_meta, 
                                                            threshhold = threshold, min_split = 5, verbose = verbose)
    if method == "STR+RaXML":
        spectral_method = reconstruct_tree.STDR(reconstruct_tree.RAxML, reconstruct_tree.HKY_similarity_matrix)
        start_time = time.time()
        tree_rec = spectral_method.deep_spectral_tree_reconstruction(observations, reconstruct_tree.HKY_similarity_matrix, 
                                                            taxa_metadata = taxa_meta, 
                                                            threshhold = threshold,
                                                            raxml_args = "-T 2 --HKY85 -c 1", min_split = 5, verbose = verbose)
    runtime = time.time() - start_time
    RF,F1 = reconstruct_tree.compare_trees(tree_rec, tree)
    print(method)
    if threshold is not None: print(threshold)
    print("--- %s seconds ---" % runtime)
    print("RF = ",RF)
    print("F1% = ",F1) 
    return([method, str(threshold), runtime, RF, F1])
        ) and node.parent_node.child_nodes()[1].is_leaf():
            cherry_count += 1
    cherry_count = cherry_count / 2
    return cherry_count


num_taxa = 20
num_reps = 10

N_vec = np.arange(100, 400, 50)
#reference_tree = utils.balanced_binary(num_taxa)
jc = generation.Jukes_Cantor(num_classes=2)

mutation_rate = jc.p2t(0.9)

snj = reconstruct_tree.SpectralNeighborJoining(
    reconstruct_tree.JC_similarity_matrix)
nj = reconstruct_tree.NeighborJoining(reconstruct_tree.JC_similarity_matrix)
treesvd = reconstruct_tree.TreeSVD()
methods = [snj, treesvd]
#results = compare_methods.experiment([reference_tree], jc, N_vec, methods=methods,\
#     mutation_rates = [mutation_rate], reps_per_tree=num_reps)
df = pd.DataFrame(
    columns=['method', 'runtime', 'RF', 'n', 'cherries_ref', 'cherries_res'])
for i in np.arange(num_reps):
    print(i)
    reference_tree = utils.unrooted_pure_kingman_tree(num_taxa)
    ch_ref = cherry_count_for_tree(reference_tree)
    for n in N_vec:
        observations, taxa_meta = generation.simulate_sequences(
            n,
            tree_model=reference_tree,
Esempio n. 3
0
def run_method(method, tree, threshold=None):
    data_HKY = simulate_discrete_chars(1000,
                                       tree,
                                       Hky85(kappa=2),
                                       mutation_rate=0.1)
    ch_list = list()
    for t in data_HKY.taxon_namespace:
        ch_list.append([x.symbol for x in data_HKY[t]])
    ch_arr = np.array(ch_list)

    if method == "RaXML":
        raxml_HKY = reconstruct_tree.RAxML()
        start_time = time.time()
        tree_rec = raxml_HKY(data_HKY, raxml_args="-T 2 --HKY85 -c 1")
    if method == "SNJ":
        snj = reconstruct_tree.SpectralNeighborJoining(
            reconstruct_tree.HKY_similarity_matrix)
        start_time = time.time()
        tree_rec = snj(ch_arr, tree.taxon_namespace)
    if method == "NJ":
        nj = reconstruct_tree.NeighborJoining(
            reconstruct_tree.HKY_similarity_matrix)
        start_time = time.time()
        tree_rec = nj(ch_arr, tree.taxon_namespace)
    if method == "STR + NJ":
        spectral_method = reconstruct_tree.SpectralTreeReconstruction(
            reconstruct_tree.NeighborJoining,
            reconstruct_tree.HKY_similarity_matrix)
        start_time = time.time()
        tree_rec = spectral_method.deep_spectral_tree_reonstruction(
            ch_arr,
            reconstruct_tree.HKY_similarity_matrix,
            taxon_namespace=tree.taxon_namespace,
            threshhold=threshold,
            min_split=5)
    if method == "STR + SNJ":
        spectral_method = reconstruct_tree.SpectralTreeReconstruction(
            reconstruct_tree.SpectralNeighborJoining,
            reconstruct_tree.HKY_similarity_matrix)
        start_time = time.time()
        tree_rec = spectral_method.deep_spectral_tree_reonstruction(
            ch_arr,
            reconstruct_tree.HKY_similarity_matrix,
            taxon_namespace=tree.taxon_namespace,
            threshhold=threshold,
            min_split=5)
    if method == "STR + RaXML":
        spectral_method = reconstruct_tree.SpectralTreeReconstruction(
            reconstruct_tree.RAxML, reconstruct_tree.HKY_similarity_matrix)
        start_time = time.time()
        tree_rec = spectral_method.deep_spectral_tree_reonstruction(
            ch_arr,
            reconstruct_tree.HKY_similarity_matrix,
            taxon_namespace=tree.taxon_namespace,
            threshhold=threshold,
            raxml_args="-T 2 --HKY85 -c 1",
            min_split=5)
    runtime = time.time() - start_time
    RF, F1 = reconstruct_tree.compare_trees(tree_rec, tree)
    print(method)
    if threshold is not None: print(threshold)
    print("--- %s seconds ---" % runtime)
    print("RF = ", RF)
    print("F1% = ", F1)
    return ([method, str(threshold), runtime, RF, F1])
Esempio n. 4
0
def run_method(method, tree, seqs, threshold=None):
    ch_list = list()
    taxons = [x.label for x in tree.taxon_namespace]
    for t in taxons:
        ch_list.append([x.symbol for x in seqs[t]])
    ch_arr = np.array(ch_list)
    ch_arr[ch_arr == "U"] = "T"

    ch_dendro = dendropy.DnaCharacterMatrix()
    ch_dendro.taxon_namespace = tree.taxon_namespace
    for t, taxon in enumerate(taxons):
        ch_dendro.new_sequence(tree.taxon_namespace[t], ch_arr[t, :].tolist())

    if method == "RaXML":
        raxml_HKY = reconstruct_tree.RAxML()
        start_time = time.time()
        tree_rec = raxml_HKY(ch_dendro, raxml_args="-T 2 --HKY85 -c 1")
    if method == "SNJ":
        snj = reconstruct_tree.SpectralNeighborJoining(
            reconstruct_tree.HKY_similarity_matrix_missing_data)
        start_time = time.time()
        tree_rec = snj(ch_arr, tree.taxon_namespace)
    if method == "NJ":
        nj = reconstruct_tree.NeighborJoining(
            reconstruct_tree.HKY_similarity_matrix_missing_data)
        start_time = time.time()
        tree_rec = nj(ch_arr, tree.taxon_namespace)
    if method == "STR + NJ":
        spectral_method = reconstruct_tree.SpectralTreeReconstruction(
            reconstruct_tree.NeighborJoining,
            reconstruct_tree.HKY_similarity_matrix_missing_data)
        start_time = time.time()
        tree_rec = spectral_method.deep_spectral_tree_reonstruction(
            ch_arr,
            reconstruct_tree.HKY_similarity_matrix_missing_data,
            taxon_namespace=tree.taxon_namespace,
            threshhold=threshold,
            min_split=5)
    if method == "STR + SNJ":
        spectral_method = reconstruct_tree.SpectralTreeReconstruction(
            reconstruct_tree.SpectralNeighborJoining,
            reconstruct_tree.HKY_similarity_matrix_missing_data)
        start_time = time.time()
        tree_rec = spectral_method.deep_spectral_tree_reonstruction(
            ch_arr,
            reconstruct_tree.HKY_similarity_matrix_missing_data,
            taxon_namespace=tree.taxon_namespace,
            threshhold=threshold,
            min_split=5)
    if method == "STR + RaXML":
        spectral_method = reconstruct_tree.SpectralTreeReconstruction(
            reconstruct_tree.RAxML,
            reconstruct_tree.HKY_similarity_matrix_missing_data)
        start_time = time.time()
        tree_rec = spectral_method.deep_spectral_tree_reonstruction(
            ch_arr,
            reconstruct_tree.HKY_similarity_matrix_missing_data,
            taxon_namespace=tree.taxon_namespace,
            threshhold=threshold,
            raxml_args="-T 2 --HKY85 -c 1",
            min_split=5)
    runtime = time.time() - start_time
    RF, F1 = reconstruct_tree.compare_trees(tree_rec, tree)
    print(method)
    if threshold is not None: print(threshold)
    print("--- %s seconds ---" % runtime)
    print("RF = ", RF)
    print("F1% = ", F1)
    return ([method, str(threshold), runtime, RF, F1])