Beispiel #1
0
 def evolve_to_current_time(node, finalize=False):
     # if it's not the end yet, just dummy
     if not finalize:
         if node is None:
             return
         for virus in node.viruses():
             virus.set_time(GC.time)
     # otherwise, store trees
     elif not hasattr(GC, 'sampled_trees'):
         seed_to_root_virus = {
             v.get_name(): GC.seed_to_first_virus[v].get_root()
             for u, v, t in GC.transmissions if u is None
         }
         inf_to_seed = {}
         for u, v, t in GC.transmissions:
             if u is None:
                 inf_to_seed[v.get_name()] = v.get_name()
             else:
                 inf_to_seed[v.get_name()] = inf_to_seed[u.get_name()]
         trees = {
             l.decode().strip() if isinstance(l, bytes) else l.strip()
             for l in GC.tree_file
         }
         trees = {tree for tree in trees if len(tree) != 0}
         GC.sampled_trees = set()
         for tree in trees:
             t = read_tree_newick(tree)
             seeds = {
                 inf_to_seed[str(leaf).split('|')[1]]
                 for leaf in t.traverse_leaves()
             }
             assert len(seeds) == 1, "More than 1 seed in tree: %s" % tree
             seed = seeds.pop()
             GC.sampled_trees.add((seed_to_root_virus[seed], tree))
         GC.PRUNE_TREES = False
def resolve_polytomy_helper(input_tree, output_file):
    full_tree = treeswift.read_tree_newick(input_tree)
    full_tree.resolve_polytomies()
    if (hide_prefix):
        full_tree.write_tree_newick(output_file, hide_rooted_prefix=True)
    else:
        full_tree.write_tree_newick(output_file)
Beispiel #3
0
def preorder(m):
    if m == 'dendropy':
        tree = dendropy.Tree.get(data=treestr, schema='newick')
        t_start = time()
        for node in tree.preorder_node_iter():
            pass
        t_end = time()
    elif m == 'biophylo':
        tree = Phylo.read(treeio, 'newick')
        t_start = time()
        for node in tree.find_clades(order='preorder'):
            pass
        t_end = time()
    elif m == 'treeswift':
        tree = read_tree_newick(treestr)
        t_start = time()
        for node in tree.traverse_preorder():
            pass
        t_end = time()
    elif m == 'ete3':
        tree = ete3.Tree(treestr,format=1)
        t_start = time()
        for node in tree.traverse(strategy='preorder'):
            pass
        t_end = time()
    else:
        assert False, "Invalid tool: %s"%m
    return t_end-t_start
 def evolve_to_current_time(node, finalize=False):
     if node is None:
         return
     viruses = [virus for virus in node.viruses()]
     for virus in viruses:
         time = GC.time - virus.get_time()
         if time > 0:
             node.remove_virus(virus)
             try:
                 command = [
                     GC.dualbirth_path,
                     str(GC.rate_A),
                     str(GC.rate_B), '-t',
                     str(time)
                 ]
                 if GC.random_number_seed is not None:
                     command += ['-s', str(GC.random_number_seed)]
                     GC.random_number_seed += 1
                 treestr = check_output(command).decode()
             except FileNotFoundError:
                 from os import chdir
                 chdir(GC.START_DIR)
                 assert False, "dualbirth executable was not found: %s" % GC.dualbirth_path
             tree = read_tree_newick(treestr)
             virus.set_time(virus.get_time() + tree.root.edge_length)
             for c in tree.root.children:
                 GC.treenode_add_child(virus, c, node)
Beispiel #5
0
def mrca(m):
    if m == 'dendropy':
        tree = dendropy.Tree.get(data=treestr, schema='newick')
        t_start = time()
        leaves = {l.taxon for l in tree.leaf_node_iter()}
        tree.mrca(taxa=leaves)
        t_end = time()
    elif m == 'biophylo':
        tree = Phylo.read(treeio, 'newick')
        t_start = time()
        leaves = tree.get_terminals()
        tree.common_ancestor(leaves)
        t_end = time()
    elif m == 'treeswift':
        tree = read_tree_newick(treestr)
        t_start = time()
        leaves = {str(l) for l in tree.traverse_leaves()}
        tree.mrca(leaves)
        t_end = time()
    elif m == 'ete3':
        tree = ete3.Tree(treestr,format=1)
        t_start = time()
        leaves = tree.get_leaf_names()
        tree.get_common_ancestor(leaves)
        t_end = time()
    else:
        assert False, "Invalid tool: %s"%m
    return t_end-t_start
def check_mulrf_scores(sfile, gfile, mulrf):
    """
    Checks RF scores are the same regardless of preprocessing gene family trees

    Parameters
    ----------
    sfile : string
            name of file containing species tree
    gfile : string
            name of file containing gene family trees
    mulrf: string
           name including full path of MulRFScorer binary
    """
    # Read species tree
    stree = treeswift.read_tree(sfile, "newick")
    remove_internal_node_labels(stree)
    stree.suppress_unifurcations()

    total_rf = 0

    with open(gfile, 'r') as f:
        g = 1
        for line in f.readlines():
            temp = "".join(line.split())

            # Build MUL-tree
            mtree = treeswift.read_tree_newick(temp)
            remove_internal_node_labels(mtree)
            unroot(mtree)

            # Build pre-processed MUL-tree
            mxtree = treeswift.read_tree(temp, "newick")
            remove_internal_node_labels(mxtree)

            [nEM, nLM, nR, c, nEMX, nLMX] = preprocess_multree(mxtree)

            score_shift = compute_score_shift(nEM, nLM, nR, c, nEMX, nLMX)

            # Compute MulRF scores
            temp = gfile.rsplit('.', 1)[0]
            mscore = score_with_MulRF(mulrf, stree, mtree,
                                      temp + "-scored")
            mxscore = score_with_MulRF(mulrf, stree, mxtree,
                                       temp + "-preprocessed-and-scored")

            # Check scores match!
            if mxscore + score_shift != mscore:
                sys.exit("Gene tree on line %d failed!\n" % g)

            total_rf += mscore

            g += 1

    sys.stdout.write('%d\n' % total_rf)
    sys.stdout.flush()
    os._exit(0)  # CRITICAL ON BLUE WATERS LOGIN NODE
Beispiel #7
0
 def time_to_mutation_rate(tree):
     if not hasattr(GC,"NUMPY_SEEDED"):
         from numpy.random import seed as numpy_seed
         numpy_seed(seed=GC.random_number_seed)
         GC.random_number_seed += 1
         GC.NUMPY_SEEDED = True
     t = read_tree_newick(tree)
     for node in t.traverse_preorder():
         if node.edge_length is not None:
             node.edge_length *= noncentral_f(dfnum=GC.tree_rate_dfnum,dfden=GC.tree_rate_dfden,nonc=GC.tree_rate_lambda)
     return str(t)
def read_preprocess_and_write_multrees(ifile, ofile, verbose):
    """
    Creates file with preprocessed MUL-trees for FastRFS

    Parameters
    ----------
    ifile : string
            name of file containing gene family trees
            (one newick string per line)
    ofile : string
            name of output file (one newick string per line)
    """
    with open(ifile, 'r') as fi, open(ofile, 'w') as fo:
        g = 1

        for line in fi.readlines():
            if verbose:
                sys.stdout.write("Preprocessing gene tree on line %d...\n" % g)
                sys.stdout.flush()

            temp = "".join(line.split())

            donot = 0
            if not temp:
                donot = 1
            else:
                tree = treeswift.read_tree_newick(temp)

                if count_leaves(tree) < 4:
                    dotnot = 2
                else:
                    [nEM, nLM, nR, c, nEMX, nLMX] = preprocess_multree(tree)
                    score_shift = compute_score_shift(nEM, nLM, nR, c, nEMX,
                                                      nLMX)

                    if nLMX < 4:
                        donot = 3
                    else:
                        fo.write(tree.newick() + '\n')

                if donot and verbose:
                    sys.stdout.write("...did not write tree as ")
                    if donot == 1:
                        sys.stdout.write("as line is empty!")
                    elif donot == 2:
                        sys.stdout.write("as tree has <4 leaves before "
                                         "preprocessing!")
                    elif donot == 3:
                        sys.stdout.write("as tree has <4 leaves after "
                                         "preprocessing!")
                    sys.stdout.write('\n')
                    sys.stdout.flush()

            g += 1
Beispiel #9
0
def _read_phylogeny(phylogeny_fp):

    with open(str(phylogeny_fp)) as input_file:
        tree_str = input_file.readline()
        if isinstance(tree_str, bytes):
            tree_str_decoded = tree_str.decode().strip()
        else:
            tree_str_decoded = tree_str.strip()
        tree = read_tree_newick(tree_str_decoded)

    return tree
Beispiel #10
0
 def time_to_mutation_rate(tree):
     if not hasattr(GC, "NUMPY_SEEDED"):
         from numpy.random import seed as numpy_seed
         numpy_seed(seed=GC.random_number_seed)
         GC.random_number_seed += 1
         GC.NUMPY_SEEDED = True
     t = read_tree_newick(tree)
     for node in t.traverse_preorder():
         if node.edge_length is not None:
             node.edge_length *= pareto(a=GC.tree_rate_shape)
     return str(t)
Beispiel #11
0
def induce_tree_helper(input_tree, input_data, output_file, hide_prefix, input_type, resolve_polytomies):
    full_tree = treeswift.read_tree_newick(input_tree)

    to_keep_node_labels = set()
    if input_type == "fasta":
        for sequence in SeqIO.parse(open(input_data), "fasta"):
            to_keep_node_labels.add(sequence.id)
    elif input_type == "newick":
        to_keep_tree = treeswift.read_tree_newick(input_data)
        for current_node in to_keep_tree.traverse_leaves():
            to_keep_node_labels.add(current_node.label)


    induced_tree = full_tree.extract_tree_with(to_keep_node_labels);
    if(resolve_polytomies):
        induced_tree.resolve_polytomies()
    if(hide_prefix):
        induced_tree.write_tree_newick(output_file, hide_rooted_prefix=True)
    else:
        induced_tree.write_tree_newick(output_file)
Beispiel #12
0
def run_TreeCluster(threshold, tree_file, threshold_free, method, support):
    trees = []
    trees.append(read_tree_newick(tree_file))
    # run algorithm
    for t, tree in enumerate(trees):
        if threshold_free is None:
            clusters = METHODS[method.lower()](tree, threshold, support)
        else:
            clusters = THRESHOLDFREE[threshold_free](METHODS[method.lower()],
                                                     tree, threshold, support)

    return clusters
def remove_outgroups_newick(tree_filename, outgroups_filename):
    if tree_filename is None:
        return None
    if outgroups_filename is None:
        return tree_filename
    if not isfile(tree_filename):
        raise ValueError("Invalid tree file: %s" % tree_filename)
    outgroups = {l.strip() for l in read_file(outgroups_filename)}
    tree = read_tree_newick(tree_filename)
    out_filename = '%s.no_outgroup.%s' % ('.'.join(rstrip_gz(tree_filename).split('.')[:-1]), rstrip_gz(tree_filename).split('.')[-1])
    tree_no_og = tree.extract_tree_without(outgroups)
    tree_no_og.root.edge_length = None
    write_file('%s\n' % tree_no_og.newick().lstrip('[&R] '), out_filename)
    return out_filename
Beispiel #14
0
def relabel_tree_helper(input_tree, tax_list, output_file, hide_prefix):
    full_tree = treeswift.read_tree_newick(input_tree)
    tax_map = {}
    with open(tax_list, "r") as f:
        line_counter = 0
        for line in f:
            tax_map[str(line_counter)] = line.strip()
            line_counter += 1
    print(tax_map)
    full_tree.rename_nodes(tax_map)
    if (hide_prefix):
        full_tree.write_tree_newick(output_file, hide_rooted_prefix=True)
    else:
        full_tree.write_tree_newick(output_file)
 def time_to_mutation_rate(tree):
     if not hasattr(GC, "NUMPY_SEEDED"):
         from numpy.random import seed as numpy_seed
         numpy_seed(seed=GC.random_number_seed)
         GC.random_number_seed += 1
         GC.NUMPY_SEEDED = True
     t = read_tree_newick(tree)
     for node in t.traverse_preorder():
         if node.is_root():
             node.rate = GC.tree_rate_R0
         else:
             node.rate = exponential(scale=node.parent.rate)
         if node.edge_length is not None:
             node.edge_length *= node.rate
     return str(t)
Beispiel #16
0
    def test_leaf_dijkstra(self):
        tree = ts.read_tree_newick("(A:3.2,(B:2.1,(C:1,D:1)));")
        nodes = [n for n in tree.traverse_preorder()]

        b = nodes[5]
        obs = b.leaf_dijkstra()
        exp = [(3.1,"C"), (3.1,"D"), (5.300000000000001,"A")]
        self.assertEqual(obs, exp)
        
        a = nodes[6]
        obs = a.leaf_dijkstra(2)
        exp = [(4.2,"C"), (4.2,"D")]
        self.assertEqual(obs, exp)
        
        tree = ts.read_tree_newick("((A:2.3,E:3)(B:2,(C:1.2,D:1)));")
        nodes = [n for n in tree.traverse_preorder()]

        a = nodes[7]
        obs = a.leaf_dijkstra(3)
        exp = [(3.3,"D"), (3.5,"C"), (4.3,"B")]
        self.assertEqual(obs, exp)

        with self.assertRaises(TypeError):
          obs = a.leaf_dijkstra(nodes[0])
 def time_to_mutation_rate(tree):
     if not hasattr(GC, "NUMPY_SEEDED"):
         from numpy.random import seed as numpy_seed
         numpy_seed(seed=GC.random_number_seed)
         GC.random_number_seed += 1
         GC.NUMPY_SEEDED = True
     t = read_tree_newick(tree)
     for node in t.traverse_preorder():
         if node.edge_length is not None:
             node.edge_length *= truncnorm.rvs(a=GC.tree_rate_min,
                                               b=GC.tree_rate_max,
                                               loc=GC.tree_rate_loc,
                                               scale=GC.tree_rate_scale,
                                               size=1)[0]
     return str(t)
def estimate_mutation_rate(rooted_tree_filename, dates_filename):
    tree = read_tree_newick(rooted_tree_filename)
    dates = dict()
    for u,t in load_dates_ViReport(dates_filename):
        dates[u] = date_to_days(t)
    rtt = dict(); x = list(); y = list() # x is time, y is root-to-tip
    for node in tree.traverse_preorder():
        if node.is_root():
            rtt[node] = 0
        else:
            rtt[node] = rtt[node.parent]
            if node.edge_length is not None:
                rtt[node] += node.edge_length
        if node.is_leaf():
            x.append(dates[node.label]); y.append(rtt[node])
    return linregress(x,y)[0] # slope is mutations/site/time, x-intercept is tMRCA
Beispiel #19
0
 def time_to_mutation_rate(tree):
     if not hasattr(GC, "NUMPY_SEEDED"):
         from numpy.random import seed as numpy_seed
         numpy_seed(seed=GC.random_number_seed)
         GC.random_number_seed += 1
         GC.NUMPY_SEEDED = True
     t = read_tree_newick(tree)
     for node in t.traverse_preorder():
         if node.is_root():
             node.rate = GC.tree_rate_R0
         else:
             assert node.edge_length is not None and node.edge_length > 0, "All edges must have positive lengths for TreeUnit_AutocorrelatedLogNormal"
             node.rate = lognormal(mean=node.parent.rate,
                                   sigma=GC.tree_rate_v * node.edge_length)
         if node.edge_length is not None:  # root node might not have incident edge
             node.edge_length *= node.rate
     return str(t)
Beispiel #20
0
def read_peroba_database(f_prefix, trust_global_sequences=False):
    if f_prefix[-1] == ".":
        f_prefix = f_prefix[:
                            -1]  ## both `perobaDB.0621` and `perobaDB.0621.` are valid
    fname = f_prefix + common.suffix["metadata"]
    logger.info(f"Reading database metadata from \'{fname}\'")
    metadata = pd.read_csv(fname,
                           compression="infer",
                           index_col="peroba_seq_uid",
                           dtype="unicode")
    metadata = common.df_finalise_metadata(metadata)

    fname = f_prefix + common.suffix["subsample"]
    logger.info(f"Reading subsampling information from \'{fname}\'")
    subsample = pd.read_csv(fname,
                            compression="infer",
                            index_col="peroba_seq_uid",
                            dtype="unicode")
    for col in subsample.columns:
        subsample[col] = pd.to_numeric(subsample[col], errors='coerce')

    fname = f_prefix + common.suffix["tree"]
    logger.info(f"Reading database tree from \'{fname}\'")
    treestring = open(fname).readline().rstrip().replace("\'", "").replace(
        "\"", "").replace("[&R]", "")
    tree = treeswift.read_tree_newick(treestring)

    fname = f_prefix + common.suffix["alignment"]
    logger.info(f"Reading database alignment from \'{fname}\'")
    sequences = common.read_fasta(fname, check_name=False)

    unaligned = []
    if trust_global_sequences:
        logger.info(
            f"Will assume global sequences are 'better' than local when duplicates exist"
        )
    else:
        fname = f_prefix + common.suffix["sequences"]
        logger.info(f"Reading database unaligned sequences from \'{fname}\'")
        unaligned = common.read_fasta(fname, check_name=False)

    logger.info(
        "Finished loading the database; dataframe has dimensions %s and it's assumed we have the same number of sequences; the tree may be smaller",
        metadata.shape)
    return [metadata, sequences, tree, subsample, unaligned]
def sample(tree, sampling_method):
    """
    Samples from a tagged tree, by taking clades at random at duplication vetices

    NOTE: must be run after 'tag()'

    Parameters
    ----------
    tree: tagged treeswift tree
    sampling_method: defines the number of samples
                "linear" - the number of sample is the same as the duplication node
                "exp" - the number of sample = 2^number of duplication node
                custom method - takes as parameter the number of duplication nodes, and returns the number of samples

    Returns samples as a list of trees
    """
    random.seed(0)  # set fixed seed for reproducibility
    out = []
    root = tree.root
    if sampling_method == 'linear':
        n_sample = tree.n_dup + 1
    elif sampling_method == 'exp':
        n_sample = 2**tree.n_dup
    elif sampling_method.isdigit():
        n_sample = int(sampling_method)
    else:
        n_sample = sampling_method(tree.n_dup)

    for i in range(n_sample):
        for node in tree.traverse_postorder(leaves=False):
            if node.tag == 'D':
                # deletes one randomly
                [left, right] = node.child_nodes()
                # we want to keep sections with more duplicates more often
                # otherwise we can end up getting the same small tree repeatedly
                bias = (left.n_dup + 0.5) / node.n_dup
                node.delete = left if random.random() > bias else right
                #node.delete = random.choice(node.child_nodes())
                node.remove_child(node.delete)
        out.append(treeswift.read_tree_newick(tree.newick()))
        for node in tree.traverse_preorder(leaves=False):
            if node.tag == 'D':
                node.add_child(node.delete)
    return out
Beispiel #22
0
 def time_to_mutation_rate(tree):
     t = read_tree_newick(tree)
     for node in t.traverse_preorder():
         if node.is_root():
             node.rate = GC.tree_rate_R0
         else:
             node.rate = node.parent.rate
             r = random()
             if r < GC.tree_rate_p / 2:  # increment
                 node.rate += GC.tree_rate_delta
                 if node.rate > GC.tree_rate_max:
                     node.rate = GC.tree_rate_max
             elif r < GC.tree_rate_p:  # decrement
                 node.rate -= GC.tree_rate_delta
                 if node.rate < GC.tree_rate_min:
                     node.rate = GC.tree_rate_min
         if node.edge_length is not None:
             node.edge_length *= node.rate
     return str(t)
Beispiel #23
0
def main(args):
    if args.output is None:
        split = args.input.rsplit('.', 1)
        output = split[0] + '-mclades.' + split[1]
    else:
        output = args.output

    with open(args.input, 'r') as fi:
        with open(output, 'w') as fo:
            for line in fi:
                tree = treeswift.read_tree_newick(line)
                unroot(tree)
                max_clades = find_max_clades(tree, args.delimiter)
                for c in max_clades:
                    unroot(c)
                    c.suppress_unifurcations()
                    newk = c.newick()
                    if args.trivial or not trivial(newk):
                        fo.write(newk + '\n')
 def pairwise_distances(tree_filename):
     if not isfile(tree_filename):
         raise ValueError("Invalid tree file: %s" % tree_filename)
     out_filename = '%s/pairwise_distances_phylogeny.csv' % GC.OUT_DIR_OUTFILES
     if GC.GZIP_OUTPUT:
         out_filename += '.gz'
     if isfile(out_filename) or isfile('%s.gz' % out_filename):
         GC.SELECTED['Logging'].writeln("Pairwise phylogenetic distances exist. Skipping recomputation.")
     else:
         dm = read_tree_newick(tree_filename).distance_matrix(leaf_labels=True)
         labels = sorted(dm.keys())
         out_lines = ['ID1,ID2,Distance']
         for i in range(len(labels)-1):
             u = labels[i]
             for j in range(i+1, len(labels)):
                 v = labels[j]
                 out_lines.append('%s,%s,%s' % (u, v, GC.num_str(dm[u][v])))
         GC.write_file('\n'.join(out_lines), out_filename)
     return out_filename
Beispiel #25
0
 def reconstruct(rooted_tree_filename, aln_filename):
     if not isfile(rooted_tree_filename):
         raise ValueError("Invalid tree file: %s" % rooted_tree_filename)
     if not isfile(aln_filename):
         raise ValueError("Invalid alignment file: %s" % aln_filename)
     treetime_dir = '%s/TreeTime_AncestralSequenceReconstruction' % GC.OUT_DIR_TMPFILES
     out_filename = '%s/ancestral_sequences.fas' % GC.OUT_DIR_OUTFILES
     if GC.GZIP_OUTPUT:
         out_filename += '.gz'
     if isfile(out_filename):
         GC.SELECTED['Logging'].writeln(
             "Ancestral sequences exist. Skipping recomputation.")
     else:
         makedirs(treetime_dir, exist_ok=True)
         tree_with_internal_labels_filename = '%s/tree_with_internal_labels.tre' % treetime_dir
         log = open('%s/log.txt' % treetime_dir, 'w')
         tmp = read_tree_newick(rooted_tree_filename)
         for i, node in enumerate(tmp.traverse_levelorder(leaves=False)):
             if node.is_root():
                 node.label = "ROOT"
             else:
                 node.label = "I%d" % i
         GC.write_file('%s\n' % tmp.newick(),
                       tree_with_internal_labels_filename)
         if aln_filename.endswith('.gz'):
             unzipped_filename = '%s/aln_unzipped.fas' % treetime_dir
             GC.write_file('\n'.join(GC.read_file(aln_filename)),
                           unzipped_filename)
             aln_filename = unzipped_filename
         command = [
             'treetime', 'ancestral', '--aln', aln_filename, '--tree',
             tree_with_internal_labels_filename, '--outdir', treetime_dir
         ]
         f = open('%s/command.txt' % treetime_dir, 'w')
         f.write('%s\n' % ' '.join(command))
         f.close()
         call(command, stdout=log)
         log.close()
         GC.write_file(
             '\n'.join(
                 GC.read_file('%s/ancestral_sequences.fasta' %
                              treetime_dir)), out_filename)
     return out_filename
Beispiel #26
0
def inorder(m):
    if m == 'dendropy':
        tree = dendropy.Tree.get(data=treestr, schema='newick')
        t_start = time()
        for node in tree.inorder_node_iter():
            pass
        t_end = time()
    elif m == 'biophylo':
        return NA
    elif m == 'treeswift':
        tree = read_tree_newick(treestr)
        t_start = time()
        for node in tree.traverse_inorder():
            pass
        t_end = time()
    elif m == 'ete3':
        return NA
    else:
        assert False, "Invalid tool: %s"%m
    return t_end-t_start
Beispiel #27
0
def load_tree(m):
    if m == 'dendropy':
        t_start = time()
        tree = dendropy.Tree.get(data=treestr, schema='newick')
        t_end = time()
    elif m == 'biophylo':
        t_start = time()
        tree = Phylo.read(treeio, 'newick')
        t_end = time()
    elif m == 'treeswift':
        t_start = time()
        tree = read_tree_newick(treestr)
        t_end = time()
    elif m == 'ete3':
        t_start = time()
        tree = ete3.Tree(treestr,format=1)
        t_end = time()
    else:
        assert False, "Invalid tool: %s"%m
    return t_end-t_start
Beispiel #28
0
def measure_memory(m):
    if m == 'dendropy':
        m_start = memory()
        t = dendropy.Tree.get(data=treestr, schema='newick')
        t.encode_bipartitions()
        m_end = memory()
    elif m == 'biophylo':
        m_start = memory()
        t = Phylo.read(treeio, 'newick')
        m_end = memory()
    elif m == 'treeswift':
        m_start = memory()
        t = read_tree_newick(treestr)
        m_end = memory()
    elif m == 'ete3':
        m_start = memory()
        t = ete3.Tree(treestr,format=1)
        m_end = memory()
    else:
        assert False, "Invalid tool: %s"%m
    return m_end-m_start
Beispiel #29
0
def rootdistorder(m):
    if m == 'dendropy':
        tree = dendropy.Tree.get(data=treestr, schema='newick')
        t_start = time()
        tree.calc_node_ages(is_force_max_age=True)
        for node in tree.ageorder_node_iter(descending=True):
            pass
        t_end = time()
    elif m == 'biophylo':
        return NA
    elif m == 'treeswift':
        tree = read_tree_newick(treestr)
        t_start = time()
        for node in tree.traverse_rootdistorder():
            pass
        t_end = time()
    elif m == 'ete3':
        return NA
    else:
        assert False, "Invalid tool: %s"%m
    return t_end-t_start
 def finalize():
     GC.final_sequences = {}
     TreeNode = MF.modules['TreeNode']
     for root, treestr in GC.pruned_newick_trees:
         seq = root.get_seq()
         leaves = list()
         for node in read_tree_newick(treestr).traverse_leaves():
             virus_name, cn_label, t_str = [
                 s.strip() for s in node.label.split('|')
             ]
             sample_time = float(t_str)
             if cn_label not in GC.final_sequences:
                 GC.final_sequences[cn_label] = {}
             if sample_time not in GC.final_sequences[cn_label]:
                 GC.final_sequences[cn_label][sample_time] = []
             leaf = TreeNode(
                 time=sample_time,
                 seq=seq,
                 contact_network_node=GC.contact_network.get_node(cn_label))
             leaves.append(leaf)
             GC.final_sequences[cn_label][sample_time].append((leaf, seq))
         root.set_leaves(leaves)