Exemple #1
0
def get_td(t1trees, t1lnls, t2trees, t2lnls):
    dindex = 0
    td = Tree()
    umtd1 = Tree()
    mmtd1 = Tree()
    l_umtd = None
    l_umtd1 = None
    l_mmtd = None
    l_mmtd1 = None
    for i in range(0, t1trees.__len__()):
        t1 = t1trees[i]
        t2 = t2trees[i]
        d = t1.symmetric_difference(t2)
        d = t2.symmetric_difference(t1)
        print "d", d
        if d == 0:
            td = t1
            l_umtd = t1lnls[i]
            l_mmtd = t2lnls[i]
            dindex = i
            print "ML td", l_umtd, l_mmtd
        else:
            umtd1 = t1
            mmtd1 = t2
            break
    
    return [dindex, td, umtd1, mmtd1]  
Exemple #2
0
    def bipartition_by_edge(self, e):
        """Prunes the subtree that attached to the head_node of edge e and returns them as a separate tree."""

        t = self._tree
        nr = e.head_node
        assert e.tail_node is not None
        assert e.head_node is not None
        assert nr.parent_node is e.tail_node
        is_valid_tree(t)

        n = self.n_leaves
        potentially_deleted_nd = e.tail_node
        grandparent_nd = potentially_deleted_nd.parent_node
        e.tail_node.remove_child(nr, suppress_unifurcations=True)

        nr.edge.length = None # Length of bisected edge
        nr.parent_node = None
        convert_node_to_root_polytomy(nr)
        t1 = PhylogeneticTree(Tree(seed_node=nr))
        n1 = t1.n_leaves # temp we could speed this up, by telling the Phylogenetic tree how many leaves it has

        if hasattr(e, "num_leaves_below"):
            if grandparent_nd is None:
                old_root = potentially_deleted_nd
                if old_root.edge:
                    old_root.edge.num_leaves_below -= n1
            else:
                if potentially_deleted_nd in grandparent_nd.child_nodes():
                    potentially_deleted_nd.edge.num_leaves_below -= n1
                old_root = grandparent_nd
                if old_root.edge:
                    old_root.edge.num_leaves_below -= n1
                while old_root.parent_node:
                    old_root = old_root.parent_node
                    if old_root.edge:
                        old_root.edge.num_leaves_below -= n1
        else:
            old_root = grandparent_nd or potentially_deleted_nd
            while old_root.parent_node:
                old_root = old_root.parent_node

        # uym2 added (April 2019): suppress unifurcation at root node
        if len(old_root.child_nodes()) == 1:
            old_root = old_root.child_nodes()[0]

        t2 = PhylogeneticTree(Tree(seed_node=old_root))

        is_valid_tree(t1._tree)
        is_valid_tree(t2._tree)
        return t1, t2
Exemple #3
0
def simulateTreeTopology(n):
# simulate a binary tree of n leaves
    leaves = [Node()]
    nodeOrder = []
    myTree = Tree(seed_node = leaves[0])

    for i in range(n-1):
        r = randint(0,i)
        a = Node()
        b = Node()
        p = leaves[r]
        p.add_child(a)
        p.add_child(b)
        leaves[r] = a
        leaves.append(b)
        nodeOrder.append(p)

    IDs = list(range(1,n+1))
    i = 0
    shuffle(IDs)   
    for leaf in leaves:
        leaf.taxon = Taxon(label=str(IDs[i]))
        leaf.time = 0
        i += 1        

    return myTree,nodeOrder    
Exemple #4
0
def min_cluster_brlen_bisect(a_tree, max_sum_branches):
    for node in a_tree.postorder_node_iter():
        if node.is_leaf():
            node.sum_brlen = 0
        else:
            node.sum_brlen = 0
            max_child = None
            max_sum_brlen = 0
            for ch in node.child_node_iter():
                s = ch.sum_brlen + ch.edge_length
                node.sum_brlen += s
                if s > max_sum_brlen:
                    max_sum_brlen = s
                    max_child = ch
        if node.sum_brlen > max_sum_branches:
            node.remove_child(max_child)
            t1 = Tree(seed_node=max_child)
            # adjust the remaining tree after cutting out
            children = node.child_nodes()
            if len(children) == 1:
                ch = children[0]
                if node is a_tree.seed_node:
                    node.remove_child(ch)
                    a_tree.seed_node = ch
                    del node
                else:
                    e = ch.edge_length + node.edge_length
                    p = node.parent_node
                    node = p.remove_child(node)
                    node.remove_child(ch)
                    p.add_child(ch)
                    ch.edge_length = e

            return a_tree, t1
    return a_tree, None
Exemple #5
0
def resolve_node(node):
    S = node.child_nodes()
    B = list_bipartitions(S)

    R = []

    for b in B:
        if len(b) > 1:
            c = [x for x in S if not x in b]
            u = Node()
            v1 = Node()
            v2 = Node()
            for x in b:
                v1.add_child(x)
            for x in c:
                v2.add_child(x)
            u.add_child(v1)
            u.add_child(v2)

            R.append(Tree(seed_node=u).as_string("newick"))

            for x in S:
                node.add_child(x)

    return R
Exemple #6
0
def extract_tree_with_taxa(tree, taxa, suppress_unifurcations=True):
    taxon_to_leaf = {}
    for n in tree.preorder_node_iter():
        n.keep = False
        if n.is_leaf():
            taxon_to_leaf[n.taxon] = n
    for t in taxa:
        for n in taxon_to_leaf[t].ancestor_iter(inclusive=True):
            n.keep = True
    out = Tree()
    q_old = Queue()
    q_old.put(tree.seed_node)
    q_new = Queue()
    q_new.put(out.seed_node)
    while not q_old.empty():
        n_old = q_old.get()
        n_new = q_new.get()
        for c_old in n_old.child_node_iter():
            if c_old.keep:
                c_new = Node(taxon=c_old.taxon,
                             label=c_old.label,
                             edge_length=c_old.edge_length)
                n_new.add_child(c_new)
                q_old.put(c_old)
                q_new.put(c_new)
    if suppress_unifurcations:
        out.suppress_unifurcations()
    return out
Exemple #7
0
    def __bisect__(t,e):
#        e = __find_centroid_edge__(t)
        
        u = e.tail_node
        v = e.head_node

        u.remove_child(v)
        t1 = Tree(seed_node = v)

        if u.num_child_nodes() == 1:
            p = u.parent_node
            v = u.child_nodes()[0]
            l_v = v.edge_length
            u.remove_child(v)
            if p is None: # u is the seed_node; this means the tree runs out of all but one side
                t.seed_node = v
                return t,t1
            l_u = u.edge_length
            p.remove_child(u)
            p.add_child(v)
            v.edge_length = l_u+l_v
            u = p

        while u is not None:
            __updateNode__(u)
            u = u.parent_node

        t.annotated = True
        t1.annotated = True

        return t,t1
Exemple #8
0
    def __bisect__(tre, edg):
        # e = __find_centroid_edge__(t)

        u = edg.tail_node
        v = edg.head_node

        u.remove_child(v)
        tr1 = Tree(seed_node=v)

        if u.num_child_nodes() == 1:
            p = u.parent_node
            v = u.child_nodes()[0]
            l_v = v.edge_length if v.edge_length else 0
            u.remove_child(v)
            # u is the seed_node; this means the tree runs out of all but one
            # side
            if p is None:
                tre.seed_node = v
                return tre, tr1
            l_u = u.edge_length if u.edge_length else 0
            p.remove_child(u)
            p.add_child(v)
            v.edge_length = l_u + l_v
            u = p

        while u is not None:
            __update_node__(u)
            u = u.parent_node

        return tre, tr1
Exemple #9
0
def graph2tree(G, root=0, names=[]):
    # assum G is acyclic
    seed_node = Node()
    seed_node.label = names[root] if names else str(root)
    T = Tree(seed_node=seed_node)
    n = len(G)
    node_refs = [None for i in range(n)]
    node_refs[root] = seed_node
    count = 1
    curr_v = root

    stk = [root]

    while len(stk) > 0:
        curr_v = stk.pop()
        for v, length in G[curr_v]:
            if node_refs[v] is None:
                stk.append(v)
                new_node = Node()
                new_node.label = names[v] if names else str(v)
                node_refs[v] = new_node
                node_refs[curr_v].add_child(new_node)
                new_node.edge_length = length

    for node in T.leaf_node_iter():
        node.taxon = T.taxon_namespace.new_taxon(label=node.label)

    return T
Exemple #10
0
def report_taxa(tree_file, scheme='newick', listing=True, counting=True):
    a_tree = Tree()
    a_tree.read_from_path(tree_file, scheme)
    if listing:
        for leaf in a_tree.leaf_nodes():
            print(leaf.taxon.label)
    if counting:
        print('Taxa #: ' + str(len(a_tree.leaf_nodes())))
Exemple #11
0
 def get_subtree(self, taxa):
     if len(taxa) == 0:
         return None
     tree = Tree(self._tree)
     if isinstance(taxa[0], str):
         tree.prune_taxa_with_labels(taxa)
     elif isinstance(taxa[0], Taxon):
         tree.prune_taxa(taxa)
     return PhylogeneticTree(tree)
Exemple #12
0
 def _read_tree_from_path(path, taxon_namespace):
     """
     Wrapper for netwick-file to dendropy tree
     """
     tree = Tree()
     my_tree = tree.get_from_path(path,
                                  "newick",
                                  taxon_namespace=taxon_namespace)
     return my_tree
Exemple #13
0
def gamma_deviation_from_clock(tree, shape, rate):
    tree1 = Tree(tree)

    for node in tree1.postorder_node_iter():
        if node is tree1.seed_node:
            continue
        f = np.random.gamma(shape, scale=1 / shape)
        node.edge_length = node.edge_length * f * rate
    return tree1
Exemple #14
0
def exp_deviation_from_clock(tree, rate):
    # Note: we force the distribution to have mean 1, so
    #there is no free parameter for an exponential distribution

    tree1 = Tree(tree)

    for node in tree1.postorder_node_iter():
        if node is tree1.seed_node:
            continue
        f = np.random.exponential()
        node.edge_length = node.edge_length * f * rate
    return tree1
Exemple #15
0
 def bipartition_by_root(self):
     if (self.n_leaves == 1):
         return (None, None, None)
     root = self._tree.seed_node
     t1_root = root._child_nodes[0]
     t = self._tree
     t.prune_subtree(t1_root, update_splits=True, delete_outdegree_one=True)
     t1 = PhylogeneticTree(t)
     t2 = PhylogeneticTree(Tree(t1_root))
     # Reroot if there's more than node left
     if (t2.n_leaves > 1):
         t2._tree.reroot_at_node(t1_root)
     return t1, t2, root
Exemple #16
0
def sample_with_outgroups(a_tree, n_ingroups, n_outgroups=1, n_reps=1):
    # sample n_reps trees from a large tree, each has n_ingroups and n_outgroups taxa
    samples = []
    for i in range(n_reps):
        t = Tree(a_tree)
        check, igs, ogs = sample_and_prune(t,
                                           n_ingroups,
                                           n_outgroups=n_outgroups)
        if not check:
            return False, samples
        samples.append((t, igs, ogs))

    return True, samples
Exemple #17
0
def is_valid_newick(path, source_sequence_names = None):
    """Is the file located at 'path' a valid newick-formatted tree?
    This method returns the tuple (True/False, error message)
    if source_sequence_names != None, then the tree should contain taxa from the list of sequence names."""
    retflag = False
    emsg = ""
    try:
        test_tree = Tree()
        test_tree.read_from_path(path, "newick")
    except Exception as e:
        emsg = e.__str__()
    else:
        retflag = True
    return (retflag, emsg)
def main(arguments):
    prune = False

    if arguments.url:
        tree = Tree().get(url=arguments.url,
                          schema="newick",
                          preserve_underscores=True)
    elif arguments.path:
        tree = Tree().get(path=arguments.path,
                          schema="newick",
                          preserve_underscores=True)
    else:
        raise ValueError(
            "No input argument found. Please specify either --url or --path")

    if arguments.rename is not None:
        tree, prune_list = rename_tree(tree, arguments.rename)

    tree = collapse_polytomies(tree, arguments.limit)

    if prune:
        PareTree_wrapper(tree, prune_list, arguments.output)
    else:
        tree.write(path=arguments.output, schema="newick")
Exemple #19
0
def main(argv):

    # Instantiates taxon set object for tree list
    taxa = TaxonSet()

    # Reads in tree string from the command line
    focalTree = Tree(stream=StringIO(argv[0]),
                     schema="newick",
                     rooted=False,
                     taxon_set=taxa)

    # Iterates over all internal nodes in the focal tree (generating one constraint each)
    for i in focalTree.internal_nodes():

        # Defines a list that initially contains all the leaf nodes from focal tree
        fullTaxonSet = focalTree.leaf_nodes()

        # Iterates over all internal nodes that are not the root
        if i is not focalTree.seed_node:

            # Instantiates string (conTree) to hold the constraint tree string
            conTree = "(("

            # Iterates over leaf nodes that are descendants of the current internal node
            for j in i.leaf_nodes():

                # Appropriately adds the taxon name to the constraint tree string
                if j is i.leaf_nodes()[0]:
                    conTree = conTree + str(j.taxon)
                else:
                    conTree = conTree + "," + str(j.taxon)

            # Closes out the part of the constraint for taxa descended from the focal node
            conTree = conTree + ")"

            # Takes all leaves and removes those descended from the focal node
            for j in i.leaf_nodes():
                fullTaxonSet.remove(j)

            # Adds all leaves not descended from the focal node to the constraint tree string
            for j in fullTaxonSet:
                conTree = conTree + "," + str(j.taxon)

            # Closes constraint tree string
            conTree = conTree + ")"

            # Prints constraint tree string to the screen
            print conTree
Exemple #20
0
def lnorm_deviation_from_clock(tree, sd, rate):
    # Note: we force the distribution to have mean 1, so
    # there is only 1 parameter to control the lognormal distribution
    # sd here is the standard deviation of the lognormal distribution,
    # NOT its underlying normal distribution

    tree1 = Tree(tree)
    mu = -0.5 * log(sd * sd + 1)
    sigma = sqrt(log(sd * sd + 1))

    for node in tree1.postorder_node_iter():
        if node is tree1.seed_node:
            continue
        f = np.random.lognormal(mean=mu, sigma=sigma)
        node.edge_length = node.edge_length * f * rate
    return tree1
def get_bls(tree_path):
    # clean the tree of any support values, so we're left only with BLs
    bls = []
    t = Tree()
    t.read_from_path( tree_path, "newick" )
    
    i = t.level_order_edge_iter()
    while True:
        try:
            e = i.next() # in Python 2.x
            len = e.length
            if len != None:
                bls.append( len )
        except StopIteration:
            break
    return bls
Exemple #22
0
def min_cluster_size_bisect(a_tree,max_size):
    for node in a_tree.postorder_node_iter():
        if node.is_leaf():
            node.nleaf = 1
        else:
            node.nleaf = 0
            max_child = None
            max_nleaf = 0
            for ch in node.child_node_iter():
                   node.nleaf += ch.nleaf
                   if ch.nleaf > max_nleaf:
                       max_nleaf = ch.nleaf
                       max_child = ch
        if node.nleaf > max_size:
            node.remove_child(max_child)
            t1 = Tree(seed_node = max_child)
            return a_tree,t1
    return a_tree,None            
Exemple #23
0
def return_trees_from_trace(path):
    print "Parsing trace:", path
    trees = []
    lnls = []
    fin = open(path, "r")
    last_tree = None
    last_lnl = 0.0
    count_unique_trees = 0
    for line in fin.xreadlines():
        treestring = ""
        lnlstring = ""
        found_tree = False
        for c in line:
            if found_tree == False and c != "]" and c != "[" and c != "(":
                lnlstring += c
            if c == "(":
                found_tree = True
            if found_tree == True:
                treestring += c
        lnl = float(lnlstring)
        t = Tree()
        t.read_from_string(line, "newick")
        if last_tree != None: #2nd->nth trees in the list
            #sd = last_tree.symmetric_difference(t)
            #sd = t.symmetric_difference(last_tree)
            if last_lnl < lnl:
                trees.append(t)
                lnls.append("%.2f"%lnl)
                count_unique_trees += 1
            else:
                trees[trees.__len__()-1] = t
                lnls[lnls.__len__()-1] = "%.2f"%lnl
        else: #first tree in the list
            trees.append(t)
            lnls.append("%.2f"%lnl)
            count_unique_trees += 1
        last_tree = t
        last_lnl = lnl
        print count_unique_trees, lnl
    trees.append(last_tree)
    lnls.append("%.2f"%lnl)
    fin.close()
    return [trees, lnls]
Exemple #24
0
def min_cluster_diam_bisect(a_tree,max_diam):
    for node in a_tree.postorder_node_iter():
        if node.is_leaf():
            node.maxdepth = 0
            continue
        d1 = -1
        d2 = -1
        max_child = None
        for ch in node.child_node_iter():
               d = ch.maxdepth + ch.edge_length
               if d > d1:
                   d2 = d1
                   d1 = d
                   max_child = ch
               elif d > d2:
                   d2 = d
        node.maxdepth = d1
        if d1+d2 > max_diam:
            node.remove_child(max_child)
            t1 = Tree(seed_node = max_child)
            return a_tree,t1
    return a_tree,None            
Exemple #25
0
    def get_super_tree(self, superTree_method, **args):
        def parse_trees(**args):
            n_tree, n_branch = float(len(self.data['trees'])), {}
            for mt_id, mt in enumerate(self.data['trees']):
                w = (float(len(mt.tre.leaf_nodes())) /
                     len(self.data['taxa']))**2
                for node in mt.tre.preorder_node_iter():
                    if node.barcode not in n_branch:
                        n_branch[node.barcode] = [[w, mt_id, node]]
                    else:
                        n_branch[node.barcode].append([w, mt_id, node])
            return n_tree, n_branch

        def consensus(self, **args):
            n_tree, n_branch = parse_trees(**args)
            n_branch = sorted([[len(v) / n_tree, k, v]
                               for k, v in n_branch.iteritems()],
                              reverse=True)
            consensus_tree = []
            for posterior, branch, nodes in n_branch:
                for cbr, _, _ in consensus_tree:
                    b1, b2 = sorted([branch, cbr])
                    if not (((b1 & b2) == b1) or ((b1 & (~b2)) == b1)):
                        branch = 0
                        break
                if branch:
                    consensus_tree.append([branch, posterior, nodes])
            return sorted(consensus_tree, reverse=True)

        def MCC(self, **args):
            n_tree, n_branch = parse_trees(**args)
            for mt_id, mt in enumerate(self.data['trees']):
                if len(mt.tre.leaf_nodes()) == len(self.data['taxa']):
                    mt.score = np.sum([
                        len(n_branch[node.barcode])
                        for node in mt.tre.preorder_node_iter()
                    ])
            tre = max(self.data['trees'], key=lambda x: x.score).tre
            return [[
                n.barcode,
                len(n_branch[n.barcode]) / n_tree, n_branch[n.barcode]
            ] for n in tre.preorder_node_iter()]

        def load_subtree(self, treeLabel, **args):
            n_tree, n_branch = parse_trees(**args)
            for mt_id, mt in enumerate(self.data['trees']):
                if mt.tre.label == treeLabel:
                    tre = mt.tre
                    break
            return [[
                n.barcode,
                len(n_branch[n.barcode]) / n_tree, n_branch[n.barcode], n.age,
                n.edge_length
            ] for n in tre.preorder_node_iter()]

        #def ASTRID(self, **args) :
        #from dendropy import PhylogeneticDistanceMatrix

        def load_tree(self, consFile=None, **args):
            n_tree, n_branch = parse_trees(**args)

            with open(consFile) as fin:
                schema = 'nexus' if fin.readline().upper().startswith(
                    '#NEXUS') else 'newick'
            for tre in Tree.yield_from_files([consFile], schema=schema):
                break

            internal_id = n_taxa = len(self.data['taxa'])
            digit_code = np.power(2, np.arange(n_taxa, dtype='object'))

            for node in tre.postorder_node_iter():
                if node.is_leaf():
                    node.id = self.data['taxa'][node.taxon.label]
                    node.barcode = digit_code[node.id]
                else:
                    node.id, internal_id = internal_id, internal_id + 1
                    node.barcode = sum([c.barcode for c in node.child_nodes()])

            tre.seed_node.age = tre.seed_node.distance_from_tip()
            for node in tre.preorder_node_iter():
                if node.parent_node:
                    node.age = node.parent_node.age - node.edge_length
            return [[
                n.barcode,
                len(n_branch.get(n.barcode, [])) / n_tree,
                n_branch.get(n.barcode, []), n.age, n.edge_length
            ] for n in tre.preorder_node_iter()]

        if superTree_method in ('MCC', 'ASTRID', 'consensus'):
            branches = locals()[superTree_method](self, **args)
        elif os.path.isfile(superTree_method):
            branches = load_tree(self, consFile=superTree_method, **args)
        else:
            branches = load_subtree(self, treeLabel=superTree_method, **args)
        supertree = Tree()
        sn = supertree.seed_node
        sn.barcode, sn.posterior = branches[0][0], branches[0][1]
        sn.age = branches[0][3] if len(branches[0]) > 3 else np.sum(
            [n[2].age * n[0]
             for n in branches[0][2]]) / np.sum([n[0] for n in branches[0][2]])
        sn.contain = [[b[0], b[1], b[2].id] for b in branches[0][2]]
        for br in branches[1:]:
            cbr, posterior, nodes = br[:3]
            while (sn.barcode & cbr) != cbr:
                sn = sn.parent_node
            new_node = Node() if len(nodes) == 0 or (
                not nodes[0][2].taxon) else Node(taxon=Taxon(
                    label=nodes[0][2].taxon.label))
            sn.add_child(new_node)
            sn = new_node
            sn.barcode, sn.posterior = cbr, posterior
            sn.contain = [[b[0], b[1], b[2].id] for b in nodes]
            if len(br) <= 3:
                sn.edge_length = 0.0 if len(nodes) == 0 else np.sum(
                    [n[2].edge_length * n[0]
                     for n in nodes]) / np.sum([n[0] for n in nodes])
                sn.age = sn.parent_node.age if len(nodes) == 0 else np.sum(
                    [n[2].age * n[0]
                     for n in nodes]) / np.sum([n[0] for n in nodes])
            else:
                sn.age, sn.edge_length = br[3:]
        internal_id = len(self.data['taxa'])
        for node in supertree.postorder_node_iter():
            if node.is_leaf():
                node.id = self.data['taxa'][node.taxon.label]
            else:
                node.id = internal_id
                internal_id += 1
        return MetaTree(supertree)
Exemple #26
0
            algorithms.estimate_branch_lengths_lp(tree, extant_genomes)

    # Fix multibranching trees:
    changed = resolve_tree(tree)
    # if the tree changed, it might be a good idea to rerun the branch length detection?
    if changed and param.estimate_lenghts is not None:
        if param.estimate_lenghts == "lp":
            algorithms.estimate_branch_lengths_lp(tree, extant_genomes)
        elif param.estimate_lenghts == "least_squares":
            algorithms.estimate_branch_lengths_least_squares(
                tree, extant_genomes)

    # if no weights file is given, use default weighting scheme:
    if param.adj_weights_file is None:
        internalAdjWeight = algorithms.ancestral_adjacency_weights(
            Tree(tree), extant_genomes)
    else:
        # if weights are given, use: (usually DeClone weights):
        internalAdjWeight = file_ops.open_ancestral_weights(
            param.adj_weights_file, cutoff=param.weight_filter)

    # main alg:
    reconstructed = algorithms.ig_indel_small_phylogeny(
        extant_genomes,
        tree,
        internalAdjWeight,
        perfect_matching=param.perfect,
        random_repeat=param.random_repeat,
        add_open_2_cycles=param.add_open_2_cycles)

    # output:
Exemple #27
0
                        phylum] != node.nleaf:
                    #if phylum not in convergence or c.phylCount[phylum] > convergence[phylum][0]:
                    if phylum not in groupings:
                        groupings[phylum] = set([c])
                    else:
                        groupings[phylum].add(c)

#for phylum in global_phylCount:
#     if global_phylCount[phylum] > 1 and not ('Candi' in phylum or 'candi' in phylum):
#         print(phylum + " " + str(global_phylCount[phylum]) + " " + str(purity[phylum][0]) + " " + str(convergence[phylum][0]))
#    print(phylum,global_phylCount[phylum])

#print(global_phylCount['Firmicutes'])
#print(purity['Firmicutes'])

for phylum in groupings:
    if global_phylCount[phylum] < 2:
        continue
    if len(list(groupings[phylum])) == 1:
        ID = None
    else:
        ID = 1
    for c in groupings[phylum]:
        #print(phylum + " " + c.label)
        subTree = Tree(seed_node=c)
        suffix = ("_" + str(ID)) if ID else ''
        for node in subTree.leaf_node_iter():
            print(node.taxon.label + " " + phylum + suffix)
        if ID:
            ID += 1
import os
import sys
from dendropy import Tree

t1path = sys.argv[1]
t2path = sys.argv[2]

t1 = Tree()
t1.read_from_path(t1path, "newick")
t2 = Tree()
t2.read_from_path(t2path, "newick")

s = t1.symmetric_difference(t2)
s = t2.symmetric_difference(t1)
print "symmetric diff. = ", s

print t1.length()
print t2.length()
Exemple #29
0
    speciesList = sys.argv[2]
    taxonomyTree = sys.argv[3]

    species = {}
    lines = open(speciesList, 'r')
    for line in lines:
        species[line.strip()] = line.strip()

    lines = open(taxonomyFile, 'r')
    header = lines.readline()
    nodes_dict = {}

    #Read first line, root node
    line = lines.readline()
    results = line.strip().split(',')
    tree = Tree()
    root = Node()
    root.__dict__['label'] = results[0].replace("\"", "")
    nodes_dict[results[0].replace("\"", "")] = root

    prune = ['1']

    #Add root node to tree
    tree.__dict__['_seed_node'].add_child(root)
    for line in lines:
        results = line.strip().split(',')
        node = Node()
        node.__dict__['label'] = results[0].replace("\"", "")
        node.taxon = Taxon(results[0].replace("\"", ""))
        nodes_dict[results[0].replace("\"", "")] = node
        nodes_dict[results[1].replace("\"", "")].add_child(node)
Exemple #30
0
def rdf2dendropyTree(file_obj=None, data=None):
    '''
    Parses the content (a `file_obj` file object or `data` as a) into a dendropyTree.
    
    Uses the 'has_Parent' term in http://www.evolutionaryontology.org/cdao/1.0/cdao.owl#
    to construct and return a rooted dendropy.Tree object
    
    Relies on rdflib and dendropy.
    Raises ValueError if the graph does not imply exactly 1 root node
    '''

    from dendropy import Node, Tree, Edge, TaxonSet, Taxon
    graph = rdflib.Graph()
    if file_obj:
        graph.parse(file=file_obj)
    else:
        graph.parse(data=data, format='xml')
    nd_dict = {}
    has_parent_predicate = OBO_PREFIX + HAS_PARENT_PREDICATE
    if _DEBUGGING:
        out = open('parse_rdf.txt', 'w')
    taxon_set = TaxonSet()
    OBO = rdflib.Namespace(u"http://purl.obolibrary.org/obo/")
    parentless = set()
    for s, p, o in graph.triples((None, OBO[HAS_PARENT_PREDICATE], None)):
        parent = nd_dict.get(id(o))

        if parent is None:
            #print 'Parent o.value = ', o.value(rdflib.RDF.nodeID)

            raw_o = o
            o = rdflib.resource.Resource(graph, o)
            o_tu = o.value(OBO[REPRESENTS_TU_PREDICATE])
            if o_tu:
                o_label = o_tu.value(rdflib.RDFS.label)
                t = Taxon(label=o_label)
                taxon_set.append(t)
                parent = Node(taxon=t)
            else:
                parent = Node()

            nd_dict[id(raw_o)] = parent
            parentless.add(parent)
        child = nd_dict.get(id(s))
        if child is None:
            raw_s = s
            s = rdflib.resource.Resource(graph, s)
            s_tu = s.value(OBO[REPRESENTS_TU_PREDICATE])
            if s_tu:
                s_label = s_tu.value(rdflib.RDFS.label)
                t = Taxon(label=s_label)
                taxon_set.append(t)
                child = Node(taxon=t)
            else:
                child = Node()
            nd_dict[id(raw_s)] = child
        else:
            if child in parentless:
                parentless.remove(child)
        parent.add_child(child)

        if _DEBUGGING:
            out.write('%s %s %s\n' % (str(s), p, o))
            out.write('%s\n' % (str(parentless)))
    if _DEBUGGING:
        out.close()
    if len(parentless) != 1:
        message = "Expecting to find exactly Node (an object of a has_Parent triple) in the graph without a parent. Found %d" % len(
            parentless)
        CUTOFF_FOR_LISTING_PARENTLESS_NODES = 1 + len(
            parentless
        )  # we might want to put in a magic number here to suppress really long output
        if len(parentless) > 0 and len(
                parentless) < CUTOFF_FOR_LISTING_PARENTLESS_NODES:
            message += ":\n  "
            for i in parentless:
                if i.label:
                    message += "\n  " + i.label
                else:
                    message += "\n  <unlabeled>" + str(id(i))
            raise ValueError(message)
        else:
            return None
    tree = Tree(taxon_set=taxon_set)
    tree.seed_node = list(parentless)[0]
    tree.is_rooted = True
    return tree