Ejemplo n.º 1
0
            "right_value < ? AND name_class == 'scientific name' AND node_rank LIKE ?", (left_value, right_value, rank.strip()))
        return [row[0] for row in db_cursor.fetchall()]
    #    for row in db_cursor.fetchall():
    #        print row
    
    #    return []

    if len(sys.argv) < 4:
        print("usage: fill_tips_from_taxonomy.py <treefile> <taxonomydb> <outfile>")
        sys.exit(0)

    tree = None
    with open(sys.argv[1],"r") as intree_file:
        tree = newick3.parse(intree_file.readline())
    
        conn = sqlite3.connect(sys.argv[2])
        c = conn.cursor()
    
        for tip in tree.leaves():
            for child_name in get_child_taxa(tip.label, "genus", c):
                child = phylo3.Node()
                child.label = child_name
                child.istip = True
    #            print child.label
                tip.add_child(child)
    #            print([t.label for t in tip.children])
    
    with open(sys.argv[3],"w") as outfile:
    #    print(newick3.to_string(tree)+";")
        outfile.write(newick3.to_string(tree)+";")
                                all_found_l = False
                            else:
                                any_found_l = True

                        # not sure if having two options here should have any effect... i think they are the same for practical purposes
                        if (all_found_r and not any_found_l):
#                            result["identical_side"] = "R"
                            r_tree_string = "((" + ",".join(result["seq_labels"]["R"]) + "),(" + ",".join(result["seq_labels"]["L"]) + "));"
                        elif (all_found_l and not any_found_r):
#                            result["identical_side"] = "L"
                            r_tree_string = "((" + ",".join(result["seq_labels"]["L"]) + "),(" + ",".join(result["seq_labels"]["R"]) + "));"
                        
                    result_tree = newick3.parse(r_tree_string)

                # write the result topology to the set of observed topologies for this node
                topo_file.write(newick3.to_string(result_tree)+";\n")

        # get the ICA score from phyx
        pxbp_outfile = "temp_pxbp_out." + node.label
        pxbp_args = ["pxbp", "-t", topo_file_name, ">", pxbp_outfile]

        subprocess.call(" ".join(pxbp_args),shell=True)
        
        # set default values: if we don't find a score in the pxbp output then this bipart is never observed
        ica = "-1"
        freq = "0"

        with open(pxbp_outfile,"r") as pxbp_result:
            for line in pxbp_result:
                parts = line.split("\t")
                if len(parts) > 1:
Ejemplo n.º 3
0
        while len(tree.children) < 2:
            # prune knuckles at the root of the tree if necessary
            only_child = tree.children[0]
            only_child.parent = None
            only_child.isroot = True
            tree = only_child

        # cannot edit tree while traversing, so just record knuckles as we go
        knuckles = []

        # first find the knuckles
        for parent in tree.iternodes(phylo3.PREORDER):

            if parent.istip:
                continue

            logfile.write("node '" + (
                str(parent.label) if parent.label != None else str(parent)) +
                          "' has " + str(len(parent.children)) + " children\n")

            if len(parent.children) == 1:
                logfile.write("\tthis node will be pruned\n")
                knuckles.append(parent)

        # now graft them out
        for k in knuckles:
            k.parent.add_child(k.children[0])
            k.parent.remove_child(k)

        print newick3.to_string(tree) + ";"
Ejemplo n.º 4
0
            "error: mrca in master tree must define a biparition, not a multifurcation!"
        )

    # get the bipart *below* the master mrca, since all target taxa are contained by it
    master_bipart = get_bipart(master_mrca.children[0])
    #    print("master bipart: " + get_string(master_bipart))

    # first check if the current root is already compatible, if so just reiterate original topology
    # THIS FAILS when there is a polytomy at the root of the input tree
    #    if is_compatible(get_bipart(target), master_bipart):
    #        print(newick3.to_string(target)+";\n")
    #        sys.exit(0)

    for test_root in target.descendants():
        test_bipart = get_bipart(test_root)
        if is_compatible(test_bipart, master_bipart):

            # just for debugging. otherwise result with not be valid newick
            #            print("found node in input tree compatible with root node in master: " + get_string(test_bipart))
            #            print("old topology is: " + newick3.to_string(test_root))

            rooted_tree = phylo3.get_tree_rooted_on(test_root)
            print(newick3.to_string(rooted_tree) + ";\n")
            sys.exit(0)

    # if we got here, then there was no bipartition in the input tree compatible with the root bipart in the master
    # ...assuming there are not bugs in script that might otherwise get here...
    sys.exit(
        "error: could not root tree. there may not be a bipartition in the target that is compatible with the root bipart in master!"
    )
Ejemplo n.º 5
0
#!/usr/bin/env python

if __name__ == '__main__':

    import newick3, phylo3, sys

    if len(sys.argv) < 2:
        print "usage: print_tip_names <treefile>"
        sys.exit()

    treefname = sys.argv[1]
    treefile = open(treefname, "r")

    for line in treefile:

        tree = newick3.parse(line)
        print(newick3.to_string(tree, use_node_labels=False))
Ejemplo n.º 6
0
        new_node.add_child(nodes.pop())
        if make_brlens:
            new_node.length = random.random() * max_brlen
        nodes.append(new_node)
    
    return nodes[0]

if __name__ == "__main__":

    description = "make a random tree with X tips"

    parser = argparse.ArgumentParser(description=description)

    parser.add_argument("-x", "--number-of-tips", type=int, nargs=1, required=False, help="The number of tips. If this is specified, tip labels will be numbers. Either this or a set of input tip labels must be specified.")

    parser.add_argument("-l", "--tip-labels", nargs='*', required=False, help="A set of tip labels for the random tree. If this is not set, then the number of tips must be specified, and tip labels will be numbers.")

    parser.add_argument("-m", "--max-branch-length", type=float, nargs=1, required=False, help="The upper bound for creating randomized branch lengths. If not set, branch lengths will be zero. This argument cannot be used in conjuction with the TREE_AGE argument.")

    args = parser.parse_args()
    
    if (args.number_of_tips != None and args.tip_labels != None) or (args.number_of_tips == None and args.tip_labels == None):
        sys.exit("You must specify either the number of tips or the tip labels, but not both")

    tip_labels = args.tip_labels if args.tip_labels != None else [str(i) for i in range(1, args.number_of_tips[0]+1)]

    max_brlen = args.max_branch_length[0] if args.max_branch_length != None else 0
    make_brlens = True if max_brlen > 0 else False
    
    print(newick3.to_string(nodes[0], use_branch_lengths=make_brlens) + ";")
Ejemplo n.º 7
0
        tree = newick3.parse(line)

        while len(tree.children) < 2:
            # prune knuckles at the root of the tree if necessary
            only_child = tree.children[0]
            only_child.parent = None
            only_child.isroot = True
            tree = only_child
    
        # cannot edit tree while traversing, so just record knuckles as we go
        knuckles = []

        # first find the knuckles
        for parent in tree.iternodes(phylo3.PREORDER):
        
            if parent.istip:
                continue
        
            logfile.write("node '" + (str(parent.label) if parent.label != None else str(parent)) + "' has " + str(len(parent.children)) + " children\n")
        
            if len(parent.children) == 1:
                logfile.write("\tthis node will be pruned\n")
                knuckles.append(parent)

        # now graft them out
        for k in knuckles:
            k.parent.add_child(k.children[0])
            k.parent.remove_child(k)
    
        print newick3.to_string(tree) + ";"
Ejemplo n.º 8
0
    )

    parser.add_argument(
        "-m",
        "--max-branch-length",
        type=float,
        nargs=1,
        required=False,
        help=
        "The upper bound for creating randomized branch lengths. If not set, branch lengths will be zero. This argument cannot be used in conjuction with the TREE_AGE argument."
    )

    args = parser.parse_args()

    if (args.number_of_tips != None
            and args.tip_labels != None) or (args.number_of_tips == None
                                             and args.tip_labels == None):
        sys.exit(
            "You must specify either the number of tips or the tip labels, but not both"
        )

    tip_labels = args.tip_labels if args.tip_labels != None else [
        str(i) for i in range(1, args.number_of_tips[0] + 1)
    ]

    max_brlen = args.max_branch_length[
        0] if args.max_branch_length != None else 0
    make_brlens = True if max_brlen > 0 else False

    print(newick3.to_string(nodes[0], use_branch_lengths=make_brlens) + ";")
        # get one node at random from the tree
        n = random.sample(list(get_nodes(tree)), 1)[0]

        # create a new internal node to be the parent of the tip we're about to add
        new_parent = phylo3.Node()

        if n.parent == None: # root case: new parent becomes the new root of the tree
            tree = new_parent

        else: # non-root case: randomly place new parent on the branch leading to n
            p = n.parent
            p.remove_child(n)
            p.add_child(new_parent)

        # reattach n to the new parent
        new_parent.add_child(n)

        # update branch lengths
        new_parent.length = random.uniform(min_branch_length, n.length - min_branch_length)
        n.length = n.length - new_parent.length

        # add the new tip as a child of the new parent
        new_tip = phylo3.Node()
        new_tip.label = name
        new_tip.istip = True
        new_tip.length = new_parent.depth - new_parent.length
        new_parent.add_child(new_tip)
            
    print(newick3.to_string(tree))
Ejemplo n.º 10
0
#!/usr/bin/env python

import sys, newick3, phylo3

if len(sys.argv) < 3:
    sys.exit("usage: extract_subtree.py <treefile> <mrca1> <mrca2>")

tree_file_name = sys.argv[1]
mrca1 = sys.argv[2].strip()
mrca2 = sys.argv[3].strip()

with open(tree_file_name) as treefile:
    tree = newick3.parse(treefile.readline())

print newick3.to_string(phylo3.get_mrca(tree, [mrca1, mrca2]))
Ejemplo n.º 11
0
def validate_tree_type(t):
    if t == 'pectinate':
        return get_pectinate_tree
    elif t == 'balanced':
        return get_balanced_tree
    else:
        sys.exit('invalid tree type: ' + t)

if __name__ == '__main__':

    parser = argparse.ArgumentParser('')
    
    parser.add_argument('-n', '--number-of-tips', type=int, required=True, \
        help='The number of tips to be in the final tree')
    
    parser.add_argument('-b', '--mean-branch-length', type=float, required=True, \
        help='The length to be used for internal branches')
    
    parser.add_argument('-t', '--tree-generator-method', type=validate_tree_type, required=True, \
        help='The type of tree to generate. Must be either "pectinate" or "balanced".')
    
    args = parser.parse_args()
    
    sys.setrecursionlimit(args.number_of_tips+100)
    
    # currently just supports constant rate
    t = args.tree_generator_method(args.number_of_tips, lambda x: x, args.mean_branch_length)

    print(newick3.to_string(t) + ';')
Ejemplo n.º 12
0
                        # not sure if having two options here should have any effect... i think they are the same for practical purposes
                        if (all_found_r and not any_found_l):
                            #                            result["identical_side"] = "R"
                            r_tree_string = "((" + ",".join(
                                result["seq_labels"]["R"]) + "),(" + ",".join(
                                    result["seq_labels"]["L"]) + "));"
                        elif (all_found_l and not any_found_r):
                            #                            result["identical_side"] = "L"
                            r_tree_string = "((" + ",".join(
                                result["seq_labels"]["L"]) + "),(" + ",".join(
                                    result["seq_labels"]["R"]) + "));"

                    result_tree = newick3.parse(r_tree_string)

                # write the result topology to the set of observed topologies for this node
                topo_file.write(newick3.to_string(result_tree) + ";\n")

        # get the ICA score from phyx
        pxbp_outfile = "temp_pxbp_out." + node.label
        pxbp_args = ["pxbp", "-t", topo_file_name, ">", pxbp_outfile]

        subprocess.call(" ".join(pxbp_args), shell=True)

        # set default values: if we don't find a score in the pxbp output then this bipart is never observed
        ica = "-1"
        freq = "0"

        with open(pxbp_outfile, "r") as pxbp_result:
            for line in pxbp_result:
                parts = line.split("\t")
                if len(parts) > 1:
Ejemplo n.º 13
0
        # get one node at random from the tree
        n = random.sample(list(get_nodes(tree)), 1)[0]

        # create a new internal node to be the parent of the tip we're about to add
        new_parent = phylo3.Node()

        if n.parent == None:  # root case: new parent becomes the new root of the tree
            tree = new_parent

        else:  # non-root case: randomly place new parent on the branch leading to n
            p = n.parent
            p.remove_child(n)
            p.add_child(new_parent)

        # reattach n to the new parent
        new_parent.add_child(n)

        # update branch lengths
        new_parent.length = random.uniform(min_branch_length,
                                           n.length - min_branch_length)
        n.length = n.length - new_parent.length

        # add the new tip as a child of the new parent
        new_tip = phylo3.Node()
        new_tip.label = name
        new_tip.istip = True
        new_tip.length = new_parent.depth - new_parent.length
        new_parent.add_child(new_tip)

    print(newick3.to_string(tree))