예제 #1
0
    def root_tree(self):
        outgrp_regex_str, species_dict, ingroup_regex_str, outgroup_id_arr = read_profile_file(
            BasePath.species_profile_filename)
        fam_tree_filename = BasePath.outpath + "/" + self.fam_id + "/" + self.fam_id + BasePath.fasttree_fileextension
        fam_tree = PhyloTree(fam_tree_filename, format=1)

        outgrp_re = re.compile(outgrp_regex_str)
        ingrp_re = re.compile(ingroup_regex_str)

        outgroup_sequence_list = self.get_regex_matching_sequence_list_from_node(
            fam_tree, outgrp_re)
        outgroup_monophyly_check = fam_tree.check_monophyly(
            values=outgroup_sequence_list, target_attr="name")
        if outgroup_monophyly_check[0]:
            print "Outgroups are monophyletic"
            root_node = fam_tree.get_common_ancestor(outgroup_sequence_list)
            fam_tree.set_outgroup(root_node)
            self.write_rooted_tree(fam_tree)

        else:
            print "Outgroups are not monophyletic"

            outgroup_sequence_list_from_seqlist = self.get_outgroup_sequences_from_seqlist(
            )
            arranged_outgroup_sequence_list = self.arrange_outgroup_sequence_ids(
                outgroup_sequence_list_from_seqlist, outgroup_id_arr)
            root_node = arranged_outgroup_sequence_list[0]
            print "Rooting using sequence {0}".format(root_node)
            fam_tree.set_outgroup(root_node)
            self.write_rooted_tree(fam_tree)
    parser.add_argument("-g",
                        "--gene_tree",
                        help="Homolog tree to be assessed.",
                        required=True)
    parser.add_argument("-og",
                        "--outgroupf",
                        help="Outgroup taxon names, one per line.",
                        required=True)

    if len(sys.argv[1:]) == 0:
        sys.argv.append("-h")

    args = parser.parse_args()

    og_list = []
    with open(args.outgroupf, "r") as ogf:
        for line in ogf:
            og_list.append(line.strip())

    tr = PhyloTree(args.gene_tree,
                   sp_naming_function=lambda node: node.name.split("@")[0])

    og_in_tr = []
    for l in tr.iter_leaves():
        if l.species in og_list:
            og_in_tr.append(l.species)

    print(args.gene_tree + "\t" +
          str(tr.check_monophyly(values=og_in_tr, target_attr="species")[0]))
예제 #3
0
t = PhyloTree(
    "((((rivulorum_F790:0.25862,((vaneedeni_KwaF773:0.0,vaneedeni_KwaF774:0.0,vaneedeni_KwaF775:0.0,vaneedeni_KwaF780:0.0,vaneedeni_KwaF782:0.0,vaneedeni_KwaF783:0.0,vaneedeni_KwaF786:0.0):0.00055,vaneedeni_KwaF784:0.00055)0.982:0.02123)0.176:0.00899,((funestus_TanF561:0.00055,((funestus_MozF804:0.00055,funestus_Ugf401:0.00055)0.000:0.00055,(funestus_GhaF265:0.00055,(funestus_GhaF264:0.0,funestus_Ken4590:0.0,funestus_MozF123:0.0,funestus_MozF260:0.0,funestus_MozF29:0.0,funestus_MozF35:0.0,funestus_TanF601:0.0,funestus_Ugf399:0.0,funestus_Ugf403:0.0,funestus_Zam281:0.0):0.00055)0.000:0.00055)0.856:0.00120)0.891:0.00241,(funestuscf_MALAF105_7:0.00055,(funestuscf_MALAF99_4:0.0,funestuscf_MALF98_2:0.0):0.00055)0.993:0.00860)0.854:0.00228)0.966:0.00732,((parensis_KwaF761:0.0,parensis_KwaF762:0.0,parensis_KwaF766:0.0,parensis_KwaF767:0.0,parensis_KwaF768:0.0,parensis_KwaF769:0.0,parensis_KwaF835:0.0):0.00053,parensis_KwaF851:0.00055)0.982:0.00089)0.948:0.00384,((longipalpusC_11:0.0,longipalpusC_13:0.0):0.00055,longipalpusC_551_12533:0.00076)1.000:0.00051,((longipalpusC_15:0.0,longipalpusC_16:0.0,longipalpusC_551_12634:0.0):0.00055,(longipalpusC_4:0.00055,longipalpusC_12:0.00055)1.000:0.00055)0.709:0.00055);"
)
t.set_species_naming_function(
    lambda node: node.name.split("_")[0])  # n.species, n.name
t.set_outgroup(t & outgroup)
#taxon1 = ["parensis", "longipalpusC", "vaneedeni"]
#taxon2 = ["funestus", "funestuscf", "vaneedeni"]
taxon = [["parensis", "longipalpusC", "vaneedeni"],
         ["funestus", "funestuscf", "vaneedeni"]]
taxdict = {}
for i, tax in enumerate(taxon):
    nodesupport = []
    nodeage = []
    for t in treelist:
        if t.check_monophyly(values=tax, target_attr="species"):
            samples = []
            for sp in tax:
                samples.extend(t.search_nodes(species=sp))
            ancnode = t.get_common_ancestor(samples)
            nodeage.append(ancnode.dist)
            nodesupport.append(ancnode.support)
    taxdict[i] = (nodeage, nodesupport)

    if not winlist:
        winarray = np.ones(len(treelist), dtype=bool)
    mtreelist, winarray = getMonophyletic(treelist, quart, winarray)
    btreelist, winarray = supportFilt(mtreelist, quart, winarray)
    if nodes:
        nh1, nh2 = nodeHeights(btreelist, quart)
    else: