Beispiel #1
0
def sink_tree(filen, outfn):
    t = tree_reader.read_tree_file_iter(filen).__next__()
    cutoffa = 80
    cutoffb = 95
    nds = set()
    for i in t.iternodes():
        if len(t.children) < 2:
            continue
        l = i.label
        if "/" in l:
            s = l.split("/")
            a = float(s[0])
            b = float(s[1])
            if a < cutoffa or b < cutoffb:
                nds.add(i)
            i.label = ""
    for j in nds:
        chs = j.children
        par = j.parent
        par.remove_child(j)
        for k in chs:
            k.parent = par
            par.add_child(k)
    outf = open(outfn, "w")
    outf.write(t.get_newick_repr(False) + ";")
    outf.close()
import sys
import tree_reader
import os
from utils import newick_name

if __name__ == "__main__":
    if len(sys.argv) != 4:
        print("python " + sys.argv[0] + " all_names.txt infile outfile")
        sys.exit(0)
    tab = open(sys.argv[1], "r")
    idn = {}
    for i in tab:
        spls = i.strip().split("\t")
        idn[spls[0]] = spls[3]
    tab.close()
    outf = open(sys.argv[3], "w")
    for i in tree_reader.read_tree_file_iter(sys.argv[2]):
        for j in i.iternodes():
            if j.label in idn:
                j.label = newick_name(idn[j.label])
        outf.write(i.get_newick_repr(True) + ";")
    outf.close()
Beispiel #3
0
    for i in tre.iternodes():
        if len(i.children) > 0:
            if i.label in nms:
                toremove.append(i)
    for i in toremove:
        sys.stderr.write("remove internal: "+i.get_newick_repr(False)+"\n")
        par = i.parent
        par.remove_child(i)


if __name__ == "__main__":
    if len(sys.argv) != 3:
        print "python "+sys.argv[0]+ " addtree bigtre"
        sys.exit(0)

    tree1 = tree_reader.read_tree_file_iter(sys.argv[1]).next()
    bigtree = tree_reader.read_tree_file_iter(sys.argv[2]).next()
    if EDITLEN:
        tree_utils.set_heights(tree1)
        tree_utils.set_heights(bigtree)

    rootnms = set(tree1.lvsnms())
    
    remove_int_ext_nodes(rootnms,bigtree)

    othernms = set(bigtree.lvsnms())
    if VERBOSE:
        ddifs = rootnms.difference(othernms)
        for i in ddifs:
            sys.stderr.write(i+"\n")
    diffnms = []
import os
import seq
import networkx as nx
import tree_reader
from networkx.drawing.nx_agraph import write_dot
"""
This will make a graph of the connectivity of the genes and taxa
given some tree (probably taxonomy) and a set of genes
"""

if __name__ == "__main__":
    if len(sys.argv) < 3:
        print("python " + sys.argv[0] + " tree files...")
        sys.exit(0)

    tree = next(tree_reader.read_tree_file_iter(sys.argv[1]))
    genes = {}
    seqfiles = []
    badseqs = []
    goodseqs = []
    for i in sys.argv[2:]:
        if i[0] != "_":
            seqfiles.append(i)
        else:
            goodseqs.append(i[1:])
    for i in seqfiles:
        genes[i] = []
        for j in seq.read_fasta_file_iter(i):
            genes[i].append(j.name)

    print(seqfiles)
Beispiel #5
0
def run_bp_window(infn, tsegfiles, mltr, segc, outf):
    write_r()
    mlto = tree_reader.read_tree_file_iter(mltr).__next__()
    mlbps = get_biparts(mlto)
    segs = {}
    segslong = {}
    segstree = {}
    plotsegs = []  # each row is a seg, each column is a node
    conflictsegscount = []
    count = 0
    for i in range(len(tsegfiles)):
        conflictcount = 0
        segs[count] = []
        segslong[count] = set()
        plotsegs.append([0] * len(mlbps))
        cmd = "bp -c " + mltr + " -t " + tsegfiles[i] + " -tv"
        segstree[count] = open(tsegfiles[i], "r").readline()
        o = subprocess.check_output(cmd.split(" "), stderr=subprocess.STDOUT)
        keepo = str(o).split("\\n")
        cf = keepo[-8]
        cc = keepo[-7]
        cft = tree_reader.read_tree_string(cf)
        cct = tree_reader.read_tree_string(cc)
        for j, k in zip(cft.iternodes(), cct.iternodes()):
            if len(j.children) > 1:
                sbp = None
                inde = None
                if j.label != "" or k.label != "":
                    sbp = get_bipart(j, cft)
                    inde = get_bp_ind(mlbps, sbp)
                # conflict one
                if j.label != "":
                    if int(j.label) > 0:
                        conflictcount += 1
                        segs[count].append(sbp)
                        plotsegs[i][inde] = -1
                        # process the bp out from above to record the actual split that conflicts
                        start = False
                        for l in keepo:
                            if start:
                                if "  (" == l[0:3]:
                                    tttt = tree_reader.read_tree_string(
                                        l.strip().split(" ")[-1])
                                    segslong[count] = add_bp(
                                        segslong[count],
                                        get_biparts(tttt)[0])
                            if "read " == l[0:5]:
                                start = True
                            if "TREES " == l[0:6]:
                                break
                # concordant one
                if k.label != "":
                    if int(k.label) > 0:
                        plotsegs[i][inde] = 1
        if remove_intermediate_files:
            otf = tsegfiles[i]
            if os.path.exists(otf):
                os.remove(otf)
        conflictsegscount.append(
            conflictcount)  #just a running tally of the conflicts per segment
        count += 1

    # print the number of conflicts per segment
    print(infn + " " + " ".join([str(k) for k in conflictsegscount]))

    # print the verbose stuff to the gzip
    detfile = gzip.open(outf + ".details.gz", "wt")
    for i, sc, sl, t in zip(segs, segc, segslong, segstree):
        detfile.write(
            str(i) + " " + "-".join([str(k) for k in list(sc)]) + "\n")
        for j in segs[i]:
            detfile.write(" conflicts with:" + str(j) + "\n")
        for j in segslong[i]:
            detfile.write(" prefers:" + str(j) + "\n")
        detfile.write(" tree:" + segstree[t] + "\n")
    detfile.close()

    # write the plotting information
    ouf = open(outf, "w")
    first = True
    for sc in segc:
        if first == True:
            first = False
        else:
            ouf.write(" ")
        ouf.write("\"" + "-".join([str(k) for k in list(sc)]) + "\"")
    ouf.write("\n")
    for i in range(len(mlbps)):
        s = []
        for j in range(len(plotsegs)):
            s.append(str(plotsegs[j][i]))
        ouf.write(" ".join(s) + "\n")
    ouf.close()
    cmd = "Rscript rplot.r " + outf + " " + outf + ".png " + infn[
        0:min(15, len(infn))] + " > rlog 2>&1"
    os.system(cmd)
Beispiel #6
0
import tree_reader
import sys
import os
"""
right now this just chooses the longest

BEWARE, this writes over the file
"""

if __name__ == "__main__":
    if len(sys.argv) != 3:
        print "python " + sys.argv[0] + " table treefile"
        sys.exit(0)
    tab = open(sys.argv[1], "r")
    idn = {}
    for i in tab:
        spls = i.strip().split("\t")
        idn[spls[3]] = spls[4]
    tab.close()
    tf = tree_reader.read_tree_file_iter(sys.argv[2]).next()
    for i in tf.iternodes():
        if i.label in idn:
            i.label = idn[i.label].replace(" ", "_")
    print tf.get_newick_repr() + ";"
    # This will be used to limit the taxa
    taxalistf = None
    if len(sys.argv) == 7:
        taxalistf = sys.argv[6]
        print(colored.yellow("LIMITING TO TAXA IN"), sys.argv[6])

    # Log file
    logfile = sys.argv[5]
    if logfile[-len(".md.gz"):] != ".md.gz":
        logfile += ".md.gz"

    tname = dirl + "/" + taxon + ".tre"
    cmd = py + " " + DI + "get_ncbi_tax_tree_no_species.py " + taxon + " " + db + " > " + tname
    print(colored.yellow("MAKING TREE"), taxon)
    os.system(cmd)
    trn = tree_reader.read_tree_file_iter(tname).__next__().label
    cmd = py + " " + DI + "make_dirs.py " + tname + " " + dirl
    print(colored.yellow("MAKING DIRS IN"), dirl)
    os.system(cmd)
    cmd = py + " " + DI + "populate_dirs_first.py " + tname + " " + dirl + " " + db
    if taxalistf != None:
        cmd += " " + taxalistf
    print(colored.yellow("POPULATING DIRS"), dirl)
    os.system(cmd)

    if os.path.isfile("log.md.gz"):
        os.remove("log.md.gz")
    cmd = py + " " + DI + "bait_tree.py " + dirl + "/" + trn + "/ " + baitdir + " " + logfile
    os.system(cmd)

    print(colored.blue("PYPHLAWD DONE " + emoticons.get_ran_emot("excited")))
Beispiel #8
0
            nd.data["names"] = set()
            tf = open(root + "/" + root.split("/")[-1] + ".table", "r")
            for i in tf:
                spls = i.strip().split("\t")
                nd.data["names"].add(spls[4].replace(" ", "_"))
            tf.close()
            for j in dirs:
                if "clusters" not in j:
                    cnd = node.Node()
                    cnd.label = j
                    cnd.parent = nd
                    nd.add_child(cnd)
                    nodes[j] = cnd
            count += 1

    intree = tree_reader.read_tree_file_iter(sys.argv[3]).next()
    for i in intree.iternodes(order="POSTORDER"):
        lvsnms = set(i.lvsnms())
        for j in tree.iternodes(order="POSTORDER"):
            if lvsnms.issubset(j.data["names"]):
                j.set_dist_root()
                i.data["h"] = j.droot
                if len(i.children) > 0:
                    i.label = str(j.droot)
                break

    for i in intree.iternodes(order="POSTORDER"):
        if len(i.children) > 0:
            try:
                if max(i.children[0].data["h"], i.children[1].data["h"]) - min(
                        i.children[0].data["h"], i.children[1].data["h"]) > 1:
    tr = open(sys.argv[1],"r")
    for i in tr:
        spls = i.strip().split(":")
        if "taxon" in spls[0]:
            removetaxa.add(spls[1])
        elif "constraint" in spls[0]:
            remove_mrca.append(spls[1].split(","))
    tr.close()

    tr = open(sys.argv[3]+".postrem","w")
    for i in seq.read_fasta_file_iter(sys.argv[3]):
        if i.name not in removetaxa:
            tr.write(i.get_fasta())
    tr.close()

    tree = next(tree_reader.read_tree_file_iter(sys.argv[2]))
    nodes = {}
    mrca_pars = [] #vector of the parents of each remove constraint
    child_constraints = [] #vector of child_constraints for each remove constraint
    for i in tree.leaves():
        nodes[i.label] = i
    for i in remove_mrca:
        tnods = []
        for j in i:
            tnods.append(nodes[j])
        mr = tree_utils.get_mrca(tnods,tree)
        par = mr.parent
        tchild_constraints = []
        for j in  mr.children:
            if len(j.children) > 0:
                tchild_constraints.append(j)
import argparse as ap

def generate_argparser():
    parser = ap.ArgumentParser(prog="change_wcid_to_name_tre.py",
        formatter_class=ap.ArgumentDefaultsHelpFormatter)
    parser = ap.ArgumentParser()
    parser.add_argument("-t", "--table", type=str, help="WCID translation table", required=True)
    parser.add_argument("-i", "--infile", type=str, help="Input tree", required=True)
    parser.add_argument("-o", "--outfile", type=str, help="Output tree", required=True)
    return parser

if __name__ == "__main__":
    parser = generate_argparser()
    if len(sys.argv[1:]) == 0:
        sys.argv.append("-h")
    args = parser.parse_args(sys.argv[1:])
    
    tab = open(args.table,"r")
    idn = {}
    for i in tab:
        spls = i.strip().split("\t")
        idn[spls[-3]] = spls[-1]
    tab.close()
    outf = open(args.outfile,"w")
    for i in tree_reader.read_tree_file_iter(args.infile):
        for j in i.iternodes():
            if j.label in idn:
                j.label = newick_name(idn[j.label])
        outf.write(i.get_newick_repr(True)+";")
    outf.close()
        count += 1
        spls = i.split("\t|\t")
        try:
            idn["ott"+spls[0]] = spls[2].replace(" ","_")
        except:
            continue
    tab.close()
    return idn

if __name__ == "__main__":
    if len(sys.argv) != 5:
        print("python "+sys.argv[0]+" ott_taxonomy.tsv db infile outfile")
        sys.exit(0)
    ottids = []
    ncbis = []
    for i in tree_reader.read_tree_file_iter(sys.argv[3]):
        for j in i.iternodes():
            if len(j.label) > 0:
                if "ott" in j.label:
                    ottids.append(j.label.replace("ott",""))
                else:
                    ncbis.append(j.label)

    # first do ott
    oidn = get_ott_names(sys.argv[1], ottids)

    # now do ncbi
    nidn = get_ncbi_names(sys.argv[2],ncbis)

    names_done = set()
import argparse
import tree_reader

if __name__ == "__main__":
    if len(sys.argv[1:]) == 0:
        sys.argv.append("-h")

    parser = argparse.ArgumentParser()
    parser.add_argument("speciesTree", help="species tree in newick")
    parser.add_argument("geneTree",
                        help="gene tree in newick format, \
                        with taxon names formatted species@sequence, \
                        where species matches species tree.")
    args = parser.parse_args()

    sTree = [x for x in tree_reader.read_tree_file_iter(args.speciesTree)][0]
    gTree = [x for x in tree_reader.read_tree_file_iter(args.geneTree)][0]

    species = sTree.lvsnms()
    genes = []
    for i in gTree.lvsnms():
        if "@" in i:
            i = i.split("@")
            genes.append((i[0], "@" + i[1]))
        else:
            genes.append((i, ""))

    mapping = {}
    for s in species:
        mapping[s] = []
        for g in genes:
import sys
import os
import tree_reader
import tree_utils
"""
assuming that the datedtre just has ott ids as the tip names

assuming the labelled_supertree has ott in front of the ott ids
"""

if __name__ == "__main__":
    if len(sys.argv) != 3:
        print "python " + sys.argv[0] + " datedtree labelled_superrtree"
        sys.exit(0)
    dated = tree_reader.read_tree_file_iter(sys.argv[1]).next()
    ott = tree_reader.read_tree_file_iter(sys.argv[2]).next()
    tree_utils.set_heights(dated)
    ottlvsd = {}  #key is name and value is node
    for i in ott.iternodes():
        ottlvsd[i.label] = i

    for i in dated.leaves():
        try:
            i.data["node"] = ottlvsd[i.label]  #ottlvsd["ott"+i.label]
        except:
            print >> sys.stderr, "not matched", i.label
            continue

    done = set()
    dates = {}  # key is node, value is date
    dates_names = {}  #key is node, value is mrca string
Beispiel #14
0
    db = sys.argv[2]
    # This will be used to limit the taxa
    taxalistf = None
    if len(sys.argv) == 5:
        taxalistf = sys.argv[4]
        print colored.yellow("LIMITING TO TAXA IN"), sys.argv[4]

    tname = dirl + "/" + taxon + ".tre"
    if taxalistf != None:
        cmd = "python " + DI + "get_ncbi_tax_tree_no_species.py " + taxon + " " + db + " " + taxalistf + " > " + tname
    else:
        cmd = "python " + DI + "get_ncbi_tax_tree_no_species.py " + taxon + " " + db + " > " + tname
    print colored.yellow("MAKING TREE"), taxon, colored.yellow(
        emoticons.get_ran_emot("excited"))
    os.system(cmd)
    trn = tree_reader.read_tree_file_iter(tname).next().label
    cmd = "python " + DI + "make_dirs.py " + tname + " " + dirl
    print colored.yellow("MAKING DIRS IN"), dirl, colored.yellow(
        emoticons.get_ran_emot("excited"))
    os.system(cmd)
    cmd = "python " + DI + "populate_dirs_first.py " + tname + " " + dirl + " " + db
    if taxalistf != None:
        cmd += " " + taxalistf
    print colored.yellow("POPULATING DIRS"), dirl, colored.yellow(
        emoticons.get_ran_emot("excited"))
    os.system(cmd)

    if os.path.isfile("log.md.gz"):
        os.remove("log.md.gz")
    cmd = "python " + DI + "cluster_tree.py " + dirl + "/" + trn + "/ log.md.gz"
    os.system(cmd)
Beispiel #15
0
#! /usr/bin/python3

import sys
import argparse
import tree_reader

if __name__ == "__main__":
    if len(sys.argv[1:]) == 0:
        sys.argv.append("-h")

    parser = argparse.ArgumentParser()
    parser.add_argument("tree",
                        help="gophy-formatted output tree, with model \
                        label per node")
    args = parser.parse_args()

    t = [x for x in tree_reader.read_tree_file_iter(args.tree)][0]

    print("tip,model")
    for n in t.iternodes():
        if n.istip:
            model = n.parent.label
            print(n.label + "," + str(model))