def sink_tree(filen, outfn): t = tree_reader.read_tree_file_iter(filen).__next__() cutoffa = 80 cutoffb = 95 nds = set() for i in t.iternodes(): if len(t.children) < 2: continue l = i.label if "/" in l: s = l.split("/") a = float(s[0]) b = float(s[1]) if a < cutoffa or b < cutoffb: nds.add(i) i.label = "" for j in nds: chs = j.children par = j.parent par.remove_child(j) for k in chs: k.parent = par par.add_child(k) outf = open(outfn, "w") outf.write(t.get_newick_repr(False) + ";") outf.close()
import sys import tree_reader import os from utils import newick_name if __name__ == "__main__": if len(sys.argv) != 4: print("python " + sys.argv[0] + " all_names.txt infile outfile") sys.exit(0) tab = open(sys.argv[1], "r") idn = {} for i in tab: spls = i.strip().split("\t") idn[spls[0]] = spls[3] tab.close() outf = open(sys.argv[3], "w") for i in tree_reader.read_tree_file_iter(sys.argv[2]): for j in i.iternodes(): if j.label in idn: j.label = newick_name(idn[j.label]) outf.write(i.get_newick_repr(True) + ";") outf.close()
for i in tre.iternodes(): if len(i.children) > 0: if i.label in nms: toremove.append(i) for i in toremove: sys.stderr.write("remove internal: "+i.get_newick_repr(False)+"\n") par = i.parent par.remove_child(i) if __name__ == "__main__": if len(sys.argv) != 3: print "python "+sys.argv[0]+ " addtree bigtre" sys.exit(0) tree1 = tree_reader.read_tree_file_iter(sys.argv[1]).next() bigtree = tree_reader.read_tree_file_iter(sys.argv[2]).next() if EDITLEN: tree_utils.set_heights(tree1) tree_utils.set_heights(bigtree) rootnms = set(tree1.lvsnms()) remove_int_ext_nodes(rootnms,bigtree) othernms = set(bigtree.lvsnms()) if VERBOSE: ddifs = rootnms.difference(othernms) for i in ddifs: sys.stderr.write(i+"\n") diffnms = []
import os import seq import networkx as nx import tree_reader from networkx.drawing.nx_agraph import write_dot """ This will make a graph of the connectivity of the genes and taxa given some tree (probably taxonomy) and a set of genes """ if __name__ == "__main__": if len(sys.argv) < 3: print("python " + sys.argv[0] + " tree files...") sys.exit(0) tree = next(tree_reader.read_tree_file_iter(sys.argv[1])) genes = {} seqfiles = [] badseqs = [] goodseqs = [] for i in sys.argv[2:]: if i[0] != "_": seqfiles.append(i) else: goodseqs.append(i[1:]) for i in seqfiles: genes[i] = [] for j in seq.read_fasta_file_iter(i): genes[i].append(j.name) print(seqfiles)
def run_bp_window(infn, tsegfiles, mltr, segc, outf): write_r() mlto = tree_reader.read_tree_file_iter(mltr).__next__() mlbps = get_biparts(mlto) segs = {} segslong = {} segstree = {} plotsegs = [] # each row is a seg, each column is a node conflictsegscount = [] count = 0 for i in range(len(tsegfiles)): conflictcount = 0 segs[count] = [] segslong[count] = set() plotsegs.append([0] * len(mlbps)) cmd = "bp -c " + mltr + " -t " + tsegfiles[i] + " -tv" segstree[count] = open(tsegfiles[i], "r").readline() o = subprocess.check_output(cmd.split(" "), stderr=subprocess.STDOUT) keepo = str(o).split("\\n") cf = keepo[-8] cc = keepo[-7] cft = tree_reader.read_tree_string(cf) cct = tree_reader.read_tree_string(cc) for j, k in zip(cft.iternodes(), cct.iternodes()): if len(j.children) > 1: sbp = None inde = None if j.label != "" or k.label != "": sbp = get_bipart(j, cft) inde = get_bp_ind(mlbps, sbp) # conflict one if j.label != "": if int(j.label) > 0: conflictcount += 1 segs[count].append(sbp) plotsegs[i][inde] = -1 # process the bp out from above to record the actual split that conflicts start = False for l in keepo: if start: if " (" == l[0:3]: tttt = tree_reader.read_tree_string( l.strip().split(" ")[-1]) segslong[count] = add_bp( segslong[count], get_biparts(tttt)[0]) if "read " == l[0:5]: start = True if "TREES " == l[0:6]: break # concordant one if k.label != "": if int(k.label) > 0: plotsegs[i][inde] = 1 if remove_intermediate_files: otf = tsegfiles[i] if os.path.exists(otf): os.remove(otf) conflictsegscount.append( conflictcount) #just a running tally of the conflicts per segment count += 1 # print the number of conflicts per segment print(infn + " " + " ".join([str(k) for k in conflictsegscount])) # print the verbose stuff to the gzip detfile = gzip.open(outf + ".details.gz", "wt") for i, sc, sl, t in zip(segs, segc, segslong, segstree): detfile.write( str(i) + " " + "-".join([str(k) for k in list(sc)]) + "\n") for j in segs[i]: detfile.write(" conflicts with:" + str(j) + "\n") for j in segslong[i]: detfile.write(" prefers:" + str(j) + "\n") detfile.write(" tree:" + segstree[t] + "\n") detfile.close() # write the plotting information ouf = open(outf, "w") first = True for sc in segc: if first == True: first = False else: ouf.write(" ") ouf.write("\"" + "-".join([str(k) for k in list(sc)]) + "\"") ouf.write("\n") for i in range(len(mlbps)): s = [] for j in range(len(plotsegs)): s.append(str(plotsegs[j][i])) ouf.write(" ".join(s) + "\n") ouf.close() cmd = "Rscript rplot.r " + outf + " " + outf + ".png " + infn[ 0:min(15, len(infn))] + " > rlog 2>&1" os.system(cmd)
import tree_reader import sys import os """ right now this just chooses the longest BEWARE, this writes over the file """ if __name__ == "__main__": if len(sys.argv) != 3: print "python " + sys.argv[0] + " table treefile" sys.exit(0) tab = open(sys.argv[1], "r") idn = {} for i in tab: spls = i.strip().split("\t") idn[spls[3]] = spls[4] tab.close() tf = tree_reader.read_tree_file_iter(sys.argv[2]).next() for i in tf.iternodes(): if i.label in idn: i.label = idn[i.label].replace(" ", "_") print tf.get_newick_repr() + ";"
# This will be used to limit the taxa taxalistf = None if len(sys.argv) == 7: taxalistf = sys.argv[6] print(colored.yellow("LIMITING TO TAXA IN"), sys.argv[6]) # Log file logfile = sys.argv[5] if logfile[-len(".md.gz"):] != ".md.gz": logfile += ".md.gz" tname = dirl + "/" + taxon + ".tre" cmd = py + " " + DI + "get_ncbi_tax_tree_no_species.py " + taxon + " " + db + " > " + tname print(colored.yellow("MAKING TREE"), taxon) os.system(cmd) trn = tree_reader.read_tree_file_iter(tname).__next__().label cmd = py + " " + DI + "make_dirs.py " + tname + " " + dirl print(colored.yellow("MAKING DIRS IN"), dirl) os.system(cmd) cmd = py + " " + DI + "populate_dirs_first.py " + tname + " " + dirl + " " + db if taxalistf != None: cmd += " " + taxalistf print(colored.yellow("POPULATING DIRS"), dirl) os.system(cmd) if os.path.isfile("log.md.gz"): os.remove("log.md.gz") cmd = py + " " + DI + "bait_tree.py " + dirl + "/" + trn + "/ " + baitdir + " " + logfile os.system(cmd) print(colored.blue("PYPHLAWD DONE " + emoticons.get_ran_emot("excited")))
nd.data["names"] = set() tf = open(root + "/" + root.split("/")[-1] + ".table", "r") for i in tf: spls = i.strip().split("\t") nd.data["names"].add(spls[4].replace(" ", "_")) tf.close() for j in dirs: if "clusters" not in j: cnd = node.Node() cnd.label = j cnd.parent = nd nd.add_child(cnd) nodes[j] = cnd count += 1 intree = tree_reader.read_tree_file_iter(sys.argv[3]).next() for i in intree.iternodes(order="POSTORDER"): lvsnms = set(i.lvsnms()) for j in tree.iternodes(order="POSTORDER"): if lvsnms.issubset(j.data["names"]): j.set_dist_root() i.data["h"] = j.droot if len(i.children) > 0: i.label = str(j.droot) break for i in intree.iternodes(order="POSTORDER"): if len(i.children) > 0: try: if max(i.children[0].data["h"], i.children[1].data["h"]) - min( i.children[0].data["h"], i.children[1].data["h"]) > 1:
tr = open(sys.argv[1],"r") for i in tr: spls = i.strip().split(":") if "taxon" in spls[0]: removetaxa.add(spls[1]) elif "constraint" in spls[0]: remove_mrca.append(spls[1].split(",")) tr.close() tr = open(sys.argv[3]+".postrem","w") for i in seq.read_fasta_file_iter(sys.argv[3]): if i.name not in removetaxa: tr.write(i.get_fasta()) tr.close() tree = next(tree_reader.read_tree_file_iter(sys.argv[2])) nodes = {} mrca_pars = [] #vector of the parents of each remove constraint child_constraints = [] #vector of child_constraints for each remove constraint for i in tree.leaves(): nodes[i.label] = i for i in remove_mrca: tnods = [] for j in i: tnods.append(nodes[j]) mr = tree_utils.get_mrca(tnods,tree) par = mr.parent tchild_constraints = [] for j in mr.children: if len(j.children) > 0: tchild_constraints.append(j)
import argparse as ap def generate_argparser(): parser = ap.ArgumentParser(prog="change_wcid_to_name_tre.py", formatter_class=ap.ArgumentDefaultsHelpFormatter) parser = ap.ArgumentParser() parser.add_argument("-t", "--table", type=str, help="WCID translation table", required=True) parser.add_argument("-i", "--infile", type=str, help="Input tree", required=True) parser.add_argument("-o", "--outfile", type=str, help="Output tree", required=True) return parser if __name__ == "__main__": parser = generate_argparser() if len(sys.argv[1:]) == 0: sys.argv.append("-h") args = parser.parse_args(sys.argv[1:]) tab = open(args.table,"r") idn = {} for i in tab: spls = i.strip().split("\t") idn[spls[-3]] = spls[-1] tab.close() outf = open(args.outfile,"w") for i in tree_reader.read_tree_file_iter(args.infile): for j in i.iternodes(): if j.label in idn: j.label = newick_name(idn[j.label]) outf.write(i.get_newick_repr(True)+";") outf.close()
count += 1 spls = i.split("\t|\t") try: idn["ott"+spls[0]] = spls[2].replace(" ","_") except: continue tab.close() return idn if __name__ == "__main__": if len(sys.argv) != 5: print("python "+sys.argv[0]+" ott_taxonomy.tsv db infile outfile") sys.exit(0) ottids = [] ncbis = [] for i in tree_reader.read_tree_file_iter(sys.argv[3]): for j in i.iternodes(): if len(j.label) > 0: if "ott" in j.label: ottids.append(j.label.replace("ott","")) else: ncbis.append(j.label) # first do ott oidn = get_ott_names(sys.argv[1], ottids) # now do ncbi nidn = get_ncbi_names(sys.argv[2],ncbis) names_done = set()
import argparse import tree_reader if __name__ == "__main__": if len(sys.argv[1:]) == 0: sys.argv.append("-h") parser = argparse.ArgumentParser() parser.add_argument("speciesTree", help="species tree in newick") parser.add_argument("geneTree", help="gene tree in newick format, \ with taxon names formatted species@sequence, \ where species matches species tree.") args = parser.parse_args() sTree = [x for x in tree_reader.read_tree_file_iter(args.speciesTree)][0] gTree = [x for x in tree_reader.read_tree_file_iter(args.geneTree)][0] species = sTree.lvsnms() genes = [] for i in gTree.lvsnms(): if "@" in i: i = i.split("@") genes.append((i[0], "@" + i[1])) else: genes.append((i, "")) mapping = {} for s in species: mapping[s] = [] for g in genes:
import sys import os import tree_reader import tree_utils """ assuming that the datedtre just has ott ids as the tip names assuming the labelled_supertree has ott in front of the ott ids """ if __name__ == "__main__": if len(sys.argv) != 3: print "python " + sys.argv[0] + " datedtree labelled_superrtree" sys.exit(0) dated = tree_reader.read_tree_file_iter(sys.argv[1]).next() ott = tree_reader.read_tree_file_iter(sys.argv[2]).next() tree_utils.set_heights(dated) ottlvsd = {} #key is name and value is node for i in ott.iternodes(): ottlvsd[i.label] = i for i in dated.leaves(): try: i.data["node"] = ottlvsd[i.label] #ottlvsd["ott"+i.label] except: print >> sys.stderr, "not matched", i.label continue done = set() dates = {} # key is node, value is date dates_names = {} #key is node, value is mrca string
db = sys.argv[2] # This will be used to limit the taxa taxalistf = None if len(sys.argv) == 5: taxalistf = sys.argv[4] print colored.yellow("LIMITING TO TAXA IN"), sys.argv[4] tname = dirl + "/" + taxon + ".tre" if taxalistf != None: cmd = "python " + DI + "get_ncbi_tax_tree_no_species.py " + taxon + " " + db + " " + taxalistf + " > " + tname else: cmd = "python " + DI + "get_ncbi_tax_tree_no_species.py " + taxon + " " + db + " > " + tname print colored.yellow("MAKING TREE"), taxon, colored.yellow( emoticons.get_ran_emot("excited")) os.system(cmd) trn = tree_reader.read_tree_file_iter(tname).next().label cmd = "python " + DI + "make_dirs.py " + tname + " " + dirl print colored.yellow("MAKING DIRS IN"), dirl, colored.yellow( emoticons.get_ran_emot("excited")) os.system(cmd) cmd = "python " + DI + "populate_dirs_first.py " + tname + " " + dirl + " " + db if taxalistf != None: cmd += " " + taxalistf print colored.yellow("POPULATING DIRS"), dirl, colored.yellow( emoticons.get_ran_emot("excited")) os.system(cmd) if os.path.isfile("log.md.gz"): os.remove("log.md.gz") cmd = "python " + DI + "cluster_tree.py " + dirl + "/" + trn + "/ log.md.gz" os.system(cmd)
#! /usr/bin/python3 import sys import argparse import tree_reader if __name__ == "__main__": if len(sys.argv[1:]) == 0: sys.argv.append("-h") parser = argparse.ArgumentParser() parser.add_argument("tree", help="gophy-formatted output tree, with model \ label per node") args = parser.parse_args() t = [x for x in tree_reader.read_tree_file_iter(args.tree)][0] print("tip,model") for n in t.iternodes(): if n.istip: model = n.parent.label print(n.label + "," + str(model))