def main(inDIR, file_ending, branch_len_cutoff, min_taxa, outDIR, log): """cut long branches and output subtrees as .subtre files if uncut and nothing changed betwee .tre and .subtree copy the original .tre file to the outdir""" if inDIR[-1] != "/": inDIR += "/" min_taxa = int(min_taxa) filecount = 0 cutoff = float(branch_len_cutoff) print "cutting branches longer than", cutoff for i in os.listdir(inDIR): if not i.endswith(file_ending): continue #print i filecount += 1 with open(inDIR + i, "r") as infile: #only 1 tree in each file intree = tree_reader.read_tree_string(infile.readline()) try: with open(inDIR + i[:i.find(".tre")] + ".tre", "r") as infile: #the original .tre raw_tree_size = len( get_front_labels( tree_reader.read_tree_string(infile.readline()))) except: # did not refine this round. Use the .tre.tt.mm tree raw_tree_size = len(get_front_labels(intree)) num_taxa = len(intree.leaves()) if num_taxa < min_taxa: print "Tree has", num_taxa, "less than", min_taxa, "taxa" else: #print ".tre:",raw_tree_size,"tips; "+file_ending+": "+str(len(get_front_labels(intree)))+" tips" subtrees = cut_long_internal_branches(intree, cutoff, min_taxa) if len(subtrees) == 0: print "No tree with at least", min_taxa, "taxa" else: count = 0 outsizes = "" for subtree in subtrees: if len(subtree.leaves()) >= min_taxa: if len(subtree.children ) == 2: #fix bifurcating roots from cutting temp, subtree = remove_kink(subtree, subtree) count += 1 outname = outDIR + "/" + i.split(".")[0] + "_" + str( count) + ".subtree" print outname with open(outname, "w") as outfile: outfile.write( subtree.get_newick_repr(True) + ";\n") outsizes += str(len(subtree.leaves())) + ", " print count, "tree(s) written. Sizes:", outsizes
def main(treefile, relative_cut, absolute_cut): intree = None with open(treefile, "r") as infile: intree = tree_reader.read_tree_string(infile.readline()) outtree, removed = trim(intree, float(relative_cut), float(absolute_cut)) print outtree.get_newick_repr(True) + ";" return outtree, removed
def load_one_study(studyloc,study_treeid,javapre,treemloc,dload,outfile,treeoutfile,append): load_nexson(studyloc,study_treeid,javapre,treemloc,dload,outfile,append) source_explorer(study_treeid,javapre,treemloc,dload,treeoutfile,append) #attempt to read the tree tf = open(treeoutfile,"r") tree = read_tree_string(tf.readline()) print "root name:"+tree.label tf.close()
def load_one_study_inf_mono(studyloc,study_treeid,javapre,treemloc,dload,outfile,treeoutfile,infmonofile,append): load_nexson(studyloc,study_treeid,javapre,treemloc,dload,outfile,append) source_explorer(study_treeid,javapre,treemloc,dload,treeoutfile,append) tf = open(treeoutfile,"r") tree = read_tree_string(tf.readline()) print "root name:"+tree.label tf.close() mapcompat_one_study(studyloc,study_treeid,javapre,treemloc,dload,outfile,outfile,True) source_explorer_inf_mono(study_treeid,javapre,treemloc,dload,infmonofile,append)
def read_tree(in_tree,nwk=False): if nwk == False: nwk = open(in_tree,"r").readlines()[0].strip() else: nwk = in_tree tree = tree_reader.read_tree_string(nwk) for i in tree.iternodes(): i.old_length = i.length return tree
def load_one_study_inf_mono(studyloc,study_treeid,javapre,treemloc,dload,outfile,treeoutfile,infmonofile,append): load_nexson(studyloc,study_treeid,javapre,treemloc,dload,outfile,append) studyid = "_".join(study_treeid.split("_")[:-1]) sha = get_git_SHA_from_json(studyloc+"/"+studyid) source_explorer(study_treeid+"_"+sha,javapre,treemloc,dload,treeoutfile,append) tf = open(treeoutfile,"r") tree = read_tree_string(tf.readline()) print "root name:"+tree.label tf.close() mapcompat_one_study(studyloc,study_treeid,javapre,treemloc,dload,outfile,outfile,True) source_explorer_inf_mono(study_treeid+"_"+sha,javapre,treemloc,dload,infmonofile,append)
def load_one_study(studyloc, study_treeid, javapre, treemloc, dload, outfile, treeoutfile, append): load_nexson(studyloc, study_treeid, javapre, treemloc, dload, outfile, append) sha = get_git_SHA(studyloc) source_explorer(study_treeid + "_" + sha, javapre, treemloc, dload, treeoutfile, append) #attempt to read the tree tf = open(treeoutfile, "r") tree = read_tree_string(tf.readline()) print "root name:" + tree.label tf.close()
def load_one_study_newick(treeid,javapre,treemloc,dload,outfile,treeoutfile,append): load_newick(treeid,javapre,treemloc,dload,outfile,append) studyid = treeid print studyid source_explorer(treeid,javapre,treemloc,dload,treeoutfile,append) #attempt to read the tree tf = open(treeoutfile,"r") ts = None for i in tf: ts = i tree = read_tree_string(ts) print "root name:"+tree.label tf.close() return treeid
def load_one_study_inf_mono(studyloc, study_treeid, javapre, treemloc, dload, outfile, treeoutfile, infmonofile, append): load_nexson(studyloc, study_treeid, javapre, treemloc, dload, outfile, append) source_explorer(study_treeid, javapre, treemloc, dload, treeoutfile, append) tf = open(treeoutfile, "r") tree = read_tree_string(tf.readline()) print "root name:" + tree.label tf.close() mapcompat_one_study(studyloc, study_treeid, javapre, treemloc, dload, outfile, outfile, True) source_explorer_inf_mono(study_treeid, javapre, treemloc, dload, infmonofile, append)
def load_one_study(studyloc,study_treeid,javapre,treemloc,dload,outfile,treeoutfile,append): load_nexson(studyloc,study_treeid,javapre,treemloc,dload,outfile,append) studyid = "_".join(study_treeid.split("_")[:-1]) print studyid sha = get_git_SHA_from_json(studyloc+"/"+studyid) source_explorer(study_treeid+"_"+sha,javapre,treemloc,dload,treeoutfile,append) #attempt to read the tree tf = open(treeoutfile,"r") ts = None for i in tf: ts = i tree = read_tree_string(ts) print "root name:"+tree.label tf.close() return sha
def load_one_study_newick(treeid, javapre, treemloc, dload, outfile, treeoutfile, append): load_newick(treeid, javapre, treemloc, dload, outfile, append) studyid = treeid print studyid source_explorer(treeid, javapre, treemloc, dload, treeoutfile, append) #attempt to read the tree tf = open(treeoutfile, "r") ts = None for i in tf: ts = i tree = read_tree_string(ts) print "root name:" + tree.label tf.close() return treeid
def load_one_study(studyloc, study_treeid, javapre, treemloc, dload, outfile, treeoutfile, append): load_nexson(studyloc, study_treeid, javapre, treemloc, dload, outfile, append) studyid = "_".join(study_treeid.split("_")[:-1]) print studyid sha = get_git_SHA_from_json(studyloc + "/" + studyid) source_explorer(study_treeid + "_" + sha, javapre, treemloc, dload, treeoutfile, append) #attempt to read the tree tf = open(treeoutfile, "r") ts = None for i in tf: ts = i tree = read_tree_string(ts) print "root name:" + tree.label tf.close() return sha
def load_one_study_inf_mono(studyloc, study_treeid, javapre, treemloc, dload, outfile, treeoutfile, infmonofile, append): load_nexson(studyloc, study_treeid, javapre, treemloc, dload, outfile, append) studyid = "_".join(study_treeid.split("_")[:-1]) sha = get_git_SHA_from_json(studyloc + "/" + studyid) source_explorer(study_treeid + "_" + sha, javapre, treemloc, dload, treeoutfile, append) tf = open(treeoutfile, "r") ts = tf.readline() # ignore output that isn't a tree while ts[0] != '(': ts = tf.readline() tree = read_tree_string(ts) print "root name:" + tree.label tf.close() mapcompat_one_study(studyloc, study_treeid, javapre, treemloc, dload, outfile, outfile, True) source_explorer_inf_mono(study_treeid + "_" + sha, javapre, treemloc, dload, infmonofile, append)
dest='phyx_location', default="", help="where are pxlstr (and pxrmt if you have outgroups)") par.add_argument('--outf', dest='outf', help="the outfile") args = par.parse_args() #parse the arguments if args.fileending != None: flend = args.fileending di = args.di[0] print "directory:", di print "file ending for trees:", flend spt = args.speciestree[0] print "species tree:", spt st = open(spt, "r") tree = tree_reader.read_tree_string(st.readline()) st.close() clades = get_clades(tree) print "phyx location:", args.phyx_location if args.phyx_location != "": cmd = args.phyx_location + "/" + cmd if args.outf != None: print "outfile:", args.outf writetofile = True outf = open(args.outf, 'w') for i in os.listdir(di): if i[-len(flend):] == flend: fd = di + "/" + i p = subprocess.Popen(cmd + fd, shell=True, stdout=subprocess.PIPE) x = p.communicate()[0].split("\n")
pp = parent.parent ch = parent.children[0] pp.remove_child(parent) parent.remove_child(ch) pp.add_child(ch) ch.parent = pp break else: parent = parent.parent if found == False: going = False break return tree if __name__ == "__main__": if len(sys.argv) != 3: print "python " + sys.argv[0] + " infile.tre outfile.tre" sys.exit() infile = open(sys.argv[1], "r") outfile = open(sys.argv[2], "w") tree = tree_reader.read_tree_string(infile.readline()) infile.close() tree = del_knees(tree) if len(tree.children) == 1: print "one child at root" tree = tree.children[0] outfile.write(tree.get_newick_repr() + ";") outfile.close()
elif len(name) > 4 and name[-4:] == "ales": return "order" elif len(name) > 3 and name[-3:] == "eae": return "tribe" else: return "genus" return rank if __name__ == "__main__": if len(sys.argv) != 3: print "python " + sys.argv[0] + " infile.tre outfile.taxa" sys.exit(0) infile = open(sys.argv[1], "r") stri = infile.readline() tree = tree_reader.read_tree_string(stri) infile.close() names = {} #key is name, value is parent nums = {} #key is name, value is number cnum = 1 for i in tree.iternodes(): if len(i.label) > 0: nums[i.label] = cnum cnum += 1 if i.parent != None: names[i.label] = i.parent.label outfile = open(sys.argv[2], "w") for i in nums: #print i p = "" if i in names:
directory = "files_for_submission_v2.0/Inf-mono_log/" tend = ".tre" lend = ".log" outfilen = "all.numtips.results" if __name__ == "__main__": studies = [] outfile = open(outfilen,"w") for i in os.listdir(directory): if lend in i: studies.append(i[0:len(i)-len(lend)]) sl = set(studies) for i in sl: try: fl = open(directory+"/"+i+tend,"r") ts = fl.readline() if "WARN" in ts: ts = fl.readline() fl.close() if len(ts) > 1: #read the tree first tr = tree_reader.read_tree_string(ts) outfile.write(str(len(list(tr.leaves())))+"\n") except: print "some problem with "+i outfile.close()
directory = "files_for_submission_v2.0/Inf-mono_log/" tend = ".tre" lend = ".log" outfilen = "all.numtips.results" if __name__ == "__main__": studies = [] outfile = open(outfilen, "w") for i in os.listdir(directory): if lend in i: studies.append(i[0:len(i) - len(lend)]) sl = set(studies) for i in sl: try: fl = open(directory + "/" + i + tend, "r") ts = fl.readline() if "WARN" in ts: ts = fl.readline() fl.close() if len(ts) > 1: #read the tree first tr = tree_reader.read_tree_string(ts) outfile.write(str(len(list(tr.leaves()))) + "\n") except: print "some problem with " + i outfile.close()
sys.exit() from stephen_laptop_conf import * infile = sys.argv[1] outfile = sys.argv[2] temp1 = "tempfile1" temp2 = "tempfile2" cmd1 = "cp "+infile+" "+temp1 #copy to a temp os.system(cmd1) cmd2 = javapre +" "+treemloc+" checktaxhier "+ temp1 +" "+dott+" > "+temp2 #tax hier os.system(cmd2) #delete knees inf = open(temp2,"r") inf.readline() tree = tree_reader.read_tree_string(inf.readline().strip()+";") inf.close() tree = delete_knees.del_knees(tree) ouf = open(temp1,"w") ouf.write(tree.get_newick_repr()+";") ouf.close() #tax hier os.system(cmd2) #delete knees again inf = open(temp2,"r") inf.readline() tree = tree_reader.read_tree_string(inf.readline().strip()+";") inf.close() tree = delete_knees.del_knees(tree) ouf = open(outfile,"w") ouf.write(tree.get_newick_repr()+";")
import tree_reader import sys if __name__ == "__main__": if len(sys.argv) != 3: print "python " + sys.argv[0] + " treefile namesfile" sys.exit(0) treefile = open(sys.argv[1], "r") tfl = tree_reader.read_tree_string(treefile.readline()) lvsd = {} for i in tfl.leaves(): lvsd[i.label] = i treefile.close() namesfile = open(sys.argv[2], "r") names = [] namesd = {} for i in namesfile: nm = "ott" + i.strip().split(" ")[-1] names.append(nm) namesd[nm] = "_".join(i.strip().split(" ")[0:-1]) namesfile.close() for i in names: if i in lvsd: lvsd[i].data["paint"] = True cur = lvsd[i] while cur != tfl: cur = cur.parent if "paint" in cur.data: break
return "family" elif len(name) > 4 and name[-4:] == "ales": return "order" elif len(name) > 3 and name[-3:] == "eae": return "tribe" else: return "genus" return rank if __name__ == "__main__": if len(sys.argv) != 3: print "python "+sys.argv[0]+" infile.tre outfile.taxa" sys.exit(0) infile = open(sys.argv[1],"r") stri = infile.readline() tree = tree_reader.read_tree_string(stri) infile.close() names = {} #key is name, value is parent nums = {} #key is name, value is number cnum = 1 for i in tree.iternodes(): if len(i.label) > 0: nums[i.label] = cnum cnum += 1 if i.parent != None: names[i.label] = i.parent.label outfile = open(sys.argv[2],"w") for i in nums: #print i p = "" if i in names:
found = True pp = parent.parent ch = parent.children[0] pp.remove_child(parent) parent.remove_child(ch) pp.add_child(ch) ch.parent = pp break else: parent = parent.parent if found == False: going = False break return tree if __name__ == "__main__": if len(sys.argv) != 3: print "python "+sys.argv[0]+" infile.tre outfile.tre" sys.exit() infile = open(sys.argv[1],"r") outfile = open(sys.argv[2],"w") tree = tree_reader.read_tree_string(infile.readline()) infile.close() tree = del_knees(tree) if len(tree.children) == 1: print "one child at root" tree = tree.children[0] outfile.write(tree.get_newick_repr()+";") outfile.close()
treestring = "" start = False for i in fl: if i.strip() == "MRP": break if start: fl2.write(i) treestring = i.strip() break if i.strip() == "TREEUID": start = True fl2.close() fl.close() print treestring treestrings.append(treestring) tree = tree_reader.read_tree_string(treestring) for i in tree.leaves(): taxalist.add(i.label) general_tm_utils.extract_taxonomy_from_ids(javapre,treemloc,dload,",".join(taxalist),taxtreefile) outfile = open(mlsout,"w") outfile.write("#NEXUS\n") outfile.write("BEGIN TAXA;\n\tDIMENSIONS NTAX="); ttf = open(taxtreefile,"r") taxstring = ttf.readline() tree = tree_reader.read_tree_string(taxstring); taxs = [] for i in tree.iternodes(): taxs.append(i.label) ttf.close() outfile.write(str(len(taxs))+";\n\ttaxlabels\n") for i in taxs:
type=str, help='If provided, write species stats to this location (CSV)') args = parser.parse_args() # Read in data pam = Matrix.load(args.pam_matrix_filename) # We use a hack function to get the data out of the tree, in the LM code # this is done with dendropy. I just didn't want to add any dependencies tree_newick, squid_dict = munge_lm_tree(args.tree_filename) #print tree_newick #print len(squid_dict.keys()) # Read the newick tree tree = read_tree_string(tree_newick) # Generate stats matrices site_stats = get_site_statistics(tree, pam, squid_dict) species_stats = get_species_statistics(tree, pam, squid_dict) # Write matrices if desired if args.sites_filename is not None: with open(args.sites_filename, 'w') as outF: site_stats.writeCSV(outF) else: print site_stats.data if args.species_filename is not None: raise Exception, 'No species stats are currently implemented'
def run_bp_window(infn, tsegfiles, mltr, segc, outf): write_r() mlto = tree_reader.read_tree_file_iter(mltr).__next__() mlbps = get_biparts(mlto) segs = {} segslong = {} segstree = {} plotsegs = [] # each row is a seg, each column is a node conflictsegscount = [] count = 0 for i in range(len(tsegfiles)): conflictcount = 0 segs[count] = [] segslong[count] = set() plotsegs.append([0] * len(mlbps)) cmd = "bp -c " + mltr + " -t " + tsegfiles[i] + " -tv" segstree[count] = open(tsegfiles[i], "r").readline() o = subprocess.check_output(cmd.split(" "), stderr=subprocess.STDOUT) keepo = str(o).split("\\n") cf = keepo[-8] cc = keepo[-7] cft = tree_reader.read_tree_string(cf) cct = tree_reader.read_tree_string(cc) for j, k in zip(cft.iternodes(), cct.iternodes()): if len(j.children) > 1: sbp = None inde = None if j.label != "" or k.label != "": sbp = get_bipart(j, cft) inde = get_bp_ind(mlbps, sbp) # conflict one if j.label != "": if int(j.label) > 0: conflictcount += 1 segs[count].append(sbp) plotsegs[i][inde] = -1 # process the bp out from above to record the actual split that conflicts start = False for l in keepo: if start: if " (" == l[0:3]: tttt = tree_reader.read_tree_string( l.strip().split(" ")[-1]) segslong[count] = add_bp( segslong[count], get_biparts(tttt)[0]) if "read " == l[0:5]: start = True if "TREES " == l[0:6]: break # concordant one if k.label != "": if int(k.label) > 0: plotsegs[i][inde] = 1 if remove_intermediate_files: otf = tsegfiles[i] if os.path.exists(otf): os.remove(otf) conflictsegscount.append( conflictcount) #just a running tally of the conflicts per segment count += 1 # print the number of conflicts per segment print(infn + " " + " ".join([str(k) for k in conflictsegscount])) # print the verbose stuff to the gzip detfile = gzip.open(outf + ".details.gz", "wt") for i, sc, sl, t in zip(segs, segc, segslong, segstree): detfile.write( str(i) + " " + "-".join([str(k) for k in list(sc)]) + "\n") for j in segs[i]: detfile.write(" conflicts with:" + str(j) + "\n") for j in segslong[i]: detfile.write(" prefers:" + str(j) + "\n") detfile.write(" tree:" + segstree[t] + "\n") detfile.close() # write the plotting information ouf = open(outf, "w") first = True for sc in segc: if first == True: first = False else: ouf.write(" ") ouf.write("\"" + "-".join([str(k) for k in list(sc)]) + "\"") ouf.write("\n") for i in range(len(mlbps)): s = [] for j in range(len(plotsegs)): s.append(str(plotsegs[j][i])) ouf.write(" ".join(s) + "\n") ouf.close() cmd = "Rscript rplot.r " + outf + " " + outf + ".png " + infn[ 0:min(15, len(infn))] + " > rlog 2>&1" os.system(cmd)
species = species.replace(" ", "_") # print(species) woody_species.append(species) print("Species with trait data: %d" %len(woody_species)) # Get tip names from tree # Big tree y = open("ALLOTB_binary_Names.tre", "r") # Genbank data tree # y = open("GBOTB.tre", "r") y.seek(0) # With Stephen's Tree_Reader tree = tree_reader.read_tree_string(y.readline().strip()) tips = tree.lvsnms() print("Tips in the tree: %d" %len(tips)) # With BioPython # tree2 = Phylo.read("GBOTB.tre", "newick") # tree2.count_terminals() # tips2 = tree2.get_terminals() # len(tips2) # Find Intersect species_intersect = list(set(woody_species) & set(tips)) percent_tips_with_traits = (len(species_intersect) * 100) / len(tips) print("Species in the tree with trait data: %d (%.2f%%)" %(len(species_intersect), percent_tips_with_traits))
j.length = brlen break if match is False: return False if match is True: return True if __name__ == "__main__": if len(sys.argv[1:]) == 0: sys.argv.append("-h") parser = argparse.ArgumentParser() parser.add_argument("tree1", help="nwk tree to transfer brlen from") parser.add_argument("tree2", help="nwk tree to transfer brlen to") args = parser.parse_args() with open(args.tree1, "r") as inf: s = inf.readlines()[0].strip() tree1 = tree_reader.read_tree_string(s) with open(args.tree2, "r") as inf: s = inf.readlines()[0].strip() tree2 = tree_reader.read_tree_string(s) if not transfer_brlen(tree1, tree2): print("topologies are not identical!") sys.exit() else: print(tree2.get_newick_repr(True))
import sys import tree_reader if __name__ == "__main__": if len(sys.argv) != 2: print "python " + sys.argv[0] + " infile.tre" sys.exit(0) inf = open(sys.argv[1], "r") oneline = inf.readline().strip() tree = tree_reader.read_tree_string(oneline) for i in tree.iternodes(): if len(i.children) > 0: i.label = i.note inf.close() print tree.get_newick_repr(True) + ";"
if "nodes" not in i.data: i.data["nodes"] = set() i.data["nodes"].add(j) print i.get_newick_repr(False),j,nodesetslf[j],nodesetsrt[j] if match == False: print "NO MATCH for tree node: "+i.get_newick_repr(False) if __name__ == "__main__": if len(sys.argv) != 2: print "python "+sys.argv[0]+" infile.tre" sys.exit(0) trees = [] infile = open(sys.argv[1],"r") for i in infile: trees.append(tree_reader.read_tree_string(i)) infile.close() bipartslf = [] bipartsrt = [] #this generates the bipartions from the tree for i in trees: root = set(i.lvsnms()) for j in i.iternodes(): if len(j.children) > 0:# and i != j: lvs = j.lvsnms() lft = set(lvs) rt = root - lft bipartslf.append(lft) bipartsrt.append(rt) #these are the bipartitoins that are added as a result of summing like ab|e and ac|e -> abc|e addlf = []#added as a result of equivalent
help="if robust. Tab-separated file \ of site, node, state, and probability, in the style \ of FastML's Ancestral_MaxMarginalProb_Char_Indel.txt") args = parser.parse_args() if args.robust and args.probs is None: sys.stderr.write("must specify probability file (-p) to calculate " + "robust substitutions\n") sys.exit() with open(args.tree, "r") as t: for s in t: s = s.strip() nwkString = s curroot = tree_reader.read_tree_string(nwkString) branches = get_anc_desc(curroot) seqs = dict([x for x in parse_fasta(args.sequences)]) # print(seqs) if args.robust: probs = parse_probs(args.probs) add_subs_robust(branches, seqs, probs, args.gaps) else: add_subs(branches, seqs, args.gaps) print("parent\tchild\tsubs") print(curroot.label) for k, v in branches.items(): print(k[0] + "\t" + k[1] + "\t" + ",".join(v))
else: maxlen = max([ lf.length_to_root for lf in leaves ]) scalef = (leaves[0].c + 1 - root_offset)/maxlen scale_cpos(tree, scalef, root_offset) for node in tree.iternodes(order="postorder"): if node.parent: for r in range(min([node.r, node.parent.r]), max([node.r, node.parent.r])): buf.putstr(r, node.parent.c, ":") sym = getattr(node, "hchar", "-") vbar = sym*(node.c-node.parent.c) buf.putstr(node.r, node.parent.c, vbar) if node.istip: buf.putstr(node.r, node.c+1, " "+node.label) else: if node.label and show_internal_labels: buf.putstr(node.r, node.c-len(node.label), node.label) buf.putstr(node.r, node.c, "+") return str(buf) if __name__ == "__main__": import random rand = random.Random() t = tree_reader.read_tree_string("(foo,((bar,(dog,cat)),(shoe,(fly,(cow, bowwow)))));") print tree2ascii(t, scaled=0, show_internal_labels=1)