def dnadist(seqs, output=None, verbose=True, force=False, args=None): if args == None: args = "y" validate_seqs(seqs) cwd = create_temp_dir() util.tic("dnadist on %d of length %d" % (len(seqs), len(seqs.values()[0]))) # create input labels = write_phylip_align(file("infile", "w"), seqs) # run phylip exec_phylip("dnadist", args, verbose) util.toc() # parse output if output != None: os.rename("outfile", "../" + output) cleanup_temp_dir(cwd) return labels else: name, mat = read_dist_matrix("outfile") cleanup_temp_dir(cwd) return labels, mat
def _test_ml_speed(self): # params bgfreq = [.258, .267, .266, .209] kappa = 1.59 # data tree = treelib.readTree("test/data/flies.nt/0/0.tree") align = fasta.readFasta("test/data/flies.nt/0/0.align") likes = [] dists = [] nodes = sorted(tree.nodes.values(), key=lambda x: x.dist) util.tic("find ML") for i in xrange(10): l = spidir.find_ml_branch_lengths_hky(tree, util.mget( align, tree.leafNames()), bgfreq, kappa, maxiter=10) util.toc() dists.append([n.dist for n in nodes]) likes.append(l)
def sample_thread(arg, seqs, rho=1.5e-8, mu=2.5e-8, popsize=1e4, times=None, ntimes=20, maxtime=200000, verbose=False): if times is None: times = argweaver.get_time_points( ntimes=ntimes, maxtime=maxtime, delta=.01) popsizes = [popsize] * len(times) if verbose: util.tic("sample thread") trees, names = arg2ctrees(arg, times) seqs2 = [seqs[name] for name in names] new_name = [x for x in seqs.keys() if x not in names][0] names.append(new_name) seqs2.append(seqs[new_name]) seqlen = len(seqs2[0]) trees = argweaver_sample_thread( trees, times, len(times), popsizes, rho, mu, (C.c_char_p * len(seqs2))(*seqs2), len(seqs2), seqlen, None) arg = ctrees2arg(trees, names, times, verbose=verbose) if verbose: util.toc() return arg
def calc_joint_prob(arg, seqs, ntimes=20, mu=2.5e-8, rho=1.5e-8, popsizes=1e4, times=None, verbose=False, delete_arg=True): """ Calculate arg_joint_prob """ if times is None: times = argweaver.get_time_points( ntimes=ntimes, maxtime=80000, delta=.01) if isinstance(popsizes, float) or isinstance(popsizes, int): popsizes = [popsizes] * len(times) if verbose: util.tic("calc likelihood") trees, names = arg2ctrees(arg, times) seqs, nseqs, seqlen = seqs2cseqs(seqs, names) p = argweaver_joint_prob( trees, times, len(times), popsizes, mu, rho, seqs, nseqs, seqlen) if delete_arg: delete_local_trees(trees) if verbose: util.toc() return p
def addEvents(self, eventsfile): if not tableExists(self.cur, "Events"): self.makeEventsTable() util.tic("add events") events_tab = tablelib.read_table(eventsfile) events_lookup = events_tab.lookup("partid") self.cur.execute("SELECT famid FROM Families;") famids = [x[0] for x in self.cur] for famid in famids: if famid not in events_lookup: continue row = events_lookup[famid] for sp in self.stree.nodes: sp = str(sp) self.cur.execute( """INSERT INTO Events VALUES ("%s", "%s", %d, %d, %d, %d);""" % (famid, sp, row[sp+"-genes"], row[sp+"-dup"], row[sp+"-loss"], row[sp+"-appear"])) util.toc()
def addGoTerms(self, gofile): if not tableExists(self.cur, "GoTerms"): self.makeGoTermsTable() util.tic("add go terms") goterms = tablelib.read_table(gofile) goterms_lookup = goterms.groupby("orf") goterms_bygoid = goterms.groupby("goid") for goterm in goterms_bygoid: term = goterms_bygoid[goterm][0] if '"' in term["term"]: print term self.cur.execute("""INSERT INTO GoTerms VALUES ("%s", "%s")""" % (term["goid"], term["term"])) for gene, terms in goterms_lookup.iteritems(): for term in terms: self.cur.execute( """INSERT INTO GeneGoTerms VALUES ("%s", "%s");""" % (gene, term["goid"])) util.toc()
def addFamilies(self, eventsfile, discard=[]): if not tableExists(self.cur, "Families"): self.makeFamiliesTable() util.tic("add families") events_tab = tablelib.read_table(eventsfile) events_lookup = events_tab.lookup("partid") familyGeneNames = self.makeFamilyGeneNames() discard = set(discard) for row in events_tab: famid = row["partid"] if famid in discard: util.logger("discarding '%s'" % famid) continue tree = treelib.read_tree(self.getTreeFile(famid)) treelen = sum(x.dist for x in tree) seqs = fasta.read_fasta(self.getFastaFile(famid)) seqlen = stats.median(map(len, seqs.values())) self.cur.execute( """INSERT INTO Families VALUES ("%s", "%s", %f, %f, %f, %d, %d, %d, "%s");""" % (row["partid"], familyGeneNames.get(row["partid"], ("", ""))[0], row["famrate"], treelen, seqlen * 3, row["dup"], row["loss"], row["genes"], familyGeneNames.get(row["partid"], ("", ""))[1])) util.toc()
def recon_helper(self, nsearch=1000): """Perform reconciliation""" self.maxp = -util.INF self.maxrecon = None proposal = self.proposer.init_proposal() init_proposal = proposal.copy() for i in xrange(nsearch): if i % 10 == 0: print "search", i # evaluate the probability of proposal util.tic("eval") p = self.eval_proposal(proposal) util.toc() # evaluate the search, then keep or discard the proposal util.tic("prop") self.eval_search(p, proposal) proposal = self.proposer.next_proposal() # set the next proposal util.toc() # all proposals bad, use initial proposal if not self.maxrecon: self.maxrecon = init_proposal # rename locus tree nodes dlcoal.rename_nodes(self.maxrecon.locus_tree, self.name_internal) # how about coal_tree names? return self.maxrecon
def trainTree(conf, stree, gene2species): args = conf["REST"] treefiles = [] for arg in args: treefiles.extend(util.shellparser(arg)) util.tic("reading trees") trees = [] prog = progress.ProgressBar(len(treefiles)) for treefile in treefiles: prog.update() trees.append(treelib.read_tree(treefile)) # even out top two branches totlen = trees[-1].root.children[0].dist + \ trees[-1].root.children[1].dist trees[-1].root.children[0].dist = totlen / 2.0 trees[-1].root.children[1].dist = totlen / 2.0 util.toc() params = Spidir.learnModel(trees, stree, gene2species, conf["trainstats"], filenames=treefiles) Spidir.writeParams(conf["param"], params)
def _test_ml_speed(self): # params bgfreq = [.258,.267,.266,.209] kappa = 1.59 # data tree = treelib.readTree("test/data/flies.nt/0/0.tree") align = fasta.readFasta("test/data/flies.nt/0/0.align") likes = [] dists = [] nodes = sorted(tree.nodes.values(), key=lambda x: x.dist) util.tic("find ML") for i in xrange(10): l = spidir.find_ml_branch_lengths_hky( tree, util.mget(align, tree.leafNames()), bgfreq, kappa, maxiter=10) util.toc() dists.append([n.dist for n in nodes]) likes.append(l)
def setTreeDistances(conf, tree, distmat, genes): if isDebug(DEBUG_MED): util.tic("fit branch lengths") if pyspidir and "parsimony" in conf: # estimate branch lengths with parsimony parsimony_C(conf["aln"], tree) tree.data["error"] = sum(node.dist for node in tree.nodes.itervalues()) elif pyspidir and "mlhkydist" in conf: # estimate branch lengths with ML logl = mlhkydist_C(conf["aln"], tree, conf["bgfreq"], conf["tsvratio"], 3*len(tree.nodes)) tree.data["distlogl"] = logl tree.data["error"] = 0.0 else: # perform LSE lse = phylo.least_square_error(tree, distmat, genes) # catch unusual case that may occur in greedy search if sum(x.dist for x in tree.nodes.values()) == 0: for node in tree.nodes.values(): node.dist = .01 tree.data["error"] = math.sqrt(scipy.dot(lse.resids, lse.resids)) / \ sum(x.dist for x in tree.nodes.values()) setBranchError(conf, tree, lse.resids, lse.paths, lse.edges, lse.topmat) if isDebug(DEBUG_MED): util.toc()
def draw_placed(self): vis = [] util.tic("create draw code") # draw frags for frag in self.frags: vis.append(self.frag_widget(frag)) # draw genes for reg, l in self.region_layout.iteritems(): vis.append( translate(l.x, l.y, self.gene_widget(self.db.get_region(reg)))) # draw matches drawn = set() for frag in self.frags: vis.append( self.draw_matches(frag.genome, frag.chrom, frag.start, frag.end, drawn)) util.toc() self.groupid = group(*vis) return self.groupid
def dnadist(seqs, output=None, verbose=True, force = False, args=None): if args == None: args = "y" validate_seqs(seqs) cwd = create_temp_dir() util.tic("dnadist on %d of length %d" % (len(seqs), len(seqs.values()[0]))) # create input labels = write_phylip_align(file("infile", "w"), seqs) # run phylip exec_phylip("dnadist", args, verbose) util.toc() # parse output if output != None: os.rename("outfile", "../" + output) cleanup_temp_dir(cwd) return labels else: name, mat = read_dist_matrix("outfile") cleanup_temp_dir(cwd) return labels, mat
def addEvents(self, eventsfile): if not tableExists(self.cur, "Events"): self.makeEventsTable() util.tic("add events") events_tab = tablelib.read_table(eventsfile) events_lookup = events_tab.lookup("partid") self.cur.execute("SELECT famid FROM Families;") famids = [x[0] for x in self.cur] for famid in famids: if famid not in events_lookup: continue row = events_lookup[famid] for sp in self.stree.nodes: sp = str(sp) self.cur.execute( """INSERT INTO Events VALUES ("%s", "%s", %d, %d, %d, %d);""" % (famid, sp, row[sp + "-genes"], row[sp + "-dup"], row[sp + "-loss"], row[sp + "-appear"])) util.toc()
def processFunc(): # remove old query tempfile if one exists if closure["oldtmp"] != None: os.remove(closure["oldtmp"]) elapse = util.toc() closure["time"] += elapse util.log("blasted %d of %d sequences (%.1f%%), elapse %.0f m, left %.0f m" % ( closure["index"], len(seqs.keys()), 100 * float(closure["index"]) / len(seqs.keys()), closure["time"] / 60.0, elapse / split * (len(seqs.keys()) - closure["index"]) / 60.0)) util.tic() # find new subset of query sequences i = closure["index"] names = seqs.keys()[i:i+split] # if no more sequences then quit if len(names) == 0: return False # start blast tmpfile = util.tempfile(".", "blastp", ".fasta") seqs.write(tmpfile, names = names) pipe = os.popen("blastall -p %s -d %s -i %s -m 8 -e .1 %s" % \ (prog, databaseFile, tmpfile, options)) # update variables closure["oldtmp"] = tmpfile closure["index"] = i + split return pipe
def boot_proml(seqs, iters = 100, seed = 1, jumble=5, output=None, verbose=True, force = False): validate_seqs(seqs) cwd = create_temp_dir() util.tic("bootProml on %d of length %d" % (len(seqs), len(seqs.values()[0]))) # create input labels = write_phylip_align(file("infile", "w"), seqs) exec_phylip("seqboot", "y\n%d" % seed, verbose) os.rename("outfile", "infile") exec_phylip("proml", "m\nD\n%d\n%d\n%d\ny" % (iters, seed, jumble), verbose) util.toc() # read tree samples if output != None: os.rename("outtree", "../" + output) cleanup_temp_dir(cwd) return labels else: trees = [] infile = file("outtree") for i in xrange(iters): tree = treelib.Tree() tree.read_newick(infile) rename_tree_with_names(tree, labels) trees.append(tree) infile.close() cleanup_temp_dir(cwd) return trees
def walk(node): for child in node.children: walk(child) if not node.is_leaf(): blastfiles = [] leaves1 = node.children[0].leaf_names() leaves2 = node.children[1].leaf_names() # determine sibling blast files for leaf1 in leaves1: for leaf2 in leaves2: if leaf1 in blastFileLookup and \ leaf2 in blastFileLookup[leaf1]: blastfiles.append(blastFileLookup[leaf1][leaf2]) # determine outgroup blast files (all other files, potentially) # go up one level, blastfiles for leaves, and subtract # sibling files outblastfiles = [] if node.parent: inleaves = leaves1 + leaves2 outleaves = set(node.parent.leaf_names()) - set(inleaves) for leaf1 in inleaves: for leaf2 in outleaves: if leaf1 in blastFileLookup and \ leaf2 in blastFileLookup[leaf1]: outblastfiles.append(blastFileLookup[leaf1][leaf2]) util.tic("merging") util.logger("leaves1: ", leaves1) util.logger("leaves2: ", leaves2) if "merge" in conf and \ conf["merge"] == "avg": node.parts = mergeAvg(conf, genes, node.children[0].parts, node.children[1].parts, blastfiles, outblastfiles) else: node.parts = mergeBuh(conf, genes, node.children[0].parts, node.children[1].parts, blastfiles) if "output" in conf and len(node.parts) > 0: util.write_delim(conf["output"] + str(node.name) + ".part", node.parts) util.logger("number of parts: ", len(node.parts)) if len(node.parts) > 0: util.logger("largest part:", max(map(len, node.parts))) util.toc()
def _test_ml(self): """Test ML code""" # params bgfreq = [.258, .267, .266, .209] kappa = 1.59 # data tree = treelib.readTree("test/data/flies.nt/0/0.tree") align = fasta.readFasta("test/data/flies.nt/0/0.align") likes = [] dists = [] nodes = sorted(tree.nodes.values(), key=lambda x: x.dist) util.tic("find ML") for i in range(40): l = spidir.find_ml_branch_lengths_hky(tree, util.mget( align, tree.leafNames()), bgfreq, kappa, parsinit=False, maxiter=1) dists.append([n.dist for n in nodes]) likes.append(l) util.toc() print likes prep_dir("test/output/ml/") # distances plot util.rplot_start("test/output/ml/ml_branches.pdf") util.rplot("plot", util.cget(dists, 0), ylim=[0, max(dists[0])], t="l", main="branch length convergence", xlab="iterations", ylab="branch lengths (sub/site)") for d in zip(*dists): util.rplot("lines", d) util.rplot_end(True) print util.cget(dists, 4) # likelihood plot util.rplot_start("test/output/ml/ml_likelihood.pdf") util.rplot("plot", likes, t="l", xlab="iterations", ylab="log likelihood", main="likelihood convergence") util.rplot_end(True)
def draw_raxml_tree(tr, adef): util.tic("Tree to string...") treestr = raxml.tree_to_string(tr, adef) util.toc() util.tic("Drawing tree...") T = treelib.parse_newick(treestr) T2 = treelib.unroot(T) treelib.draw_tree(T2, out=sys.stdout, minlen=5, maxlen=5) util.toc()
def pamp(seqs, tree, seqtype="dna", saveOutput="", verbose=False, safe=True): if safe and seqtype == "dna": seqs = alignlib.mapalign(seqs, valfunc=removeStopCodons) phylip.validate_seqs(seqs) cwd = phylip.create_temp_dir() util.tic("pamp on %d of length %d" % (len(seqs), len(seqs.values()[0]))) # create input nex = nexus.Nexus("align", "w") nex.write_matrix(seqs.keys(), seqs.values(), seqtype, seqs.alignlen()) nex.close() treefile = open("tree", "w") treefile.write("%d 1\n" % len(tree.leaves())) tree.write(treefile, writeData=lambda x: "") treefile.close() # create control file out = file("pamp.ctl", "w") print >>out, "seqfile = align" print >>out, "treefile = tree" print >>out, "outfile = out" if seqtype == "dna": print >>out, "seqtype = 0" elif seqtype == "pep": print >>out, "seqtype = 2" else: raise Exception("unknown seqtype '%s'" % seqtype) print >>out, "ncatG = 8" print >>out, "nhomo = 0" out.close() # run pamp if verbose: os.system("pamp paml.ctl") else: os.system("pamp paml.ctl > /dev/null") res = PamlResults("out") aln = res.getPampReconstruction() aln.write("recon.mfa") tree2 = res.getBranchNames() renameTreeAlign(tree2, aln) if saveOutput != "": phylip.save_temp_dir(cwd, saveOutput) else: phylip.cleanup_temp_dir(cwd) util.toc() return tree2, aln
def pamp(seqs, tree, seqtype="dna", saveOutput="", verbose=False, safe=True): if safe and seqtype == "dna": seqs = alignlib.mapalign(seqs, valfunc=removeStopCodons) phylip.validate_seqs(seqs) cwd = phylip.create_temp_dir() util.tic("pamp on %d of length %d" % (len(seqs), len(seqs.values()[0]))) # create input nex = nexus.Nexus("align", "w") nex.write_matrix(seqs.keys(), seqs.values(), seqtype, seqs.alignlen()) nex.close() treefile = open("tree", "w") treefile.write("%d 1\n" % len(tree.leaves())) tree.write(treefile, writeData=lambda x: "") treefile.close() # create control file out = file("pamp.ctl", "w") print >> out, "seqfile = align" print >> out, "treefile = tree" print >> out, "outfile = out" if seqtype == "dna": print >> out, "seqtype = 0" elif seqtype == "pep": print >> out, "seqtype = 2" else: raise Exception("unknown seqtype '%s'" % seqtype) print >> out, "ncatG = 8" print >> out, "nhomo = 0" out.close() # run pamp if verbose: os.system("pamp paml.ctl") else: os.system("pamp paml.ctl > /dev/null") res = PamlResults("out") aln = res.getPampReconstruction() aln.write("recon.mfa") tree2 = res.getBranchNames() renameTreeAlign(tree2, aln) if saveOutput != "": phylip.save_temp_dir(cwd, saveOutput) else: phylip.cleanup_temp_dir(cwd) util.toc() return tree2, aln
def resample_arg_region(arg, seqs, region_start, region_end, ntimes=20, rho=1.5e-8, mu=2.5e-8, popsizes=1e4, times=None, carg=False, refine=1, verbose=False): """ Sample ARG for sequences """ if times is None: times = argweaver.get_time_points( ntimes=ntimes, maxtime=80000, delta=.01) if isinstance(popsizes, float) or isinstance(popsizes, int): popsizes = [popsizes] * len(times) if verbose: util.tic("resample arg") # convert arg to c++ if verbose: util.tic("convert arg") trees, names = arg2ctrees(arg, times) if verbose: util.toc() # get sequences in same order # and add all other sequences not in arg yet leaves = set(names) for name, seq in seqs.items(): if name not in leaves: names.append(name) seqs2, nseqs, seqlen = seqs2cseqs(seqs, names) # resample arg seqlen = len(seqs[names[0]]) trees = argweaver_resample_arg_region( trees, times, len(times), popsizes, rho, mu, seqs2, nseqs, seqlen, region_start, region_end, refine) #trees = argweaver_resample_arg_region( # trees, times, len(times), # popsizes, rho, mu, seqs2, nseqs, seqlen, # region_start, region_end) # convert arg back to python if carg: arg = (trees, names) else: arg = ctrees2arg(trees, names, times, verbose=verbose) if verbose: util.toc() return arg
def _test_ml(self): """Test ML code""" # params bgfreq = [.258,.267,.266,.209] kappa = 1.59 # data tree = treelib.readTree("test/data/flies.nt/0/0.tree") align = fasta.readFasta("test/data/flies.nt/0/0.align") likes = [] dists = [] nodes = sorted(tree.nodes.values(), key=lambda x: x.dist) util.tic("find ML") for i in range(40): l = spidir.find_ml_branch_lengths_hky( tree, util.mget(align, tree.leafNames()), bgfreq, kappa, parsinit=False, maxiter=1) dists.append([n.dist for n in nodes]) likes.append(l) util.toc() print likes prep_dir("test/output/ml/") # distances plot util.rplot_start("test/output/ml/ml_branches.pdf") util.rplot("plot", util.cget(dists, 0), ylim=[0, max(dists[0])], t="l", main="branch length convergence", xlab="iterations", ylab="branch lengths (sub/site)") for d in zip(* dists): util.rplot("lines", d) util.rplot_end(True) print util.cget(dists, 4) # likelihood plot util.rplot_start("test/output/ml/ml_likelihood.pdf") util.rplot("plot", likes, t="l", xlab="iterations", ylab="log likelihood", main="likelihood convergence") util.rplot_end(True)
def sample_all_arg(seqs, ntimes=20, rho=1.5e-8, mu=2.5e-8, popsizes=1e4, refine=1, times=None, verbose=False, carg=False, prob_path_switch=.1): """ Sample ARG for sequences """ if times is None: times = argweaver.get_time_points( ntimes=ntimes, maxtime=80000, delta=.01) if isinstance(popsizes, float) or isinstance(popsizes, int): popsizes = [popsizes] * len(times) if verbose: util.tic("resample arg") # convert arg to c++ if verbose: util.tic("convert arg") arg = argweaver.make_trunk_arg( 0, len(seqs.values()[0]), name=seqs.keys()[0]) trees, names = arg2ctrees(arg, times) if verbose: util.toc() # get sequences in same order # and add all other sequences not in arg yet seqs2 = [seqs[name] for name in names] leaves = set(names) for name, seq in seqs.items(): if name not in leaves: names.append(name) seqs2.append(seq) # resample arg seqlen = len(seqs[names[0]]) trees = argweaver_resample_all_arg( trees, times, len(times), popsizes, rho, mu, (C.c_char_p * len(seqs2))(*seqs2), len(seqs2), seqlen, refine, prob_path_switch) if carg: arg = (trees, names) else: # convert arg back to python arg = ctrees2arg(trees, names, times, verbose=verbose) if verbose: util.toc() return arg
def ctrees2arg(trees, names, times, verbose=False, delete_arg=True): """ Convert a C data structure for the ARG into a python ARG """ if verbose: util.tic("convert arg") # get local trees info nnodes = get_local_trees_nnodes(trees) ntrees = get_local_trees_ntrees(trees) # allocate data structures for treeset ptrees = [] ages = [] sprs = [] blocklens = [0] * ntrees for i in range(ntrees): ptrees.append([0] * nnodes) ages.append([0] * nnodes) sprs.append([0, 0, 0, 0]) # populate data structures get_local_trees_ptrees(trees, ptrees, ages, sprs, blocklens) # fully convert to python for i in range(ntrees): ptrees[i] = ptrees[i][:nnodes] ages[i] = ages[i][:nnodes] sprs[i] = sprs[i][:4] # convert treeset to arg data structure blocks = [] start = 0 for blocklen in blocklens: end = start + blocklen blocks.append((start, end)) start = end assert len(names) == ((nnodes + 1) / 2) arg = treeset2arg(ptrees, ages, sprs, blocks, names, times) if delete_arg: delete_local_trees(trees) if verbose: util.toc() return arg
def addGenes(self, species, gff_files, region_filter=lambda x: x): """populate genes table""" # clear Genes Table if not tableExists(self.cur, "Genes"): self.makeGenesTable() dups = set() util.tic("add genes") for sp, gff_file in zip(species, gff_files): for region in gff.read_gff(gff_file, regionFilter=region_filter): gene = region.data["ID"] #gene = row["name"] if gene in self.fams.genelookup: famid = self.fams.getFamid(gene) if len(self.fams.getGenes(famid)) < 2: famid = "NONE" else: famid = "NONE" if gene in dups: continue dups.add(gene) assert region.start <= region.end if gene in self.gene2name: common = self.gene2name[gene]["name"] desc = self.gene2name[gene]["description"] else: common = "" desc = "" cmd = ("""INSERT INTO Genes VALUES ("%s", "%s", "%s", "%s", %d, %d, %d, "%s", "%s");""" % (gene, common, self.gene2species(gene), region.seqname, region.start, region.end, region.strand, desc.replace('"', ''), famid)) self.cur.execute(cmd) util.toc()
def addGenes(self, species, gff_files, region_filter=lambda x: x): """populate genes table""" # clear Genes Table if not tableExists(self.cur, "Genes"): self.makeGenesTable() dups = set() util.tic("add genes") for sp, gff_file in zip(species, gff_files): for region in gff.read_gff(gff_file, regionFilter=region_filter): gene = region.data["ID"] #gene = row["name"] if gene in self.fams.genelookup: famid = self.fams.getFamid(gene) if len(self.fams.getGenes(famid)) < 2: famid = "NONE" else: famid = "NONE" if gene in dups: continue dups.add(gene) assert region.start <= region.end if gene in self.gene2name: common = self.gene2name[gene]["name"] desc = self.gene2name[gene]["description"] else: common = "" desc = "" cmd = """INSERT INTO Genes VALUES ("%s", "%s", "%s", "%s", %d, %d, %d, "%s", "%s");""" % \ (gene, common, self.gene2species(gene), region.seqname, region.start, region.end, region.strand, desc.replace('"', ''), famid) self.cur.execute(cmd) util.toc()
def resample_mcmc_arg(arg, seqs, ntimes=20, rho=1.5e-8, mu=2.5e-8, popsizes=1e4, refine=1, times=None, verbose=False, carg=False, window=200000, niters2=5): """ Sample ARG for sequences """ if times is None: times = argweaver.get_time_points( ntimes=ntimes, maxtime=80000, delta=.01) if isinstance(popsizes, float) or isinstance(popsizes, int): popsizes = [popsizes] * len(times) if verbose: util.tic("resample arg") # convert arg to c++ if verbose: util.tic("convert arg") trees, names = arg2ctrees(arg, times) if verbose: util.toc() # get sequences in same order # and add all other sequences not in arg yet leaves = set(names) names = list(names) for name in seqs: if name not in leaves: names.append(name) seqs2, nseqs, seqlen = seqs2cseqs(seqs, names) # resample arg trees = argweaver_resample_mcmc_arg( trees, times, len(times), popsizes, rho, mu, seqs2, nseqs, seqlen, refine, niters2, window) if carg: arg = (trees, names) else: # convert arg back to python arg = ctrees2arg(trees, names, times, verbose=verbose) if verbose: util.toc() return arg
def dndsMatrix(seqs, saveOutput="", verbose=False, safe=True): if safe: seqs = alignlib.mapalign(seqs, valfunc=removeStopCodons) phylip.validate_seqs(seqs) cwd = phylip.create_temp_dir() util.tic("yn00 on %d of length %d" % (len(seqs), len(seqs.values()[0]))) # create input labels = phylip.write_phylip_align(file("seqfile.phylip", "w"), seqs) util.write_list(file("labels", "w"), labels) # create control file out = file("yn00.ctl", "w") print >> out, "seqfile = seqfile.phylip" print >> out, "outfile = outfile" out.close() # run yn00 if verbose: os.system("yn00 yn00.ctl") else: os.system("yn00 yn00.ctl > /dev/null") try: dnmat = phylip.read_dist_matrix("2YN.dN") dsmat = phylip.read_dist_matrix("2YN.dS") except: # could not make distance matrix if safe: # make dummy matrix dnmat = labels, [[0] * len(labels)] * len(labels) dsmat = labels, [[0] * len(labels)] * len(labels) else: raise Exception("could not read dn or ds matrices") if saveOutput != "": phylip.save_temp_dir(cwd, saveOutput) else: phylip.cleanup_temp_dir(cwd) util.toc() return dnmat, dsmat
def dndsMatrix(seqs, saveOutput="", verbose=False, safe=True): if safe: seqs = alignlib.mapalign(seqs, valfunc=removeStopCodons) phylip.validate_seqs(seqs) cwd = phylip.create_temp_dir() util.tic("yn00 on %d of length %d" % (len(seqs), len(seqs.values()[0]))) # create input labels = phylip.write_phylip_align(file("seqfile.phylip", "w"), seqs) util.write_list(file("labels", "w"), labels) # create control file out = file("yn00.ctl", "w") print >>out, "seqfile = seqfile.phylip" print >>out, "outfile = outfile" out.close() # run yn00 if verbose: os.system("yn00 yn00.ctl") else: os.system("yn00 yn00.ctl > /dev/null") try: dnmat = phylip.read_dist_matrix("2YN.dN") dsmat = phylip.read_dist_matrix("2YN.dS") except: # could not make distance matrix if safe: # make dummy matrix dnmat = labels, [[0] * len(labels)] * len(labels) dsmat = labels, [[0] * len(labels)] * len(labels) else: raise Exception("could not read dn or ds matrices") if saveOutput != "": phylip.save_temp_dir(cwd, saveOutput) else: phylip.cleanup_temp_dir(cwd) util.toc() return dnmat, dsmat
def addPfamGenes(self, pfamfile): """add pfam domains""" if not tableExists(self.cur, "PfamDomains"): self.makePfamTable() util.tic("add pfam domains") pfams = tablelib.read_table(pfamfile) for row in pfams: name = re.sub("\..*$", "", row["pfam_acc"]) self.cur.execute("""INSERT INTO GenePfamDomains VALUES ("%s", "%s", %d, %d, %f, %f);""" % (row["locus"], name, row["start"], row["end"], row["score"], row["evalue"])) util.toc()
def argweaver_forward_algorithm(arg, seqs, rho=1.5e-8, mu=2.5e-8, popsizes=1e4, times=None, ntimes=20, maxtime=180000, verbose=False, prior=[], internal=False, slow=False): if times is None: times = argweaver.get_time_points( ntimes=ntimes, maxtime=maxtime, delta=.01) if isinstance(popsizes, float) or isinstance(popsizes, int): popsizes = [popsizes] * len(times) probs = [] if verbose: util.tic("forward") if is_carg(arg): trees, names = arg else: trees, names = arg2ctrees(arg, times) seqs2 = [seqs[node] for node in names] for name in seqs.keys(): if name not in names: seqs2.append(seqs[name]) seqlen = len(seqs2[0]) fw = argweaver_forward_alg(trees, times, len(times), popsizes, rho, mu, (C.c_char_p * len(seqs2))(*seqs2), len(seqs2), seqlen, len(prior) > 0, prior, internal, slow) nstates = [0] * seqlen argweaver_get_nstates(trees, len(times), internal, nstates) probs = [row[:n] for row, n in zip(fw, nstates)] delete_forward_matrix(fw, seqlen) if verbose: util.toc() return probs
def boot_neighbor(seqs, iters=100, seed=None, output=None, verbose=True, force=False): if seed == None: seed = random.randInt(0, 1000) * 2 + 1 validate_seqs(seqs) cwd = create_temp_dir() util.tic("boot_neighbor on %d of length %d" % (len(seqs), len(seqs.values()[0]))) # create input labels = write_phylip_align(file("infile", "w"), seqs) exec_phylip("seqboot", "r\n%d\ny\n%d" % (iters, seed), verbose) os.rename("outfile", "infile") exec_phylip("protdist", "m\nd\n%d\ny" % iters, verbose) os.rename("outfile", "infile") exec_phylip("neighbor", "m\n%d\n%d\ny" % (iters, seed), verbose) util.toc() # read tree samples if output != None: os.rename("outtree", "../" + output) cleanup_temp_dir(cwd) return labels else: trees = [] infile = file("outtree") for i in xrange(iters): tree = treelib.Tree() tree.read_newick(infile) rename_tree_with_name(tree, labels) trees.append(tree) infile.close() cleanup_temp_dir(cwd) return trees
def est_popsizes_trees(arg, times, step, verbose=False): if verbose: util.tic("convert arg") trees, names = arg2ctrees(arg, times) if verbose: util.toc() util.tic("estimate popsizes") popsizes = [0.0] * (len(times) - 1) argweaver_est_popsizes_trees(trees, times, len(times), step, popsizes) if verbose: util.toc() if not is_carg(arg): delete_local_trees(trees) return popsizes
def boot_proml(seqs, iters=100, seed=1, jumble=5, output=None, verbose=True, force=False): validate_seqs(seqs) cwd = create_temp_dir() util.tic("bootProml on %d of length %d" % (len(seqs), len(seqs.values()[0]))) # create input labels = write_phylip_align(file("infile", "w"), seqs) exec_phylip("seqboot", "y\n%d" % seed, verbose) os.rename("outfile", "infile") exec_phylip("proml", "m\nD\n%d\n%d\n%d\ny" % (iters, seed, jumble), verbose) util.toc() # read tree samples if output != None: os.rename("outtree", "../" + output) cleanup_temp_dir(cwd) return labels else: trees = [] infile = file("outtree") for i in xrange(iters): tree = treelib.Tree() tree.read_newick(infile) rename_tree_with_names(tree, labels) trees.append(tree) infile.close() cleanup_temp_dir(cwd) return trees
def sample_arg(seqs, ntimes=20, rho=1.5e-8, mu=2.5e-8, popsizes=1e4, refine=0, nremove=1, times=None, verbose=False, carg=False): """ Sample ARG for sequences """ if times is None: times = argweaver.get_time_points( ntimes=ntimes, maxtime=80000, delta=.01) if isinstance(popsizes, float) or isinstance(popsizes, int): popsizes = [popsizes] * len(times) if verbose: util.tic("sample arg") names = [] seqs2 = [] for name, seq in seqs.items(): names.append(name) seqs2.append(seq) # sample arg trees = argweaver_sample_arg_refine( times, len(times), popsizes, rho, mu, (C.c_char_p * len(seqs))(*seqs2), len(seqs), len(seqs2[0]), refine, nremove) if carg: arg = (trees, names) else: # convert to python arg = ctrees2arg(trees, names, times, verbose=verbose) if verbose: util.toc() return arg
def test_forward(): k = 4 n = 1e4 rho = 1.5e-8 * 20 mu = 2.5e-8 * 20 length = int(100e3 / 20) times = argweaver.get_time_points(ntimes=100) arg = arglib.sample_arg_smc(k, 2 * n, rho, start=0, end=length) muts = arglib.sample_arg_mutations(arg, mu) seqs = arglib.make_alignment(arg, muts) print "muts", len(muts) print "recomb", len(arglib.get_recomb_pos(arg)) argweaver.discretize_arg(arg, times) # remove chrom new_name = "n%d" % (k - 1) arg = argweaver.remove_arg_thread(arg, new_name) carg = argweaverc.arg2ctrees(arg, times) util.tic("C fast") probs1 = argweaverc.argweaver_forward_algorithm(carg, seqs, times=times) util.toc() util.tic("C slow") probs2 = argweaverc.argweaver_forward_algorithm(carg, seqs, times=times, slow=True) util.toc() for i, (col1, col2) in enumerate(izip(probs1, probs2)): for a, b in izip(col1, col2): fequal(a, b, rel=.0001)
def proml_treelk(aln, tree, verbose=True, force=False, args="u\ny"): validate_seqs(aln) cwd = create_temp_dir() util.tic("proml on %d of length %d" % (len(aln), len(aln.values()[0]))) # create input labels = write_phylip_align(file("infile", "w"), aln) write_in_tree("intree", tree, labels) # run phylip exec_phylip("proml", args, verbose) # parse logl logl = read_logl("outfile") # parse tree tree = read_out_tree("outtree", labels) cleanup_temp_dir(cwd) util.toc() return logl, tree
def proml_treelk(aln, tree, verbose=True, force = False, args="u\ny"): validate_seqs(aln) cwd = create_temp_dir() util.tic("proml on %d of length %d" % (len(aln), len(aln.values()[0]))) # create input labels = write_phylip_align(file("infile", "w"), aln) write_in_tree("intree", tree, labels) # run phylip exec_phylip("proml", args, verbose) # parse logl logl = read_logl("outfile") # parse tree tree = read_out_tree("outtree", labels) cleanup_temp_dir(cwd) util.toc() return logl, tree
def recon(self, nsearch=1000): """Perform reconciliation""" self.init_search() proposal = self.proposer.init_proposal() self.maxrecon = proposal.copy() for i in xrange(nsearch): if i % 10 == 0: print "search", i util.tic("eval") p = self.eval_proposal(proposal) util.toc() util.tic("prop") self.eval_search(p, proposal) proposal = self.proposer.next_proposal() util.toc() # rename locus tree nodes dlcoal.rename_nodes(self.maxrecon.locus_tree, self.name_internal) return self.maxrecon
def resample_arg_regions(arg, seqs, niters, width=1000, ntimes=20, rho=1.5e-8, mu=2.5e-8, popsize=1e4, times=None, carg=False, verbose=False): seqlen = len(seqs.values()[0]) if is_carg(arg): trees, names = arg arg2 = ctrees2arg(trees, names, times, verbose=verbose, delete_arg=False) recomb_pos = list(x.pos for x in arg2 if x.event == "recomb") else: recomb_pos = list(x.pos for x in arg if x.event == "recomb") for it in range(niters): maxr = 0 for i, j, a, b in stats.iter_window_index(recomb_pos, width): r = j - i + 1 if r > maxr: maxr = r region = [max(recomb_pos[i]-10, 10), min(recomb_pos[j]+10, seqlen - 10)] if verbose: util.tic("sample ARG region %s" % region) print arg arg = argweaver.resample_arg_region(arg, seqs, region[0], region[1], rho=rho, mu=mu, times=times, carg=carg, verbose=True) if not carg: recomb_pos = list(x.pos for x in arg if x.event == "recomb") if verbose: util.logger("%d: # recombs %d" % (it, len(recomb_pos))) if verbose: util.toc() return arg
def draw_placed(self): vis = [] util.tic("create draw code") # draw frags for frag in self.frags: vis.append(self.frag_widget(frag)) # draw genes for reg, l in self.region_layout.iteritems(): vis.append(translate(l.x, l.y, self.gene_widget(self.db.get_region(reg)))) # draw matches drawn = set() for frag in self.frags: vis.append(self.draw_matches(frag.genome, frag.chrom, frag.start, frag.end, drawn)) util.toc() self.groupid = group(*vis) return self.groupid
# check arguments if options.niter < 1: parser.error("--niter must be >= 1: %d" % options.niter) if len(args) != 1: parser.error("must specify input file") # ============================= # main file treefile = args[0] seqfile = util.replace_ext(treefile, options.treeext, options.alignext) out = util.open_stream(options.output, "w") util.tic("Initializing RAXML and optimizing...") module = raxml.RAxML() module.optimize_model(treefile, seqfile, options.extra) util.toc() tree = treelib.read_tree(treefile) for node in tree: node.dist = 0 if "boot" in node.data: del node.data["boot"] treehash = phylo.hash_tree(treelib.unroot(tree, newCopy=True)) treehashes = set([treehash]) for i in xrange(options.niter): while treehash in treehashes: util.log("random spr")
def align2tree(prog, seqs, verbose=True, force=False, args=None, usertree=None, saveOutput="", bootiter=1, seed=1, jumble=1): validate_seqs(seqs) cwd = create_temp_dir() util.tic("%s on %d of length %d" % (prog, len(seqs), len(seqs.values()[0]))) # create input labels = write_phylip_align(file("infile", "w"), seqs) util.write_list(file("labels", "w"), labels) # initialize default arguments if args == None: args = "y" # create user tree if given if usertree != None: write_in_tree("intree", usertree, labels) args = "u\n" + args # add user tree option # bootstrap alignment if needed if bootiter > 1: exec_phylip("seqboot", "r\n%d\ny\n%d" % (bootiter, seed), verbose) os.rename("outfile", "infile") # add bootstrap arguments args = "m\nD\n%d\n%d\n%d\n%s" % (bootiter, seed, jumble, args) # run phylip exec_phylip(prog, args, verbose) # check for PHYLIP GIVE UP if is_phylip_give_up("outfile"): tree = treelib.Tree() tree.make_root() # make star tree for key in seqs: tree.add_child(tree.root, treelib.TreeNode(key)) else: # parse tree if bootiter == 1: tree = read_out_tree("outtree", labels, bootiter) # parse likelihood if prog in ["dnaml", "proml"]: tree.data["logl"] = read_logl("outfile") else: trees = read_out_tree("outtree", labels, bootiter) if saveOutput != "": save_temp_dir(cwd, saveOutput) else: cleanup_temp_dir(cwd) util.toc() if bootiter == 1: return tree else: return trees
def wrapper(*args, **kwargs): util.tic(func.__name__) result = func(*args, **kwargs) util.toc() return result
def mergeBuh(conf, genes, parts1, parts2, blastfiles): """Merge by Best Unidirectional Hits""" # don't use this code without double checking it assert False lookup1 = item2part(parts1) lookup2 = item2part(parts2) best = util.Dict(dim=1, default=(0, None)) util.tic("read hits") for blastfile, order in blastfiles: util.tic("determine best hits '%s'" % os.path.basename(blastfile)) for hit in blast.BlastReader(blastfile): if order: gene1 = blast.query(hit) gene2 = blast.subject(hit) alnlen1 = blast.queryLength(hit) alnlen2 = blast.subjectLength(hit) else: gene2 = blast.query(hit) gene1 = blast.subject(hit) alnlen2 = blast.queryLength(hit) alnlen1 = blast.subjectLength(hit) score = blast.bitscore(hit) len1 = genes[gene1]["length"] len2 = genes[gene2]["length"] coverage = max(alnlen1 / float(len1), alnlen2 / float(len2)) # discard a hit that does not pass basic cutoffs if blast.bitscore(hit) / float(blast.alignLength(hit)) < \ conf["bitspersite"] or \ coverage < conf["coverage"] or \ blast.evalue(hit) > conf["signif"]: continue #if blast.evalue(hit) > conf["signif"]: # continue if gene1 in lookup1: part1 = (0, lookup1[gene1]) else: parts1.append([gene1]) lookup1[gene1] = len(parts1) - 1 part1 = (0, len(parts1) - 1) if gene2 in lookup2: part2 = (1, lookup2[gene2]) else: parts2.append([gene2]) lookup2[gene2] = len(parts2) - 1 part2 = (1, len(parts2) - 1) if score > best[part1][0]: best[part1] = (score, part2) if score > best[part2][0]: best[part2] = (score, part1) util.toc() util.toc() util.tic("determine clusters") sets = {} for gene in best: sets[gene] = sets.UnionFind([gene]) for blastfile, order in blastfiles: util.tic("read hits '%s'" % os.path.basename(blastfile)) for hit in blast.BlastReader(blastfile): if order: gene1 = blast.query(hit) gene2 = blast.subject(hit) alnlen1 = blast.queryLength(hit) alnlen2 = blast.subjectLength(hit) else: gene2 = blast.query(hit) gene1 = blast.subject(hit) alnlen2 = blast.queryLength(hit) alnlen1 = blast.subjectLength(hit) score = blast.bitscore(hit) len1 = genes[gene1]["length"] len2 = genes[gene2]["length"] coverage = max(alnlen1 / float(len1), alnlen2 / float(len2)) # discard a hit that does not pass basic cutoffs if blast.bitscore(hit) / float(blast.alignLength(hit)) < \ conf["bitspersite"] or \ coverage < conf["coverage"] or \ blast.evalue(hit) > conf["signif"]: continue #if blast.evalue(hit) > conf["signif"]: # continue part1 = (0, lookup1[gene1]) part2 = (1, lookup2[gene2]) if score >= best[part1][0] * conf["relcutoff"]: sets[part1].union(sets[part2]) if score >= best[part2][0] * conf["relcutoff"]: sets[part2].union(sets[part1]) util.toc() sets = util.unique([x.root() for x in sets.values()]) parts = [] joining = (parts1, parts2) for set in sets: parts.append([]) for i, row in set.members(): parts[-1].extend(joining[i][row]) util.toc() return parts
def mergeAvg(conf, genes, parts1, parts2, blastfiles, outblastfiles): lookup1 = item2part(parts1) lookup2 = item2part(parts2) # value is [sum, total] hits = util.Dict(dim=2, default=[0, 0]) if "accept" in conf: accept = conf["accept"] else: accept = False util.tic("read hits") for blastfile, order in blastfiles: util.tic("determine best hits '%s'" % os.path.basename(blastfile)) for hit in blast.BlastReader(blastfile): if order: gene1 = blast.query(hit) gene2 = blast.subject(hit) alnlen1 = blast.queryLength(hit) alnlen2 = blast.subjectLength(hit) else: gene2 = blast.query(hit) gene1 = blast.subject(hit) alnlen2 = blast.queryLength(hit) alnlen1 = blast.subjectLength(hit) score = blast.bitscore(hit) len1 = genes[gene1]["length"] len2 = genes[gene2]["length"] coveragesmall = min(alnlen1 / float(len1), alnlen2 / float(len2)) coveragebig = max(alnlen1 / float(len1), alnlen2 / float(len2)) # discard a hit that does not pass basic cutoffs if blast.bitscore(hit) / float(blast.alignLength(hit)) < \ conf["bitspersite"] or \ coveragesmall < conf["coveragesmall"] or \ coveragebig < conf["coveragebig"] or \ blast.evalue(hit) > conf["signif"]: continue if accept and \ (gene1 not in accept or gene2 not in accept): continue # create a key for a partition: (side, index) if gene1 in lookup1: part1 = (0, lookup1[gene1]) else: parts1.append([gene1]) lookup1[gene1] = len(parts1) - 1 part1 = (0, len(parts1) - 1) if gene2 in lookup2: part2 = (1, lookup2[gene2]) else: parts2.append([gene2]) lookup2[gene2] = len(parts2) - 1 part2 = (1, len(parts2) - 1) val = hits[part1][part2] val[0] += score val[1] += 1 hits[part2][part1] = val util.toc() util.toc() util.tic("read outgroup hits") outbest = util.Dict(default=[0, 0]) for blastfile, order in outblastfiles: util.tic("determine best hits '%s'" % os.path.basename(blastfile)) for hit in blast.BlastReader(blastfile): if order: genein = blast.query(hit) geneout = blast.subject(hit) else: geneout = blast.query(hit) genein = blast.subject(hit) score = blast.bitscore(hit) # create a key for a partition: (side, index) if genein in lookup1: partin = (0, lookup1[genein]) elif gene1 in lookup2: partin = (1, lookup2[genein]) else: continue val = outbest[partin] val[0] += score val[1] += 1 util.toc() util.toc() assert len(parts1) == len(unionPart(parts1)) assert len(parts2) == len(unionPart(parts2)) util.tic("determine clusters") sets = {} for i in xrange(len(parts1)): sets[(0, i)] = sets.UnionFind([(0, i)]) for i in xrange(len(parts2)): sets[(1, i)] = sets.UnionFind([(1, i)]) # merge top avg hits for part1 in hits: o1 = outbest[part1] outavg1 = float(o1[0]) / max(o1[1], 1) top = 0 toppart = None for part2, (tot, num) in hits[part1].iteritems(): avg = float(tot) / num o2 = outbest[part2] outavg2 = float(o2[0]) / max(o2[1], 1) if avg > outavg1 and avg > outavg2 and avg > top: top = avg toppart = part2 if toppart: sets[part1].union(sets[toppart]) sets = util.unique([x.root() for x in sets.values()]) # create partition of genes parts = [] joining = (parts1, parts2) for set in sets: parts.append([]) for i, row in set: parts[-1].extend(joining[i][row]) util.toc() assert len(parts) == len(unionPart(parts)) return parts
def mrbayes(aln, nexfilename="", seqtype="pep", options=None, usertree=None, bootiter=0, verbose=True, saveOutput=""): util.tic("mrbayes on %d of length %d" % (len(aln), len(aln.values()[0]))) if nexfilename == "": cwd = phylip.create_temp_dir() else: cwd = None # setup options if nexfilename == "": nexfilename = "infile.nex" if not options: options = {} setDefaultOptions(options) options["burninfrac"] = .25 options["relburnin"] = "yes" # force best binary tree (if possible) options["extra"] += "sumt contype=allcompat;" # get gene names names = [] namemap = {} for key in aln.keys(): if "+" in key: key2 = key.replace("+", "_") names.append(key2) namemap[key2] = key else: names.append(key) # write input file out = file(nexfilename, "w") writeNexus(out, names, aln.values(), seqtype, options) # write options writeMrbayesOptions(out, options, seqtype=seqtype) out.close() # exec mrbayes if verbose: os.system("echo exe %s | mb" % nexfilename) else: os.system("echo exe %s | mb >/dev/null 2>&1" % nexfilename) # read tree tree = readNexusConTree(file(nexfilename + ".con")) # clean up if cwd != None: if saveOutput != "": phylip.save_temp_dir(cwd, saveOutput) else: phylip.cleanup_temp_dir(cwd) util.toc() for tmpname, origname in namemap.iteritems(): tree.rename(tmpname, origname) return tree
################################################# # timing if 0: from rasmus import util text = [ "##types:" + "int\t" * 99 + "int", "\t".join(map(str, range(100))) ] for i in range(10000): text.append("1\t" * 99 + "1") text = "\n".join(text) stream = StringIO.StringIO(text) util.tic("read table") tab = readTable(stream) util.toc() ################################################# # specialized types if 1: text = """\ ##types:str int strand_type name num strand matt 123 + alex 456 - mike 789 + john 0 + """
T2 = treelib.unroot(T) treelib.draw_tree(T2, out=sys.stdout, minlen=5, maxlen=5) util.toc() treefile = args[0] seqfile = util.replace_ext(treefile, options.treeext, options.alignext) out = util.open_stream(options.output, "w") adef = raxml.new_analdef() raxml.init_adef(adef) tr = raxml.new_tree() cmd = "raxmlHPC -t %s -s %s %s" % (treefile, seqfile, options.extra) raxml.init_program(adef, tr, cmd.split(" ")) util.tic("Optimizing model...") raxml.optimize_model(adef, tr) util.toc() # draw_raxml_tree(tr, adef) util.tic("Getting parameters for LH...") bestVector, bestLH, weightSum = raxml.compute_best_LH(tr) util.log("bestLH: %.3f" % bestLH) util.toc() tree = treelib.read_tree(treefile) for node in tree: node.dist = 0 if "boot" in node.data: del node.data["boot"]
self.window_size = (0, 0) self.window_pos = (0, 0) self.vsash_pos = 0 self.hsash_pos = 0 self.apps = [] self.apps2 = [] def read(self, filename): parser.read(self, filename) def write(self, filename): parser.write(self, filename) from rasmus import util util.tic("run") infile = StringIO.StringIO("""<?xml version="1.0" encoding="UTF-8"?> <notebook> <window_size>1053,905</window_size> <window_pos>0,0</window_pos> <vsash_pos>0</vsash_pos> <hsash_pos>250</hsash_pos> <external_apps> <app>web_browser</app> <app>image_editor</app> </external_apps> <external_apps2> <app><name>web_browser</name><prog>firefox</prog></app> <app><name>image_editor</name><prog>gimp</prog></app> </external_apps2>
def phyml(seqs, verbose=True, args=None, usertree=None, seqtype="pep", saveOutput="", bootiter=0, opttree=True, optbranches=True, nrates=4): phylip.validate_seqs(seqs) cwd = phylip.create_temp_dir() util.tic("phyml on %d of length %d" % (len(seqs), len(seqs.values()[0]))) # create input labels = phylip.write_phylip_align(file("infile", "w"), seqs) util.write_list(file("labels", "w"), labels) options = "y" # only bootstrap when iterations are above 1 if bootiter == 1: bootiter = 0 if usertree != None: usertree = treelib.unroot(usertree) phylip.write_in_tree("intree", usertree, labels) treefile = "intree" else: treefile = "BIONJ" optimize = "" if opttree: optimize += "y " else: optimize += "n " if optbranches: optimize += "y " else: optimize += "n " if args == None: if seqtype == "dna": args = "infile 0 s 1 %d HKY e e %d e %s %s" % \ (bootiter, nrates, treefile, optimize) elif seqtype == "pep": args = "infile 1 s 1 %d JTT e %d e %s %s" % \ (bootiter, nrates, treefile, optimize) else: assert False, "unknown sequence type '%s'" % seqtype phylip.exec_phylip("phyml %s" % args, options, verbose) # parse tree tree = phylip.read_out_tree("infile_phyml_tree.txt", labels) # parse likelihood tree.data["logl"] = float(file("infile_phyml_lk.txt").read()) if saveOutput != "": phylip.save_temp_dir(cwd, saveOutput) else: phylip.cleanup_temp_dir(cwd) util.toc() return tree
def update(self): self.pos += 1 if (self.pos > self.prog): self.prog += int(self.step * self.end) self.printBar() self.pad.refresh() if self.pos == self.end: self.pad.addstr(1, 1 + self.bar, "|\n") import curses curses.endwin() def printBar(self): amount = int((self.pos / self.end * self.width) - self.bar) self.pad.addstr(1, 1 + self.bar, "*" * amount) self.bar += amount if __name__ == "__main__": import time util.tic("hi") prog = FancyProgressBar(100) for i in range(100): time.sleep(.01) prog.update() util.toc()