def buildAlignBigTree(seqs, verbose=True, removetmp=True, options=""): if len(seqs) < 2: return seqs # make input file for muscle infilename = util.tempfile(".", "muscle-in", ".fa") fasta.write_fasta(infilename, seqs) # run muscle outfilename = util.tempfile(".", "muscle-out", ".aln") outfilename2 = util.tempfile(".", "muscle-out", ".tree") cmd = "muscle -diags1 -sv -maxiters 1 " + options + " -in " + infilename + \ " -out " + outfilename + " -tree1 " + outfilename2 os.system(cmd) # parse output aln = fasta.read_fasta(outfilename) tree = treelib.Tree() tree.read_newick(outfilename2) # cleanup tempfiles if removetmp: os.remove(infilename) os.remove(outfilename) os.remove(outfilename2) return (aln, tree)
def muscle(seqs, verbose=True, removetmp=True, options=""): if len(seqs) < 2: return seqs # make input file for muscle infilename = util.tempfile(".", "muscle-in", ".fa") fasta.write_fasta(infilename, seqs) if not verbose: options += " -quiet " # run muscle outfilename = util.tempfile(".", "muscle-out", ".aln") cmd = "muscle " + options + " -in " + infilename + \ " -out " + outfilename os.system(cmd) # parse output aln = fasta.read_fasta(outfilename) # cleanup tempfiles if removetmp: os.remove(infilename) os.remove(outfilename) return aln
def clustalw(seqs, verbose=True, removetmp=True, options=""): """Align sequences 'seqs' with clustalw""" if len(seqs) < 2: return seqs # make input file for clustalw infilename = util.tempfile(".", "clustalw-in", ".fa") fasta.writeFasta(infilename, seqs) # run clustalw outfilename = util.tempfile(".", "clustalw-out", ".aln") cmd = "clustalw " + options + " -quicktree -infile=" + infilename + \ " -outfile=" + outfilename if not verbose: cmd += " > /dev/null" os.system(cmd) # parse output aln = readClustalwAlign(outfilename) # cleanup tempfiles if removetmp: os.remove(infilename) os.remove(outfilename) os.remove(infilename.replace(".fa", ".dnd")) # convert output return aln
def bionj(aln=None, labels=None, distmat=None, seqtype="pep", verbose=True): # make temp files distfile = util.tempfile(".", "bionj-in", ".dist") treefile = util.tempfile(".", "bionj-out", ".tree") # find distances and then NJ tree if distmat is not None: phylip.write_dist_matrix(distmat, out=distfile) if labels is None: labels = aln.keys() else: if seqtype == "pep": labels = phylip.protdist(aln, distfile, verbose=verbose) else: labels = phylip.dnadist(aln, distfile, verbose=verbose) os.system("echo -n '%s\n%s' | bionj > /dev/null" % (distfile, treefile)) tree = treelib.read_tree(treefile) phylip.rename_tree_with_names(tree, labels) # clean up os.remove(distfile) os.remove(treefile) return tree
def clustalwProfiles(aln1, aln2, verbose=True, removetmp=True, options=""): # make input file for clustalw infilename1 = util.tempfile(".", "clustalw-in", ".fa") infilename2 = util.tempfile(".", "clustalw-in", ".fa") fasta.writeFasta(infilename1, aln1) fasta.writeFasta(infilename2, aln2) # run clustalw outfilename = util.tempfile(".", "clustalw-out", ".aln") cmd = "clustalw " + options + " -quicktree -profile1=" + infilename1 + \ " -profile2=" + infilename2 + " -outfile=" + outfilename if not verbose: cmd += " > /dev/null" os.system(cmd) # parse output aln = readClustalwAlign(outfilename) # cleanup tempfiles if removetmp: os.remove(infilename1) os.remove(infilename2) os.remove(outfilename) try: os.remove(infilename1.replace(".fa", ".dnd")) except: pass try: os.remove(infilename2.replace(".fa", ".dnd")) except: pass # convert output keys = aln.keys() return aln
def bl2seq(seq1, seq2, program="blastp", options="", name1="seq1", name2="seq2"): """ Performs Blast between two sequences 'seq1' and 'seq2'. Returns a single Blast hit line or None if no hits found. """ # create temp files for sequences file1 = util.tempfile(".", "blastp", ".fasta") file2 = util.tempfile(".", "blastp", ".fasta") fasta.write_fasta_ordered(file1, [name1], [seq1]) fasta.write_fasta_ordered(file2, [name2], [seq2]) # execute blast pipe = os.popen("bl2seq -p %s -i %s -j %s -D 1 %s" % (program, file1, file2, options)) # parse hit hit = None for line in pipe: if line[0] == "#": continue hit = line.rstrip().split("\t") break # remove temp files os.remove(file1) os.remove(file2) return hit
def create_temp_dir(prefix="tmpphylip_"): """Create a temporary directory for executing PHYLIP""" directory = os.path.split(util.tempfile(".", prefix, ""))[1] os.mkdir(directory) os.chdir(directory) return directory
def processFunc(): # remove old query tempfile if one exists if closure["oldtmp"] != None: os.remove(closure["oldtmp"]) elapse = util.toc() closure["time"] += elapse util.log("blasted %d of %d sequences (%.1f%%), elapse %.0f m, left %.0f m" % ( closure["index"], len(seqs.keys()), 100 * float(closure["index"]) / len(seqs.keys()), closure["time"] / 60.0, elapse / split * (len(seqs.keys()) - closure["index"]) / 60.0)) util.tic() # find new subset of query sequences i = closure["index"] names = seqs.keys()[i:i+split] # if no more sequences then quit if len(names) == 0: return False # start blast tmpfile = util.tempfile(".", "blastp", ".fasta") seqs.write(tmpfile, names = names) pipe = os.popen("blastall -p %s -d %s -i %s -m 8 -e .1 %s" % \ (prog, databaseFile, tmpfile, options)) # update variables closure["oldtmp"] = tmpfile closure["index"] = i + split return pipe
def make_color_legend(filename, colormap, start, end, step, width=100, height=10, display=False): from rasmus import util if filename is None: filename = util.tempfile(".", "colormap", ".svg") temp = True else: temp = False s = svg.Svg(util.open_stream(filename, "w")) s.beginSvg(width, height) xscale = float(width) / (end + step - start) for i in util.frange(start, end + step, step): color = colormap.get(i) s.rect((i - start) * xscale, 0, step * xscale, height, color, color) s.endSvg() s.close() # display if display: os.system("display %s" % filename) # clean up temp files if temp: os.remove(filename)
def showtab(tab, name='table'): """Show a table in a new xterm""" name = name.replace("'", "") tmp = util.tempfile(".", "tmp", ".tab") tab.write_pretty(file(tmp, "w")) os.system("(xterm -T '%s' -n '%s' -e less -S %s; rm %s) &" % (name, name, tmp, tmp))
def gfit(self, func, eqn, params, list1, list2=[], list3=[], ** options): """ all syntax should be valid GNUPLOT syntax func - a string of the function call i.e. "f(x)" eqn - a string of a GNUPLOT equation "a*x**b" params - a dictionary of parameters in eqn and their initial values ex: {"a": 1, "b": 3} """ from rasmus import util self.set(** options) print len(list1), len(list2), len(list3) if not self.enable: raise Exception("must be output must be enabled for fitting") list1, list2, list3 = self.prepareData(list1, list2, list3) # add data to graph self.data.append(self.Plot(list1, list2, list3, copy.copy(self.options))) # perform fitting self.stream = os.popen("gnuplot", "w") print >>self.stream, "%s = %s" % (func, eqn) for param, value in params.items(): print >>self.stream, "%s = %f" % (param, value) print >>self.stream, "fit %s '-' via %s" % \ (func, ",".join(params.keys())) self.outputData(list1, list2, list3) # save and read parameters outfile = util.tempfile(".", "plot", ".txt") print >>self.stream, "save var '%s'" % outfile print >>self.stream, "print 'done'" self.stream.flush() # wait for variable file while not os.path.isfile(outfile): pass params = self.readParams(outfile) os.remove(outfile) # build eqn for plotting paramlist = "" for param, value in params.items(): paramlist += "%s = %s, " % (param, value) self.options["eqn"] = paramlist + "%s = %s, %s" % \ (func, eqn, func) self.options["style"] = "lines" # add fitted eqn to graph self.data.append(self.Plot([], [], [], copy.copy(self.options))) self.replot()
def wait(self): """Wait until all commands are known to be excuted""" from rasmus import util tmpfile = util.tempfile(".", "gnuplot", ".ps") print >>self.stream, "set output '%s'" % tmpfile print >>self.stream, "set terminal postscript color" print >>self.stream, "plot '-'\n0 0\ne\n" self.stream.flush() while not os.path.isfile(tmpfile): pass os.remove(tmpfile)
def buildTree(seqs, verbose=True, removetmp=True, options=""): # make input file for muscle infilename = util.tempfile(".", "muscle-in", ".fa") fasta.write_fasta(infilename, seqs) # run muscle outfilename = util.tempfile(".", "muscle-out", ".tree") cmd = "muscle " + options + " -in " + infilename + \ " -cluster -tree1 " + outfilename if not verbose: cmd += " 2>/dev/null" os.system(cmd) tree = treelib.Tree() tree.read_newick(outfilename) if removetmp: os.remove(infilename) os.remove(outfilename) return tree
def setTerminal(self, filename = "", format="x11"): if not self.enable: return from rasmus import util # auto detect format from filename if filename != "": print >>self.stream, "set output \"%s\"" % filename # determine format if filename.endswith(".ps"): format = "ps" if filename.endswith(".pdf"): format = "pdf" if filename.endswith(".gif"): format = "gif" if filename.endswith(".png"): format = "png" if filename.endswith(".jpg"): format = "jpg" else: tmpfile = util.tempfile(".", "gnuplot", ".ps") print >>self.stream, "set output \"%s\"" % tmpfile return tmpfile # set terminal format if format == "ps": print >>self.stream, "set terminal postscript color" elif format == "pdf": print >>self.stream, "set terminal pdf" elif format == "gif": print >>self.stream, "set terminal gif" elif format == "jpg": print >>self.stream, "set terminal jpeg" else: print >>self.stream, "set terminal %s" % format
def buildTree(seqs, verbose=True, removetmp=True, options=""): # make input file for clustalw infilename = util.tempfile(".", "clustalw-in", ".fa") fasta.writeFasta(infilename, seqs) # run clustalw outfilename = infilename.replace(".fa", ".ph") cmd = "clustalw " + options + " -tree -infile=" + infilename + \ " -outfile=" + outfilename if not verbose: cmd += " > /dev/null" os.system(cmd) # parse output tree = treelib.Tree() tree.read_newick(outfilename) # cleanup tempfiles if removetmp: os.remove(infilename) os.remove(outfilename) return tree
def heatmap(matrix, width=20, height=20, colormap=None, filename=None, rlabels=None, clabels=None, display=True, xdir=1, ydir=1, xmargin=0, ymargin=0, labelPadding=2, labelSpacing=4, mincutoff=None, maxcutoff=None, showVals=False, formatVals=str, valColor=black, clabelsAngle=270, clabelsPadding=None, rlabelsAngle=0, rlabelsPadding=None, colors=None, strokeColors=None, valAnchor="start", close=True): from rasmus import util if display and (not close): raise Exception("must close file if display is used") # determine filename if filename is None: filename = util.tempfile(".", "heatmap", ".svg") temp = True else: temp = False # determine colormap if colors is None: if colormap is None: colormap = rainbowColorMap(util.flatten(matrix)) # determine matrix size and orientation nrows = len(matrix) ncols = len(matrix[0]) if xdir == 1: xstart = xmargin ranchor = "end" coffset = width elif xdir == -1: xstart = xmargin + ncols * width ranchor = "start" coffset = 0 else: raise Exception("xdir must be 1 or -1") if ydir == 1: ystart = ymargin roffset = height canchor = "start" elif ydir == -1: ystart = ymargin + nrows * width roffset = 0 canchor = "end" else: raise Exception("ydir must be 1 or -1") # begin svg infile = util.open_stream(filename, "w") s = svg.Svg(infile) s.beginSvg(ncols * width + 2 * xmargin, nrows * height + 2 * ymargin) # draw matrix for i in xrange(nrows): for j in xrange(ncols): if mincutoff and matrix[i][j] < mincutoff: continue if maxcutoff and matrix[i][j] > maxcutoff: continue if colors: color = colors[i][j] else: color = colormap.get(matrix[i][j]) if strokeColors: strokeColor = strokeColors[i][j] else: strokeColor = color s.rect(xstart + xdir * j * width, ystart + ydir * i * height, xdir * width, ydir * height, strokeColor, color) # draw values if showVals: # find text size fontwidth = 7 / 11.0 textsize = [] for i in xrange(nrows): for j in xrange(ncols): if mincutoff and matrix[i][j] < mincutoff: continue if maxcutoff and matrix[i][j] > maxcutoff: continue strval = formatVals(matrix[i][j]) if len(strval) > 0: textsize.append( min(height, width / (float(len(strval)) * fontwidth))) textsize = min(textsize) if valAnchor == "start": xoffset = 0 elif valAnchor == "middle": xoffset = 0.5 elif valAnchor == "end": xoffset = 1 else: raise Exception("anchor not supported: %s" % valAnchor) yoffset = int(ydir == -1) for i in xrange(nrows): for j in xrange(ncols): if mincutoff and matrix[i][j] < mincutoff: continue if maxcutoff and matrix[i][j] > maxcutoff: continue strval = formatVals(matrix[i][j]) s.text(strval, xstart + xdir * (j + xoffset) * width, ystart + ydir * (i + yoffset) * height + height / 2.0 + textsize / 2.0, textsize, fillColor=valColor, anchor=valAnchor) # draw labels if rlabels is not None: assert len(rlabels) == nrows, \ "number of row labels does not equal number of rows" if rlabelsPadding is None: rlabelsPadding = labelPadding for i in xrange(nrows): x = xstart - xdir * rlabelsPadding y = ystart + roffset + ydir * i * height - labelSpacing / 2. s.text(rlabels[i], x, y, height - labelSpacing, anchor=ranchor, angle=rlabelsAngle) if clabels is not None: assert len(clabels) == ncols, \ "number of col labels does not equal number of cols" if clabelsPadding is None: clabelsPadding = labelPadding for j in xrange(ncols): x = xstart + coffset + xdir * j * width - labelSpacing / 2. y = ystart - ydir * clabelsPadding s.text(clabels[j], x, y, width - labelSpacing, anchor=canchor, angle=clabelsAngle) # end svg if close: s.endSvg() s.close() # display matrix if display: #if temp: os.system("display %s" % filename) #else: # os.spawnl(os.P_NOWAIT, "display", "display", filename) # clean up temp files if temp: os.remove(filename) return s