def buildAlignBigTree(seqs, verbose=True, removetmp=True, options=""):
    if len(seqs) < 2:
        return seqs

    # make input file for muscle
    infilename = util.tempfile(".", "muscle-in", ".fa")
    fasta.write_fasta(infilename, seqs)

    # run muscle
    outfilename = util.tempfile(".", "muscle-out", ".aln")
    outfilename2 = util.tempfile(".", "muscle-out", ".tree")
    cmd = "muscle -diags1 -sv -maxiters 1 " + options + " -in " + infilename + \
          " -out " + outfilename + " -tree1 " + outfilename2
    os.system(cmd)

    # parse output
    aln = fasta.read_fasta(outfilename)
    tree = treelib.Tree()
    tree.read_newick(outfilename2)

    # cleanup tempfiles
    if removetmp:
        os.remove(infilename)
        os.remove(outfilename)
        os.remove(outfilename2)

    return (aln, tree)
def muscle(seqs, verbose=True, removetmp=True, options=""):
    if len(seqs) < 2:
        return seqs

    # make input file for muscle
    infilename = util.tempfile(".", "muscle-in", ".fa")
    fasta.write_fasta(infilename, seqs)

    if not verbose:
        options += " -quiet "

    # run muscle
    outfilename = util.tempfile(".", "muscle-out", ".aln")
    cmd = "muscle " + options + " -in " + infilename + \
          " -out " + outfilename
    os.system(cmd)

    # parse output
    aln = fasta.read_fasta(outfilename)

    # cleanup tempfiles
    if removetmp:
        os.remove(infilename)
        os.remove(outfilename)

    return aln
Example #3
0
def buildAlignBigTree(seqs, verbose=True, removetmp=True, options=""):
    if len(seqs) < 2:
        return seqs

    # make input file for muscle
    infilename = util.tempfile(".", "muscle-in", ".fa")
    fasta.write_fasta(infilename, seqs)

    # run muscle
    outfilename = util.tempfile(".", "muscle-out", ".aln")
    outfilename2 = util.tempfile(".", "muscle-out", ".tree")
    cmd = "muscle -diags1 -sv -maxiters 1 " + options + " -in " + infilename + \
          " -out " + outfilename + " -tree1 " + outfilename2
    os.system(cmd)

    # parse output
    aln = fasta.read_fasta(outfilename)
    tree = treelib.Tree()
    tree.read_newick(outfilename2)

    # cleanup tempfiles
    if removetmp:
        os.remove(infilename)
        os.remove(outfilename)
        os.remove(outfilename2)

    return (aln, tree)
Example #4
0
def clustalw(seqs, verbose=True, removetmp=True, options=""):
    """Align sequences 'seqs' with clustalw"""

    if len(seqs) < 2:
        return seqs

    # make input file for clustalw
    infilename = util.tempfile(".", "clustalw-in", ".fa")
    fasta.writeFasta(infilename, seqs)

    # run clustalw
    outfilename = util.tempfile(".", "clustalw-out", ".aln")
    cmd = "clustalw " + options + " -quicktree -infile=" + infilename + \
          " -outfile=" + outfilename
    if not verbose:
        cmd += " > /dev/null"
    os.system(cmd)

    # parse output
    aln = readClustalwAlign(outfilename)

    # cleanup tempfiles
    if removetmp:
        os.remove(infilename)
        os.remove(outfilename)
        os.remove(infilename.replace(".fa", ".dnd"))

    # convert output
    return aln
Example #5
0
def bionj(aln=None, labels=None, distmat=None, seqtype="pep", verbose=True):
    # make temp files
    distfile = util.tempfile(".", "bionj-in", ".dist")
    treefile = util.tempfile(".", "bionj-out", ".tree")

    # find distances and then NJ tree
    if distmat is not None:
        phylip.write_dist_matrix(distmat, out=distfile)

        if labels is None:
            labels = aln.keys()
    else:
        if seqtype == "pep":
            labels = phylip.protdist(aln, distfile, verbose=verbose)
        else:
            labels = phylip.dnadist(aln, distfile, verbose=verbose)

    os.system("echo -n '%s\n%s' | bionj > /dev/null" % (distfile, treefile))
    tree = treelib.read_tree(treefile)
    phylip.rename_tree_with_names(tree, labels)

    # clean up
    os.remove(distfile)
    os.remove(treefile)

    return tree
Example #6
0
def muscle(seqs, verbose=True, removetmp=True, options=""):
    if len(seqs) < 2:
        return seqs

    # make input file for muscle
    infilename = util.tempfile(".", "muscle-in", ".fa")
    fasta.write_fasta(infilename, seqs)

    if not verbose:
        options += " -quiet "

    # run muscle
    outfilename = util.tempfile(".", "muscle-out", ".aln")
    cmd = "muscle " + options + " -in " + infilename + \
          " -out " + outfilename
    os.system(cmd)

    # parse output
    aln = fasta.read_fasta(outfilename)

    # cleanup tempfiles
    if removetmp:
        os.remove(infilename)
        os.remove(outfilename)

    return aln
Example #7
0
def clustalwProfiles(aln1, aln2, verbose=True, removetmp=True, options=""):
    # make input file for clustalw
    infilename1 = util.tempfile(".", "clustalw-in", ".fa")
    infilename2 = util.tempfile(".", "clustalw-in", ".fa")
    fasta.writeFasta(infilename1, aln1)
    fasta.writeFasta(infilename2, aln2)

    # run clustalw
    outfilename = util.tempfile(".", "clustalw-out", ".aln")
    cmd = "clustalw " + options + " -quicktree -profile1=" + infilename1 + \
          " -profile2=" + infilename2 + " -outfile=" + outfilename
    if not verbose:
        cmd += " > /dev/null"
    os.system(cmd)

    # parse output
    aln = readClustalwAlign(outfilename)

    # cleanup tempfiles
    if removetmp:
        os.remove(infilename1)
        os.remove(infilename2)
        os.remove(outfilename)
        try:
            os.remove(infilename1.replace(".fa", ".dnd"))
        except:
            pass
        try:
            os.remove(infilename2.replace(".fa", ".dnd"))
        except:
            pass

    # convert output
    keys = aln.keys()
    return aln
def bl2seq(seq1, seq2, program="blastp", options="", name1="seq1", name2="seq2"):
    """
    Performs Blast between two sequences 'seq1' and 'seq2'.
    Returns a single Blast hit line or None if no hits found.
    """

    # create temp files for sequences
    file1 = util.tempfile(".", "blastp", ".fasta")
    file2 = util.tempfile(".", "blastp", ".fasta")

    fasta.write_fasta_ordered(file1, [name1], [seq1])
    fasta.write_fasta_ordered(file2, [name2], [seq2])

    # execute blast
    pipe = os.popen("bl2seq -p %s -i %s -j %s -D 1 %s" % (program, file1, file2, options))

    # parse hit
    hit = None
    for line in pipe:
        if line[0] == "#":
            continue
        hit = line.rstrip().split("\t")
        break

    # remove temp files
    os.remove(file1)
    os.remove(file2)

    return hit
def clustalwProfiles(aln1, aln2, verbose=True, removetmp=True, options=""):
    # make input file for clustalw
    infilename1 = util.tempfile(".", "clustalw-in", ".fa")
    infilename2 = util.tempfile(".", "clustalw-in", ".fa")
    fasta.writeFasta(infilename1, aln1)
    fasta.writeFasta(infilename2, aln2)
    
    # run clustalw
    outfilename = util.tempfile(".", "clustalw-out", ".aln")
    cmd = "clustalw " + options + " -quicktree -profile1=" + infilename1 + \
          " -profile2=" + infilename2 + " -outfile=" + outfilename
    if not verbose:
        cmd += " > /dev/null"
    os.system(cmd)
    
    # parse output
    aln = readClustalwAlign(outfilename)
    
    # cleanup tempfiles
    if removetmp:
        os.remove(infilename1)
        os.remove(infilename2)
        os.remove(outfilename)
        try:
            os.remove(infilename1.replace(".fa", ".dnd"))
        except:
            pass
        try:        
            os.remove(infilename2.replace(".fa", ".dnd"))
        except:
            pass

    # convert output    
    keys = aln.keys()
    return aln
def clustalw(seqs, verbose=True, removetmp=True, options=""):
    """Align sequences 'seqs' with clustalw"""

    if len(seqs) < 2:
        return seqs

    # make input file for clustalw
    infilename = util.tempfile(".", "clustalw-in", ".fa")
    fasta.writeFasta(infilename, seqs)
    
    # run clustalw
    outfilename = util.tempfile(".", "clustalw-out", ".aln")
    cmd = "clustalw " + options + " -quicktree -infile=" + infilename + \
          " -outfile=" + outfilename
    if not verbose:
        cmd += " > /dev/null"
    os.system(cmd)
    
    # parse output
    aln = readClustalwAlign(outfilename)
    
    # cleanup tempfiles
    if removetmp:
        os.remove(infilename)
        os.remove(outfilename)
        os.remove(infilename.replace(".fa", ".dnd"))

    # convert output    
    return aln
Example #11
0
def bl2seq(seq1, seq2, program="blastp", options="",
           name1="seq1", name2="seq2"):
    """
    Performs Blast between two sequences 'seq1' and 'seq2'.
    Returns a single Blast hit line or None if no hits found.
    """

    # create temp files for sequences
    file1 = util.tempfile(".", "blastp", ".fasta")
    file2 = util.tempfile(".", "blastp", ".fasta")

    fasta.write_fasta_ordered(file1, [name1], [seq1])
    fasta.write_fasta_ordered(file2, [name2], [seq2])

    # execute blast
    pipe = os.popen("bl2seq -p %s -i %s -j %s -D 1 %s" %
                    (program, file1, file2, options))

    # parse hit
    hit = None
    for line in pipe:
        if line[0] == "#":
            continue
        hit = line.rstrip().split("\t")
        break

    # remove temp files
    os.remove(file1)
    os.remove(file2)

    return hit
Example #12
0
def create_temp_dir(prefix="tmpphylip_"):
    """Create a temporary directory for executing PHYLIP"""

    directory = os.path.split(util.tempfile(".", prefix, ""))[1]
    os.mkdir(directory)
    os.chdir(directory)
    return directory
Example #13
0
 def processFunc():
     # remove old query tempfile if one exists
     if closure["oldtmp"] != None:
         os.remove(closure["oldtmp"])
         elapse = util.toc()
         closure["time"] += elapse
         
         util.log("blasted %d of %d sequences (%.1f%%), elapse %.0f m, left %.0f m" % (
             closure["index"], len(seqs.keys()), 
             100 * float(closure["index"]) / len(seqs.keys()),
             closure["time"] / 60.0, 
             elapse / split * (len(seqs.keys()) - closure["index"]) / 60.0))
         
     util.tic()
     
     # find new subset of query sequences
     i = closure["index"]
     names = seqs.keys()[i:i+split]
     
     # if no more sequences then quit
     if len(names) == 0:
         return False
     
     # start blast
     tmpfile = util.tempfile(".", "blastp", ".fasta")
     seqs.write(tmpfile, names = names)
     pipe = os.popen("blastall -p %s -d %s -i %s -m 8 -e .1 %s" % \
         (prog, databaseFile, tmpfile, options))
     
     # update variables
     closure["oldtmp"] = tmpfile
     closure["index"] = i + split
     
     return pipe
Example #14
0
def make_color_legend(filename,
                      colormap,
                      start,
                      end,
                      step,
                      width=100,
                      height=10,
                      display=False):
    from rasmus import util

    if filename is None:
        filename = util.tempfile(".", "colormap", ".svg")
        temp = True
    else:
        temp = False

    s = svg.Svg(util.open_stream(filename, "w"))
    s.beginSvg(width, height)

    xscale = float(width) / (end + step - start)

    for i in util.frange(start, end + step, step):
        color = colormap.get(i)
        s.rect((i - start) * xscale, 0, step * xscale, height, color, color)

    s.endSvg()
    s.close()

    # display
    if display:
        os.system("display %s" % filename)

    # clean up temp files
    if temp:
        os.remove(filename)
Example #15
0
def showtab(tab, name='table'):
    """Show a table in a new xterm"""

    name = name.replace("'", "")
    tmp = util.tempfile(".", "tmp", ".tab")
    tab.write_pretty(file(tmp, "w"))
    os.system("(xterm -T '%s' -n '%s' -e less -S %s; rm %s) &" %
              (name, name, tmp, tmp))
Example #16
0
    def gfit(self, func, eqn, params, list1, list2=[], list3=[], ** options):
        """
        all syntax should be valid GNUPLOT syntax
            func - a string of the function call i.e. "f(x)"
            eqn  - a string of a GNUPLOT equation  "a*x**b"
            params - a dictionary of parameters in eqn and their initial values
                   ex: {"a": 1, "b": 3}        
        """

        from rasmus import util
        
        self.set(** options)
    
        print len(list1), len(list2), len(list3)
    
        if not self.enable:
            raise Exception("must be output must be enabled for fitting")
        
        list1, list2, list3 = self.prepareData(list1, list2, list3)
        
        # add data to graph
        self.data.append(self.Plot(list1, list2, list3, copy.copy(self.options)))
        
        
        # perform fitting
        self.stream = os.popen("gnuplot", "w")
        print >>self.stream, "%s = %s" % (func, eqn)
        for param, value in params.items():
            print >>self.stream, "%s = %f" % (param, value)
        print >>self.stream, "fit %s '-' via %s" % \
            (func, ",".join(params.keys()))
        self.outputData(list1, list2, list3)
       
                
        # save and read parameters
        outfile = util.tempfile(".", "plot", ".txt")        
        print >>self.stream, "save var '%s'" % outfile
        print >>self.stream, "print 'done'"
        self.stream.flush()     
        
        # wait for variable file
        while not os.path.isfile(outfile): pass
        params = self.readParams(outfile)
        os.remove(outfile)
        
        # build eqn for plotting
        paramlist = ""
        for param, value in params.items():
            paramlist += "%s = %s, " % (param, value)
        self.options["eqn"] = paramlist + "%s = %s, %s" % \
            (func, eqn, func)
        self.options["style"] = "lines"
        
        # add fitted eqn to graph
        self.data.append(self.Plot([], [], [], copy.copy(self.options)))
        
        self.replot()
Example #17
0
    def wait(self):
        """Wait until all commands are known to be excuted"""

        from rasmus import util
        
        tmpfile = util.tempfile(".", "gnuplot", ".ps")
        print >>self.stream, "set output '%s'" % tmpfile
        print >>self.stream, "set terminal postscript color"
        print >>self.stream, "plot '-'\n0 0\ne\n"
        self.stream.flush()
        
        while not os.path.isfile(tmpfile): pass
        os.remove(tmpfile)
Example #18
0
def buildTree(seqs, verbose=True, removetmp=True, options=""):

    # make input file for muscle
    infilename = util.tempfile(".", "muscle-in", ".fa")
    fasta.write_fasta(infilename, seqs)

    # run muscle
    outfilename = util.tempfile(".", "muscle-out", ".tree")
    cmd = "muscle " + options + " -in " + infilename + \
          " -cluster -tree1 " + outfilename

    if not verbose:
        cmd += " 2>/dev/null"

    os.system(cmd)

    tree = treelib.Tree()
    tree.read_newick(outfilename)

    if removetmp:
        os.remove(infilename)
        os.remove(outfilename)

    return tree
Example #19
0
    def setTerminal(self, filename = "", format="x11"):
        if not self.enable:
            return

        from rasmus import util
        
        # auto detect format from filename
        if filename != "":
            print >>self.stream, "set output \"%s\"" % filename
        
            # determine format
            if filename.endswith(".ps"):
                format = "ps"
            if filename.endswith(".pdf"):
                format = "pdf"
            if filename.endswith(".gif"):
                format = "gif"
            if filename.endswith(".png"):
                format = "png"
            if filename.endswith(".jpg"):
                format = "jpg"
        else:
            tmpfile = util.tempfile(".", "gnuplot", ".ps")
            print >>self.stream, "set output \"%s\"" % tmpfile
            return tmpfile
        
        
        # set terminal format
        if format == "ps":
            print >>self.stream, "set terminal postscript color"
        elif format == "pdf":
            print >>self.stream, "set terminal pdf"
        elif format == "gif":
            print >>self.stream, "set terminal gif"
        elif format == "jpg":
            print >>self.stream, "set terminal jpeg"
        else:
            print >>self.stream, "set terminal %s" % format
Example #20
0
def buildTree(seqs, verbose=True, removetmp=True, options=""):
    # make input file for clustalw
    infilename = util.tempfile(".", "clustalw-in", ".fa")
    fasta.writeFasta(infilename, seqs)

    # run clustalw
    outfilename = infilename.replace(".fa", ".ph")
    cmd = "clustalw " + options + " -tree -infile=" + infilename + \
          " -outfile=" + outfilename
    if not verbose:
        cmd += " > /dev/null"
    os.system(cmd)

    # parse output
    tree = treelib.Tree()
    tree.read_newick(outfilename)

    # cleanup tempfiles
    if removetmp:
        os.remove(infilename)
        os.remove(outfilename)

    return tree
Example #21
0
def heatmap(matrix,
            width=20,
            height=20,
            colormap=None,
            filename=None,
            rlabels=None,
            clabels=None,
            display=True,
            xdir=1,
            ydir=1,
            xmargin=0,
            ymargin=0,
            labelPadding=2,
            labelSpacing=4,
            mincutoff=None,
            maxcutoff=None,
            showVals=False,
            formatVals=str,
            valColor=black,
            clabelsAngle=270,
            clabelsPadding=None,
            rlabelsAngle=0,
            rlabelsPadding=None,
            colors=None,
            strokeColors=None,
            valAnchor="start",
            close=True):

    from rasmus import util
    if display and (not close):
        raise Exception("must close file if display is used")

    # determine filename
    if filename is None:
        filename = util.tempfile(".", "heatmap", ".svg")
        temp = True
    else:
        temp = False

    # determine colormap
    if colors is None:
        if colormap is None:
            colormap = rainbowColorMap(util.flatten(matrix))

    # determine matrix size and orientation
    nrows = len(matrix)
    ncols = len(matrix[0])

    if xdir == 1:
        xstart = xmargin
        ranchor = "end"
        coffset = width
    elif xdir == -1:
        xstart = xmargin + ncols * width
        ranchor = "start"
        coffset = 0
    else:
        raise Exception("xdir must be 1 or -1")

    if ydir == 1:
        ystart = ymargin
        roffset = height
        canchor = "start"
    elif ydir == -1:
        ystart = ymargin + nrows * width
        roffset = 0
        canchor = "end"
    else:
        raise Exception("ydir must be 1 or -1")

    # begin svg
    infile = util.open_stream(filename, "w")
    s = svg.Svg(infile)
    s.beginSvg(ncols * width + 2 * xmargin, nrows * height + 2 * ymargin)

    # draw matrix
    for i in xrange(nrows):
        for j in xrange(ncols):

            if mincutoff and matrix[i][j] < mincutoff:
                continue
            if maxcutoff and matrix[i][j] > maxcutoff:
                continue

            if colors:
                color = colors[i][j]
            else:
                color = colormap.get(matrix[i][j])

            if strokeColors:
                strokeColor = strokeColors[i][j]
            else:
                strokeColor = color

            s.rect(xstart + xdir * j * width, ystart + ydir * i * height,
                   xdir * width, ydir * height, strokeColor, color)

    # draw values
    if showVals:
        # find text size

        fontwidth = 7 / 11.0

        textsize = []
        for i in xrange(nrows):
            for j in xrange(ncols):

                if mincutoff and matrix[i][j] < mincutoff:
                    continue
                if maxcutoff and matrix[i][j] > maxcutoff:
                    continue

                strval = formatVals(matrix[i][j])
                if len(strval) > 0:
                    textsize.append(
                        min(height, width / (float(len(strval)) * fontwidth)))
        textsize = min(textsize)

        if valAnchor == "start":
            xoffset = 0
        elif valAnchor == "middle":
            xoffset = 0.5
        elif valAnchor == "end":
            xoffset = 1
        else:
            raise Exception("anchor not supported: %s" % valAnchor)

        yoffset = int(ydir == -1)
        for i in xrange(nrows):
            for j in xrange(ncols):

                if mincutoff and matrix[i][j] < mincutoff:
                    continue
                if maxcutoff and matrix[i][j] > maxcutoff:
                    continue

                strval = formatVals(matrix[i][j])
                s.text(strval,
                       xstart + xdir * (j + xoffset) * width,
                       ystart + ydir * (i + yoffset) * height + height / 2.0 +
                       textsize / 2.0,
                       textsize,
                       fillColor=valColor,
                       anchor=valAnchor)

    # draw labels
    if rlabels is not None:
        assert len(rlabels) == nrows, \
            "number of row labels does not equal number of rows"

        if rlabelsPadding is None:
            rlabelsPadding = labelPadding

        for i in xrange(nrows):
            x = xstart - xdir * rlabelsPadding
            y = ystart + roffset + ydir * i * height - labelSpacing / 2.
            s.text(rlabels[i],
                   x,
                   y,
                   height - labelSpacing,
                   anchor=ranchor,
                   angle=rlabelsAngle)

    if clabels is not None:
        assert len(clabels) == ncols, \
            "number of col labels does not equal number of cols"

        if clabelsPadding is None:
            clabelsPadding = labelPadding

        for j in xrange(ncols):
            x = xstart + coffset + xdir * j * width - labelSpacing / 2.
            y = ystart - ydir * clabelsPadding
            s.text(clabels[j],
                   x,
                   y,
                   width - labelSpacing,
                   anchor=canchor,
                   angle=clabelsAngle)

    # end svg
    if close:
        s.endSvg()
        s.close()

    # display matrix
    if display:
        #if temp:
        os.system("display %s" % filename)
    #else:
    #    os.spawnl(os.P_NOWAIT, "display", "display", filename)

    # clean up temp files
    if temp:
        os.remove(filename)

    return s