Esempio n. 1
0
def clustalw(inputfile="", seqs={}, remove_inputfile=True, params={}):
    """
    """
    if inputfile and seqs:
        raise "wrong usage!"
    elif inputfile and not seqs:
        # input is (hopefully) a filename
        pass
    elif not inputfile and seqs:
        # input is (hopefully) sequences
        # do a quick check if (sequence) strings are given
        ARE_ALL_STRINGS = True
        for header, seq in seqs.iteritems():
            if not seq:
                ARE_ALL_STRINGS = False
                break
        if not ARE_ALL_STRINGS:
            raise Exception, "no sequence string(s) specified: %s" % seqs
        # make a kind of semi-unique filename
        uniqueid = get_random_string_tag()
        inputfile = uniqueid + "_" + "_".join(
            [_nonstringheader2stringheader(hdr) for hdr in seqs.keys()[0:5]])
        inputfile += ".mfa"
        writeMultiFasta(seqs, inputfile)
    else:
        # no input at all
        raise "no input specified"

    # okay, do the clustalw
    fname_in = inputfile
    # get hard-assigned parameters
    paramstring = " ".join(["-%s=%s" % (k, v) for k, v in params.iteritems()])
    ci, co = osPopen2("%s %s %s" %
                      (EXECUTABLE_CLUSTALW, fname_in, paramstring))
    ci.close()
    clwout = co.read()
    co.close()
    # abstract output filenames from input filename
    if fname_in.find(".") == -1:
        fname_out = fname_in + ".aln"
        fname_tree = fname_in + ".dnd"
    else:
        _base = fname_in[0:fname_in.rfind(".")]
        fname_out = _base + ".aln"
        fname_tree = _base + ".dnd"

    # parse alignment output file
    _seqs, _alignment = _parse_clustalw(fname_out)
    # and delete tmp. created files
    osRemove(fname_out)
    osRemove(fname_tree)
    if remove_inputfile: osRemove(fname_in)
    # check if the keys (headers) in _seqs correspont to those in seqs
    # differences can occur when non-string headers are used

    # and return
    return (_seqs, _alignment)
Esempio n. 2
0
def clustalw(inputfile="",seqs={},remove_inputfile=True,params={}):
    """
    """
    if inputfile and seqs:
        raise "wrong usage!"
    elif inputfile and not seqs:
        # input is (hopefully) a filename
        pass
    elif not inputfile and seqs:
        # input is (hopefully) sequences
        # do a quick check if (sequence) strings are given
        ARE_ALL_STRINGS = True
        for header, seq in seqs.iteritems():
            if not seq:
                ARE_ALL_STRINGS = False
                break
        if not ARE_ALL_STRINGS:
            raise Exception, "no sequence string(s) specified: %s" % seqs
        # make a kind of semi-unique filename
        uniqueid = get_random_string_tag()
        inputfile = uniqueid+"_"+"_".join([ _nonstringheader2stringheader(hdr) for hdr in seqs.keys()[0:5] ])
        inputfile+=".mfa"
        writeMultiFasta(seqs,inputfile)
    else:
        # no input at all
        raise "no input specified"

    # okay, do the clustalw
    fname_in = inputfile
    # get hard-assigned parameters
    paramstring = " ".join([ "-%s=%s" % (k,v) for k,v in params.iteritems() ]) 
    ci,co = osPopen2("%s %s %s" % (EXECUTABLE_CLUSTALW,fname_in, paramstring))
    ci.close()
    clwout = co.read()
    co.close()
    # abstract output filenames from input filename
    if fname_in.find(".") == -1:
        fname_out  = fname_in+".aln"
        fname_tree = fname_in+".dnd"
    else:
        _base      = fname_in[0:fname_in.rfind(".")]
        fname_out  = _base+".aln"
        fname_tree = _base+".dnd"

    # parse alignment output file
    _seqs,_alignment = _parse_clustalw(fname_out)
    # and delete tmp. created files
    osRemove(fname_out)
    osRemove(fname_tree)
    if remove_inputfile: osRemove(fname_in)
    # check if the keys (headers) in _seqs correspont to those in seqs
    # differences can occur when non-string headers are used

    # and return
    return (_seqs,_alignment)
Esempio n. 3
0
 def scan_branchpoint(self):
     """ """
     intronseq  = self.dnasequence()
     command = """%s -e %s""" % (EXECUTABE_SCAN_BRANCHPOINT, EXECUTABLE_SFM)
     ci,co = osPopen2(command)
     ci.write(">intron\n%s\n" % intronseq)
     ci.close()
     gfflines = parsegfftxt( co.read(), offset=self.start )
     co.close()
     orderable = [ ( int(line[3]), line ) for line in gfflines ]
     orderable.sort()
     orderable.reverse()
     return [ gffline for pos,gffline in orderable ]