def clustalw(inputfile="", seqs={}, remove_inputfile=True, params={}): """ """ if inputfile and seqs: raise "wrong usage!" elif inputfile and not seqs: # input is (hopefully) a filename pass elif not inputfile and seqs: # input is (hopefully) sequences # do a quick check if (sequence) strings are given ARE_ALL_STRINGS = True for header, seq in seqs.iteritems(): if not seq: ARE_ALL_STRINGS = False break if not ARE_ALL_STRINGS: raise Exception, "no sequence string(s) specified: %s" % seqs # make a kind of semi-unique filename uniqueid = get_random_string_tag() inputfile = uniqueid + "_" + "_".join( [_nonstringheader2stringheader(hdr) for hdr in seqs.keys()[0:5]]) inputfile += ".mfa" writeMultiFasta(seqs, inputfile) else: # no input at all raise "no input specified" # okay, do the clustalw fname_in = inputfile # get hard-assigned parameters paramstring = " ".join(["-%s=%s" % (k, v) for k, v in params.iteritems()]) ci, co = osPopen2("%s %s %s" % (EXECUTABLE_CLUSTALW, fname_in, paramstring)) ci.close() clwout = co.read() co.close() # abstract output filenames from input filename if fname_in.find(".") == -1: fname_out = fname_in + ".aln" fname_tree = fname_in + ".dnd" else: _base = fname_in[0:fname_in.rfind(".")] fname_out = _base + ".aln" fname_tree = _base + ".dnd" # parse alignment output file _seqs, _alignment = _parse_clustalw(fname_out) # and delete tmp. created files osRemove(fname_out) osRemove(fname_tree) if remove_inputfile: osRemove(fname_in) # check if the keys (headers) in _seqs correspont to those in seqs # differences can occur when non-string headers are used # and return return (_seqs, _alignment)
def clustalw(inputfile="",seqs={},remove_inputfile=True,params={}): """ """ if inputfile and seqs: raise "wrong usage!" elif inputfile and not seqs: # input is (hopefully) a filename pass elif not inputfile and seqs: # input is (hopefully) sequences # do a quick check if (sequence) strings are given ARE_ALL_STRINGS = True for header, seq in seqs.iteritems(): if not seq: ARE_ALL_STRINGS = False break if not ARE_ALL_STRINGS: raise Exception, "no sequence string(s) specified: %s" % seqs # make a kind of semi-unique filename uniqueid = get_random_string_tag() inputfile = uniqueid+"_"+"_".join([ _nonstringheader2stringheader(hdr) for hdr in seqs.keys()[0:5] ]) inputfile+=".mfa" writeMultiFasta(seqs,inputfile) else: # no input at all raise "no input specified" # okay, do the clustalw fname_in = inputfile # get hard-assigned parameters paramstring = " ".join([ "-%s=%s" % (k,v) for k,v in params.iteritems() ]) ci,co = osPopen2("%s %s %s" % (EXECUTABLE_CLUSTALW,fname_in, paramstring)) ci.close() clwout = co.read() co.close() # abstract output filenames from input filename if fname_in.find(".") == -1: fname_out = fname_in+".aln" fname_tree = fname_in+".dnd" else: _base = fname_in[0:fname_in.rfind(".")] fname_out = _base+".aln" fname_tree = _base+".dnd" # parse alignment output file _seqs,_alignment = _parse_clustalw(fname_out) # and delete tmp. created files osRemove(fname_out) osRemove(fname_tree) if remove_inputfile: osRemove(fname_in) # check if the keys (headers) in _seqs correspont to those in seqs # differences can occur when non-string headers are used # and return return (_seqs,_alignment)
def scan_branchpoint(self): """ """ intronseq = self.dnasequence() command = """%s -e %s""" % (EXECUTABE_SCAN_BRANCHPOINT, EXECUTABLE_SFM) ci,co = osPopen2(command) ci.write(">intron\n%s\n" % intronseq) ci.close() gfflines = parsegfftxt( co.read(), offset=self.start ) co.close() orderable = [ ( int(line[3]), line ) for line in gfflines ] orderable.sort() orderable.reverse() return [ gffline for pos,gffline in orderable ]