def blastall_file2db(fname,dbname="",blastprogram="blastp",output="ncbiparsed",extra_blastp_params={'F': 'F', 'e': '10'}): """ """ if blastprogram not in ['blastp','tblastn','blastn','tblastx']: raise "only blastp and tblastn are supported" extra_params = " ".join(["-%s %s" % (k,v) for k,v in extra_blastp_params.iteritems()]) command = "%s -p %s %s -i %s -d %s " % (BLASTALL_PATH,blastprogram,extra_params,fname,dbname) try: ci,co,ce = osPopen3(command) ci.close() if output == "ncbiparsed": b_parser = NCBIStandalone.BlastParser() blastallout = b_parser.parse(co) else: blastallout = co.read() co.close() ce.close() # do NOT remove the input fname except: co.close() error = ce.read().strip() ce.close() print command print "ERROR: '%s'" % error raise "BLAST CRASHED...." # and return! return blastallout
def run_cexpander_dr(settingsfile, outfname, verbose=False): """ Run the cexpander_dr program by specifying input & output filenames etc. @type infname: string @param infname: (absolute) path to cbalignp output file @type outfname: string @param outfname: (absolute) path to cexpander_dr output file @type commandline: string @param commandline: literal additional command line arguments for cexpander_dr @type verbose: Boolean @param verbose: print debugging messages to STDOUT (True) or not (False, default) @attention: requires global variable EXECUTABLE_CEXPANDER_CEXPANDER @attention: see cexpander_dr for (additional) command line options @attention: only a subset of cexpander_dr commandline options are supported! @rtype: Boolean @return: True """ command = "%s < %s > %s " % (EXECUTABLE_CEXPANDER_CEXPANDER, settingsfile, outfname) if verbose: print command ci, co, ce = osPopen3(command) ci.close() co.close() ce.close() return True
def run_cexpander_dr(settingsfile,outfname,verbose=False): """ Run the cexpander_dr program by specifying input & output filenames etc. @type infname: string @param infname: (absolute) path to cbalignp output file @type outfname: string @param outfname: (absolute) path to cexpander_dr output file @type commandline: string @param commandline: literal additional command line arguments for cexpander_dr @type verbose: Boolean @param verbose: print debugging messages to STDOUT (True) or not (False, default) @attention: requires global variable EXECUTABLE_CEXPANDER_CEXPANDER @attention: see cexpander_dr for (additional) command line options @attention: only a subset of cexpander_dr commandline options are supported! @rtype: Boolean @return: True """ command = "%s < %s > %s " % ( EXECUTABLE_CEXPANDER_CEXPANDER, settingsfile, outfname ) if verbose: print command ci,co,ce = osPopen3(command) ci.close() co.close() ce.close() return True
def blastall_seq2seq(fastadata=(),filenames=(),output="ncbiparsed",blastprogram="blastp",remove_files=True,extra_blastp_params={'F': 'F', 'e': '10'}): """ choose proper input: fastadata ( ( headerQUERY, seqQUERY ) , ( headerSBJCT, seqSBJCT ) ) or filenames ( filenameQUERY, filenameSBJCT ) """ input = None if blastprogram not in ['blastp','tblastn','tblastx','blastx']: raise "only blastp and tblastn are supported" elif blastprogram in ['tblastn','tblastx']: dna_or_prot = "F" else: dna_or_prot = "T" if fastadata and type(fastadata) == type(()) and len(fastadata) == 2 and not filenames: # input is fasta headers and sequence input = "fastadata" # write input filenames uniquetag = get_random_string_tag() fname_q = "_".join( [ uniquetag, str(fastadata[0][0]), 'Q.fa' ] ) fname_s = "_".join( [ uniquetag, str(fastadata[1][0]), 'S.fa' ] ) fh = open(fname_q,'w') fh.write(">%s\n%s" % (fastadata[0][0],fastadata[0][1])) fh.close() fh = open(fname_s,'w') fh.write(">%s\n%s" % (fastadata[1][0],fastadata[1][1])) fh.close() elif filenames and type(filenames) == type(()) and len(filenames) == 2 and not fastadata: # input is (supposed to be) filenames input = "filenames" # get filenames fname_q = filenames[0] fname_s = filenames[1] elif not filenames and not fastadata: raise "no input!" else: raise "inproper input!" # formatdb OSsystem("%s -i %s -p %s" % (FORMATDB_PATH,fname_s,dna_or_prot)) # and blastall! extra_params = " ".join(["-%s %s" % (k,v) for k,v in extra_blastp_params.iteritems()]) ci,co,ce = osPopen3("%s -p %s %s -i %s -d %s " % (BLASTALL_PATH,blastprogram,extra_params,fname_q,fname_s)) ci.close() if output == "ncbiparsed": b_parser = NCBIStandalone.BlastParser() blastallout = b_parser.parse(co) else: blastallout = co.read() co.close() ce.close() if remove_files: OSsystem("rm %s.*" % fname_s) osRemove("%s" % fname_s) osRemove("%s" % fname_q) # and return! return blastallout
def blastall_seq2db(header, sequence, dbname="", blastprogram="blastp", output="ncbiparsed", extra_blastp_params={ 'F': 'F', 'e': '10' }): """ """ if blastprogram not in ['blastp', 'tblastn', 'blastn', 'blastx']: raise "only blastp and tblastn are supported" extra_params = " ".join( ["-%s %s" % (k, v) for k, v in extra_blastp_params.iteritems()]) # generate (semi ;-) unique filename uniquetag = get_random_string_tag() fname = "_".join( [uniquetag, str(header).replace(" ", "_"), sequence[0:10] + ".fa"]) fname = osPathJoin(OSgetcwd(), fname) fh = open(fname, 'w') fh.write(">%s\n%s\n" % (header, sequence)) fh.close() command = "%s -p %s %s -i %s -d %s " % (BLASTALL_PATH, blastprogram, extra_params, fname, dbname) try: ci, co, ce = osPopen3(command) ci.close() if output == "ncbiparsed": b_parser = NCBIStandalone.BlastParser() blastallout = b_parser.parse(co) else: blastallout = co.read() co.close() ce.close() except: # for some kind of - obvious or freak accident case - # Blast or parsing of the blast record failed # No debugging here; just cleanup and return False print "BLAST CRASHED::" print command blastallout = False # remove the created Query file osRemove(fname) # and return! return blastallout
def getorf(sequence=None,fname=None,outputfile=None, executable_getorf=EXECUTABLE_GETORF, minsize=EXECUTABLE_GETORF_MINSIZE): """ Run EMBOSS getorf and write results to file @type sequence: string @param inputfile: full path to getorf input file (fasta) @type outputfile: string @param outputfile: full path to getorf output file (fasta) @type executable_getorf: string @param executable_getorf: full path to getorf executable """ # do some integrity checks if not sequence and not fname: message = "specify `sequence` or `fname` variable, not neither" raise InproperlyAppliedArgument, message if sequence and fname: message = "specify `sequence` or `fname` variable, not both" raise InproperlyAppliedArgument, message if not executable_getorf: message = "specify `EXECUTABLE_GETORF` variable" raise InproperlyAppliedArgument, message # create command line, execute with popen and parse command = "%s -minsize %s -noreverse" % ( executable_getorf, minsize ) if fname: command = "cat %s | %s" % (fname, command) else: command = "echo %s | %s" % (sequence, command) if outputfile: command = "%s -outseq %s" % (command, outputfile) else: command = "%s -filter" % (command) ci,co,ce = osPopen3(command) ci.close() output = co.read() co.close() error = ce.read() ce.close() if error and error.strip() != "Finds and extracts open reading frames (ORFs)": print "getorf ERROR:", error if not outputfile: return output else: return outputfile
def run_getorf(inputfile="",outputfile="", executable_getorf=EXECUTABLE_GETORF, minsize=EXECUTABLE_GETORF_MINSIZE): """ Run EMBOSS getorf and write results to file @type inputfile: string @param inputfile: full path to getorf input file (fasta) @type outputfile: string @param outputfile: full path to getorf output file (fasta) @type EXECUTABLE_GETORF: string @param EXECUTABLE_GETORF: full path to getorf executable """ # do some integrity checks if not inputfile: raise InproperlyAppliedArgument, "specify `inputfile` variable" if not outputfile: raise InproperlyAppliedArgument, "specify `outputfile` variable" if not executable_getorf: raise InproperlyAppliedArgument, "specify `executable_getorf` variable" # create command line, execute with popen and parse command = "%s -sequence %s -outseq %s -minsize %s -noreverse" % ( executable_getorf, inputfile, outputfile, minsize ) ci,co,ce = osPopen3(command) ci.close() output = co.read() co.close() error = ce.read() ce.close() if output: print "getorf OUTPUT:", output if error and error.strip() != "Finds and extracts open reading frames (ORFs)": print "getorf ERROR:", error
def blastall_file2db(fname, dbname="", blastprogram="blastp", output="ncbiparsed", extra_blastp_params={ 'F': 'F', 'e': '10' }): """ """ if blastprogram not in ['blastp', 'tblastn', 'blastn', 'tblastx']: raise "only blastp and tblastn are supported" extra_params = " ".join( ["-%s %s" % (k, v) for k, v in extra_blastp_params.iteritems()]) command = "%s -p %s %s -i %s -d %s " % (BLASTALL_PATH, blastprogram, extra_params, fname, dbname) try: ci, co, ce = osPopen3(command) ci.close() if output == "ncbiparsed": b_parser = NCBIStandalone.BlastParser() blastallout = b_parser.parse(co) else: blastallout = co.read() co.close() ce.close() # do NOT remove the input fname except: co.close() error = ce.read().strip() ce.close() print command print "ERROR: '%s'" % error raise "BLAST CRASHED...." # and return! return blastallout
def blastall_seq2db(header,sequence,dbname="",blastprogram="blastp",output="ncbiparsed",extra_blastp_params={'F': 'F', 'e': '10'}): """ """ if blastprogram not in ['blastp','tblastn','blastn','blastx']: raise "only blastp and tblastn are supported" extra_params = " ".join(["-%s %s" % (k,v) for k,v in extra_blastp_params.iteritems()]) # generate (semi ;-) unique filename uniquetag = get_random_string_tag() fname = "_".join( [ uniquetag, str(header).replace(" ","_"), sequence[0:10]+".fa" ] ) fname = osPathJoin(OSgetcwd(),fname) fh = open(fname,'w') fh.write(">%s\n%s\n" % (header,sequence)) fh.close() command = "%s -p %s %s -i %s -d %s " % (BLASTALL_PATH,blastprogram,extra_params,fname,dbname) try: ci,co,ce = osPopen3(command) ci.close() if output == "ncbiparsed": b_parser = NCBIStandalone.BlastParser() blastallout = b_parser.parse(co) else: blastallout = co.read() co.close() ce.close() except: # for some kind of - obvious or freak accident case - # Blast or parsing of the blast record failed # No debugging here; just cleanup and return False print "BLAST CRASHED::" print command blastallout = False # remove the created Query file osRemove(fname) # and return! return blastallout
def runcexpander(fname_fasta, cbalignp_commandline=" -y", output='binary'): """ Run the complete cascade of cexpander algorithms on an input multi fasta file and return the output as a CexpanderOutput object @type fname_fasta: string @param fname_fasta: path to input multi fasta file @type cbalignp_commandline: string @param cbalignp_commandline: (extra) command line for cbalignp @type min_cols: integer @param min_cols: minimal number of uniformly matched positions (cols) required to report transfer blocks for (>= 0) @type projected_on: string @param projected_on: apply fasta seqeunce header which to use for projection; apply ':::' to do projections on all input sequences @attention: requires global variable EXECUTABLE_cexpander_ALLVSALL @attention: requires global variable EXECUTABLE_CEXPANDER_CBALIGNP @attention: requires global variable EXECUTABLE_CEXPANDER_CEXPANDER @attention: see cexpander_dr for (additional) command line options @attention: only a subset of cexpander_dr commandline options are supported! @rtype: CexpanderOutput object @return: CexpanderOutput object """ if not fname_fasta: raise "NoProperFunctionArguments" if not osPathIsfile(fname_fasta): raise "FileDoesNotExist" # (0) create (~unique) filenames uniquetag = get_random_string_tag() fname_allvsall = ".".join([fname_fasta, uniquetag, "allvsall"]) fname_report = ".".join([fname_fasta, uniquetag, "report"]) fname_aligned = ".".join([fname_fasta, uniquetag, "aligned"]) fname_settings = ".".join([fname_fasta, uniquetag, "settings"]) fname_cexpander = ".".join([fname_fasta, uniquetag, "cexpander"]) # (1) create complete .fa -> cexpanderstring command command = """ python %s %s %s %s; %s -i %s %s > %s; %s < %s; """ % ( EXECUTABLE_CEXPANDER_ALLVSALL, fname_fasta, fname_allvsall, fname_report, EXECUTABLE_CEXPANDER_CBALIGNP, fname_allvsall, cbalignp_commandline, fname_aligned, EXECUTABLE_CEXPANDER_CEXPANDER, fname_settings, ) # (2) create fname_settings file binorfloat = "$dumpcv" if output == "float": binorfloat = "$dumpcvc" fh = open(fname_settings, 'w') content = "\n\n".join([ "$load\n%s\n%s" % (fname_report, fname_aligned), "$addquery\n-1", "$run", "$dumpentries", "$cv_linear", "%s" % (binorfloat), # BINARY == $dumpcv, FLOAT = $dumpcvc "$exit\n\n", ]) fh.write(content) fh.close() # (3) run the command ci, co, ce = osPopen3(command) ci.close() # output of EXECUTABLE_CEXPANDER_ALLVSALL is cast to STDOUT as well! cexpanderdata = co.read() co.close() error = ce.read() ce.close() # (4) parse fname_cexpander to CexpanderOutput object cxpdr = parse_cexpander(cexpanderdata, fname_fasta) # (5) cleanup files osSystem("rm -f %s %s.%s.*" % (fname_fasta, fname_fasta, uniquetag)) # (6) return the output object return cxpdr
def blastall_seq2seq(fastadata=(), filenames=(), output="ncbiparsed", blastprogram="blastp", remove_files=True, extra_blastp_params={ 'F': 'F', 'e': '10' }): """ choose proper input: fastadata ( ( headerQUERY, seqQUERY ) , ( headerSBJCT, seqSBJCT ) ) or filenames ( filenameQUERY, filenameSBJCT ) """ input = None if blastprogram not in ['blastp', 'tblastn', 'tblastx', 'blastx']: raise "only blastp and tblastn are supported" elif blastprogram in ['tblastn', 'tblastx']: dna_or_prot = "F" else: dna_or_prot = "T" if fastadata and type(fastadata) == type( ()) and len(fastadata) == 2 and not filenames: # input is fasta headers and sequence input = "fastadata" # write input filenames uniquetag = get_random_string_tag() fname_q = "_".join([uniquetag, str(fastadata[0][0]), 'Q.fa']) fname_s = "_".join([uniquetag, str(fastadata[1][0]), 'S.fa']) fh = open(fname_q, 'w') fh.write(">%s\n%s" % (fastadata[0][0], fastadata[0][1])) fh.close() fh = open(fname_s, 'w') fh.write(">%s\n%s" % (fastadata[1][0], fastadata[1][1])) fh.close() elif filenames and type(filenames) == type( ()) and len(filenames) == 2 and not fastadata: # input is (supposed to be) filenames input = "filenames" # get filenames fname_q = filenames[0] fname_s = filenames[1] elif not filenames and not fastadata: raise "no input!" else: raise "inproper input!" # formatdb OSsystem("%s -i %s -p %s" % (FORMATDB_PATH, fname_s, dna_or_prot)) # and blastall! extra_params = " ".join( ["-%s %s" % (k, v) for k, v in extra_blastp_params.iteritems()]) ci, co, ce = osPopen3( "%s -p %s %s -i %s -d %s " % (BLASTALL_PATH, blastprogram, extra_params, fname_q, fname_s)) ci.close() if output == "ncbiparsed": b_parser = NCBIStandalone.BlastParser() blastallout = b_parser.parse(co) else: blastallout = co.read() co.close() ce.close() if remove_files: OSsystem("rm %s.*" % fname_s) osRemove("%s" % fname_s) osRemove("%s" % fname_q) # and return! return blastallout
def runcexpander(fname_fasta,cbalignp_commandline=" -y",output='binary'): """ Run the complete cascade of cexpander algorithms on an input multi fasta file and return the output as a CexpanderOutput object @type fname_fasta: string @param fname_fasta: path to input multi fasta file @type cbalignp_commandline: string @param cbalignp_commandline: (extra) command line for cbalignp @type min_cols: integer @param min_cols: minimal number of uniformly matched positions (cols) required to report transfer blocks for (>= 0) @type projected_on: string @param projected_on: apply fasta seqeunce header which to use for projection; apply ':::' to do projections on all input sequences @attention: requires global variable EXECUTABLE_cexpander_ALLVSALL @attention: requires global variable EXECUTABLE_CEXPANDER_CBALIGNP @attention: requires global variable EXECUTABLE_CEXPANDER_CEXPANDER @attention: see cexpander_dr for (additional) command line options @attention: only a subset of cexpander_dr commandline options are supported! @rtype: CexpanderOutput object @return: CexpanderOutput object """ if not fname_fasta: raise "NoProperFunctionArguments" if not osPathIsfile(fname_fasta): raise "FileDoesNotExist" # (0) create (~unique) filenames uniquetag = get_random_string_tag() fname_allvsall = ".".join([fname_fasta,uniquetag,"allvsall"]) fname_report = ".".join([fname_fasta,uniquetag,"report"]) fname_aligned = ".".join([fname_fasta,uniquetag,"aligned"]) fname_settings = ".".join([fname_fasta,uniquetag,"settings"]) fname_cexpander = ".".join([fname_fasta,uniquetag,"cexpander"]) # (1) create complete .fa -> cexpanderstring command command = """ python %s %s %s %s; %s -i %s %s > %s; %s < %s; """ % ( EXECUTABLE_CEXPANDER_ALLVSALL, fname_fasta, fname_allvsall, fname_report, EXECUTABLE_CEXPANDER_CBALIGNP, fname_allvsall, cbalignp_commandline, fname_aligned, EXECUTABLE_CEXPANDER_CEXPANDER, fname_settings, ) # (2) create fname_settings file binorfloat = "$dumpcv" if output == "float": binorfloat = "$dumpcvc" fh = open(fname_settings,'w') content = "\n\n".join( [ "$load\n%s\n%s" % (fname_report,fname_aligned), "$addquery\n-1", "$run", "$dumpentries", "$cv_linear", "%s" % ( binorfloat ), # BINARY == $dumpcv, FLOAT = $dumpcvc "$exit\n\n", ] ) fh.write(content) fh.close() # (3) run the command ci,co,ce = osPopen3(command) ci.close() # output of EXECUTABLE_CEXPANDER_ALLVSALL is cast to STDOUT as well! cexpanderdata = co.read() co.close() error = ce.read() ce.close() # (4) parse fname_cexpander to CexpanderOutput object cxpdr = parse_cexpander(cexpanderdata,fname_fasta) # (5) cleanup files osSystem("rm -f %s %s.%s.*" % ( fname_fasta, fname_fasta,uniquetag ) ) # (6) return the output object return cxpdr