def blast(self,fasta,output): """ Blast the fasta, consume the output buffer, return the output filename """ runtime().debug("Blasting %s with alignment %s using %s" %(fasta, self.alignment,self.blast_exe)) r,e = NCBIStandalone.blastpgp(self.blast_exe, self.db, fasta, align_infile=self.alignment, align_outfile=output, expectation=self.expect, model_threshold=self.expect, npasses=3, nprocessors=1, **self.kwargs) consume(r) return output
def blast(self, fasta, output): """ Blast the fasta, consume the output buffer, return the output filename """ runtime().debug("Blasting %s with alignment %s using %s" % (fasta, self.alignment, self.blast_exe)) r, e = NCBIStandalone.blastpgp(self.blast_exe, self.db, fasta, align_infile=self.alignment, align_outfile=output, expectation=self.expect, model_threshold=self.expect, npasses=3, nprocessors=1, **self.kwargs) consume(r) return output
def localPSIBlast(self, seqFile, db, method='blastp', resultOut=None, e='0.001', **kw): """ Performa a local psi-blast search (requires that the blast binaries and databases are installed localy). Uses Bio.Blast.NCBIStandalone.blastpgp (Biopython) for the search @param seqFile: file name with search sequence in FASTA format @type seqFile: str @param db: database(s) to search e.g. ['swissprot', 'pdb'] @type db: [str] @param e: expectation value cutoff (default: 0.001) @type e: float @param resultOut: save blast output to this new file @type resultOut: str @param kw: optional keywords:: --- New Blast+ routine --- (see NcbipsiblastCommandline) num_iterations Number of passes (default 1). matrix Matrix to use (default BLOSUM62). --- old blastall routine --- --- Scoring --- matrix Matrix to use (default BLOSUM62). gap_open Gap open penalty (default 11). gap_extend Gap extension penalty (default 1). window_size Multiple hits window size (default 40). npasses Number of passes (default 1). passes Hits/passes (Integer 0-2, default 1). --- Algorithm --- gapped Whether to do a gapped alignment (T/F, default T). wordsize Word size (default 3). keep_hits Number of beset hits from a region to keep (def 0) xdrop Dropoff value (bits) for gapped alignments (def 15) hit_extend Threshold for extending hits (default 11). nbits_gapping Number of bits to trigger gapping (default 22). pseudocounts Pseudocounts constants for multiple passes (def 9). xdrop_final X dropoff for final gapped alignment (default 25). xdrop_extension Dropoff for blast extensions (default 7). model_threshold E-value threshold to include in multipass model (default 0.005). required_start Start of required region in query (default 1). required_end End of required region in query (default -1). --- Processing --- filter Filter query sequence with SEG? (T/F, default F) believe_query Believe the query defline? (T/F, default F) nprocessors Number of processors to use (default 1). --- Formatting --- alignments Number of alignments (default 250). @type kw: any @raise BlastError: if program call failes """ ## the following should work for new Blast+ tools: #from Bio.Blast.Applications import NcbipsiblastCommandline #resultOut = resultOut or self.outFolder+ self.F_BLAST_RAW_OUT #blastx_cline = NcbipsiblastCommandline(query=seqFile, #db=db, #evalue=e, #outfmt=5, #out=resultOut, #**kw) #stdout, stderr = blastx_cline() #parsed = NCBIXML.parse( results ).next() #self.__blast2dict( parsed, db ) results = err = None resultOut = resultOut or self.outFolder + self.F_BLAST_RAW_OUT kw = self.__dictvalues2str(kw) e = str(e) try: results, err = NCBIStandalone.blastpgp( settings.psi_blast_bin, db, seqFile, program='blastpgp', align_view='7', ## XML output expectation=e, **kw) results = self.__copyFileHandle(results, resultOut) err = self.__copyFileHandle(err, self.outFolder + self.F_BLAST_ERROR) if self.verbose: self.log.writeln('Raw blast output copied to: ' + resultOut) parsed = NCBIXML.parse(results).next() self.__blast2dict(parsed, db) except Exception, why: self.log.add(T.lastErrorTrace()) globals().update(locals()) self.log.writeln('local namespace is pushed into global ') raise BlastError(str(why))
starttime = time.time() print 'retrieving homologous sequences from UniProt using PSI-BLAST...', sys.stdout.flush() ## execute blastpgp ## blastexe = '/usr/bin/blastpgp' blastdb = '/clusterfs/ohana/external/UniProt/current/protein' iters = 4 eval = 0.0001 maxseqs = 1000 results, errors = NCBIStandalone.blastpgp(blastexe, blastdb, seedfname, expectation=eval, alignments=maxseqs, npasses=iters) ## parse psiblast hits to get ids ## blasthits = set([]) for blast_record in NCBIXML.parse(results): for alignment in blast_record.alignments: blasthits.add(alignment.hit_id) if len(blasthits) < 3: print 'Sorry, only %d homologs were retrieved from UniProt, too few sequences to determine patterns of evolutionary conservation.' % len( blasthits) sys.exit(0)
def localPSIBlast( self, seqFile, db, method='blastp', resultOut=None, e='0.001', **kw ): """ Performa a local psi-blast search (requires that the blast binaries and databases are installed localy). Uses Bio.Blast.NCBIStandalone.blastpgp (Biopython) for the search @param seqFile: file name with search sequence in FASTA format @type seqFile: str @param db: database(s) to search e.g. ['swissprot', 'pdb'] @type db: [str] @param e: expectation value cutoff (default: 0.001) @type e: float @param resultOut: save blast output to this new file @type resultOut: str @param kw: optional keywords:: --- New Blast+ routine --- (see NcbipsiblastCommandline) num_iterations Number of passes (default 1). matrix Matrix to use (default BLOSUM62). --- old blastall routine --- --- Scoring --- matrix Matrix to use (default BLOSUM62). gap_open Gap open penalty (default 11). gap_extend Gap extension penalty (default 1). window_size Multiple hits window size (default 40). npasses Number of passes (default 1). passes Hits/passes (Integer 0-2, default 1). --- Algorithm --- gapped Whether to do a gapped alignment (T/F, default T). wordsize Word size (default 3). keep_hits Number of beset hits from a region to keep (def 0) xdrop Dropoff value (bits) for gapped alignments (def 15) hit_extend Threshold for extending hits (default 11). nbits_gapping Number of bits to trigger gapping (default 22). pseudocounts Pseudocounts constants for multiple passes (def 9). xdrop_final X dropoff for final gapped alignment (default 25). xdrop_extension Dropoff for blast extensions (default 7). model_threshold E-value threshold to include in multipass model (default 0.005). required_start Start of required region in query (default 1). required_end End of required region in query (default -1). --- Processing --- filter Filter query sequence with SEG? (T/F, default F) believe_query Believe the query defline? (T/F, default F) nprocessors Number of processors to use (default 1). --- Formatting --- alignments Number of alignments (default 250). @type kw: any @raise BlastError: if program call failes """ ## the following should work for new Blast+ tools: #from Bio.Blast.Applications import NcbipsiblastCommandline #resultOut = resultOut or self.outFolder+ self.F_BLAST_RAW_OUT #blastx_cline = NcbipsiblastCommandline(query=seqFile, #db=db, #evalue=e, #outfmt=5, #out=resultOut, #**kw) #stdout, stderr = blastx_cline() #parsed = NCBIXML.parse( results ).next() #self.__blast2dict( parsed, db ) results = err = None resultOut = resultOut or self.outFolder+ self.F_BLAST_RAW_OUT kw = self.__dictvalues2str( kw ) e = str(e) try: results, err = NCBIStandalone.blastpgp( settings.psi_blast_bin, db, seqFile, program='blastpgp', align_view='7', ## XML output expectation=e, **kw) results = self.__copyFileHandle(results,resultOut ) err = self.__copyFileHandle(err, self.outFolder+self.F_BLAST_ERROR) if self.verbose: self.log.writeln('Raw blast output copied to: ' + resultOut ) parsed = NCBIXML.parse( results ).next() self.__blast2dict( parsed, db ) except Exception, why: self.log.add( T.lastErrorTrace() ) globals().update( locals() ) self.log.writeln('local namespace is pushed into global ') raise BlastError( str(why) )
# starttime = time.time() print 'retrieving homologous sequences from UniProt using PSI-BLAST...', sys.stdout.flush() ## execute blastpgp ## blastexe = '/usr/bin/blastpgp' blastdb = '/clusterfs/ohana/external/UniProt/current/protein' iters = 4 eval = 0.0001 maxseqs = 1000 results,errors = NCBIStandalone.blastpgp(blastexe, blastdb, seedfname, expectation=eval, alignments=maxseqs, npasses=iters) ## parse psiblast hits to get ids ## blasthits = set([]) for blast_record in NCBIXML.parse(results): for alignment in blast_record.alignments: blasthits.add(alignment.hit_id) if len(blasthits) < 3: print 'Sorry, only %d homologs were retrieved from UniProt, too few sequences to determine patterns of evolutionary conservation.' % len(blasthits) sys.exit(0) handle = open('intrepid-psiblast-ids.txt', 'w')