Example #1
0
 def blast(self,fasta,output):
     """
     Blast the fasta, consume the output buffer, return the output filename
     """
     runtime().debug("Blasting %s with alignment %s using %s" %(fasta, self.alignment,self.blast_exe))
     r,e = NCBIStandalone.blastpgp(self.blast_exe, 
                                   self.db,
                                   fasta,
                                   align_infile=self.alignment,
                                   align_outfile=output,
                                   expectation=self.expect, 
                                   model_threshold=self.expect,
                                   npasses=3,
                                   nprocessors=1,
                                   **self.kwargs)
     consume(r)
     return output
Example #2
0
 def blast(self, fasta, output):
     """
     Blast the fasta, consume the output buffer, return the output filename
     """
     runtime().debug("Blasting %s with alignment %s using %s" %
                     (fasta, self.alignment, self.blast_exe))
     r, e = NCBIStandalone.blastpgp(self.blast_exe,
                                    self.db,
                                    fasta,
                                    align_infile=self.alignment,
                                    align_outfile=output,
                                    expectation=self.expect,
                                    model_threshold=self.expect,
                                    npasses=3,
                                    nprocessors=1,
                                    **self.kwargs)
     consume(r)
     return output
Example #3
0
    def localPSIBlast(self,
                      seqFile,
                      db,
                      method='blastp',
                      resultOut=None,
                      e='0.001',
                      **kw):
        """
        Performa a local psi-blast search (requires that the blast binaries
        and databases are installed localy).
        Uses Bio.Blast.NCBIStandalone.blastpgp (Biopython) for the search

        @param seqFile: file name with search sequence in FASTA format
        @type  seqFile: str
        @param db: database(s) to search e.g. ['swissprot', 'pdb']
        @type  db: [str]
        @param e: expectation value cutoff (default: 0.001)
        @type  e: float
        @param resultOut: save blast output to this new file
        @type  resultOut: str

        @param kw: optional keywords::
            --- New Blast+ routine ---
            (see NcbipsiblastCommandline)

            num_iterations   Number of passes (default 1).
            matrix           Matrix to use (default BLOSUM62).
            
            --- old blastall routine ---
            --- Scoring --- 
            matrix           Matrix to use (default BLOSUM62).
            gap_open         Gap open penalty (default 11).
            gap_extend       Gap extension penalty (default 1).
            window_size      Multiple hits window size (default 40).
            npasses          Number of passes (default 1).
            passes           Hits/passes (Integer 0-2, default 1).

            --- Algorithm --- 
            gapped           Whether to do a gapped alignment (T/F, default T).
            wordsize         Word size (default 3).
            keep_hits        Number of beset hits from a region to keep (def 0)
            xdrop            Dropoff value (bits) for gapped alignments
                             (def 15)
            hit_extend       Threshold for extending hits (default 11).
            nbits_gapping    Number of bits to trigger gapping (default 22).
            pseudocounts     Pseudocounts constants for multiple passes
                             (def 9).
            xdrop_final      X dropoff for final gapped alignment (default 25).
            xdrop_extension  Dropoff for blast extensions (default 7).
            model_threshold  E-value threshold to include in multipass model
                             (default 0.005).
            required_start   Start of required region in query (default 1).
            required_end     End of required region in query (default -1).

            --- Processing --- 
            filter           Filter query sequence with SEG? (T/F, default F)
            believe_query    Believe the query defline? (T/F, default F)
            nprocessors      Number of processors to use (default 1).

            --- Formatting --- 
            alignments       Number of alignments (default 250).
        @type  kw: any

        @raise BlastError: if program call failes
        """
        ## the following should work for new Blast+ tools:

        #from Bio.Blast.Applications import NcbipsiblastCommandline

        #resultOut = resultOut or self.outFolder+ self.F_BLAST_RAW_OUT
        #blastx_cline = NcbipsiblastCommandline(query=seqFile,
        #db=db,
        #evalue=e,
        #outfmt=5,
        #out=resultOut,
        #**kw)
        #stdout, stderr = blastx_cline()
        #parsed = NCBIXML.parse( results ).next()
        #self.__blast2dict( parsed, db )

        results = err = None
        resultOut = resultOut or self.outFolder + self.F_BLAST_RAW_OUT
        kw = self.__dictvalues2str(kw)
        e = str(e)

        try:
            results, err = NCBIStandalone.blastpgp(
                settings.psi_blast_bin,
                db,
                seqFile,
                program='blastpgp',
                align_view='7',  ## XML output
                expectation=e,
                **kw)

            results = self.__copyFileHandle(results, resultOut)
            err = self.__copyFileHandle(err,
                                        self.outFolder + self.F_BLAST_ERROR)

            if self.verbose:
                self.log.writeln('Raw blast output copied to: ' + resultOut)

            parsed = NCBIXML.parse(results).next()

            self.__blast2dict(parsed, db)

        except Exception, why:
            self.log.add(T.lastErrorTrace())
            globals().update(locals())
            self.log.writeln('local namespace is pushed into global ')
            raise BlastError(str(why))
Example #4
0
starttime = time.time()

print 'retrieving homologous sequences from UniProt using PSI-BLAST...',
sys.stdout.flush()

## execute blastpgp ##

blastexe = '/usr/bin/blastpgp'
blastdb = '/clusterfs/ohana/external/UniProt/current/protein'
iters = 4
eval = 0.0001
maxseqs = 1000

results, errors = NCBIStandalone.blastpgp(blastexe,
                                          blastdb,
                                          seedfname,
                                          expectation=eval,
                                          alignments=maxseqs,
                                          npasses=iters)

## parse psiblast hits to get ids ##

blasthits = set([])

for blast_record in NCBIXML.parse(results):
    for alignment in blast_record.alignments:
        blasthits.add(alignment.hit_id)

if len(blasthits) < 3:
    print 'Sorry, only %d homologs were retrieved from UniProt, too few sequences to determine patterns of evolutionary conservation.' % len(
        blasthits)
    sys.exit(0)
Example #5
0
    def localPSIBlast( self, seqFile, db, method='blastp',
                       resultOut=None, e='0.001', **kw ):
        """
        Performa a local psi-blast search (requires that the blast binaries
        and databases are installed localy).
        Uses Bio.Blast.NCBIStandalone.blastpgp (Biopython) for the search

        @param seqFile: file name with search sequence in FASTA format
        @type  seqFile: str
        @param db: database(s) to search e.g. ['swissprot', 'pdb']
        @type  db: [str]
        @param e: expectation value cutoff (default: 0.001)
        @type  e: float
        @param resultOut: save blast output to this new file
        @type  resultOut: str

        @param kw: optional keywords::
            --- New Blast+ routine ---
            (see NcbipsiblastCommandline)

            num_iterations   Number of passes (default 1).
            matrix           Matrix to use (default BLOSUM62).
            
            --- old blastall routine ---
            --- Scoring --- 
            matrix           Matrix to use (default BLOSUM62).
            gap_open         Gap open penalty (default 11).
            gap_extend       Gap extension penalty (default 1).
            window_size      Multiple hits window size (default 40).
            npasses          Number of passes (default 1).
            passes           Hits/passes (Integer 0-2, default 1).

            --- Algorithm --- 
            gapped           Whether to do a gapped alignment (T/F, default T).
            wordsize         Word size (default 3).
            keep_hits        Number of beset hits from a region to keep (def 0)
            xdrop            Dropoff value (bits) for gapped alignments
                             (def 15)
            hit_extend       Threshold for extending hits (default 11).
            nbits_gapping    Number of bits to trigger gapping (default 22).
            pseudocounts     Pseudocounts constants for multiple passes
                             (def 9).
            xdrop_final      X dropoff for final gapped alignment (default 25).
            xdrop_extension  Dropoff for blast extensions (default 7).
            model_threshold  E-value threshold to include in multipass model
                             (default 0.005).
            required_start   Start of required region in query (default 1).
            required_end     End of required region in query (default -1).

            --- Processing --- 
            filter           Filter query sequence with SEG? (T/F, default F)
            believe_query    Believe the query defline? (T/F, default F)
            nprocessors      Number of processors to use (default 1).

            --- Formatting --- 
            alignments       Number of alignments (default 250).
        @type  kw: any

        @raise BlastError: if program call failes
        """
        ## the following should work for new Blast+ tools:
        
        #from Bio.Blast.Applications import NcbipsiblastCommandline

        #resultOut = resultOut or self.outFolder+ self.F_BLAST_RAW_OUT
        #blastx_cline = NcbipsiblastCommandline(query=seqFile, 
                                               #db=db, 
                                               #evalue=e,
                                               #outfmt=5, 
                                               #out=resultOut,
                                               #**kw)
        #stdout, stderr = blastx_cline()
        #parsed = NCBIXML.parse( results ).next()
        #self.__blast2dict( parsed, db )
        
        results = err = None
        resultOut = resultOut or self.outFolder+ self.F_BLAST_RAW_OUT
        kw = self.__dictvalues2str( kw )
        e = str(e)

        try:
            results, err = NCBIStandalone.blastpgp( settings.psi_blast_bin,
                                                    db, seqFile,
                                                    program='blastpgp',
                                                    align_view='7', ## XML output
                                                    expectation=e, **kw)

            results = self.__copyFileHandle(results,resultOut )
            err = self.__copyFileHandle(err, self.outFolder+self.F_BLAST_ERROR)

            if self.verbose:
                self.log.writeln('Raw blast output copied to: ' + resultOut )

            parsed = NCBIXML.parse( results ).next()

            self.__blast2dict( parsed, db )

        except Exception, why:
            self.log.add( T.lastErrorTrace() )
            globals().update( locals() )
            self.log.writeln('local namespace is pushed into global ')
            raise BlastError( str(why) ) 
#

starttime = time.time()

print 'retrieving homologous sequences from UniProt using PSI-BLAST...',
sys.stdout.flush()

## execute blastpgp ##

blastexe = '/usr/bin/blastpgp'
blastdb  = '/clusterfs/ohana/external/UniProt/current/protein'
iters = 4
eval = 0.0001
maxseqs = 1000

results,errors = NCBIStandalone.blastpgp(blastexe, blastdb, seedfname, expectation=eval, alignments=maxseqs, npasses=iters)

## parse psiblast hits to get ids ##

blasthits = set([])

for blast_record in NCBIXML.parse(results):
    for alignment in blast_record.alignments:
        blasthits.add(alignment.hit_id)

if len(blasthits) < 3:
    print 'Sorry, only %d homologs were retrieved from UniProt, too few sequences to determine patterns of evolutionary conservation.' % len(blasthits)
    sys.exit(0)

handle = open('intrepid-psiblast-ids.txt', 'w')