def blast_remos(r, db='nr'): """Uses blast to find remos in a genome""" from Bio.Blast import NCBIWWW, NCBIXML import cStringIO b_parser = NCBIXML.BlastParser() E_VALUE_THRESH = 0.04 for s in r.get_aligned_sequences(): for remo in r.get_remos_for(s): seq = remo.get_sequence_for(s.centre_sequence, False) print 'Blasting: %s...' % (seq[:60]) result_handle = NCBIWWW.qblast('blastn', db, seq) blast_results = result_handle.read() blast_out = cStringIO.StringIO(blast_results) b_record = b_parser.parse(blast_out) for alignment in b_record.alignments: for hsp in alignment.hsps: if hsp.expect < E_VALUE_THRESH: print '****Alignment****' print 'sequence:', alignment.title print 'length:', alignment.length print 'e value:', hsp.expect print 'sbjct_start:', hsp.sbjct_start print hsp.query[0:75] + '...' print hsp.match[0:75] + '...' print hsp.sbjct[0:75] + '...' break break
def blast(blastRootDirectory): if sys.platform == 'win32': blast_db = os.path.join(blastRootDirectory, 'blastDB.fasta') else: if not os.path.isdir('/tmp/BLAST'): print "making directory '/tmp/BLAST'" os.mkdir('/tmp/BLAST/') if not os.path.exists('/tmp/BLAST/formatdb'): shutil.copy(os.path.join(blastRootDirectory, 'formatdb'), '/tmp/BLAST') print "copying 'formatdb' to '/tmp/BLAST/'" blast_db = os.path.join('/tmp/BLAST', 'blastDB.fasta') #print 'path to blastDB.fasta:', blast_db blast_file = os.path.join(blastRootDirectory, 'filetoblast.txt') #print 'path to filetoblast.txt:', blast_file if sys.platform == 'win32': blastall_name = 'Blastall.exe' blast_exe = os.path.join(blastRootDirectory, blastall_name) else: blastall_name = 'blastall' blast_exe = os.path.join(os.getcwd(), '../../BLAST/bin/', blastall_name) #print 'path to blastall:', blast_exe if sys.platform == 'win32': import win32api blast_db = win32api.GetShortPathName(blast_db) blast_file = win32api.GetShortPathName(blast_file) blast_exe = win32api.GetShortPathName(blast_exe) #cont = raw_input('blah') #try: blast_out, error_info = NCBIStandalone.blastall(blast_exe, 'blastp', blast_db, blast_file, align_view=7) #except: # f = open(blast_file, 'r') # s = file.read() # print s #print 'done BLASTing' print 'errors:', error_info.read() print 'blast output:', blast_out.read() b_parser = NCBIXML.BlastParser() #print 'got parser' b_record = b_parser.parse(blast_out) b_iterator = NCBIStandalone.Iterator(blast_out, b_parser) #print 'got iterator' results = [] recordnumber = 0 nonmatchingQueries = [] while 1: recordnumber += 1 b_record = b_iterator.next() if not b_record: break print 'query:', b_record.query if b_record is None: break e_value_thresh = 0.001 print 'number of alignments:', len(b_record.alignments) significant = False for alignment in b_record.alignments: for hsp in alignment.hsps: if hsp.expect < e_value_thresh: alignment.title = alignment.title.replace(">", "") if b_record.query != alignment.title: significant = True print 'adding', b_record.query, 'and', alignment.title, 'to the list of matches' results.append( (b_record.query, alignment.title, hsp.expect)) print b_record.query, significant if not significant: print 'adding', b_record.query, 'to the list of queries without matches' nonmatchingQueries.append(b_record.query) return nonmatchingQueries, results