Python NCBIWWW Examples, Bio.Blast.NCBIWWW Python Examples

Example #1

1

Show file

File: compBACTEUK.py Project: Katzlab/Scripts-for-analysis-of-transcriptomes

def blastBACTEUK(arg):
	out=open('bacterial.txt','a')
	out2=open('eukaryotic.txt','a')
	records = SeqIO.parse(open(arg), format="fasta")
	
	for record in records:
		try:
			name = record.id
			result_handleB = NCBIWWW.qblast("blastx", "nr", record.format("fasta"), ncbi_gi=False, descriptions= "1", alignments="1", format_type="XML", hitlist_size="1", entrez_query='(Bacteria[ORGN] OR Archaea[ORGN])')
			result_handleE = NCBIWWW.qblast("blastx", "nr", record.format("fasta"), ncbi_gi=False, descriptions= "1", alignments="1", format_type="XML", hitlist_size="1", entrez_query='(Eukaryota[ORGN])')

			blast_recordsB = NCBIXML.read(result_handleB)
			blast_recordsE = NCBIXML.read(result_handleE)

			if blast_recordsB.descriptions:
				print record.id

				name = record.id


				out.write(name + ',' + str(blast_recordsB.alignments[0].hsps[0].expect) + '\n')
			else:
				out.write(name + ', no hit'  + '\n')

			if blast_recordsE.descriptions:
				out2.write(name + ',' +  str(blast_recordsE.alignments[0].hsps[0].expect) + '\n')
			else:
				out2.write(name + ', no hit'  + '\n')
		except:
			errorout = open('errorlog.txt','a')
			error out.write('problem blasting ' + record.id + '\n')
			errorout.close()

	out.close()
	out2.close()

Example #2

0

Show file

File: design.py Project: expartools/Proxar-Design

def check_blast_in(input_filename,taxid_line,num, GACTC_YES,blast_filename,perct):
    strlist=str(taxid_line).split(' OR ')
    for valist in strlist:
        txid_num=valist[valist.find('(taxid:')+7:valist.find(')')]
        blast_result_file= open(blast_filename+txid_num,"w")
        txid='txid'+txid_num+' [ORGN]'
        typ='' #this is for the input sequence
        input_file = open(input_filename,"r")
        for seq_record in SeqIO.parse(input_file, "fasta"):
            if (len(typ)>200):
                result_handle = NCBIWWW.qblast("blastn", "nr", typ,word_size=13,hitlist_size=100,entrez_query=txid,expect=10)
                t=result_handle.read()
                blast_result_file.write(t)
                typ=''
                #print "200 done!"
            typ=typ+seq_record.format('fasta')
            #print "wating", typ,"finished waiting","\n\n"
        if (len(typ)>0):
            #print "working on the leftover"
            result_handle = NCBIWWW.qblast("blastn", "nr", typ,word_size=13,hitlist_size=100,entrez_query=txid,expect=10)
            t2=result_handle.read()
            #print typ
            blast_result_file.write(t2)
        input_file.close()
        blast_result_file.close()
    print "blast job done!"

Example #3

0

Show file

File: CPA.py Project: jqiankgi/CPA

def check_blast_ex(input_filename,piece_len,bool_customer,exclude_line):
	bad_match=set()
	if bool_customer and exclude_line !='':
		query_line=exclude_line
	else:
		query_line='txid9606 [ORGN]'
	(input_id,input_seq,input_len)=read_fasta(input_filename)
	f=open('~seq.txt','w')
	for i in range(0,input_len-piece_len):
		print >>f, '>'+str(i)+'\n'+input_seq[i:i+piece_len]	
	f.close()
	input_file = open('~seq.txt','r')
	count_piece=int(os.path.getsize('~seq.txt')/2000)+1
	typ=''
	blast_result_file= open('~blastresult.xml',"w")
	counter_i=0
	try:
		for seq_record in SeqIO.parse(input_file, "fasta"):
			typ=typ+seq_record.format('fasta')
			if (len(typ)>2000):
				counter_i=counter_i+1
				try:
					print "blasting"
					result_handle = NCBIWWW.qblast("blastn", "nr", typ,word_size=13,hitlist_size=100,entrez_query=query_line,expect=10)
					print str(counter_i)+" out of "+str(count_piece)+" is blasted!"
				except:
					try:
						print "mistake happens when tryint to connect to NCBI blast engine, try again!"
						result_handle = NCBIWWW.qblast("blastn", "nr", typ,word_size=13,hitlist_size=100,entrez_query=query_line,expect=10)
						print "sucessfully connect to NCBI blast engine at the second try!"
					except:
						print "can't use NCBI blast at this moment!"
						sys.exit(0)
				t=result_handle.read()
				blast_result_file.write(t)					
				typ=''
		blast_result_file.close()
		blast_result_file= open('~blastresult.xml',"a")
		result_handle = NCBIWWW.qblast("blastn", "nr", typ,word_size=13,hitlist_size=100,entrez_query=query_line,expect=10)
		t2=result_handle.read()
		blast_result_file.write(t2)
		blast_result_file.close()
	except:
		print "Error happpens while getting the blast result! Unable to use blast this time!"
		sys.exit(0)
	try:
		blast_records = NCBIXML.parse(open('~blastresult.xml')) 
		for blast_record in blast_records:
			for alignment in blast_record.alignments:
				for hsp in alignment.hsps:
					if hsp.identities == piece_len:
						bad_match.add(i)
	except:
		print "Error happens while parsing blast result"
		sys.exit(0)
	input_file.close()
	return bad_match

Example #4

0

Show file

File: ncbiRemoteblast.py Project: Ginsea/Some-python-scripts

def run_blast(fasta,type):
    if type == "prot":
        for seqs in SeqIO.parse(fasta,"fasta"):
            clock()
            out = open("split_xml/%s.xml"%str(seqs.id),"w")
            ncbi = NCBIWWW.qblast(program="blastp",database="nr",sequence=str(seqs.seq),format_type="XML",ncbi_gi=str(seqs.id), alignments=20,word_size=3)
            out.write(ncbi.read())
            print "%s\t%f"%(str(seqs.id),float(clock()))
    elif type == "nucl":
        for seqs in SeqIO.parse(fasta,"fasta"):
            clock()
            out = open("split_xml/%s.xml"%str(seqs.id),"w")
            ncbi = NCBIWWW.qblast(program="blastp",database="nr",sequence=str(seqs.seq),format_type="XML",ncbi_gi=str(seqs.id), alignments=20,word_size=3)
            out.write(ncbi.read())
            print "%s\t%f"%(str(seqs.id),float(clock()))

Example #5

0

Show file

File: blast_multi.py Project: vhsvhs/phylowidgets

def fetchGenbankData(seq_list):
    Entrez.email = "*****@*****.**"
    try:
        for taxa in seq_list.keys():
            seq = seq_list[taxa]  
            print "BLAST-ing NCBI for sequence ID: " + taxa.__str__()
            retry_count = 0
            # fetch the GenBank record; retry up to 3 times if the connection is problematic.
            while retry_count < 3:
                try:
                    blast_handle = NCBIWWW.qblast('blastp', 'nr', seq)
    
                    blast_handle.seek(0)
                    blast_file = open( taxa.__str__() + '.xml', 'w' )
                    blast_file.write( blast_handle.read() )
                    blast_file.close()
                    blast_handle.close()
                    print ". . . results written to " + taxa.__str__() + '.xml'
                    break # if we get the handle OK, then break out of the loop
                except ValueError:
                    sleep(3)
                    print "Something went wrong, my GenBank query for taxa " + taxa.__str__() + " returned no records."
                    print "I'm trying again. . ."
                    retry_count += 1
            time.sleep(2)
    except ValueError:
        print "Something went wrong, my GenBank query for taxa " + taxa.__str__() + " returned no records."
        print "I'm not going retry anymore.  Sorry."
        exit(1)

Example #6

0

Show file

File: fasta_statter.py Project: crashfrog/Dispatch

def find_closest_ref(fasta_file, callback=None, update_callback=lambda d: None, organism=entrez_CFSAN_genera):
	"Find closest match in NCBI Refseq to longest contig, then collect URL for it"
	if not callback:
		import datetime
		def callback(s):
			print "[{}]".format(datetime.datetime.today().ctime()), s
			
	callback("Importing modules...")

	from Bio.Blast import NCBIWWW
	import xml.etree.ElementTree as xml
	
	callback("Loading fasta ({})...".format(fasta_file))
	with open(fasta_file, 'r') as f:
		contigs = iter(sorted(list(SeqIO.parse(f, 'fasta')), lambda a,b: cmp(len(a), len(b))))
		contig = contigs.next()
		while len(contig) < 1500:
			try:
				contig = contigs.next()
			except StopIteration:
				break
	callback("Longest contig is {} bases. BLASTing...".format(len(contig)))
	r = NCBIWWW.qblast("blastn", "chromosome", ">{}\n{}".format(contig.description, contig.seq), 
					   alignments=1, 
					   entrez_query="{}".format(organism),
					   hitlist_size=1,
					   filter='L')
	callback("BLAST finished.")
	result = xml.parse(r)
	refseq = result.find(".//Iteration/Iteration_hits/Hit/Hit_id").text.split("|")[1]
	refseq_url = 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nuccore&id={}&rettype=fasta&retmode=text'.format(refseq)
	update_callback({'ref_file':refseq, 'ref_url':refseq_url})
	return refseq

Example #7

0

Show file

File: run_BLAST.py Project: stevenhwu/autoBLAST

    def blast2(self):

 #        File = open("output"+x+".txt","w")
        fasta_string = open(self.infile).read()  # or make the names fasta1.fasta and just do open(i).read
        print(fasta_string)
        database = "nr"
        program = "blastn"
        parameters = [
         ('DATABASE', database),

          ('PROGRAM', program),
          # ('PSSM',pssm), - It is possible to use PSI-BLAST via this API?
          ('QUERY', fasta_string),

        ('CMD', 'Put'),
          ]
        query = [x for x in parameters if x[1] is not None]
        message = (urllib.urlencode(query))
        print (query)
        print(message)
        result_handle = NCBIWWW.qblast("blastn", "nr", fasta_string, hitlist_size=10)

        blast_records = NCBIXML.parse(result_handle)
        # or blast_record = NCBIXML.read(result_handle) if you only have one seq in file
        E_VALUE_THRESH = 0.001
        for blast_record in blast_records:
            for alignment in blast_record.alignments:
                for hsp in alignment.hsps:
                    if hsp.expect < E_VALUE_THRESH:
                        print "alignment:", alignment.title
                        print "e-value:", hsp.expect

Example #8

0

Show file

File: blast.py Project: pazur/final

 def run(self):
     res = NCBIWWW.qblast(self.program, self.database, self.sequence, **self.queryextra)
     blast_records = NCBIXML.parse(res)
     alignments = reduce(lambda x, y: x + y, map(lambda r: r.alignments, blast_records), [])
     records = list(self.get_seqrecords(alignments))
     records = self.delete_same(records)
     return {"blast_xml": res.getvalue(), "blast_records": blast_records, "sequences": records}

Example #9

0

Show file

File: RTfetch.py Project: mazimi/HYPNO

		def taxBLASTn(uniprotID,taxName,proSeq):
			mapTuple, debug = (), ''
			try:
				result_handle = NCBIWWW.qblast("tblastn", "nr", proSeq, expect = .0001, entrez_query = taxName+'[organism]')
				string = result_handle.read()
				result_handle.close()
				tree = xml.etree.ElementTree.fromstring(string)
				iteration = tree.find("BlastOutput_iterations/Iteration")
				hits = iteration.findall("Iteration_hits/Hit")
				topHit = hits[0]
				accessionNCBI = topHit.findtext("Hit_accession")
				qseq = topHit.findtext("Hit_hsps/Hsp/Hsp_qseq")
				hseq = topHit.findtext("Hit_hsps/Hsp/Hsp_hseq")
				midseq = topHit.findtext("Hit_hsps/Hsp/Hsp_midline")
				Hit_id = topHit.findtext("Hit_id")
				Hit_from = int(topHit.findtext("Hit_hsps/Hsp/Hsp_hit-from"))
				Hit_to = int(topHit.findtext("Hit_hsps/Hsp/Hsp_hit-to"))
				match = re.search(r'gi\|(\w+)\|',Hit_id)
				GI = match.group(1)
				debug += '\ttBLASTn hit accession and match indices: '+str(accessionNCBI)+' ('+str(Hit_to)+', '+str(Hit_from)+')\n'
				dna_seq = chromParse(GI,Hit_from,Hit_to)
				if dna_seq != '':
					mapTuple = (accessionNCBI,dna_seq,'TBLASTN',midseq)
					return mapTuple, debug
				else:
					return mapTuple, debug
			except:
				return mapTuple, debug

Example #10

0

Show file

File: bitscore.py Project: babonis/gimme

def internetBLAST(inputFile, fileFormat='fasta', evalue=0.001):
    '''This function runs Blast online and reads
    the output (xml format). Bitscore/length ration
    for each match is calculated and the max score
    is reported to stdout.

    '''

    for seqRecord in SeqIO.parse(inputFile, fileFormat):
        print >> sys.stderr, 'Doing BLAST (internet) search for', seqRecord.id
        ratios = []
        resultHandle = NCBIWWW.qblast('blastp', 'nr', seqRecord.seq)
        blastRecords = NCBIXML.parse(resultHandle)
        for blastRecord in blastRecords:
            for alignment in blastRecord.alignments:
                for hsp in alignment.hsps:
                    if hsp.expect < evalue:
                        ratios.append(((hsp.bits / alignment.length),
                                        alignment.title))
        if ratios:
            ratio, subject = sorted(ratios, key=lambda x: x[0])[-1]
            print >> sys.stdout, '%s\t%f\t%s' % (seqRecord.id, ratio, subject)
        else:
            print >> sys.stdout, '%s\t%s\t%s' % (seqRecord.id, 'NA', 'NA')
            print >> sys.stderr, '%s\t%s\t%s' % (seqRecord.id, 'NA', 'NA')

Example #11

0

Show file

File: blast.py Project: ODiogoSilva/blast_wrapper

def blast(arguments):
    """ Worker function that executes the WWWNCBI blast """
    # Gathering arguments
    blast_program = arguments[0]
    name, seq = arguments[1]
    database = arguments[2]
    evalue = arguments[3]
    hitlist = arguments[4]
    output_num = arguments[5]
    output_format = arguments[6]
    # output_file defined in main

    # Executing BLAST
    save_file = open("blast_out_%s_%s" % (output_file, output_num), "a")

    try:
        result_handle = NCBIWWW.qblast(
            blast_program,
            database,
            ">%s\n%s" % (name, seq),
            expect=evalue,
            hitlist_size=hitlist,
            format_type=output_format,
        )
        # Ensuring that when the sequence input type and blast program are incompatible, the program exits cleanly and with an informative error
    except (ValueError):
        sys.exit("\nPlease check the compatibility between the input sequence type and the BLAST program")

    save_file.write(result_handle.read())
    save_file.close()

Example #12

0

Show file

File: blast_zebrafinch_genome.py Project: ajshultz/Rad

def main(argv):
	try:
		opts,args = getopt.getopt(argv,'hf:o:e:',)
	except getopt.GetOptError:
		print "BlastZFGenome.py -f <path to input fasta file> -o <path to output directory and filename (default ./blast.xml)> -e <expect value (default 10e-20)>" 
		sys.exit(2)
			
	output = './blast.xml'
	fastafile = ''
	expect = 10e-20

	for opt, arg in opts:
		if opt == "-h":
			print "BlastZFGenome.py -f <path to input fasta file> -o <path to output directory and filename (default ./blast.xml)> -e <expect value (default 10e-20)>"
			sys.exit(2)
		elif opt == "-o":
			output = arg
		elif opt == "-f":
			fastafile = arg
		elif opt == "-e":
			expect = float(arg)
				
	fasta = open(fastafile).read()
	result_handle = NCBIWWW.qblast("blastn","GPIPE/59729/101/ref_top_level",fasta,expect=expect)

	save_file = open(output, "w")
	save_file.write(result_handle.read())
	save_file.close()
	result_handle.close()

Example #13

0

Show file

File: session.py Project: admp/protein-search

 def search_blast(self, accession, program="blastp", database="nr"):
     """
     See:
         http://biopython.org/DIST/docs/tutorial/Tutorial.html#htoc73
         http://biopython.org/DIST/docs/tutorial/Tutorial.html#htoc75
     """
     # First, find the original sequence based on accession
     assert self.uniprot is not None
     sequence = None
     for record in self.uniprot:
         for acc in record.accessions:
             if acc == accession:
                 sequence = record.sequence
                 break
     if not sequence:
         raise ValueError('No matching sequence for the accession number')
     # Now, feed it to BLAST
     handle = NCBIWWW.qblast(program, database, sequence)
     try:
         with self.open('blast.xml', 'w') as fp:
             fp.write(handle.read())
     finally:
         handle.close()
     self.load_blast()
     return self.blast_records

Example #14

0

Show file

File: OT.py Project: Sudoka/tupac

def get_OT(sample_seqs):
  s = ''
  query_string = ''
  for seq in sample_seqs:
    query_string += seq[0]+'\n'+seq[1]+'\n'

  blast_handle = NCBIWWW.qblast('tblastn', 'nr', query_string, entrez_query='scenedesmus dimorphus')
  blast_handle.seek(0)
  records = NCBIXML.parse(blast_handle)
  i = 0
  for record in records:
    if len(record.alignments) > 0:
      for align in record.alignments:
        row = sample_seqs[i][0] + '\t' + align.hit_id + '\t'
        frames = [hsp.frame[1] for hsp in align.hsps]
        if valid_align(frames):
          row += plus_or_minus(frames[0]) + '\t'
        else:
          row += '/' + '\t'
        query_coverage = float(sum([len(hsp.sbjct) for hsp in align.hsps])) / len(sample_seqs[i][1])
        if query_coverage < .9:
          row += str(1)
        else:
          row += str(2)
    else:
      row = sample_seqs[i][0] + '\t' + ' '*28 + '\t' + ' ' + '\t' + str(0)

    s += row + '\n'
    i += 1
  return s

Example #15

0

Show file

File: Overload.py Project: GAR-for-GATC/Biosecurity

def main():
    my_string_to_use = open("C:/Users/Evan/Desktop/Biosecurity_Stuff/Genomes/test_gene.txt","r")


    line_one = ""   #This program searches 2 lines, the strings here will hold them during the for loop
    line_two = ""
    counter = 0     #this is used in the for loop 
    blast_counter = 1 #this counts the number of blast searches performed
    for lines in my_string_to_use:
        if counter == 0:
            counter = counter + 1
            #print "pear"  #These random print statements were used for troubleshooting
        elif counter%2 == 1:  #used if the counter is odd, it saves the line of nucleotides for later
            line_one = lines
            counter = counter+1
            #print "apple"
        elif counter%2 == 0:  # if the counter is even, it will blast search both lines and clear the temporary lines
            line_two = lines
            result_handle = NCBIWWW.qblast("blastn", "nt", line_one + line_two)
            #print "purple"
            print result_handle  #This tells you in the python terminal if a blast search was performed
            save_file = open("C:/Users/Evan/Desktop/overload_genome.xml", "w")
            save_file.write(result_handle.read())
            save_file.close()
            result_handle.close()
            print "Blast search number: ", blast_counter

            blast_counter = blast_counter + 1
            counter = counter + 1
        
        #print lines
        
    my_string_to_use.close()
    
    Open_the_XML_file()

Example #16

0

Show file

File: BlastPrimers.py Project: allista/DegenPrimer

 def blast_query(self, counter, entrez_query=''):
     counter.set_work(5)
     self._format_query(); counter.count()
     self._save_query_config(); counter.count()
     try:
         print '\nLaunching BLAST query #%d...' % self._primers_hash
         blast_results = NCBIWWW.qblast('blastn', 
                                        self.database, 
                                        self._query.format('fasta'), 
                                        expect       = self.e_val, 
                                        word_size    = self.w_size,
                                        nucl_penalty = self.n_pen,
                                        nucl_reward  = self.n_rew,
                                        filter       = self.fltr,
                                        entrez_query = entrez_query,
                                        ungapped_alignment = self.no_gaps,)
         counter.count()
         #save results to a file
         results_file = open(self._results_filename, 'w')
         results_file.write(blast_results.read())
         results_file.close()
         blast_results.close()
         print '\nBLAST output was written to:\n   %s' % self._results_filename
         counter.count()
         #parse results
         results_file  = open(self._results_filename, 'r')
         self._blast_results = list(NCBIXML.parse(results_file))
         results_file.close()
         counter.count()
     except Exception, e:
         print '\nFailed to obtain BLAST query results from NCBI.'
         print e
         return False

Example #17

0

Show file

File: Blast_and_Compare.py Project: Y-Lammers/Cites-checker

def blast_bulk (fasta_file, settings):

	# The blast modules are imported from biopython
	from Bio.Blast import NCBIWWW, NCBIXML
	from Bio import SeqIO
	
	# parse the fasta file
	seq_list = [seq for seq in SeqIO.parse(fasta_file, 'fasta')]

	# open the fasta file
	#fasta_open = open(fasta_file, 'r')
	#fasta_handle = fasta_open.read()
	
	blast_list = []

	for seq in seq_list:
		print seq
		result_handle = NCBIWWW.qblast(settings[0], settings[1], seq.format('fasta'), megablast=settings[3], hitlist_size=settings[2])
		blast_list.append(NCBIXML.read(result_handle))
	# Blast the sequences against the NCBI nucleotide database
	# return a list with the blast results
	#result_handle = NCBIWWW.qblast(settings[0], settings[1], fasta_handle, megablast=settings[3], hitlist_size=settings[2])
	#blast_list = [item for item in NCBIXML.parse(result_handle)]	

	return blast_list

Example #18

0

Show file

File: analysis.py Project: dmnfarrell/epitopemap

def getOrthologs(seq,expect=10,hitlist_size=400,equery=None):
    """Fetch orthologous sequences using blast and return the records
        as a dataframe"""

    from Bio.Blast import NCBIXML,NCBIWWW
    from Bio import Entrez, SeqIO
    Entrez.email = "*****@*****.**"
    #entrez_query = "mycobacterium[orgn]"
    #db = '/local/blast/nr'
    #SeqIO.write(SeqRecord(Seq(seq)), 'tempseq.faa', "fasta")
    #sequtils.doLocalBlast(db, 'tempseq.faa', output='my_blast.xml', maxseqs=100, evalue=expect)

    try:
        print 'running blast..'
        result_handle = NCBIWWW.qblast("blastp", "nr", seq, expect=expect,
                              hitlist_size=500,entrez_query=equery)
        time.sleep(2)
    except:
        print 'blast timeout'
        return
    savefile = open("my_blast.xml", "w")
    savefile.write(result_handle.read())
    savefile.close()
    result_handle = open("my_blast.xml")

    df = sequtils.getBlastResults(result_handle)
    df['accession'] = df.subj.apply(lambda x: x.split('|')[3])
    df['definition'] = df.subj.apply(lambda x: x.split('|')[4])
    df = df.drop(['subj','positive','query_length','score'],1)
    print len(df)
    df.drop_duplicates(subset=['definition'], inplace=True)
    df = df[df['perc_ident']!=100]
    print len(df)
    #df = getAlignedBlastResults(df)
    return df

Example #19

0

Show file

File: ncbi_api.py Project: cxhernandez/asr-pipeline

def aa_to_mrna(aaseq):
    """Given an amino acid sequence, return the mRNA sequence, if it exists,
        from the NCBI nucleotide database."""
    result_handle = NCBIWWW.qblast("tblastn", "nr", aaseq, descriptions=10)
    result = result_handle.read()
    print result
    return result

Example #20

0

Show file

File: webservice.py Project: Beltzac/TCC_WebService

def process(hash):
    print('Starting thread for:', hash)
    error = 100

    text, error = ocr(hash)

    if len(text) > 0:
        start_time = timeit.default_timer()
        blast = NCBIWWW.qblast('blastn', 'nr', text)
        elapsed = timeit.default_timer() - start_time
        print('blast:', elapsed)
        if len(blast.getvalue()) == 0:
            error = 10  #blast nao retornou resultados
        else:
            error = 0  # tudo OK
    else:
        error = 34  #ocr nao encontrou texto

    # faz um update para incluir os resultados

    with app.test_request_context():
        r = Result.query.filter_by(hash=hash).first()
        r.error = error
        r.result = blast.getvalue()
        db.session.commit()

    return

Example #21

0

Show file

File: UniproBlastToPdb.py Project: avishek-r-kumar/DFI-Web-Interface

def UniBLAST(code,Verbose=False):
    """
    Input
    ------
    Uniprot Code 
    UniBLAST(code)
    e.g. UniBLAST('O00238')
    
    Description
    -----------
    Outputs a Fasta sequence and
    Runs blasp looking through pdb database with the Uniprot code 
    
    Output
    ------
    - UniproID.fasta     FASTA Sequence 
    - UniproID_blast.xml Blast output in XML Format 
    """
    
    Entrez.email = random.choice(emails)
    if(Verbose):
        print "Using email: %s"%(Entrez.email)
    with open(code + ".fasta", "w") as out_file:
        net_handle = Entrez.efetch(db="nucleotide", id=code, rettype="fasta")
        out_file.write(net_handle.read())

    if(Verbose):
        print "Running blastp"
    result_handle = NCBIWWW.qblast("blastp", "pdb", code)
    if(Verbose):
        print "Done running blastp"
    with open(code + "_blast.xml", "w") as save_file:
        save_file.write(result_handle.read())
    result_handle.close()

Example #22

0

Show file

File: bio.py Project: egils/Bioinformatika--3.1-u-duotis

def get_blast_alignments(seq, query):
  ncbi = NCBIWWW.qblast(program="blastn" , database="nr", 
                      sequence=seq, entrez_query=query, format_type="XML", hitlist_size = 500, expect = 100.0)
  blast = NCBIXML.read(ncbi);
  remove_alignments = []
  
  query_length = len(seq);

  #results = []
  #for alignment in blast.alignments:
    #positive = alignment.hsps[0].positives * 100 / 80
    #if positive >= 80:
      #results.append(alignment)
  #return results
  for alignment in blast.alignments:
    overall_length = 0.0
    for hsp in alignment.hsps:
      overall_length += hsp.align_length
    if (overall_length / query_length) < 0.8:
      remove_alignments.append(alignment)

  for alignment in remove_alignments:
    blast.alignments.remove(alignment)
    
  return blast.alignments;

Example #23

0

Show file

File: Scanner.py Project: whitestorm19/PlasmidAnalysis

def createPSSM():
    print "Start PSSM"

    #sequencelist = sequencelist.replace("-", ".")
    list = []

    for seq_record in SeqIO.parse("fastatmp", "fasta", IUPAC.unambiguous_dna):
        list.append(str(seq_record.seq))

    #Blast typical sequence
    result_handle = NCBIWWW.qblast("blastn", "nt", list[0])
    save_file = open("my_blast.xml", "w")
    save_file.write(result_handle.read())
    save_file.close()
    result_handle.close()

    #motifs.create(test, alphabet=Gapped(IUPAC.unambiguous_dna))
    m = motifs.create(list, alphabet=Gapped(IUPAC.unambiguous_dna))
    print "motif created"


    pwm = m.counts.normalize(pseudocounts=0.25)
    print "PWM done"
    pssm = pwm.log_odds()
    print "PSSM done"
    print pssm
    return pssm

Example #24

0

Show file

File: make_descs.py Project: Sudoka/tupac

def get_descriptions(sample_seqs, organism):
  query_string = ''
  for seq in sample_seqs:
    query_string += seq[0]+'\n'+seq[1]+'\n'

  blast_handle = NCBIWWW.qblast('tblastn', 'nr', query_string, entrez_query=organism)
  blast_handle.seek(0)
  records = NCBIXML.parse(blast_handle)
  descs = []
  i = 0
  for record in records:
    if len(record.alignments) > 0:
      for align in record.alignments:
        desc = [sample_seqs[i][0], align.hit_id]

        frames = [hsp.frame[1] for hsp in align.hsps]
        if valid_align(frames):
          desc.append(plus_or_minus(frames[0]))
        else:
          desc.append('/')

        query_coverage = float(sum([len(hsp.sbjct) for hsp in align.hsps])) / len(sample_seqs[i][1])
        desc.append(query_coverage)

        #list of tuples of form ( (query_start, query_end), (sbjct_start, sbjct_end), (query, match, sbjct, frame) ) sorted by query_start
        hsp_info = sorted([((hsp.query_start, hsp.query_end), (hsp.sbjct_start, hsp.sbjct_end), (hsp.query, hsp.match, hsp.sbjct, hsp.frame[1])) for hsp in align.hsps], key= lambda t: t[0][0])
        desc.append(hsp_info)

    else:
      desc = [sample_seqs[i][0], ' ', ' ', 0.0, [], []]

    descs.append(desc)
    i += 1
  return descs

Example #25

0

Show file

File: distributed.py Project: choderalab/Ensembler2

def blast_pdb(target_sequence, num_hits=1000):
    """
    Query the PDB using NCBI blast and return MSMSeeds initialized with the results

    Parameters
    ----------
    target_sequence : String
        The sequence of the target to use to query blast
    num_hits : int, optional
        The maximum number of hits returned by BLAST. Default: 1000

    Returns
    -------
    msmseeds : list of MSMSeed objects
        A list of MSMSeed objects initialized with a target sequence, template sequence, template structure,
        and BLAST e-value. Can be readily parallelized in Spark.
    """
    from Bio.Blast import NCBIWWW, NCBIXML
    result_handle = NCBIWWW.qblast("blastp", "pdb", target_sequence, hitlist_size=num_hits)
    blast_record = NCBIXML.read(result_handle)
    alignments = blast_record.alignments
    msmseeds = []
    for alignment in alignments:
        e_val = alignment.hsps[0].expect
        template_fasta, template_structure = _retrieve_chain(alignment.accession)
        msmseeds.append(MSMSeed(target_sequence,template_fasta, template_structure, e_val))
    return msmseeds

Example #26

0

Show file

File: BLASTExecute.py Project: eelster/NRPSQuanification

def blast_execute(record):
    result_handle = NCBIWWW.qblast("blastn", "nt", record.seq)
    save_file = open(os.path.join(ana_dir, os.path.join(bla_dir,
                    os.path.join(xml_dir, "BLAST-" + record.name + ".xml"))), "w")
    save_file.write(result_handle.read())
    save_file.close()
    result_handle.close()

Example #27

0

Show file

File: blast.py Project: AndreaEdwards/design_pipeline

	def blast_record(self):
		print "BLASTing record number %d ..." % int(self.genbank_record_number)
		result_handle = NCBIWWW.qblast("blastp", "nr", self.genbank_record_number)
		print "extracting result..."
		self.xml_result = result_handle.read()
		result_handle.close()
		return self.xml_result

Example #28

0

Show file

File: WJ.py Project: willgdjones/GIassignment2

def blastdemo(genbankID):
    # run blastp on the swissprot database NB to scale this up we must do it locally on cluster
    result_handle = NCBIWWW.qblast("blastp", "swissprot", genbankID)
    # read the results as XML
    blast_record = NCBIXML.read(result_handle)

    # Set this value to ridiculously low
    E_VALUE_THRESH = 0.00000000000000001
    # for each alignment found, display the one with the lowest e-value, and also protein function information.
    for alignment in blast_record.alignments:
        for hsp in alignment.hsps:
            if hsp.expect < E_VALUE_THRESH:
                print ("****Alignment****")
                print ("sequence:", alignment.title)
                print ("length:", alignment.length)
                print ("e value:", hsp.expect)
                print (hsp.query[0:75] + "...")
                print (hsp.match[0:75] + "...")
                print (hsp.sbjct[0:75] + "...")
                print "\n"

    ### h is not defined yet, Will (problem from  iPython nb's!)
    # print h.query[0:75] + '...'
    # print h.match[0:75] + '...'
    # print h.sbjct[0:75] + '...'

    for a in blast_record.alignments:
        print a.length

Example #29

0

Show file

File: singletones.py Project: vladsaveliev/AZ_Orthofinder

def blast_online(rec, result_xml_fpath):
    from Bio.Blast import NCBIWWW

    retrying = False
    to_the_next = False
    attempt_number = 1
    while True:
        try:
            print rec.format('fasta')
            result_xml_f = NCBIWWW.qblast('blastp', 'refseq_protein', rec.format('fasta'),
                                          hitlist_size=10)
            with open(result_xml_fpath, 'w') as save_f:
                save_f.write(result_xml_f.read())

        except urllib2.HTTPError as e:
            log.warn('     Warning: could not blast through web. HTTPError: %s. Code %s. '
                     '(You can press Ctrl+C to interrupt and continue later).'
                     % (e.msg, str(e.code)))
            retrying = True

        except urllib2.URLError, e:
            log.warn('     Warning: could not blast through web. URLError: %s. '
                     '(You can press Ctrl+C to interrupt and continue later).'
                     % (e.args))
            retrying = True

        except (KeyboardInterrupt, SystemExit, GeneratorExit):
            if retrying:
                log.info('     If you restart from this step and do not remove the "%s" directory, '
                         'the process will continue from here.' % blasted_singletones_dir)
            return 1

Example #30

0

Show file

File: remo.py Project: JohnReid/biopsy

def blast_remos( r, db = 'nr' ):
    """Uses blast to find remos in a genome"""
    from Bio.Blast import NCBIWWW, NCBIXML
    import cStringIO
    b_parser = NCBIXML.BlastParser()
    E_VALUE_THRESH = 0.04
    for s in r.get_aligned_sequences():
        for remo in r.get_remos_for( s ):
            seq = remo.get_sequence_for( s.centre_sequence, False )
            print 'Blasting: %s...' % ( seq[:60] )
            result_handle = NCBIWWW.qblast( 'blastn', db, seq )
            blast_results = result_handle.read()
            blast_out = cStringIO.StringIO(blast_results)
            b_record = b_parser.parse(blast_out)
            for alignment in b_record.alignments:
                for hsp in alignment.hsps:
                    if hsp.expect < E_VALUE_THRESH:
                        print '****Alignment****'
                        print 'sequence:', alignment.title
                        print 'length:', alignment.length
                        print 'e value:', hsp.expect
                        print 'sbjct_start:', hsp.sbjct_start
                        print hsp.query[0:75] + '...'
                        print hsp.match[0:75] + '...'
                        print hsp.sbjct[0:75] + '...'
            break
        break

Example #31

0

Show file

File: task3.py Project: reige012/assignment-18

def blast_with_GIs(GI_seqIDs):
    """
    Based in part on Biopython cookbook example.
    Try and except structure of function suggested by Subir
    """
    counter = 1
    for GI_ID in GI_seqIDs:
        try:
            result = NCBIWWW.qblast("blastn", "nt", GI_ID, format_type="Text")
            blast_results = result.read()
            with open("{}_{}.txt".format("blast_results", counter),
                      "w") as outfile:
                outfile.write(blast_results)
            counter += 1
        except:
            print("No sequence available for gi|{}".format(GI_ID))
        time.sleep(1)

Example #32

0

Show file

def do_blast(seq, organism, eVal):

    while True:
        signal.alarm(120)
        try:
            result = NCBIWWW.qblast("blastp",
                                    "nr",
                                    seq,
                                    entrez_query=organism,
                                    expect=eVal)
            break
        except TimeoutException:
            print("Server timeout, trying again")
            continue

    signal.alarm(0)
    return result

Example #33

0

Show file

def blastProt(database, file_name, file_format):
    record = SeqIO.read(open(file_name), format=file_format)
    
    print("BLAST runnning")
    result_handle = NCBIWWW.qblast("blastp", database, record.format(file_format))
    print("BLAST finnished")

    mo = re.search("\d", file_name)
    number = mo.group()
    
    xml_file = "blast-Prot" + number + ".xml"
    save_file = open(xml_file, "w")
    save_file.write(result_handle.read()) 
    save_file.close() 
    result_handle.close()
    
    return xml_file

Example #34

0

Show file

def main():
    from Bio.Blast import NCBIWWW
    import os
    from Bio import SeqIO
    from io import StringIO

    print("Start reading FASTA files...")
    os.chdir("/home/yikylee/Desktop/megahit")
    list_dir = [i for i in os.listdir() if i.find(".fa") >= 0]
    for fasta in list_dir:
        _prefix = fasta.split(".")[0]
        print("Now " + _prefix + "...")
        sequence_data = open(fasta).read()
        result_handle = NCBIWWW.qblast("blastn", "nt", sequence_data, format_type="Text", hitlist_size=10)
        print(_prefix + " analysis completed.")
        with open("./" + _prefix + "_result.txt", "w") as save_to:
            save_to.write(result_handle.read())

Example #35

0

Show file

def buscaNcbi(query):
    # Buscar proteínas = blastp
    # Buscar nucleotídeos = blastn
    try:
        print("Buscando arquivo...")
        blast_result = NCBIWWW.qblast("blastp", "nr", query)
        blast_out = open(arq_file + ".xml", "w")
        blast_out.write(blast_result.read())
        blast_out.close()
        blast_result.close()
        print("Fim da busca.\nArquivo " + query +
              ".xml encontra-se disponível no diretório '" + path +
              "' para análise")

    except ValueError:
        print("\nProteína inexistente ou inválida\n\n\n")
        time.sleep(5)

Example #36

0

Show file

File: task3.py Project: reige012/assignment-18

def genebank_sequence(name):
    esearch_query = Entrez.esearch(db="nucleotide",
                                   term=name,
                                   retmode="xml")
    esearch_result = Entrez.read(esearch_query)
    sequenc_entry = esearch_result['IdList']
    print(sequenc_entry)
    for i in sequenc_entry:
        try:
            result_blast = NCBIWWW.qblast("blastn", "nt", i,
                                          format_type='Text')
            output = result_blast.read()
            time.sleep(1)
            with open("outputfile.txt", "a") as outfile:
                outfile.write(output)
        except ValueError:
            output = ''

Example #37

0

Show file

File: perform_blast.py Project: SmitsG/Workflows-bi11

def perform_blast(output, program, database, sequence, hitlist_size):
    """
    Called from blast_controller. Performs BLAST and writes to output file
    output= path for the output file
    program=the BLAST program to be used
    database=the database to BLAST against
    sequence=the sequence to be blasted
    hitlist_size=maximum number of hits
    """
    handle = NCBIWWW.qblast(program=program,
                            database=database,
                            sequence=sequence,
                            hitlist_size=hitlist_size)
    with open(output, "a") as out_handle:
        out_handle.write(handle.read())
        out_handle.close()
    handle.close()

Example #38

0

Show file

def net_blast(query_record, program='blastn', database='nr'):
    """
	net_blast(query_record, program, database = 'nr')
	*Perform a BLAST search over the net using the specified program & database
	*before searching, check that the search alphabet is compatible with the type of search,
	*raise a ValueError if not
	
	ARGUMENTS
	query_record: a SeqRecord object containing the query sequence
	program: the program to use, as per:
		http://www.ncbi.nlm.nih.gov/BLAST/blast_program.shtml
	database: the db to query, as per:
		http://blast.ncbi.nlm.nih.gov/Blast.cgi?CMD=Web&PAGE_TYPE=BlastDocs&DOC_TYPE=ProgSelectionGuide#db
	
	"""
    #check whether we have a valid query
    if not isinstance(query_record, SeqRecord):
        raise ValueError(u'Invalid Search Item')
    if len(query_record.seq) < 10:
        raise ValueError(u"Query sequence is too short")
    #check that the program is valid
    program = program.lower()
    if program not in searches:
        raise ValueError(u"Invalid Program '%s'" % program)

    #check that the alphabet and db are ok
    (required_alpha, required_dbs) = searches[program]
    if not isinstance(query_record.seq.alphabet, required_alpha):
        raise ValueError(u"Query alphabet for '%s' must be '%s'" %
                         (program, alphabets[program]))
    if not (database in protein_db or database in nucleotide_db):
        raise ValueError(u"Invalid database '%s'" % database)
    if not database in required_dbs:
        raise ValueError(u"Database '%s' cannot be used with program '%s'" %
                         (database, program))

    #Value checking done, time to run the search
    results = NCBIWWW.qblast(program,
                             database,
                             query_record.seq,
                             format_type='XML')

    #parse the results
    blast_records = NCBIXML.parse(results)

    return blast_records

Example #39

0

Show file

def blast_file(fasta_path, blast_db='nt', parser=basic_parser):
    logging.info("Running BLAST {}".format(fasta_path))
    results = []
    #record = SeqIO.read(fasta_path, format="fasta")
    fasta_string = open(fasta_path, 'r').read()
    logging.debug(fasta_string)
    result_handle = NCBIWWW.qblast(BLAST_PROG,
                                   blast_db,
                                   fasta_string,
                                   megablast=True)
    logging.info("BLAST returned")
    blast_records = NCBIXML.parse(result_handle)
    logging.info("Analyzed BLAST")
    for single_record in blast_records:
        # each run is a single sequence search from fasta_path
        results.append(parser(single_record))
    return results

Example #40

0

Show file

def getBLAST(arg):
    BLASTResultAsXML = NCBIWWW.qblast(program=arg[1],
                                      database=arg[2],
                                      sequence=arg[3],
                                      expect=arg[4],
                                      hitlist_size=arg[5],
                                      matrix_name=arg[6],
                                      alignments=arg[7])

    BLASTData = NCBIXML.parse(BLASTResultAsXML)

    maxEValue = 0.0001
    maxResults = 1

    i = 0

    for BLASTResult in BLASTData:
        for alignment in BLASTResult.alignments:
            for hsp in alignment.hsps:
                if hsp.expect < maxEValue and maxResults < 2:
                    # Header van het BLAST resultaat
                    header = str(alignment.title)
                    # Naam organisme
                    name = header.split('[', 1)[1].split(']')[0].split('>')[0]
                    protein = header.split('|')[4].split('[')[0]
                    accession = alignment.title.split('|')[3]
                    eValue = hsp.expect
                    identity = hsp.identities
                    queryCov = float(hsp.identities) / float(len(
                        hsp.query)) * float(100)
                    score = hsp.score
                    bits = hsp.bits

                    data = str(name) + "$" + str(protein) + "$" + str(
                        accession) + "$" + str(eValue) + "$" + str(
                            identity) + "$" + str(queryCov) + "$" + str(
                                score) + "$" + str(bits)
                    print(data)
                    maxResults += 1

                if maxResults >= 2:
                    break
            i += 1
        if i == 1:
            break

Example #41

0

Show file

File: offtarget.py Project: igemsoftware/AMU-Poznan2014

def blast_offtarget(fasta_string):
    """Function which count offtarget using blast.

    Args:
        fasta_string(str): Fasta sequence.

    Returns:
        Offtarget value(int).
    """
    try:
        with blast_path():
            with open('fasta', 'w') as fasta_file:
                fasta_file.write(fasta_string)
            cline = NcbiblastnCommandline(
                query="fasta",
                db="refseq_rna",
                outfmt=("'6 qseqid sseqid evalue bitscore sgi sacc staxids"
                        "sscinames scomnames stitle'"))
            stdout, stderr = cline()

        blast_lines = [
            line for line in stdout.split('\n') if 'H**o sapiens' in line
        ]

        return len(blast_lines)
    except ApplicationError:
        result_handle = NCBIWWW.qblast("blastn",
                                       "refseq_rna",
                                       fasta_string,
                                       entrez_query="txid9606 [ORGN]",
                                       expect=100,
                                       gapcosts="5 2",
                                       genetic_code=1,
                                       hitlist_size=100,
                                       word_size=len(fasta_string),
                                       megablast=True)
        blast_results = result_handle.read()

        blast_in = cStringIO.StringIO(blast_results)
        count = 0

        for record in NCBIXML.parse(blast_in):
            for align in record.alignments:
                count += 1
        return count

Example #42

0

Show file

File: parse_blast_xml.py Project: RonaldHShi/blast_python_scripts

def BlastFastaXmlIndv(fasta_filename=None, xml_filename=None):
    if fasta_filename:
        record_iterator = SeqIO.parse(fasta_filename, "fasta")
        output_table = open(fasta_filename + ".summary.tsv", 'w')
        outputWriter = csv.writer(output_table, delimiter="\t")
        for seq_record in record_iterator:
            wait_time = 1
            while True:
                print seq_record.id
                try:
                    result_handle = NCBIWWW.qblast("blastn",
                                                   "nr",
                                                   seq_record.seq,
                                                   entrez_query="KM204118.1")
                    break
                except ValueError:
                    print "Error encountered"
                    print "Trying again in " + str(wait_time) + " seconds"
                    if wait_time > 100:
                        sys.exit()
                    time.sleep(wait_time)
                    wait_time *= 2

            blast_record = NCBIXML.read(result_handle)
            filteredHspStartEnds = FilterBlastRecord(blast_record)
            if filteredHspStartEnds and CheckPossibleRecomb(
                    filteredHspStartEnds):
                WriteARow(outputWriter, blast_record, filteredHspStartEnds)

            result_handle.close()

    elif xml_filename:
        output_table = open(xml_filename + ".summary.tsv", 'w')
        outputWriter = csv.writer(output_table, delimiter="\t")
        result_handle = open(xml_filename)
        blast_records = NCBIXML.parse(result_handle)
        for blast_record in blast_records:
            filteredHspStartEnds = FilterBlastRecord(blast_record)
            if filteredHspStartEnds and CheckPossibleRecomb(
                    filteredHspStartEnds):
                WriteARow(outputWriter, blast_record, filteredHspStartEnds)

        result_handle.close()

    output_table.close()

Example #43

0

Show file

    def fillDomainsBLAST(self):
        '''
		Using the NCBIWWW package, it searches for domains with BLAST. Domains are saved in the protdomains variable.
		:return: phageDomains, a dictionary that, for each protein in a given species, has domains associated
		'''
        print('Finding functions/domains with BLAST')
        from Bio.Blast import NCBIWWW
        from Bio.Blast import NCBIXML
        import pickle
        from pathlib import Path
        my_file = Path("files/phage_list_blast")
        if my_file.is_file():
            with open('files/phage_list_blast', 'rb') as f:
                list_done = pickle.load(f)
        else:
            list_done = []
        for spec in self.phagesProteins:
            if spec not in list_done:
                for prot in self.phagesProteins[spec]:
                    if 'hypothetical' in self.phagesProteins[spec][prot][
                            0].lower(
                            ) or 'uncharacterized' in self.phagesProteins[
                                spec][prot][0].lower(
                                ) or 'unknown' in self.phagesProteins[spec][
                                    prot][0].lower():
                        # if not self.phageDomains[bac][prot]:
                        result_handle = NCBIWWW.qblast(
                            'blastp',
                            'nr',
                            self.phagesProteins[spec][prot][1],
                            entrez_query=
                            'Acinetobacter baumannii (taxid:470), Escherichia coli (taxid:562), Klebsiella pneumonia (taxid:573)'
                        )
                        blastout = NCBIXML.read(result_handle)
                        for ali in blastout.alignments:
                            if 'hypothetical' not in ali.hit_def.lower(
                            ) and 'uncharacterized' not in ali.hit_def.lower():
                                print(ali.hit_def[:ali.hit_def.find(' [')])
                                self.phagesProteins[spec][prot][
                                    0] = ali.hit_def[:ali.hit_def.find(' [')]
                                break
                list_done.append(spec)
                with open('files/phage_list_blast', 'wb') as f:
                    pickle.dump(list_done, f)
                self.saveDomains()

Example #44

0

Show file

File: workers.py Project: mnowotka/MgrFuncAdnot

 def execute(self, seqRecord, outFormat):
     from Bio.Blast import NCBIWWW
     from Bio.Blast import NCBIXML
     ret = []
     rekord = seqRecord.format("fasta")
     for db in self.params["db"]:
         ret.append(
             NCBIWWW.qblast(self.params['blast'],
                            db,
                            rekord,
                            expect=float(self.params['cutoff']),
                            filter=self.params['filter'],
                            hitlist_size=int(self.params['nhits']),
                            matrix_name=self.params['matrix'],
                            alignments=int(self.params['nalign']),
                            descriptions=int(self.params['ndesc']),
                            megablast=self.params['megablast']).read())
     return ret

Example #45

0

Show file

    def call_blast(self, input_file, organism):

        blast_db = "nt"
        if organism == "mm":
            blast_db = self.mus_musculus_blast_db
        elif organism == "hs":
            blast_db = self.homo_sapiens_blast_db
        elif organism == "rn":
            blast_db = self.rattus_norvegicus_blast_db

        return_handle = NCBIWWW.qblast("blastn",
                                       blast_db,
                                       input_file,
                                       hitlist_size=10,
                                       expect=1000,
                                       word_size=7,
                                       gapcosts="5 2")
        return return_handle

Example #46

0

Show file

 def make_blast(self):
     """Faz um blast das proteínas que se encontram no ficheiro em formato fasta contra o genoma humano e imprime o tempo de duração"""
     records = SeqIO.parse(self.__file_prot, "fasta")
     save_file = open(self.__out, "w")
     for record in records:
         beginning = time.time()
         result_handle = NCBIWWW.qblast(
             "blastp",
             self.__db,
             record.format("fasta"),
             entrez_query='H**o sapiens [organism]')
         save_file.write(result_handle.read() + "\n")
         end = time.time()
         print(
             "A proteína %s já foi submetida ao blast e demorou %s segundos. "
             % (record.id, end - beginning))
     save_file.close()
     records.close()

Example #47

0

Show file

File: blast_remote.py Project: pantelismyr/Bioinformatics17

def main():

    # get the fasta file from stdin and return the sequence (cst3.fa)
    sequence = hf.Get_sequences()[0]

    # if the file is not empty proceed
    if sequence:
        # Do Blast search of a given protein sequence against the nr database at NCBI
        # https://blast.ncbi.nlm.nih.gov/Blast.cgi?PROGRAM=blastp&PAGE_TYPE=BlastSearch&LINK_LOC=blasthome
        # see NCBIWWW documentation at http://biopython.org/DIST/docs/tutorial/Tutorial.html (chapter 7.1)

        # invoke the NCBI BLAST server over the internet
        # The first argument is the blast program to use for the search ('blastp' in our case)
        # The second argument specifies the databases to search against ('nr' in our case)
        # The third argument is a string containing your query sequence
        result_handle = NCBIWWW.qblast("blastp", "nr", sequence.seq)
        # print out the results (stdout)
        print(result_handle.read())

Example #48

0

Show file

def assign(seqs, outfile, title):
    """
    """
    assigned = []
    sequences = list(SeqIO.parse(seqs, "fasta"))

    result_handle = NCBIWWW.qblast(
        "blastp", "nr", "\n".join([
            ">{}\n{}".format(sequence.id, str(sequence.seq.ungap("-")))
            for sequence in sequences
        ]))
    with open("{}_result.txt".format(title), 'w') as results:
        print(result_handle.getvalue(), file=results)

    simple_blast_assignment(seqs,
                            "{}_result.txt".format(title),
                            outfile,
                            title=title)

Example #49

0

Show file

def get_blast_results(fasta_filename, blast_type="blastn", db="nt"):
    """Get the results from NCIB BLAST for the given FASTA file.

    Args:
        fasta_filename (str): The path to the FASTA file to run against BLAST
        blast_type (str): The type of BLAST to run ("blastn", "blastp", etc.)
        db (str): The blast database to run this query against ("nt", "pt", etc.)

    Return:
        list of Bio.Blast.Record.Blast records
    """
    fasta_sequence = None
    with open(fasta_filename, 'r') as fasta_file:
        fasta_sequence = fasta_file.read()

    results = NCBIWWW.qblast(blast_type, db, fasta_sequence)
    blast_records = NCBIXML.parse(results)
    return list(blast_records)

Example #50

0

Show file

File: blast.py Project: duttaprat/proteinGAN

def get_blast_record(seq, alignments, descriptions, hitlist_size):
    """Calls  NCBI's QBLAST server or a cloud service provider to get alignment results

    Args:
      alignments: max number of aligments from BLAST
      descriptions: max number of descriptions to show
      hitlist_size: max number of hits to return
      seq: protein sequence as string

    Returns:
      single Blast record

    """
    result_handle = NCBIWWW.qblast(program="blastp", database="nr", alignments=alignments,
                                   descriptions=descriptions,
                                   hitlist_size=hitlist_size, sequence=seq)
    blast_record = NCBIXML.read(result_handle)
    return blast_record

Example #51

0

Show file

File: BLAST_wrapper.py Project: Y-Lammers/BLAST_wrapper

def online_blast(seq_list):

    # convert the sequences to a sequence file (stored in the
    # working memory)
    temp = StringIO.StringIO()
    SeqIO.write(seq_list, temp, 'fasta')
    temp.seek(0, 0)

    # BLAST the sequences online against a NCBI database
    logging.debug('BLASTING sequences agaist NCBI')
    result_handle = NCBIWWW.qblast(args.ba,
                                   args.bd,
                                   temp.read(),
                                   megablast=args.mb,
                                   hitlist_size=args.hs)

    # return the results handle with the blast results
    return result_handle

Example #52

0

Show file

 def execute_blast(self, records, output, newlist):
     os.chdir(output)
     self.records = SeqIO.index(self.fastapath, "fasta")
     for i in np.arange(len(self.newlist)):
         print("Blasting gene " + self.newlist[i] + " against the " +
               self.db + " database.")
         result_handle = NCBIWWW.qblast(self.search,
                                        self.db,
                                        self.records[self.newlist[i]].seq,
                                        format_type='Text',
                                        hitlist_size=15,
                                        expect=0.0001,
                                        entrez_query='metazoa[Organism]')
         with open('{0}_result_handle.txt'.format(self.newlist[i]),
                   'w') as f:
             f.write('Gene: ' + self.newlist[i] + '\n\n\n')
             f.write('Seq:\n' + self.records[newlist[i]].format('fasta'))
             f.write(result_handle.read())

Example #53

0

Show file

def protein_blast(protein,
                  criteria,
                  threshold,
                  filename='blast.fasta',
                  db='swissprot'):
    """ perform blast search + filter by percentage coverage """
    handle = NCBIWWW.qblast('blastp', db, protein, entrez_query=criteria)
    result = NCBIXML.read(handle)

    out = open(filename, 'w')
    for alignment in result.alignments:
        sequence = alignment.hsps[0]

        if ((float)(sequence.positives) / sequence.align_length *
                100.0) >= threshold:
            out.write('>' + alignment.hit_id + '\n' + sequence.sbjct + '\n\n')

    out.close

Example #54

0

Show file

File: blast.py Project: metagenlab/BioVx

 def blastp(self, acc):
     try:
         gis = []
         print 'here'
         result_handle = NCBIWWW.qblast("blastp",
                                        "nr",
                                        acc,
                                        format_type="XML",
                                        expect=self.blast_threshold)
         print 'here'
         for blast_record in NCBIXML.parse(result_handle):
             for alignment in blast_record.alignments:
                 gis.append(alignment.title.split("|")[1])
         unique = [int(i.strip()) for i in gis if int(i) not in self.gis]
         self.gis.extend(unique)
     except:
         self.status.setdefault(acc, False)
     return

Example #55

0

Show file

def search_blast(protien, numHits=50):
    result_handle = NCBIWWW.qblast("blastp",
                                   "nr",
                                   protien,
                                   hitlist_size=int(numHits),
                                   format_type='HTML')

    save_file = open("my_blast.xml", "w")
    data = result_handle.read()
    #text = data.split('<Iteration>')[1].split('</Iteration_hits>')[0]
    #text = ' '.join([i for i in ' '.join([i for i in text.split('\n')]).split(' ') if i != ''])

    save_file.write(data)  #result_handle.read())
    save_file.close()
    result_handle.close()

    text = [i.split('</Hit_def>\n')[0] for i in data.split('</Hit_id>\n')][1:]
    names = [i.split('  <Hit_def>')[1] for i in text]
    return names

Example #56

0

Show file

def cli(input, output):
    """Simple program that BLAST searches all FASTA files in a directory
    and writes the top 5 hits for each query to a text file."""

    # open each file and print the filename to the terminal
    for filename in glob.glob(os.path.join(input, '*.fasta')):
        print(filename)
        with open(filename, 'rU') as fasta_handle:
            result_handle = NCBIWWW.qblast('blastn', 'nt',
                                           fasta_handle.read(), hitlist_size=5)  # do the actual blast search
            blast_results = SearchIO.parse(result_handle, 'blast-xml')  # parse the results without storing them
            for result in blast_results:
                i = 1
                for hsp in result.hsps:
                    output.write('Result #' + str(i) + '\n')
                    output.write(str(hsp) + '\n\n')
                    i += 1
        fasta_handle.close()
    print('Done')

Example #57

0

Show file

def processFasta(fastaFile, resultDirectory):
    print("Writing to directory ==> " + resultDirectory)
    record = SeqIO.read(fastaFile, format="fasta")
    result_handle = NCBIWWW.qblast("blastx",
                                   "nr",
                                   record.format("fasta"),
                                   expect=1e-10,
                                   hitlist_size=5)
    baseFile = os.path.basename(fastaFile)
    fileName = baseFile.rsplit('.', 1)[0] + '.xml'
    #fileName = fastaFile
    location = resultDirectory + fileName

    save_file = open(location, "w")
    save_file.write(result_handle.read())
    save_file.close()
    result_handle = open(location)
    genomeName = os.path.basename(fastaFile).rsplit(".", 1)[0]
    handelBlastResult(result_handle, genomeName)

Example #58

0

Show file

def find_homologues(protACC, max_number=10, filename="blast.xml"):
    """
    Find_homologues takes a protein accession number as required argument,
    and an optional max_number of results argument, default set to 10,
    and does a protein BLAST. The function returns the accession numbers
    of the BLAST proteins.
    """
    result_handle = NCBIWWW.qblast("blastp",
                                   "nr",
                                   protACC,
                                   hitlist_size=max_number)
    with open(filename, "w") as out_handle:
        out_handle.write(result_handle.read())
    with open(filename) as file:
        blast_record = NCBIXML.read(file)
        protACC = []
        for rec in blast_record.alignments:
            protACC.append(rec.accession)
    return protACC

Example #59

0

Show file

def blast(sequence, vorm='blastp'):
    try:
        time.sleep(5)
        result_handle = NCBIWWW.qblast(vorm,
                                       'nr',
                                       sequence,
                                       expect=(1 * (10**-5)),
                                       matrix_name='BLOSUM62',
                                       word_size=3,
                                       format_type='XML',
                                       hitlist_size=5)
        if vorm == 'blastp':
            return result_handle
        else:
            blast_records = NCBIXML.parse(result_handle)
            for blast_record in blast_records:
                return blast_record
    except:
        return blast(vorm, sequence)

Example #60

0

Show file

def blastx_blasten(sequentie):
    """ Deze sequentie opent een leeg XML file, daarna gaat het de blast uitvoeren met blastx. De gegevens van deze
    blast worden opgeslageni in het bestand en dit bestand wordt gesloten. Daarna wordt dit bestand weer geopend en
    wordt voor elk resultaat het organisme, eiwit, sequentie, lengte, e_value en stukje van de vergelijking opgeslagen
    in een lege lijst. Deze wordt samen met titel wat een lege string is gereturnd.

    :param sequentie: de ingevoerde sequentie
    :return: blastresultaat, een lijst met gegevens van de blatresultaten
    :return titel, een lege string
    """
    titel = ''
    blast_resultaat = []
    bestand = open("Resultaat.xml", "w")
    result_handle = NCBIWWW.qblast("blastx",
                                   "nr",
                                   sequentie,
                                   alignments=1,
                                   hitlist_size=10)
    bestand.write(result_handle.getvalue())
    bestand.close()

    result_handle = open("Resultaat.xml", "r")
    blast_records = NCBIXML.parse(result_handle)
    blast_record = next(blast_records)
    for alignment in blast_record.alignments:
        for hsp in alignment.hsps:
            blast_resultaat.append("****Alignment****")
            titel = alignment.title
            titels = titel.split("[")
            titelss = titels[1].split("]")
            titel_ = titel.split("|")
            titel__ = titel_[2].split("[")
            blast_resultaat.append("Blast organism: {}".format(titelss[0]))
            blast_resultaat.append("Protein: {}".format(titel__[0]))
            blast_resultaat.append("Sequence: {}".format(alignment.title))
            blast_resultaat.append("Length: {}".format(alignment.length))
            blast_resultaat.append("E-value: {}".format(hsp.expect))
            blast_resultaat.append(hsp.query[0:75] + "...")
            blast_resultaat.append(hsp.match[0:75] + "...")
            blast_resultaat.append(hsp.sbjct[0:75] + "...")
            blast_resultaat.append("\n")

    return blast_resultaat, titel