def blastBACTEUK(arg): out=open('bacterial.txt','a') out2=open('eukaryotic.txt','a') records = SeqIO.parse(open(arg), format="fasta") for record in records: try: name = record.id result_handleB = NCBIWWW.qblast("blastx", "nr", record.format("fasta"), ncbi_gi=False, descriptions= "1", alignments="1", format_type="XML", hitlist_size="1", entrez_query='(Bacteria[ORGN] OR Archaea[ORGN])') result_handleE = NCBIWWW.qblast("blastx", "nr", record.format("fasta"), ncbi_gi=False, descriptions= "1", alignments="1", format_type="XML", hitlist_size="1", entrez_query='(Eukaryota[ORGN])') blast_recordsB = NCBIXML.read(result_handleB) blast_recordsE = NCBIXML.read(result_handleE) if blast_recordsB.descriptions: print record.id name = record.id out.write(name + ',' + str(blast_recordsB.alignments[0].hsps[0].expect) + '\n') else: out.write(name + ', no hit' + '\n') if blast_recordsE.descriptions: out2.write(name + ',' + str(blast_recordsE.alignments[0].hsps[0].expect) + '\n') else: out2.write(name + ', no hit' + '\n') except: errorout = open('errorlog.txt','a') error out.write('problem blasting ' + record.id + '\n') errorout.close() out.close() out2.close()
def check_blast_in(input_filename,taxid_line,num, GACTC_YES,blast_filename,perct): strlist=str(taxid_line).split(' OR ') for valist in strlist: txid_num=valist[valist.find('(taxid:')+7:valist.find(')')] blast_result_file= open(blast_filename+txid_num,"w") txid='txid'+txid_num+' [ORGN]' typ='' #this is for the input sequence input_file = open(input_filename,"r") for seq_record in SeqIO.parse(input_file, "fasta"): if (len(typ)>200): result_handle = NCBIWWW.qblast("blastn", "nr", typ,word_size=13,hitlist_size=100,entrez_query=txid,expect=10) t=result_handle.read() blast_result_file.write(t) typ='' #print "200 done!" typ=typ+seq_record.format('fasta') #print "wating", typ,"finished waiting","\n\n" if (len(typ)>0): #print "working on the leftover" result_handle = NCBIWWW.qblast("blastn", "nr", typ,word_size=13,hitlist_size=100,entrez_query=txid,expect=10) t2=result_handle.read() #print typ blast_result_file.write(t2) input_file.close() blast_result_file.close() print "blast job done!"
def check_blast_ex(input_filename,piece_len,bool_customer,exclude_line): bad_match=set() if bool_customer and exclude_line !='': query_line=exclude_line else: query_line='txid9606 [ORGN]' (input_id,input_seq,input_len)=read_fasta(input_filename) f=open('~seq.txt','w') for i in range(0,input_len-piece_len): print >>f, '>'+str(i)+'\n'+input_seq[i:i+piece_len] f.close() input_file = open('~seq.txt','r') count_piece=int(os.path.getsize('~seq.txt')/2000)+1 typ='' blast_result_file= open('~blastresult.xml',"w") counter_i=0 try: for seq_record in SeqIO.parse(input_file, "fasta"): typ=typ+seq_record.format('fasta') if (len(typ)>2000): counter_i=counter_i+1 try: print "blasting" result_handle = NCBIWWW.qblast("blastn", "nr", typ,word_size=13,hitlist_size=100,entrez_query=query_line,expect=10) print str(counter_i)+" out of "+str(count_piece)+" is blasted!" except: try: print "mistake happens when tryint to connect to NCBI blast engine, try again!" result_handle = NCBIWWW.qblast("blastn", "nr", typ,word_size=13,hitlist_size=100,entrez_query=query_line,expect=10) print "sucessfully connect to NCBI blast engine at the second try!" except: print "can't use NCBI blast at this moment!" sys.exit(0) t=result_handle.read() blast_result_file.write(t) typ='' blast_result_file.close() blast_result_file= open('~blastresult.xml',"a") result_handle = NCBIWWW.qblast("blastn", "nr", typ,word_size=13,hitlist_size=100,entrez_query=query_line,expect=10) t2=result_handle.read() blast_result_file.write(t2) blast_result_file.close() except: print "Error happpens while getting the blast result! Unable to use blast this time!" sys.exit(0) try: blast_records = NCBIXML.parse(open('~blastresult.xml')) for blast_record in blast_records: for alignment in blast_record.alignments: for hsp in alignment.hsps: if hsp.identities == piece_len: bad_match.add(i) except: print "Error happens while parsing blast result" sys.exit(0) input_file.close() return bad_match
def run_blast(fasta,type): if type == "prot": for seqs in SeqIO.parse(fasta,"fasta"): clock() out = open("split_xml/%s.xml"%str(seqs.id),"w") ncbi = NCBIWWW.qblast(program="blastp",database="nr",sequence=str(seqs.seq),format_type="XML",ncbi_gi=str(seqs.id), alignments=20,word_size=3) out.write(ncbi.read()) print "%s\t%f"%(str(seqs.id),float(clock())) elif type == "nucl": for seqs in SeqIO.parse(fasta,"fasta"): clock() out = open("split_xml/%s.xml"%str(seqs.id),"w") ncbi = NCBIWWW.qblast(program="blastp",database="nr",sequence=str(seqs.seq),format_type="XML",ncbi_gi=str(seqs.id), alignments=20,word_size=3) out.write(ncbi.read()) print "%s\t%f"%(str(seqs.id),float(clock()))
def fetchGenbankData(seq_list): Entrez.email = "*****@*****.**" try: for taxa in seq_list.keys(): seq = seq_list[taxa] print "BLAST-ing NCBI for sequence ID: " + taxa.__str__() retry_count = 0 # fetch the GenBank record; retry up to 3 times if the connection is problematic. while retry_count < 3: try: blast_handle = NCBIWWW.qblast('blastp', 'nr', seq) blast_handle.seek(0) blast_file = open( taxa.__str__() + '.xml', 'w' ) blast_file.write( blast_handle.read() ) blast_file.close() blast_handle.close() print ". . . results written to " + taxa.__str__() + '.xml' break # if we get the handle OK, then break out of the loop except ValueError: sleep(3) print "Something went wrong, my GenBank query for taxa " + taxa.__str__() + " returned no records." print "I'm trying again. . ." retry_count += 1 time.sleep(2) except ValueError: print "Something went wrong, my GenBank query for taxa " + taxa.__str__() + " returned no records." print "I'm not going retry anymore. Sorry." exit(1)
def find_closest_ref(fasta_file, callback=None, update_callback=lambda d: None, organism=entrez_CFSAN_genera): "Find closest match in NCBI Refseq to longest contig, then collect URL for it" if not callback: import datetime def callback(s): print "[{}]".format(datetime.datetime.today().ctime()), s callback("Importing modules...") from Bio.Blast import NCBIWWW import xml.etree.ElementTree as xml callback("Loading fasta ({})...".format(fasta_file)) with open(fasta_file, 'r') as f: contigs = iter(sorted(list(SeqIO.parse(f, 'fasta')), lambda a,b: cmp(len(a), len(b)))) contig = contigs.next() while len(contig) < 1500: try: contig = contigs.next() except StopIteration: break callback("Longest contig is {} bases. BLASTing...".format(len(contig))) r = NCBIWWW.qblast("blastn", "chromosome", ">{}\n{}".format(contig.description, contig.seq), alignments=1, entrez_query="{}".format(organism), hitlist_size=1, filter='L') callback("BLAST finished.") result = xml.parse(r) refseq = result.find(".//Iteration/Iteration_hits/Hit/Hit_id").text.split("|")[1] refseq_url = 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nuccore&id={}&rettype=fasta&retmode=text'.format(refseq) update_callback({'ref_file':refseq, 'ref_url':refseq_url}) return refseq
def blast2(self): # File = open("output"+x+".txt","w") fasta_string = open(self.infile).read() # or make the names fasta1.fasta and just do open(i).read print(fasta_string) database = "nr" program = "blastn" parameters = [ ('DATABASE', database), ('PROGRAM', program), # ('PSSM',pssm), - It is possible to use PSI-BLAST via this API? ('QUERY', fasta_string), ('CMD', 'Put'), ] query = [x for x in parameters if x[1] is not None] message = (urllib.urlencode(query)) print (query) print(message) result_handle = NCBIWWW.qblast("blastn", "nr", fasta_string, hitlist_size=10) blast_records = NCBIXML.parse(result_handle) # or blast_record = NCBIXML.read(result_handle) if you only have one seq in file E_VALUE_THRESH = 0.001 for blast_record in blast_records: for alignment in blast_record.alignments: for hsp in alignment.hsps: if hsp.expect < E_VALUE_THRESH: print "alignment:", alignment.title print "e-value:", hsp.expect
def run(self): res = NCBIWWW.qblast(self.program, self.database, self.sequence, **self.queryextra) blast_records = NCBIXML.parse(res) alignments = reduce(lambda x, y: x + y, map(lambda r: r.alignments, blast_records), []) records = list(self.get_seqrecords(alignments)) records = self.delete_same(records) return {"blast_xml": res.getvalue(), "blast_records": blast_records, "sequences": records}
def taxBLASTn(uniprotID,taxName,proSeq): mapTuple, debug = (), '' try: result_handle = NCBIWWW.qblast("tblastn", "nr", proSeq, expect = .0001, entrez_query = taxName+'[organism]') string = result_handle.read() result_handle.close() tree = xml.etree.ElementTree.fromstring(string) iteration = tree.find("BlastOutput_iterations/Iteration") hits = iteration.findall("Iteration_hits/Hit") topHit = hits[0] accessionNCBI = topHit.findtext("Hit_accession") qseq = topHit.findtext("Hit_hsps/Hsp/Hsp_qseq") hseq = topHit.findtext("Hit_hsps/Hsp/Hsp_hseq") midseq = topHit.findtext("Hit_hsps/Hsp/Hsp_midline") Hit_id = topHit.findtext("Hit_id") Hit_from = int(topHit.findtext("Hit_hsps/Hsp/Hsp_hit-from")) Hit_to = int(topHit.findtext("Hit_hsps/Hsp/Hsp_hit-to")) match = re.search(r'gi\|(\w+)\|',Hit_id) GI = match.group(1) debug += '\ttBLASTn hit accession and match indices: '+str(accessionNCBI)+' ('+str(Hit_to)+', '+str(Hit_from)+')\n' dna_seq = chromParse(GI,Hit_from,Hit_to) if dna_seq != '': mapTuple = (accessionNCBI,dna_seq,'TBLASTN',midseq) return mapTuple, debug else: return mapTuple, debug except: return mapTuple, debug
def internetBLAST(inputFile, fileFormat='fasta', evalue=0.001): '''This function runs Blast online and reads the output (xml format). Bitscore/length ration for each match is calculated and the max score is reported to stdout. ''' for seqRecord in SeqIO.parse(inputFile, fileFormat): print >> sys.stderr, 'Doing BLAST (internet) search for', seqRecord.id ratios = [] resultHandle = NCBIWWW.qblast('blastp', 'nr', seqRecord.seq) blastRecords = NCBIXML.parse(resultHandle) for blastRecord in blastRecords: for alignment in blastRecord.alignments: for hsp in alignment.hsps: if hsp.expect < evalue: ratios.append(((hsp.bits / alignment.length), alignment.title)) if ratios: ratio, subject = sorted(ratios, key=lambda x: x[0])[-1] print >> sys.stdout, '%s\t%f\t%s' % (seqRecord.id, ratio, subject) else: print >> sys.stdout, '%s\t%s\t%s' % (seqRecord.id, 'NA', 'NA') print >> sys.stderr, '%s\t%s\t%s' % (seqRecord.id, 'NA', 'NA')
def blast(arguments): """ Worker function that executes the WWWNCBI blast """ # Gathering arguments blast_program = arguments[0] name, seq = arguments[1] database = arguments[2] evalue = arguments[3] hitlist = arguments[4] output_num = arguments[5] output_format = arguments[6] # output_file defined in main # Executing BLAST save_file = open("blast_out_%s_%s" % (output_file, output_num), "a") try: result_handle = NCBIWWW.qblast( blast_program, database, ">%s\n%s" % (name, seq), expect=evalue, hitlist_size=hitlist, format_type=output_format, ) # Ensuring that when the sequence input type and blast program are incompatible, the program exits cleanly and with an informative error except (ValueError): sys.exit("\nPlease check the compatibility between the input sequence type and the BLAST program") save_file.write(result_handle.read()) save_file.close()
def main(argv): try: opts,args = getopt.getopt(argv,'hf:o:e:',) except getopt.GetOptError: print "BlastZFGenome.py -f <path to input fasta file> -o <path to output directory and filename (default ./blast.xml)> -e <expect value (default 10e-20)>" sys.exit(2) output = './blast.xml' fastafile = '' expect = 10e-20 for opt, arg in opts: if opt == "-h": print "BlastZFGenome.py -f <path to input fasta file> -o <path to output directory and filename (default ./blast.xml)> -e <expect value (default 10e-20)>" sys.exit(2) elif opt == "-o": output = arg elif opt == "-f": fastafile = arg elif opt == "-e": expect = float(arg) fasta = open(fastafile).read() result_handle = NCBIWWW.qblast("blastn","GPIPE/59729/101/ref_top_level",fasta,expect=expect) save_file = open(output, "w") save_file.write(result_handle.read()) save_file.close() result_handle.close()
def search_blast(self, accession, program="blastp", database="nr"): """ See: http://biopython.org/DIST/docs/tutorial/Tutorial.html#htoc73 http://biopython.org/DIST/docs/tutorial/Tutorial.html#htoc75 """ # First, find the original sequence based on accession assert self.uniprot is not None sequence = None for record in self.uniprot: for acc in record.accessions: if acc == accession: sequence = record.sequence break if not sequence: raise ValueError('No matching sequence for the accession number') # Now, feed it to BLAST handle = NCBIWWW.qblast(program, database, sequence) try: with self.open('blast.xml', 'w') as fp: fp.write(handle.read()) finally: handle.close() self.load_blast() return self.blast_records
def get_OT(sample_seqs): s = '' query_string = '' for seq in sample_seqs: query_string += seq[0]+'\n'+seq[1]+'\n' blast_handle = NCBIWWW.qblast('tblastn', 'nr', query_string, entrez_query='scenedesmus dimorphus') blast_handle.seek(0) records = NCBIXML.parse(blast_handle) i = 0 for record in records: if len(record.alignments) > 0: for align in record.alignments: row = sample_seqs[i][0] + '\t' + align.hit_id + '\t' frames = [hsp.frame[1] for hsp in align.hsps] if valid_align(frames): row += plus_or_minus(frames[0]) + '\t' else: row += '/' + '\t' query_coverage = float(sum([len(hsp.sbjct) for hsp in align.hsps])) / len(sample_seqs[i][1]) if query_coverage < .9: row += str(1) else: row += str(2) else: row = sample_seqs[i][0] + '\t' + ' '*28 + '\t' + ' ' + '\t' + str(0) s += row + '\n' i += 1 return s
def main(): my_string_to_use = open("C:/Users/Evan/Desktop/Biosecurity_Stuff/Genomes/test_gene.txt","r") line_one = "" #This program searches 2 lines, the strings here will hold them during the for loop line_two = "" counter = 0 #this is used in the for loop blast_counter = 1 #this counts the number of blast searches performed for lines in my_string_to_use: if counter == 0: counter = counter + 1 #print "pear" #These random print statements were used for troubleshooting elif counter%2 == 1: #used if the counter is odd, it saves the line of nucleotides for later line_one = lines counter = counter+1 #print "apple" elif counter%2 == 0: # if the counter is even, it will blast search both lines and clear the temporary lines line_two = lines result_handle = NCBIWWW.qblast("blastn", "nt", line_one + line_two) #print "purple" print result_handle #This tells you in the python terminal if a blast search was performed save_file = open("C:/Users/Evan/Desktop/overload_genome.xml", "w") save_file.write(result_handle.read()) save_file.close() result_handle.close() print "Blast search number: ", blast_counter blast_counter = blast_counter + 1 counter = counter + 1 #print lines my_string_to_use.close() Open_the_XML_file()
def blast_query(self, counter, entrez_query=''): counter.set_work(5) self._format_query(); counter.count() self._save_query_config(); counter.count() try: print '\nLaunching BLAST query #%d...' % self._primers_hash blast_results = NCBIWWW.qblast('blastn', self.database, self._query.format('fasta'), expect = self.e_val, word_size = self.w_size, nucl_penalty = self.n_pen, nucl_reward = self.n_rew, filter = self.fltr, entrez_query = entrez_query, ungapped_alignment = self.no_gaps,) counter.count() #save results to a file results_file = open(self._results_filename, 'w') results_file.write(blast_results.read()) results_file.close() blast_results.close() print '\nBLAST output was written to:\n %s' % self._results_filename counter.count() #parse results results_file = open(self._results_filename, 'r') self._blast_results = list(NCBIXML.parse(results_file)) results_file.close() counter.count() except Exception, e: print '\nFailed to obtain BLAST query results from NCBI.' print e return False
def blast_bulk (fasta_file, settings): # The blast modules are imported from biopython from Bio.Blast import NCBIWWW, NCBIXML from Bio import SeqIO # parse the fasta file seq_list = [seq for seq in SeqIO.parse(fasta_file, 'fasta')] # open the fasta file #fasta_open = open(fasta_file, 'r') #fasta_handle = fasta_open.read() blast_list = [] for seq in seq_list: print seq result_handle = NCBIWWW.qblast(settings[0], settings[1], seq.format('fasta'), megablast=settings[3], hitlist_size=settings[2]) blast_list.append(NCBIXML.read(result_handle)) # Blast the sequences against the NCBI nucleotide database # return a list with the blast results #result_handle = NCBIWWW.qblast(settings[0], settings[1], fasta_handle, megablast=settings[3], hitlist_size=settings[2]) #blast_list = [item for item in NCBIXML.parse(result_handle)] return blast_list
def getOrthologs(seq,expect=10,hitlist_size=400,equery=None): """Fetch orthologous sequences using blast and return the records as a dataframe""" from Bio.Blast import NCBIXML,NCBIWWW from Bio import Entrez, SeqIO Entrez.email = "*****@*****.**" #entrez_query = "mycobacterium[orgn]" #db = '/local/blast/nr' #SeqIO.write(SeqRecord(Seq(seq)), 'tempseq.faa', "fasta") #sequtils.doLocalBlast(db, 'tempseq.faa', output='my_blast.xml', maxseqs=100, evalue=expect) try: print 'running blast..' result_handle = NCBIWWW.qblast("blastp", "nr", seq, expect=expect, hitlist_size=500,entrez_query=equery) time.sleep(2) except: print 'blast timeout' return savefile = open("my_blast.xml", "w") savefile.write(result_handle.read()) savefile.close() result_handle = open("my_blast.xml") df = sequtils.getBlastResults(result_handle) df['accession'] = df.subj.apply(lambda x: x.split('|')[3]) df['definition'] = df.subj.apply(lambda x: x.split('|')[4]) df = df.drop(['subj','positive','query_length','score'],1) print len(df) df.drop_duplicates(subset=['definition'], inplace=True) df = df[df['perc_ident']!=100] print len(df) #df = getAlignedBlastResults(df) return df
def aa_to_mrna(aaseq): """Given an amino acid sequence, return the mRNA sequence, if it exists, from the NCBI nucleotide database.""" result_handle = NCBIWWW.qblast("tblastn", "nr", aaseq, descriptions=10) result = result_handle.read() print result return result
def process(hash): print('Starting thread for:', hash) error = 100 text, error = ocr(hash) if len(text) > 0: start_time = timeit.default_timer() blast = NCBIWWW.qblast('blastn', 'nr', text) elapsed = timeit.default_timer() - start_time print('blast:', elapsed) if len(blast.getvalue()) == 0: error = 10 #blast nao retornou resultados else: error = 0 # tudo OK else: error = 34 #ocr nao encontrou texto # faz um update para incluir os resultados with app.test_request_context(): r = Result.query.filter_by(hash=hash).first() r.error = error r.result = blast.getvalue() db.session.commit() return
def UniBLAST(code,Verbose=False): """ Input ------ Uniprot Code UniBLAST(code) e.g. UniBLAST('O00238') Description ----------- Outputs a Fasta sequence and Runs blasp looking through pdb database with the Uniprot code Output ------ - UniproID.fasta FASTA Sequence - UniproID_blast.xml Blast output in XML Format """ Entrez.email = random.choice(emails) if(Verbose): print "Using email: %s"%(Entrez.email) with open(code + ".fasta", "w") as out_file: net_handle = Entrez.efetch(db="nucleotide", id=code, rettype="fasta") out_file.write(net_handle.read()) if(Verbose): print "Running blastp" result_handle = NCBIWWW.qblast("blastp", "pdb", code) if(Verbose): print "Done running blastp" with open(code + "_blast.xml", "w") as save_file: save_file.write(result_handle.read()) result_handle.close()
def get_blast_alignments(seq, query): ncbi = NCBIWWW.qblast(program="blastn" , database="nr", sequence=seq, entrez_query=query, format_type="XML", hitlist_size = 500, expect = 100.0) blast = NCBIXML.read(ncbi); remove_alignments = [] query_length = len(seq); #results = [] #for alignment in blast.alignments: #positive = alignment.hsps[0].positives * 100 / 80 #if positive >= 80: #results.append(alignment) #return results for alignment in blast.alignments: overall_length = 0.0 for hsp in alignment.hsps: overall_length += hsp.align_length if (overall_length / query_length) < 0.8: remove_alignments.append(alignment) for alignment in remove_alignments: blast.alignments.remove(alignment) return blast.alignments;
def createPSSM(): print "Start PSSM" #sequencelist = sequencelist.replace("-", ".") list = [] for seq_record in SeqIO.parse("fastatmp", "fasta", IUPAC.unambiguous_dna): list.append(str(seq_record.seq)) #Blast typical sequence result_handle = NCBIWWW.qblast("blastn", "nt", list[0]) save_file = open("my_blast.xml", "w") save_file.write(result_handle.read()) save_file.close() result_handle.close() #motifs.create(test, alphabet=Gapped(IUPAC.unambiguous_dna)) m = motifs.create(list, alphabet=Gapped(IUPAC.unambiguous_dna)) print "motif created" pwm = m.counts.normalize(pseudocounts=0.25) print "PWM done" pssm = pwm.log_odds() print "PSSM done" print pssm return pssm
def get_descriptions(sample_seqs, organism): query_string = '' for seq in sample_seqs: query_string += seq[0]+'\n'+seq[1]+'\n' blast_handle = NCBIWWW.qblast('tblastn', 'nr', query_string, entrez_query=organism) blast_handle.seek(0) records = NCBIXML.parse(blast_handle) descs = [] i = 0 for record in records: if len(record.alignments) > 0: for align in record.alignments: desc = [sample_seqs[i][0], align.hit_id] frames = [hsp.frame[1] for hsp in align.hsps] if valid_align(frames): desc.append(plus_or_minus(frames[0])) else: desc.append('/') query_coverage = float(sum([len(hsp.sbjct) for hsp in align.hsps])) / len(sample_seqs[i][1]) desc.append(query_coverage) #list of tuples of form ( (query_start, query_end), (sbjct_start, sbjct_end), (query, match, sbjct, frame) ) sorted by query_start hsp_info = sorted([((hsp.query_start, hsp.query_end), (hsp.sbjct_start, hsp.sbjct_end), (hsp.query, hsp.match, hsp.sbjct, hsp.frame[1])) for hsp in align.hsps], key= lambda t: t[0][0]) desc.append(hsp_info) else: desc = [sample_seqs[i][0], ' ', ' ', 0.0, [], []] descs.append(desc) i += 1 return descs
def blast_pdb(target_sequence, num_hits=1000): """ Query the PDB using NCBI blast and return MSMSeeds initialized with the results Parameters ---------- target_sequence : String The sequence of the target to use to query blast num_hits : int, optional The maximum number of hits returned by BLAST. Default: 1000 Returns ------- msmseeds : list of MSMSeed objects A list of MSMSeed objects initialized with a target sequence, template sequence, template structure, and BLAST e-value. Can be readily parallelized in Spark. """ from Bio.Blast import NCBIWWW, NCBIXML result_handle = NCBIWWW.qblast("blastp", "pdb", target_sequence, hitlist_size=num_hits) blast_record = NCBIXML.read(result_handle) alignments = blast_record.alignments msmseeds = [] for alignment in alignments: e_val = alignment.hsps[0].expect template_fasta, template_structure = _retrieve_chain(alignment.accession) msmseeds.append(MSMSeed(target_sequence,template_fasta, template_structure, e_val)) return msmseeds
def blast_execute(record): result_handle = NCBIWWW.qblast("blastn", "nt", record.seq) save_file = open(os.path.join(ana_dir, os.path.join(bla_dir, os.path.join(xml_dir, "BLAST-" + record.name + ".xml"))), "w") save_file.write(result_handle.read()) save_file.close() result_handle.close()
def blast_record(self): print "BLASTing record number %d ..." % int(self.genbank_record_number) result_handle = NCBIWWW.qblast("blastp", "nr", self.genbank_record_number) print "extracting result..." self.xml_result = result_handle.read() result_handle.close() return self.xml_result
def blastdemo(genbankID): # run blastp on the swissprot database NB to scale this up we must do it locally on cluster result_handle = NCBIWWW.qblast("blastp", "swissprot", genbankID) # read the results as XML blast_record = NCBIXML.read(result_handle) # Set this value to ridiculously low E_VALUE_THRESH = 0.00000000000000001 # for each alignment found, display the one with the lowest e-value, and also protein function information. for alignment in blast_record.alignments: for hsp in alignment.hsps: if hsp.expect < E_VALUE_THRESH: print ("****Alignment****") print ("sequence:", alignment.title) print ("length:", alignment.length) print ("e value:", hsp.expect) print (hsp.query[0:75] + "...") print (hsp.match[0:75] + "...") print (hsp.sbjct[0:75] + "...") print "\n" ### h is not defined yet, Will (problem from iPython nb's!) # print h.query[0:75] + '...' # print h.match[0:75] + '...' # print h.sbjct[0:75] + '...' for a in blast_record.alignments: print a.length
def blast_online(rec, result_xml_fpath): from Bio.Blast import NCBIWWW retrying = False to_the_next = False attempt_number = 1 while True: try: print rec.format('fasta') result_xml_f = NCBIWWW.qblast('blastp', 'refseq_protein', rec.format('fasta'), hitlist_size=10) with open(result_xml_fpath, 'w') as save_f: save_f.write(result_xml_f.read()) except urllib2.HTTPError as e: log.warn(' Warning: could not blast through web. HTTPError: %s. Code %s. ' '(You can press Ctrl+C to interrupt and continue later).' % (e.msg, str(e.code))) retrying = True except urllib2.URLError, e: log.warn(' Warning: could not blast through web. URLError: %s. ' '(You can press Ctrl+C to interrupt and continue later).' % (e.args)) retrying = True except (KeyboardInterrupt, SystemExit, GeneratorExit): if retrying: log.info(' If you restart from this step and do not remove the "%s" directory, ' 'the process will continue from here.' % blasted_singletones_dir) return 1
def blast_remos( r, db = 'nr' ): """Uses blast to find remos in a genome""" from Bio.Blast import NCBIWWW, NCBIXML import cStringIO b_parser = NCBIXML.BlastParser() E_VALUE_THRESH = 0.04 for s in r.get_aligned_sequences(): for remo in r.get_remos_for( s ): seq = remo.get_sequence_for( s.centre_sequence, False ) print 'Blasting: %s...' % ( seq[:60] ) result_handle = NCBIWWW.qblast( 'blastn', db, seq ) blast_results = result_handle.read() blast_out = cStringIO.StringIO(blast_results) b_record = b_parser.parse(blast_out) for alignment in b_record.alignments: for hsp in alignment.hsps: if hsp.expect < E_VALUE_THRESH: print '****Alignment****' print 'sequence:', alignment.title print 'length:', alignment.length print 'e value:', hsp.expect print 'sbjct_start:', hsp.sbjct_start print hsp.query[0:75] + '...' print hsp.match[0:75] + '...' print hsp.sbjct[0:75] + '...' break break
def blast_with_GIs(GI_seqIDs): """ Based in part on Biopython cookbook example. Try and except structure of function suggested by Subir """ counter = 1 for GI_ID in GI_seqIDs: try: result = NCBIWWW.qblast("blastn", "nt", GI_ID, format_type="Text") blast_results = result.read() with open("{}_{}.txt".format("blast_results", counter), "w") as outfile: outfile.write(blast_results) counter += 1 except: print("No sequence available for gi|{}".format(GI_ID)) time.sleep(1)
def do_blast(seq, organism, eVal): while True: signal.alarm(120) try: result = NCBIWWW.qblast("blastp", "nr", seq, entrez_query=organism, expect=eVal) break except TimeoutException: print("Server timeout, trying again") continue signal.alarm(0) return result
def blastProt(database, file_name, file_format): record = SeqIO.read(open(file_name), format=file_format) print("BLAST runnning") result_handle = NCBIWWW.qblast("blastp", database, record.format(file_format)) print("BLAST finnished") mo = re.search("\d", file_name) number = mo.group() xml_file = "blast-Prot" + number + ".xml" save_file = open(xml_file, "w") save_file.write(result_handle.read()) save_file.close() result_handle.close() return xml_file
def main(): from Bio.Blast import NCBIWWW import os from Bio import SeqIO from io import StringIO print("Start reading FASTA files...") os.chdir("/home/yikylee/Desktop/megahit") list_dir = [i for i in os.listdir() if i.find(".fa") >= 0] for fasta in list_dir: _prefix = fasta.split(".")[0] print("Now " + _prefix + "...") sequence_data = open(fasta).read() result_handle = NCBIWWW.qblast("blastn", "nt", sequence_data, format_type="Text", hitlist_size=10) print(_prefix + " analysis completed.") with open("./" + _prefix + "_result.txt", "w") as save_to: save_to.write(result_handle.read())
def buscaNcbi(query): # Buscar proteínas = blastp # Buscar nucleotídeos = blastn try: print("Buscando arquivo...") blast_result = NCBIWWW.qblast("blastp", "nr", query) blast_out = open(arq_file + ".xml", "w") blast_out.write(blast_result.read()) blast_out.close() blast_result.close() print("Fim da busca.\nArquivo " + query + ".xml encontra-se disponível no diretório '" + path + "' para análise") except ValueError: print("\nProteína inexistente ou inválida\n\n\n") time.sleep(5)
def genebank_sequence(name): esearch_query = Entrez.esearch(db="nucleotide", term=name, retmode="xml") esearch_result = Entrez.read(esearch_query) sequenc_entry = esearch_result['IdList'] print(sequenc_entry) for i in sequenc_entry: try: result_blast = NCBIWWW.qblast("blastn", "nt", i, format_type='Text') output = result_blast.read() time.sleep(1) with open("outputfile.txt", "a") as outfile: outfile.write(output) except ValueError: output = ''
def perform_blast(output, program, database, sequence, hitlist_size): """ Called from blast_controller. Performs BLAST and writes to output file output= path for the output file program=the BLAST program to be used database=the database to BLAST against sequence=the sequence to be blasted hitlist_size=maximum number of hits """ handle = NCBIWWW.qblast(program=program, database=database, sequence=sequence, hitlist_size=hitlist_size) with open(output, "a") as out_handle: out_handle.write(handle.read()) out_handle.close() handle.close()
def net_blast(query_record, program='blastn', database='nr'): """ net_blast(query_record, program, database = 'nr') *Perform a BLAST search over the net using the specified program & database *before searching, check that the search alphabet is compatible with the type of search, *raise a ValueError if not ARGUMENTS query_record: a SeqRecord object containing the query sequence program: the program to use, as per: http://www.ncbi.nlm.nih.gov/BLAST/blast_program.shtml database: the db to query, as per: http://blast.ncbi.nlm.nih.gov/Blast.cgi?CMD=Web&PAGE_TYPE=BlastDocs&DOC_TYPE=ProgSelectionGuide#db """ #check whether we have a valid query if not isinstance(query_record, SeqRecord): raise ValueError(u'Invalid Search Item') if len(query_record.seq) < 10: raise ValueError(u"Query sequence is too short") #check that the program is valid program = program.lower() if program not in searches: raise ValueError(u"Invalid Program '%s'" % program) #check that the alphabet and db are ok (required_alpha, required_dbs) = searches[program] if not isinstance(query_record.seq.alphabet, required_alpha): raise ValueError(u"Query alphabet for '%s' must be '%s'" % (program, alphabets[program])) if not (database in protein_db or database in nucleotide_db): raise ValueError(u"Invalid database '%s'" % database) if not database in required_dbs: raise ValueError(u"Database '%s' cannot be used with program '%s'" % (database, program)) #Value checking done, time to run the search results = NCBIWWW.qblast(program, database, query_record.seq, format_type='XML') #parse the results blast_records = NCBIXML.parse(results) return blast_records
def blast_file(fasta_path, blast_db='nt', parser=basic_parser): logging.info("Running BLAST {}".format(fasta_path)) results = [] #record = SeqIO.read(fasta_path, format="fasta") fasta_string = open(fasta_path, 'r').read() logging.debug(fasta_string) result_handle = NCBIWWW.qblast(BLAST_PROG, blast_db, fasta_string, megablast=True) logging.info("BLAST returned") blast_records = NCBIXML.parse(result_handle) logging.info("Analyzed BLAST") for single_record in blast_records: # each run is a single sequence search from fasta_path results.append(parser(single_record)) return results
def getBLAST(arg): BLASTResultAsXML = NCBIWWW.qblast(program=arg[1], database=arg[2], sequence=arg[3], expect=arg[4], hitlist_size=arg[5], matrix_name=arg[6], alignments=arg[7]) BLASTData = NCBIXML.parse(BLASTResultAsXML) maxEValue = 0.0001 maxResults = 1 i = 0 for BLASTResult in BLASTData: for alignment in BLASTResult.alignments: for hsp in alignment.hsps: if hsp.expect < maxEValue and maxResults < 2: # Header van het BLAST resultaat header = str(alignment.title) # Naam organisme name = header.split('[', 1)[1].split(']')[0].split('>')[0] protein = header.split('|')[4].split('[')[0] accession = alignment.title.split('|')[3] eValue = hsp.expect identity = hsp.identities queryCov = float(hsp.identities) / float(len( hsp.query)) * float(100) score = hsp.score bits = hsp.bits data = str(name) + "$" + str(protein) + "$" + str( accession) + "$" + str(eValue) + "$" + str( identity) + "$" + str(queryCov) + "$" + str( score) + "$" + str(bits) print(data) maxResults += 1 if maxResults >= 2: break i += 1 if i == 1: break
def blast_offtarget(fasta_string): """Function which count offtarget using blast. Args: fasta_string(str): Fasta sequence. Returns: Offtarget value(int). """ try: with blast_path(): with open('fasta', 'w') as fasta_file: fasta_file.write(fasta_string) cline = NcbiblastnCommandline( query="fasta", db="refseq_rna", outfmt=("'6 qseqid sseqid evalue bitscore sgi sacc staxids" "sscinames scomnames stitle'")) stdout, stderr = cline() blast_lines = [ line for line in stdout.split('\n') if 'H**o sapiens' in line ] return len(blast_lines) except ApplicationError: result_handle = NCBIWWW.qblast("blastn", "refseq_rna", fasta_string, entrez_query="txid9606 [ORGN]", expect=100, gapcosts="5 2", genetic_code=1, hitlist_size=100, word_size=len(fasta_string), megablast=True) blast_results = result_handle.read() blast_in = cStringIO.StringIO(blast_results) count = 0 for record in NCBIXML.parse(blast_in): for align in record.alignments: count += 1 return count
def BlastFastaXmlIndv(fasta_filename=None, xml_filename=None): if fasta_filename: record_iterator = SeqIO.parse(fasta_filename, "fasta") output_table = open(fasta_filename + ".summary.tsv", 'w') outputWriter = csv.writer(output_table, delimiter="\t") for seq_record in record_iterator: wait_time = 1 while True: print seq_record.id try: result_handle = NCBIWWW.qblast("blastn", "nr", seq_record.seq, entrez_query="KM204118.1") break except ValueError: print "Error encountered" print "Trying again in " + str(wait_time) + " seconds" if wait_time > 100: sys.exit() time.sleep(wait_time) wait_time *= 2 blast_record = NCBIXML.read(result_handle) filteredHspStartEnds = FilterBlastRecord(blast_record) if filteredHspStartEnds and CheckPossibleRecomb( filteredHspStartEnds): WriteARow(outputWriter, blast_record, filteredHspStartEnds) result_handle.close() elif xml_filename: output_table = open(xml_filename + ".summary.tsv", 'w') outputWriter = csv.writer(output_table, delimiter="\t") result_handle = open(xml_filename) blast_records = NCBIXML.parse(result_handle) for blast_record in blast_records: filteredHspStartEnds = FilterBlastRecord(blast_record) if filteredHspStartEnds and CheckPossibleRecomb( filteredHspStartEnds): WriteARow(outputWriter, blast_record, filteredHspStartEnds) result_handle.close() output_table.close()
def fillDomainsBLAST(self): ''' Using the NCBIWWW package, it searches for domains with BLAST. Domains are saved in the protdomains variable. :return: phageDomains, a dictionary that, for each protein in a given species, has domains associated ''' print('Finding functions/domains with BLAST') from Bio.Blast import NCBIWWW from Bio.Blast import NCBIXML import pickle from pathlib import Path my_file = Path("files/phage_list_blast") if my_file.is_file(): with open('files/phage_list_blast', 'rb') as f: list_done = pickle.load(f) else: list_done = [] for spec in self.phagesProteins: if spec not in list_done: for prot in self.phagesProteins[spec]: if 'hypothetical' in self.phagesProteins[spec][prot][ 0].lower( ) or 'uncharacterized' in self.phagesProteins[ spec][prot][0].lower( ) or 'unknown' in self.phagesProteins[spec][ prot][0].lower(): # if not self.phageDomains[bac][prot]: result_handle = NCBIWWW.qblast( 'blastp', 'nr', self.phagesProteins[spec][prot][1], entrez_query= 'Acinetobacter baumannii (taxid:470), Escherichia coli (taxid:562), Klebsiella pneumonia (taxid:573)' ) blastout = NCBIXML.read(result_handle) for ali in blastout.alignments: if 'hypothetical' not in ali.hit_def.lower( ) and 'uncharacterized' not in ali.hit_def.lower(): print(ali.hit_def[:ali.hit_def.find(' [')]) self.phagesProteins[spec][prot][ 0] = ali.hit_def[:ali.hit_def.find(' [')] break list_done.append(spec) with open('files/phage_list_blast', 'wb') as f: pickle.dump(list_done, f) self.saveDomains()
def execute(self, seqRecord, outFormat): from Bio.Blast import NCBIWWW from Bio.Blast import NCBIXML ret = [] rekord = seqRecord.format("fasta") for db in self.params["db"]: ret.append( NCBIWWW.qblast(self.params['blast'], db, rekord, expect=float(self.params['cutoff']), filter=self.params['filter'], hitlist_size=int(self.params['nhits']), matrix_name=self.params['matrix'], alignments=int(self.params['nalign']), descriptions=int(self.params['ndesc']), megablast=self.params['megablast']).read()) return ret
def call_blast(self, input_file, organism): blast_db = "nt" if organism == "mm": blast_db = self.mus_musculus_blast_db elif organism == "hs": blast_db = self.homo_sapiens_blast_db elif organism == "rn": blast_db = self.rattus_norvegicus_blast_db return_handle = NCBIWWW.qblast("blastn", blast_db, input_file, hitlist_size=10, expect=1000, word_size=7, gapcosts="5 2") return return_handle
def make_blast(self): """Faz um blast das proteínas que se encontram no ficheiro em formato fasta contra o genoma humano e imprime o tempo de duração""" records = SeqIO.parse(self.__file_prot, "fasta") save_file = open(self.__out, "w") for record in records: beginning = time.time() result_handle = NCBIWWW.qblast( "blastp", self.__db, record.format("fasta"), entrez_query='H**o sapiens [organism]') save_file.write(result_handle.read() + "\n") end = time.time() print( "A proteína %s já foi submetida ao blast e demorou %s segundos. " % (record.id, end - beginning)) save_file.close() records.close()
def main(): # get the fasta file from stdin and return the sequence (cst3.fa) sequence = hf.Get_sequences()[0] # if the file is not empty proceed if sequence: # Do Blast search of a given protein sequence against the nr database at NCBI # https://blast.ncbi.nlm.nih.gov/Blast.cgi?PROGRAM=blastp&PAGE_TYPE=BlastSearch&LINK_LOC=blasthome # see NCBIWWW documentation at http://biopython.org/DIST/docs/tutorial/Tutorial.html (chapter 7.1) # invoke the NCBI BLAST server over the internet # The first argument is the blast program to use for the search ('blastp' in our case) # The second argument specifies the databases to search against ('nr' in our case) # The third argument is a string containing your query sequence result_handle = NCBIWWW.qblast("blastp", "nr", sequence.seq) # print out the results (stdout) print(result_handle.read())
def assign(seqs, outfile, title): """ """ assigned = [] sequences = list(SeqIO.parse(seqs, "fasta")) result_handle = NCBIWWW.qblast( "blastp", "nr", "\n".join([ ">{}\n{}".format(sequence.id, str(sequence.seq.ungap("-"))) for sequence in sequences ])) with open("{}_result.txt".format(title), 'w') as results: print(result_handle.getvalue(), file=results) simple_blast_assignment(seqs, "{}_result.txt".format(title), outfile, title=title)
def get_blast_results(fasta_filename, blast_type="blastn", db="nt"): """Get the results from NCIB BLAST for the given FASTA file. Args: fasta_filename (str): The path to the FASTA file to run against BLAST blast_type (str): The type of BLAST to run ("blastn", "blastp", etc.) db (str): The blast database to run this query against ("nt", "pt", etc.) Return: list of Bio.Blast.Record.Blast records """ fasta_sequence = None with open(fasta_filename, 'r') as fasta_file: fasta_sequence = fasta_file.read() results = NCBIWWW.qblast(blast_type, db, fasta_sequence) blast_records = NCBIXML.parse(results) return list(blast_records)
def get_blast_record(seq, alignments, descriptions, hitlist_size): """Calls NCBI's QBLAST server or a cloud service provider to get alignment results Args: alignments: max number of aligments from BLAST descriptions: max number of descriptions to show hitlist_size: max number of hits to return seq: protein sequence as string Returns: single Blast record """ result_handle = NCBIWWW.qblast(program="blastp", database="nr", alignments=alignments, descriptions=descriptions, hitlist_size=hitlist_size, sequence=seq) blast_record = NCBIXML.read(result_handle) return blast_record
def online_blast(seq_list): # convert the sequences to a sequence file (stored in the # working memory) temp = StringIO.StringIO() SeqIO.write(seq_list, temp, 'fasta') temp.seek(0, 0) # BLAST the sequences online against a NCBI database logging.debug('BLASTING sequences agaist NCBI') result_handle = NCBIWWW.qblast(args.ba, args.bd, temp.read(), megablast=args.mb, hitlist_size=args.hs) # return the results handle with the blast results return result_handle
def execute_blast(self, records, output, newlist): os.chdir(output) self.records = SeqIO.index(self.fastapath, "fasta") for i in np.arange(len(self.newlist)): print("Blasting gene " + self.newlist[i] + " against the " + self.db + " database.") result_handle = NCBIWWW.qblast(self.search, self.db, self.records[self.newlist[i]].seq, format_type='Text', hitlist_size=15, expect=0.0001, entrez_query='metazoa[Organism]') with open('{0}_result_handle.txt'.format(self.newlist[i]), 'w') as f: f.write('Gene: ' + self.newlist[i] + '\n\n\n') f.write('Seq:\n' + self.records[newlist[i]].format('fasta')) f.write(result_handle.read())
def protein_blast(protein, criteria, threshold, filename='blast.fasta', db='swissprot'): """ perform blast search + filter by percentage coverage """ handle = NCBIWWW.qblast('blastp', db, protein, entrez_query=criteria) result = NCBIXML.read(handle) out = open(filename, 'w') for alignment in result.alignments: sequence = alignment.hsps[0] if ((float)(sequence.positives) / sequence.align_length * 100.0) >= threshold: out.write('>' + alignment.hit_id + '\n' + sequence.sbjct + '\n\n') out.close
def blastp(self, acc): try: gis = [] print 'here' result_handle = NCBIWWW.qblast("blastp", "nr", acc, format_type="XML", expect=self.blast_threshold) print 'here' for blast_record in NCBIXML.parse(result_handle): for alignment in blast_record.alignments: gis.append(alignment.title.split("|")[1]) unique = [int(i.strip()) for i in gis if int(i) not in self.gis] self.gis.extend(unique) except: self.status.setdefault(acc, False) return
def search_blast(protien, numHits=50): result_handle = NCBIWWW.qblast("blastp", "nr", protien, hitlist_size=int(numHits), format_type='HTML') save_file = open("my_blast.xml", "w") data = result_handle.read() #text = data.split('<Iteration>')[1].split('</Iteration_hits>')[0] #text = ' '.join([i for i in ' '.join([i for i in text.split('\n')]).split(' ') if i != '']) save_file.write(data) #result_handle.read()) save_file.close() result_handle.close() text = [i.split('</Hit_def>\n')[0] for i in data.split('</Hit_id>\n')][1:] names = [i.split(' <Hit_def>')[1] for i in text] return names
def cli(input, output): """Simple program that BLAST searches all FASTA files in a directory and writes the top 5 hits for each query to a text file.""" # open each file and print the filename to the terminal for filename in glob.glob(os.path.join(input, '*.fasta')): print(filename) with open(filename, 'rU') as fasta_handle: result_handle = NCBIWWW.qblast('blastn', 'nt', fasta_handle.read(), hitlist_size=5) # do the actual blast search blast_results = SearchIO.parse(result_handle, 'blast-xml') # parse the results without storing them for result in blast_results: i = 1 for hsp in result.hsps: output.write('Result #' + str(i) + '\n') output.write(str(hsp) + '\n\n') i += 1 fasta_handle.close() print('Done')
def processFasta(fastaFile, resultDirectory): print("Writing to directory ==> " + resultDirectory) record = SeqIO.read(fastaFile, format="fasta") result_handle = NCBIWWW.qblast("blastx", "nr", record.format("fasta"), expect=1e-10, hitlist_size=5) baseFile = os.path.basename(fastaFile) fileName = baseFile.rsplit('.', 1)[0] + '.xml' #fileName = fastaFile location = resultDirectory + fileName save_file = open(location, "w") save_file.write(result_handle.read()) save_file.close() result_handle = open(location) genomeName = os.path.basename(fastaFile).rsplit(".", 1)[0] handelBlastResult(result_handle, genomeName)
def find_homologues(protACC, max_number=10, filename="blast.xml"): """ Find_homologues takes a protein accession number as required argument, and an optional max_number of results argument, default set to 10, and does a protein BLAST. The function returns the accession numbers of the BLAST proteins. """ result_handle = NCBIWWW.qblast("blastp", "nr", protACC, hitlist_size=max_number) with open(filename, "w") as out_handle: out_handle.write(result_handle.read()) with open(filename) as file: blast_record = NCBIXML.read(file) protACC = [] for rec in blast_record.alignments: protACC.append(rec.accession) return protACC
def blast(sequence, vorm='blastp'): try: time.sleep(5) result_handle = NCBIWWW.qblast(vorm, 'nr', sequence, expect=(1 * (10**-5)), matrix_name='BLOSUM62', word_size=3, format_type='XML', hitlist_size=5) if vorm == 'blastp': return result_handle else: blast_records = NCBIXML.parse(result_handle) for blast_record in blast_records: return blast_record except: return blast(vorm, sequence)
def blastx_blasten(sequentie): """ Deze sequentie opent een leeg XML file, daarna gaat het de blast uitvoeren met blastx. De gegevens van deze blast worden opgeslageni in het bestand en dit bestand wordt gesloten. Daarna wordt dit bestand weer geopend en wordt voor elk resultaat het organisme, eiwit, sequentie, lengte, e_value en stukje van de vergelijking opgeslagen in een lege lijst. Deze wordt samen met titel wat een lege string is gereturnd. :param sequentie: de ingevoerde sequentie :return: blastresultaat, een lijst met gegevens van de blatresultaten :return titel, een lege string """ titel = '' blast_resultaat = [] bestand = open("Resultaat.xml", "w") result_handle = NCBIWWW.qblast("blastx", "nr", sequentie, alignments=1, hitlist_size=10) bestand.write(result_handle.getvalue()) bestand.close() result_handle = open("Resultaat.xml", "r") blast_records = NCBIXML.parse(result_handle) blast_record = next(blast_records) for alignment in blast_record.alignments: for hsp in alignment.hsps: blast_resultaat.append("****Alignment****") titel = alignment.title titels = titel.split("[") titelss = titels[1].split("]") titel_ = titel.split("|") titel__ = titel_[2].split("[") blast_resultaat.append("Blast organism: {}".format(titelss[0])) blast_resultaat.append("Protein: {}".format(titel__[0])) blast_resultaat.append("Sequence: {}".format(alignment.title)) blast_resultaat.append("Length: {}".format(alignment.length)) blast_resultaat.append("E-value: {}".format(hsp.expect)) blast_resultaat.append(hsp.query[0:75] + "...") blast_resultaat.append(hsp.match[0:75] + "...") blast_resultaat.append(hsp.sbjct[0:75] + "...") blast_resultaat.append("\n") return blast_resultaat, titel