def build_commandline(self, query): """Build the command line based on the arguments that were provided. If local database is provided, will create a command line based on the Ncbi____Commandline function, based on which program was specified.""" # We build a dictionary of command lines, which we will use to select # the command to run. command_dict = { 'blastn': NcbiblastnCommandline(query=self.blastin.name, out=self.blastout.name, db=self.db, evalue=self.evalue, outfmt=5, max_target_seqs=self.maxhits), 'tblastx': NcbitblastxCommandline(query=self.blastin.name, out=self.blastout.name, db=self.db, evalue=self.evalue, outfmt=5, max_target_seqs=self.maxhits) } if not self.web: # Write the contents of the query sequence into the temp FASTA # file. Unfortunately, command line BLAST only accepts input # files and not sequences SeqIO.write(query, self.blastin, 'fasta') self.commandline = command_dict[self.prog] self.blastin.close() return
def gfg(): if request.method == "POST": # getting input with name = seq in HTML form ip_sequence = request.form.get("seq") ip_type = request.form.get("ip_type") blast_type = request.form.get("blast_type") database_type = request.form.get("database") my_blast_db = request.form.get("db_typeo") e_value_thresh = request.form.get("evalue") e_value_thresh = float(e_value_thresh) #default e-value if e_value_thresh=="": e_value_thresh=0.05 if ip_type =="fastq": seq_id = ip_sequence.split("\n")[0] #sequence id only seq_fasta = "".join(ip_sequence.split("\n")[1]) #gives only sequence fasta_seq= seq_id + "\n" + seq_fasta elif ip_type =="fasta": seq_id = ip_sequence.split("\n")[0] seq_fasta = ip_sequence.split("\n")[1] fasta_seq= "\n".join(ip_sequence.split("\n")[1:]) if my_blast_db=="": print("1") #blast over internet result_handle=NCBIWWW.qblast(blast_type, database_type, fasta_seq) with open("outputhtml.xml", "w") as save_to: save_to.write(result_handle.read()) result_handle.close() else: #local blast #if loop for each blast type: if blast_type=="blastn": result_handle=NcbiblastnCommandline(cmd=blast_type, query=fasta_seq, db=my_blast_db, evalue=e_value_thresh, out="outputhtml.xml") elif blast_type=="blastp": result_handle=NcbiblastpCommandline(cmd=blast_type, query=fasta_seq, db=my_blast_db, evalue=e_value_thresh, out="outputhtml.xml") elif blast_type=="blastx": result_handle=NcbiblastxCommandline(cmd=blast_type, query=fasta_seq, db=my_blast_db, evalue=e_value_thresh, out="outputhtml.xml") elif blast_type=="tblastx": result_handle=NcbitblastxCommandline(cmd=blast_type, query=fasta_seq, db=my_blast_db, evalue=e_value_thresh, out="outputhtml.xml") elif blast_type=="tblastn": result_handle=NcbitblastnCommandline(cmd=blast_type, query=fasta_seq, db=my_blast_db, evalue=e_value_thresh, out="outputhtml.xml") #blast parsing blast_records = NCBIXML.parse(result_handle) with open("outputhtml.xml") as f: blast_records = NCBIXML.parse(f) blast_record = list(blast_records)[0] return render_template("output.html", blast_record=blast_record, e_value_threshold=e_value_thresh) return render_template("input.html")
def run_tblastx(self, evalue=0.1): from Bio.Blast.Applications import NcbitblastxCommandline import os blast_id = self.id_generator(8) outpath = os.path.join(self.working_dir, '%s.tab' % blast_id) blastn_cline = NcbitblastxCommandline(query=self.query, db=self.database, evalue=evalue, outfmt=6, out=outpath, num_threads=8, max_hsps=1000) print(blastn_cline) stdout, stderr = blastn_cline() print(stderr) with open(outpath, 'r') as result_handle: self.best_hit_list = [] self.complete_hit_list = [] for line in result_handle: self.complete_hit_list.append(line.rstrip().split('\t')) if line.split('\t')[0] in self.best_hit_list: continue else: self.best_hit_list.append(line.rstrip().split('\t')) return outpath
def make_blast_cmds(filename_list, blast_type, output, blastdb, date): """given a file, make a blast cmd, and return path to output csv """ blast_cmds = [] blast_outputs = [] for f in filename_list: output_path_tab = str( os.path.join(output, date) + "_dcmegablast_results_" + os.path.basename(f) + ".tab") if blast_type == 'blastn': blast_cline = NcbiblastnCommandline(query=f, db=blastdb, evalue=10, outfmt=6, out=output_path_tab) add_params = str(" -num_threads 1 -max_target_seqs " + "2000 -task dc-megablast") elif blast_type == 'tblastx': blast_cline = NcbitblastxCommandline(query=f, db=blastdb, evalue=10, outfmt=6, out=output_path_tab) add_params = str(" -num_threads 1 -max_target_seqs 2000 " + "-query_gencode 11 -db_gencode 11") else: raise ValueError("must use either blastn or tblastx") blast_command = str(str(blast_cline) + add_params) blast_cmds.append(blast_command) blast_outputs.append(output_path_tab) return (blast_cmds, blast_outputs)
def tblastx(query, evalue, db, out): tblastx_search = NcbitblastxCommandline(query=query, evalue=evalue, db=db, num_threads=2, out=out, outfmt=5) tblastx_search() # initializes the search print 'Searching for', gene_name, 'in', db_name, 'using tblastx'
def RunCommand(self): """Run the BLAST search.""" self.fh_in, self.infile = tempfile.mkstemp() self.fh_out, self.outfile = tempfile.mkstemp() with open(self.infile, "w+") as f: f.write(">Name\n") f.write(self.command_data[0]) blast_program = self.command_data[1] database = self.command_data[2] # Check if user supplied additional options and extract them if self.command_data[3]: option = self.command_data[3] options = {} for x in range(0, len(option.split()) - 1, 2): options[option.split()[x]] = option.split()[x + 1] else: options = {} args, kwargs = ( blast_program, { "query": self.infile, "db": database, "out": self.outfile }, ) if blast_program.endswith("blastn"): blast_cmd = NcbiblastnCommandline(args, **kwargs) elif blast_program.endswith("blastp"): blast_cmd = NcbiblastpCommandline(args, **kwargs) elif blast_program.endswith("blastx"): blast_cmd = NcbiblastxCommandline(args, **kwargs) elif blast_program.endswith("tblastn"): blast_cmd = NcbitblastnCommandline(args, **kwargs) elif blast_program.endswith("tblastx"): blast_cmd = NcbitblastxCommandline(args, **kwargs) else: return if options: try: for key in options: blast_cmd.set_parameter(key, options[key]) except ValueError as e: messagebox.showerror("xbb tools", "Commandline error:\n\n" + str(e)) self.tid.destroy() return self.worker = BlastWorker(blast_cmd) self.worker.start() self.UpdateResults()
def local_tblastx_2file(query_file, dbfile_path, outfile, prefs): """Perform blastx against local database.""" cline = NcbitblastxCommandline(query=query_file, db=dbfile_path, out=outfile, evalue=prefs['evalue'], outfmt=prefs['outfmt_pref']) child = subprocess.Popen(str(cline), stdout=subprocess.PIPE, shell=True) output, error = child.communicate() # forces the main script to wait
def RunCommand(self): self.fh_in, self.infile = tempfile.mkstemp() self.fh_out, self.outfile = tempfile.mkstemp() with open(self.infile, 'w+') as f: f.write('>Name\n') f.write(self.command_data[0]) blast_program = self.command_data[1] database = self.command_data[2] # Check if user supplied additional options and extract them if self.command_data[3]: option = self.command_data[3] options = {} for x in range(0, len(option.split()) - 1, 2): options[option.split()[x]] = option.split()[x + 1] else: options = {} args, kwargs = blast_program, { 'query': self.infile, 'db': database, 'out': self.outfile } if blast_program.endswith('blastn'): blast_cmd = NcbiblastnCommandline(args, **kwargs) elif blast_program.endswith('blastp'): blast_cmd = NcbiblastpCommandline(args, **kwargs) elif blast_program.endswith('blastx'): blast_cmd = NcbiblastxCommandline(args, **kwargs) elif blast_program.endswith('tblastn'): blast_cmd = NcbitblastnCommandline(args, **kwargs) elif blast_program.endswith('tblastx'): blast_cmd = NcbitblastxCommandline(args, **kwargs) else: return if options: try: for key in options: blast_cmd.set_parameter(key, options[key]) except ValueError as e: messagebox.showerror('xbb tools', 'Commandline error:\n\n' + str(e)) self.tid.destroy() return self.worker = BlastWorker(blast_cmd) self.worker.start() self.UpdateResults()
def run_blast(self, database): """Define a function to run the BLAST command.""" # Create a temp file blastout = self.gen_output() # Start building a command line cline = NcbitblastxCommandline(query=self.query, out=blastout.name, db=database, evalue=self.evalue, outfmt=5, max_target_seqs=5) self.mainlog.debug(str(cline)) # And then execute it cline() return blastout
def run_blast(db, focal): """For each database, run TBLASTX against it and pull the best hit.""" # Create a temp file temp_output = tempfile.NamedTemporaryFile(mode='w+t', prefix='BLASTSearch_', suffix='_BLASTout.xml') # Start building a command line cline = NcbitblastxCommandline(query=focal, out=temp_output.name, db=db, evalue=0.05, outfmt=5, max_target_seqs=1) # And then execute it cline() return temp_output
def run_tblastx(args, query_fasta, subject_fasta, out_tsv): """Run BLASTX with query FASTA file and subject FASTA file.""" x = re.split("\/", query_fasta) x = x[-1] y = re.split("\/", subject_fasta) y = y[-1] print( '%s\ttBlastX executed with %s evalue cutoff. Query: %s. Subject: %s.' % (current_time(), args.evalue, x, y)), sys.stdout.flush() tblastx_cline = NcbitblastxCommandline( query=query_fasta, subject=subject_fasta, evalue=args.evalue, outfmt="\'6 qseqid sseqid qstart qend " "sstart send evalue qframe sframe\'", out=out_tsv) tblastx_cline()
def make_nuc_nuc_recip_blast_cmds(query_list, date, threads, recip_threads, output, args, subject_file=None, logger=None): """given a file, make a blast cmd, and return path to output csv Only works is query_list is nucleotide and subject_file is nuc """ assert logger is not None, "must use logging" logger.info("Creating nucl BLAST database") db_dir = os.path.join(output, os.path.splitext(os.path.basename(subject_file))[0]) os.makedirs(db_dir, exist_ok=True) protdb = os.path.join(db_dir, os.path.splitext(os.path.basename(subject_file))[0]) nucdb = os.path.join(db_dir, "genomes") setup_blast_db(input_file=args.db_aa, input_type="fasta", dbtype="nucl", out=protdb, logger=logger, title="gene") setup_blast_db(input_file=os.path.join(os.path.dirname(query_list[0]), "", "*"), input_type="fasta", dbtype="nucl", out=nucdb, logger=logger, title="genome") blast_cmds = [] blast_outputs = [] recip_blast_outputs = [] for f in query_list: # run forward, genome aganst gene, blast output_path_tab = str( os.path.join(output, date) + "_simpleOrtho_results_" + os.path.basename(f) + "_vs_nucdb.tab") blast_cline = NcbitblastxCommandline(query=f, max_target_seqs=args.nkeep, db=protdb, evalue=.001, outfmt=6, out=output_path_tab) add_params = str(" -num_threads %i" % threads) blast_command = str(str(blast_cline) + add_params) blast_cmds.append(blast_command) blast_outputs.append(output_path_tab) # run reverse, gene against genomes, blast recip_output_path_tab = str( os.path.join(output, date) + "_simpleOrtho_results_" + "nuc_vs_genomes.tab") recip_blast_cline = NcbitblastxCommandline(query=subject_file, max_target_seqs=args.nkeep, db=nucdb, evalue=.001, outfmt=6, out=recip_output_path_tab) recip_blast_command = str( str(recip_blast_cline) + str(" -num_threads %i" % recip_threads)) # blast_cmds.append(recip_blast_command) recip_blast_outputs.append(recip_output_path_tab) return (blast_cmds, recip_blast_command, blast_outputs, recip_blast_outputs)
def blast(request): path = os.path.dirname(os.path.abspath(__file__)) #If sequence exist in request.POST, the user has clicked the submit button if 'sequence' in request.POST or 'file' in request.FILES: #If the sequence is not empty, we can continue sequence = request.POST["sequence"].replace(" ","").replace("\r","").replace("\n","").replace("\t","") isThereAFile = ('file' in request.FILES) if sequence != "" or isThereAFile: sequence = request.POST["sequence"].upper() formatP = request.POST["formatP"] blast = request.POST["blast"] evalueI = request.POST["evalue"] db = request.POST["db"] hits = request.POST["hits"] ip = get_client_ip(request) #We check with sets if the query is formed by correct chars sequence = sequence.replace(" ","").replace("\r","").replace("\n","").replace("\t","") querySet = set(literal_eval(str(list(sequence)))) tblastnSet = set(['A','C','G','T','U','W','S','M','K','R','Y','B','D','H','V','n','-','.']) blastSet = set(['A','C','G','T','U','R','Y','S','W','K','M','B','D','H','V','N','-','.']) if (blast == "tblastn"): if (not querySet.issubset(tblastnSet)): return render(request, 'chromevaloaAPP/blastOutput.html',{'blast_record': "ERROR WRONG"}) else: if (not querySet.issubset(blastSet)): return render(request, 'chromevaloaAPP/blastOutput.html',{'blast_record': "ERROR WRONG"}) #We check the hits and evalue fields try: float(evalueI) except ValueError: return render(request, 'chromevaloaAPP/blastOutput.html',{'blast_record': "ERROR EVALUE"}) if not hits.isdigit(): return render(request, 'chromevaloaAPP/blastOutput.html',{'blast_record': "ERROR HITS"}) #We try to make the folder for the files try: os.makedirs(path + "/blastOutput/"+ip) except: #The folder already exists pass #If there is a file, I read the file try: f = request.FILES['file']; fName = f.name if formatP == "genbank": content = f.read() prueba = content.split("ORIGIN")[1].split("\n") acum = "" for line in prueba: acum += str(line.replace("0","").replace("1","").replace("2","").replace("3","").replace("4","").replace("5",""). replace("6","").replace("7","").replace("8","").replace("9","").replace(" ","")) acum = acum[0:-2] with open(path + "/blastOutput/"+ip+'/' +fName, 'wb+') as destination: destination.write(acum) elif formatP =="embl": content = f.read() prueba = content.split("SQ")[1].split("\n") acum = "" for line in prueba[1:len(prueba)]: acum += str(line.replace("0","").replace("1","").replace("2","").replace("3","").replace("4","").replace("5",""). replace("6","").replace("7","").replace("8","").replace("9","").replace(" ","")) acum = acum[0:-2] with open(path + "/blastOutput/"+ip+'/' +fName, 'wb+') as destination: destination.write(acum) else: with open(path + "/blastOutput/"+ip+'/' +fName, 'wb+') as destination: for chunk in f.chunks(): destination.write(chunk) #If not, the sequenece of the text gap except: fName = "query.txt" print "exceptioneee" with open(path + "/blastOutput/"+ip+'/' + fName, 'wb+') as destination: destination.write(sequence) #Inicialice the params of blast query_param = path + "/blastOutput/"+ip+"/"+fName db_param = path + "/db/"+db evalue_param = evalueI outfmt_param = 5 #xml out_param = path + "/blastOutput/"+ip+"/blastOutput.xml" if (blast == "tblastx"): blastn_cline = NcbitblastxCommandline(query=query_param, db=db_param,\ evalue=evalue_param, max_target_seqs= hits, outfmt=outfmt_param,out=out_param) elif(blast == "tblastn"): blastn_cline = NcbitblastnCommandline(query=query_param, db=db_param,\ evalue=evalue_param, max_target_seqs= hits, outfmt=outfmt_param,out=out_param) elif(blast == "blastn"): blastn_cline = NcbiblastnCommandline(query=query_param, db=db_param,\ evalue=evalue_param, max_target_seqs= hits, outfmt=outfmt_param,out=out_param) os.system(str(blastn_cline)) result_handle = open(path + "/blastOutput/"+ip+"/blastOutput.xml") try: blast_record = NCBIXML.read(result_handle) except: return render(request, 'chromevaloaAPP/blastOutput.html',{'blast_record': "ERROR BLAST"}) #If all was ok return render(request, 'chromevaloaAPP/blastOutput.html',\ {'blast_record': blast_record, 'db':db}) #If the query was empty return render(request, 'chromevaloaAPP/blastOutput.html',{'blast_record': "ERROR EMPTY"}) #If the user didn't press the submit button (Initial page) return render(request, 'chromevaloaAPP/blast.html')
blast_cline = NcbiblastnCommandline(query=fasta_output.name, db=blastdb, evalue=10, outfmt=7, out=output_path_tab) add_params = " -num_threads 4 -max_target_seqs 2000 -task dc-megablast" blast_command = str(str(blast_cline)+add_params) print("Running blastn search...") # subprocess.Popen(blast_command, stdout=subprocess.PIPE, shell=True).stdout.read() subprocess.call(blast_command, shell=True) return(output_path_tab) def run_tblastx(): # build commandline call output_path_tab = str(os.path.join(output, date)+"_tblastx_results.tab") blast_cline = NcbitblastxCommandline(query=fasta_output.name, db=blastdb, evalue=10, outfmt=7, out=output_path_tab) add_params = " -num_threads 4 -max_target_seqs 2000 -query_gencode 11 -db_gencode 11" blast_command = str(str(blast_cline)+add_params) print("Running tblastx search...") # subprocess.Popen(blast_command, stdout=subprocess.PIPE, shell=True).stdout.read() subprocess.call(blast_command, shell=True) return(output_path_tab) #%% Execute if blasttype == "blastn": output_path_tab = run_blastn() elif blasttype == "tblastx": output_path_tab = run_tblastx() else: print("you need to use either blastn or tblastx, sorry!")
def run_BLAST(query, database, args, cons_run): """ Given a mfa of query sequences of interest & a database, search for them. Important to note: * Turns dust filter off, * Only a single target sequence (top hit), * Output in XML format as blast.xml. # TODO: Add evalue filtering ? # TODO: add task='blastn' to use blastn scoring ? .. warning:: default is megablast .. warning:: tblastx funcationality has not been checked :param query: the fullpath to the vf.mfa :param database: the full path of the databse to search for the vf in :param args: the arguments parsed to argparse :param cons_run: part of a mapping consensus run :type query: string :type database: string :type args: argparse args (dictionary) :type cons_run: boolean :returns: the path of the blast.xml file """ tmp1 = os.path.splitext(query.split('/')[-1])[0] tmp2 = os.path.splitext(database.split('/')[-1])[0] if not cons_run: outfile = os.path.join("BLAST_results/", "DB="+tmp1+"ID="+tmp2+"_blast.xml") else: outfile = os.path.join("BLAST_results/", "cons_DB="+tmp1+"ID="+tmp2+"_blast.xml") protein = False # File type not specified, determine using util.is_protein() if args.reftype is None: if SeqFindr.util.is_protein(query) != -1: protein = True sys.stderr.write('%s is protein' % (query)) elif args.reftype == 'prot': protein = True sys.stderr.write('%s is protein\n' % (query)) run_command = '' if protein: sys.stderr.write('Using tblastn\n') run_command = NcbitblastnCommandline(query=query, seg='no', db=database, outfmt=5, num_threads=args.BLAST_THREADS, max_target_seqs=1, evalue=args.evalue, out=outfile) else: if args.tblastx: sys.stderr.write('Using tblastx\n') run_command = NcbitblastxCommandline(query=query, seg='no', db=database, outfmt=5, num_threads=args.BLAST_THREADS, max_target_seqs=1, evalue=args.evalue, out=outfile) else: sys.stderr.write('Using blastn\n') if args.short == False: run_command = NcbiblastnCommandline(query=query, dust='no', db=database, outfmt=5, num_threads=args.BLAST_THREADS, max_target_seqs=1, evalue=args.evalue, out=outfile) else: sys.stderr.write('Optimising for short query sequences\n') run_command = NcbiblastnCommandline(query=query, dust='no', db=database, outfmt=5, word_size=7, num_threads=args.BLAST_THREADS, evalue=1000, max_target_seqs=1, out=outfile) sys.stderr.write(str(run_command)+"\n") run_command() return os.path.join(os.getcwd(), outfile)
def make_blast_cmds(query_file, evalue, output, subject_file, threads=1, algo=None, protein_subject=False, makedb=False, reciprocal=False, logger=None): """given a file, make a blast cmd, and return path to output csv This should handle both protein and nucleotide references. """ assert logger is not None, "must use logging" db_dir = os.path.join(output, os.path.splitext(os.path.basename(subject_file))[0]) subjectdb = os.path.join( db_dir, os.path.splitext(os.path.basename(subject_file))[0]) # first_record = SeqIO.parse(subject_file, "fasta").__next__() DBNAME = "protdb" if protein_subject else "nucdb" # logger.debug("Protein Subject: %s", PROTEIN_SUBJECT) # logger.debug("Protein Query: %s", PROTEIN_QUERY) if makedb: logger.debug("Creating BLAST database") os.makedirs(db_dir, exist_ok=False) if protein_subject: setup_blast_db(input_file=subject_file, input_type="fasta", dbtype="prot", out=subjectdb, logger=logger) else: setup_blast_db(input_file=subject_file, input_type="fasta", dbtype="nucl", out=subjectdb, logger=logger) blast_cmds = [] blast_outputs = [] recip_blast_outputs = [] for f in [query_file]: qname = os.path.splitext(os.path.basename(f))[0] # run forward, nuc aganst prot, blast output_path_tab = os.path.join(output, qname + "_vs_" + DBNAME + ".tab") # if PROTEIN_QUERY and PROTEIN_SUBJECT: # blast_cline = NcbiblastpCommandline( # query=f, # db=subjectdb, evalue=evalue, out=output_path_tab) # elif not PROTEIN_QUERY and not PROTEIN_SUBJECT and algo == "tblastx": # blast_cline = NcbitblastxCommandline( # query=f, # db=subjectdb, evalue=evalue, out=output_path_tab) # elif not PROTEIN_QUERY and not PROTEIN_SUBJECT and algo == "blastn": # blast_cline = NcbiblastnCommandline( # query=f, # db=subjectdb, evalue=evalue, out=output_path_tab) # elif not PROTEIN_QUERY and PROTEIN_SUBJECT: # blast_cline = NcbiblastxCommandline( # query=f, # db=subjectdb, evalue=evalue, out=output_path_tab) # elif PROTEIN_QUERY and not PROTEIN_SUBJECT: # blast_cline = NcbiblastxCommandline( # query=f, # db=subjectdb, evalue=evalue, out=output_path_tab) if algo == "blastn": blast_cline = NcbiblastnCommandline(query=f, db=subjectdb, evalue=evalue, out=output_path_tab) elif algo == "tblastn": blast_cline = NcbitblastnCommandline(query=f, db=subjectdb, evalue=evalue, out=output_path_tab) elif algo == "blastp": blast_cline = NcbiblastpCommandline(query=f, db=subjectdb, evalue=evalue, out=output_path_tab) elif algo == "blastx": blast_cline = NcbiblastxCommandline(query=f, db=subjectdb, evalue=evalue, out=output_path_tab) else: assert algo == "tblastx", "error parsing algrithm!" blast_cline = NcbitblastxCommandline(query=f, db=subjectdb, evalue=evalue, out=output_path_tab) add_params = str( " -num_threads {} -num_alignments 20 " + "-outfmt '6 qaccver saccver pident length mismatch " + "gapopen qstart qend sstart send evalue bitscore slen'").format( threads) blast_command = str(str(blast_cline) + add_params) blast_cmds.append(blast_command) blast_outputs.append(output_path_tab) # run reverse, prot against nuc, blast recip_output_path_tab = os.path.join(output, DBNAME + "_vs_" + qname + ".tab") if protein_subject: recip_blast_cline = NcbiblastpCommandline( query=subject_file, subject=f, evalue=evalue, out=recip_output_path_tab) else: recip_blast_cline = NcbiblastxCommandline( query=subject_file, subject=f, evalue=evalue, out=recip_output_path_tab) recip_blast_command = str(str(recip_blast_cline) + add_params) if reciprocal: blast_cmds.append(recip_blast_command) recip_blast_outputs.append(recip_output_path_tab) return (blast_cmds, blast_outputs, recip_blast_outputs)