def qiime_blast_seqs(seqs, blast_constructor=Blastall, blast_program='blastn', blast_db=None, refseqs=None, refseqs_fp=None, blast_mat_root=None, params={}, WorkingDir=None, seqs_per_blast_run=1000, HALT_EXEC=False): """Blast list of sequences. seqs: a list (or object with list-like interace) of (seq_id, seq) tuples (e.g., the output of MinimalFastaParser) """ assert blast_db or refseqs_fp or refseqs, \ 'Must provide either a blast_db or a fasta '+\ 'filepath containing sequences to build one.' if refseqs_fp: blast_db, db_files_to_remove =\ build_blast_db_from_fasta_path(refseqs_fp,output_dir=WorkingDir) elif refseqs: blast_db, db_files_to_remove =\ build_blast_db_from_fasta_file(refseqs,output_dir=WorkingDir) else: db_files_to_remove = [] params["-d"] = blast_db params["-p"] = blast_program blast_app = blast_constructor(params=params, blast_mat_root=blast_mat_root, InputHandler='_input_as_seq_id_seq_pairs', WorkingDir=WorkingDir, SuppressStderr=True, HALT_EXEC=HALT_EXEC) current_seqs = [] blast_results = BlastResult([]) for seq in seqs: current_seqs.append(seq) if len(current_seqs) % seqs_per_blast_run == 0: if blast_results: blast_results.update(\ BlastResult(blast_app(current_seqs)['StdOut'])) else: blast_results = BlastResult(blast_app(current_seqs)['StdOut']) current_seqs = [] # clean-up run: blast the remaining sequences blast_results.update(\ BlastResult(blast_app(current_seqs)['StdOut'])) remove_files(db_files_to_remove) return blast_results
def blastn(seqs, blast_db="nt", e_value="1e-20", max_hits=200, working_dir="/tmp", blast_mat_root=None, extra_params={}): """ Returns BlastResult from input seqs, using blastn. Need to add doc string """ # set up params to use with blastp params = { # matrix "-M": "BLOSUM62", # max procs "-a": "1", # expectation "-e": e_value, # max seqs to show "-b": max_hits, # max one line descriptions "-v": max_hits, # program "-p": "blastn" } params.update(extra_params) # blast blast_res = blast_seqs(seqs, Blastall, blast_mat_root=blast_mat_root, blast_db=blast_db, params=params, add_seq_names=False, WorkingDir=working_dir) # get prot id map if blast_res['StdOut']: lines = [x for x in blast_res['StdOut']] return BlastResult(lines) return None
def setUp(self): self.blast_lines = BLAST_LINES self.blast_result=BlastResult(self.blast_lines) self.subjectdb_fp = get_tmp_filename_as_str(\ prefix='ExcludeByBlastTests_',suffix='.fasta') self.query_fp = get_tmp_filename_as_str(\ prefix='ExcludeByBlastTests_',suffix='.fasta') self.query2_fp = get_tmp_filename_as_str(\ prefix='ExcludeByBlastTests_',suffix='.fasta') open(self.subjectdb_fp,"w").writelines(TEST_BLAST_DB_LINES) open(self.query_fp,"w").writelines(TEST_BLAST_DB_LINES) open(self.query2_fp,"w").writelines(TEST_BLAST_DB2_LINES) self._paths_to_clean_up = [self.subjectdb_fp,self.query_fp,\ self.query2_fp]
def setUp(self): self.blast_lines = BLAST_LINES self.blast_result = BlastResult(self.blast_lines) _, self.subjectdb_fp = mkstemp(prefix='ExcludeByBlastTests_', suffix='.fasta') close(_) _, self.query_fp = mkstemp(prefix='ExcludeByBlastTests_', suffix='.fasta') close(_) _, self.query2_fp = mkstemp(prefix='ExcludeByBlastTests_', suffix='.fasta') close(_) open(self.subjectdb_fp, "w").writelines(TEST_BLAST_DB_LINES) open(self.query_fp, "w").writelines(TEST_BLAST_DB_LINES) open(self.query2_fp, "w").writelines(TEST_BLAST_DB2_LINES) self._paths_to_clean_up = [self.subjectdb_fp, self.query_fp, self.query2_fp]