def qiime_blast_seqs(seqs, blast_constructor=Blastall, blast_program='blastn', blast_db=None, refseqs=None, refseqs_fp=None, blast_mat_root=None, params={}, WorkingDir=None, seqs_per_blast_run=1000, HALT_EXEC=False): """Blast list of sequences. seqs: a list (or object with list-like interace) of (seq_id, seq) tuples (e.g., the output of MinimalFastaParser) """ assert blast_db or refseqs_fp or refseqs, \ 'Must provide either a blast_db or a fasta '+\ 'filepath containing sequences to build one.' if refseqs_fp: blast_db, db_files_to_remove =\ build_blast_db_from_fasta_path(refseqs_fp,output_dir=WorkingDir) elif refseqs: blast_db, db_files_to_remove =\ build_blast_db_from_fasta_file(refseqs,output_dir=WorkingDir) else: db_files_to_remove = [] params["-d"] = blast_db params["-p"] = blast_program blast_app = blast_constructor(params=params, blast_mat_root=blast_mat_root, InputHandler='_input_as_seq_id_seq_pairs', WorkingDir=WorkingDir, SuppressStderr=True, HALT_EXEC=HALT_EXEC) current_seqs = [] blast_results = BlastResult([]) for seq in seqs: current_seqs.append(seq) if len(current_seqs) % seqs_per_blast_run == 0: if blast_results: blast_results.update(\ BlastResult(blast_app(current_seqs)['StdOut'])) else: blast_results = BlastResult(blast_app(current_seqs)['StdOut']) current_seqs = [] # clean-up run: blast the remaining sequences blast_results.update(\ BlastResult(blast_app(current_seqs)['StdOut'])) remove_files(db_files_to_remove) return blast_results
def setUp(self): """ """ self.refseqs1 = refseqs1.split('\n') self.inseqs1 = inseqs1.split('\n') self.blast_db, db_files_to_remove =\ build_blast_db_from_fasta_file(self.refseqs1,output_dir='/tmp/') self.files_to_remove = db_files_to_remove self.refseqs1_fp = get_tmp_filename(\ tmp_dir='/tmp/', prefix="BLAST_temp_db_", suffix=".fasta") fasta_f = open(self.refseqs1_fp, 'w') fasta_f.write(refseqs1) fasta_f.close() self.files_to_remove = db_files_to_remove + [self.refseqs1_fp]
def setUp(self): """ """ self.refseqs1 = refseqs1.split('\n') self.inseqs1 = inseqs1.split('\n') self.blast_db, db_files_to_remove =\ build_blast_db_from_fasta_file(self.refseqs1,output_dir='/tmp/') self.files_to_remove = db_files_to_remove self.refseqs1_fp = get_tmp_filename(\ tmp_dir='/tmp/', prefix="BLAST_temp_db_", suffix=".fasta") fasta_f = open(self.refseqs1_fp,'w') fasta_f.write(refseqs1) fasta_f.close() self.files_to_remove = db_files_to_remove + [self.refseqs1_fp]
def test_build_blast_db_from_fasta_file(self): """build_blast_db_from_fasta_file works with open files as input """ blast_db, db_files = build_blast_db_from_fasta_file(open(self.in_aln1_fp), output_dir="/tmp/") self.assertTrue(blast_db.startswith("/tmp/BLAST_temp_db")) self.assertTrue(blast_db.endswith(".fasta")) expected_db_files = set( [blast_db] + [blast_db + ext for ext in [".nhr", ".nin", ".nsq", ".nsd", ".nsi", ".log"]] ) self.assertEqual(set(db_files), expected_db_files) # result returned when blasting against new db self.assertEqual(len(blastn(self.test_seq, blast_db=blast_db, e_value=0.0)), 1) # Make sure all db_files exist for fp in db_files: self.assertTrue(exists(fp)) # Remove all db_files exist remove_files(db_files) # Make sure nothing weird happened in the remove for fp in db_files: self.assertFalse(exists(fp))
def test_build_blast_db_from_fasta_file(self): """build_blast_db_from_fasta_file works with open files as input """ blast_db, db_files = \ build_blast_db_from_fasta_file(open(self.in_aln1_fp),output_dir='/tmp/') self.assertTrue(blast_db.startswith('/tmp/BLAST_temp_db')) self.assertTrue(blast_db.endswith('.fasta')) expected_db_files = set([blast_db] + [blast_db + ext\ for ext in ['.nhr','.nin','.nsq','.nsd','.nsi','.log']]) self.assertEqual(set(db_files), expected_db_files) # result returned when blasting against new db self.assertEqual(\ len(blastn(self.test_seq,blast_db=blast_db,e_value=0.0)),1) # Make sure all db_files exist for fp in db_files: self.assertTrue(exists(fp)) # Remove all db_files exist remove_files(db_files) # Make sure nothing weird happened in the remove for fp in db_files: self.assertFalse(exists(fp))