def find_homologs(query_file, subject_genome, e_value, max_hits, working_dir, blast_mat_root, wordsize, percent_aligned, extra_params={}, require_hit=False, DEBUG=True): """BLAST query_file against subject_genome query_file -- .nuc file or other FASTA file to BLAST against all files in file_list subject_genome -- path to a KEGG .nuc file or other FASTA formated file. e-value -- e-value threshold for blasts percent_aligned -- minumum percent alignment, between 0.0 and 1.0 max_hits,blast_mat_root,extra_params -- these are passed along to blastn DEBUG -- if True, display debugging output """ start_time = time() raw_blast_output = [] seqs = open(query_file, "U").readlines() if DEBUG: print "BLASTING %s vs. %s" % (query_file, subject_genome) blast_db = subject_genome raw_output_data = blast_genome(seqs, blast_db, e_value, max_hits, wordsize, working_dir, blast_mat_root, extra_params, DEBUG=DEBUG) if DEBUG: print "Length of raw BLAST results:", len(raw_output_data) curr_blast_result = BlastResult(raw_output_data) align_filter = make_percent_align_filter(percent_aligned) # should a mismatch filter be added? filtered_ids, removed_ids = query_ids_from_blast_result(curr_blast_result, align_filter, DEBUG=DEBUG) return raw_output_data, filtered_ids, removed_ids
def setUp(self): self.blast_lines = BLAST_LINES self.blast_result = BlastResult(self.blast_lines) fd, self.subjectdb_fp = mkstemp(prefix='ExcludeByBlastTests_', suffix='.fasta') close(fd) fd, self.query_fp = mkstemp(prefix='ExcludeByBlastTests_', suffix='.fasta') close(fd) fd, self.query2_fp = mkstemp(prefix='ExcludeByBlastTests_', suffix='.fasta') close(fd) open(self.subjectdb_fp, "w").writelines(TEST_BLAST_DB_LINES) open(self.query_fp, "w").writelines(TEST_BLAST_DB_LINES) open(self.query2_fp, "w").writelines(TEST_BLAST_DB2_LINES) self._paths_to_clean_up = [ self.subjectdb_fp, self.query_fp, self.query2_fp ]