Ejemplo n.º 1
0
def find_homologs(query_file,
                  subject_genome,
                  e_value,
                  max_hits,
                  working_dir,
                  blast_mat_root,
                  wordsize,
                  percent_aligned,
                  extra_params={},
                  require_hit=False,
                  DEBUG=True):
    """BLAST query_file against subject_genome

    query_file -- .nuc file or other FASTA file to BLAST against all files in file_list

    subject_genome -- path to a KEGG .nuc file or other FASTA formated file.

    e-value -- e-value threshold for blasts

    percent_aligned -- minumum percent alignment, between 0.0 and 1.0

    max_hits,blast_mat_root,extra_params -- these are passed along to blastn

    DEBUG -- if True, display debugging output
    """
    start_time = time()
    raw_blast_output = []
    seqs = open(query_file, "U").readlines()

    if DEBUG:
        print "BLASTING %s vs. %s" % (query_file, subject_genome)

    blast_db = subject_genome

    raw_output_data = blast_genome(seqs,
                                   blast_db,
                                   e_value,
                                   max_hits,
                                   wordsize,
                                   working_dir,
                                   blast_mat_root,
                                   extra_params,
                                   DEBUG=DEBUG)

    if DEBUG:
        print "Length of raw BLAST results:", len(raw_output_data)

    curr_blast_result = BlastResult(raw_output_data)

    align_filter = make_percent_align_filter(percent_aligned)
    # should a mismatch filter be added?

    filtered_ids, removed_ids = query_ids_from_blast_result(curr_blast_result,
                                                            align_filter,
                                                            DEBUG=DEBUG)

    return raw_output_data, filtered_ids, removed_ids
Ejemplo n.º 2
0
    def setUp(self):

        self.blast_lines = BLAST_LINES
        self.blast_result = BlastResult(self.blast_lines)

        fd, self.subjectdb_fp = mkstemp(prefix='ExcludeByBlastTests_',
                                        suffix='.fasta')
        close(fd)
        fd, self.query_fp = mkstemp(prefix='ExcludeByBlastTests_',
                                    suffix='.fasta')
        close(fd)
        fd, self.query2_fp = mkstemp(prefix='ExcludeByBlastTests_',
                                     suffix='.fasta')
        close(fd)

        open(self.subjectdb_fp, "w").writelines(TEST_BLAST_DB_LINES)
        open(self.query_fp, "w").writelines(TEST_BLAST_DB_LINES)
        open(self.query2_fp, "w").writelines(TEST_BLAST_DB2_LINES)

        self._paths_to_clean_up = [
            self.subjectdb_fp, self.query_fp, self.query2_fp
        ]