コード例 #1
0
ファイル: pfam.py プロジェクト: pythseq/anvio
    def process(self):
        hmm_file = os.path.join(self.pfam_data_dir, 'Pfam-A.hmm.gz')

        # initialize contigs database
        class Args:
            pass

        args = Args()
        args.contigs_db = self.contigs_db_path
        contigs_db = dbops.ContigsSuperclass(args)
        tmp_directory_path = filesnpaths.get_temp_directory_path()

        # export AA sequences for genes
        target_files_dict = {
            'AA:GENE': os.path.join(tmp_directory_path, 'AA_gene_sequences.fa')
        }
        contigs_db.gen_FASTA_file_of_sequences_for_gene_caller_ids(
            output_file_path=target_files_dict['AA:GENE'],
            simple_headers=True,
            rna_alphabet=False,
            report_aa_sequences=True)

        # run hmmscan
        hmmer = HMMer(target_files_dict, num_threads_to_use=self.num_threads)
        hmm_hits_file = hmmer.run_hmmscan('Pfam', 'AA', 'GENE', None, None,
                                          len(self.function_catalog), hmm_file,
                                          None, '--cut_ga')

        # parse hmmscan output
        parser = parser_modules['search']['hmmscan'](hmm_hits_file,
                                                     alphabet='AA',
                                                     context='GENE')
        search_results_dict = parser.get_search_results()

        # add functions to database
        functions_dict = {}
        counter = 0
        for hmm_hit in search_results_dict.values():
            functions_dict[counter] = {
                'gene_callers_id': hmm_hit['gene_callers_id'],
                'source': 'Pfam',
                'accession': hmm_hit['gene_hmm_id'],
                'function':
                self.get_function_from_catalog(hmm_hit['gene_hmm_id']),
                'e_value': hmm_hit['e_value'],
            }

            counter += 1

        gene_function_calls_table = TableForGeneFunctions(
            self.contigs_db_path, self.run, self.progress)
        gene_function_calls_table.create(functions_dict)
コード例 #2
0
ファイル: pfam.py プロジェクト: AstrobioMike/anvio
    def process(self):
        hmm_file = os.path.join(self.pfam_data_dir, 'Pfam-A.hmm.gz')

        # initialize contigs database
        class Args: pass
        args = Args()
        args.contigs_db = self.contigs_db_path
        contigs_db = dbops.ContigsSuperclass(args)
        tmp_directory_path = filesnpaths.get_temp_directory_path()

        # export AA sequences for genes
        target_files_dict = {'AA:GENE': os.path.join(tmp_directory_path, 'AA_gene_sequences.fa')}
        contigs_db.gen_FASTA_file_of_sequences_for_gene_caller_ids(output_file_path=target_files_dict['AA:GENE'],
                                                                   simple_headers=True,
                                                                   rna_alphabet=False,
                                                                   report_aa_sequences=True)

        # run hmmscan
        hmmer = HMMer(target_files_dict, num_threads_to_use=self.num_threads)
        hmm_hits_file = hmmer.run_hmmscan('Pfam', 'AA', 'GENE', None, None, len(self.function_catalog), hmm_file, None, '--cut_ga')

        # parse hmmscan output
        parser = parser_modules['search']['hmmscan'](hmm_hits_file, alphabet='AA', context='GENE')
        search_results_dict = parser.get_search_results()

        # add functions to database
        functions_dict = {}
        counter = 0
        for hmm_hit in search_results_dict.values():
            functions_dict[counter] = {
                'gene_callers_id': hmm_hit['gene_callers_id'],
                'source': 'Pfam',
                'accession': hmm_hit['gene_hmm_id'],
                'function': self.get_function_from_catalog(hmm_hit['gene_hmm_id']),
                'e_value': hmm_hit['e_value'],
            }

            counter += 1

        gene_function_calls_table = TableForGeneFunctions(self.contigs_db_path, self.run, self.progress)
        gene_function_calls_table.create(functions_dict)

        if anvio.DEBUG:
            run.warning("The temp directories, '%s' and '%s' are kept. Please don't forget to clean those up\
                         later" % (tmp_directory_path, ', '.join(hmmer.tmp_dirs)), header="Debug")
        else:
            run.info_single('Cleaning up the temp directory (you can use `--debug` if you would\
                             like to keep it for testing purposes)', nl_before=1, nl_after=1)
            shutil.rmtree(tmp_directory_path)
            hmmer.clean_tmp_dirs()
コード例 #3
0
    def process(self):
        hmm_file = os.path.join(self.pfam_data_dir, 'Pfam-A.hmm.gz')

        # initialize contigs database
        class Args:
            pass

        args = Args()
        args.contigs_db = self.contigs_db_path
        contigs_db = dbops.ContigsSuperclass(args)
        tmp_directory_path = filesnpaths.get_temp_directory_path()

        # get an instance of gene functions table
        gene_function_calls_table = TableForGeneFunctions(
            self.contigs_db_path, self.run, self.progress)

        # export AA sequences for genes
        target_files_dict = {
            'AA:GENE': os.path.join(tmp_directory_path, 'AA_gene_sequences.fa')
        }
        contigs_db.gen_FASTA_file_of_sequences_for_gene_caller_ids(
            output_file_path=target_files_dict['AA:GENE'],
            simple_headers=True,
            rna_alphabet=False,
            report_aa_sequences=True)

        # run hmmscan
        hmmer = HMMer(target_files_dict, num_threads_to_use=self.num_threads)
        hmm_hits_file = hmmer.run_hmmscan('Pfam', 'AA', 'GENE', None, None,
                                          len(self.function_catalog), hmm_file,
                                          None, '--cut_ga')

        if not hmm_hits_file:
            run.info_single(
                "The HMM search returned no hits :/ So there is nothing to add to the contigs database. But "
                "now anvi'o will add PFAMs as a functional source with no hits, clean the temporary directories "
                "and gracefully quit.",
                nl_before=1,
                nl_after=1)
            shutil.rmtree(tmp_directory_path)
            hmmer.clean_tmp_dirs()
            gene_function_calls_table.add_empty_sources_to_functional_sources(
                {'Pfam'})
            return

        # parse hmmscan output
        parser = parser_modules['search']['hmmscan'](hmm_hits_file,
                                                     alphabet='AA',
                                                     context='GENE')
        search_results_dict = parser.get_search_results()

        # add functions to database
        functions_dict = {}
        counter = 0
        for hmm_hit in search_results_dict.values():
            functions_dict[counter] = {
                'gene_callers_id':
                hmm_hit['gene_callers_id'],
                'source':
                'Pfam',
                'accession':
                hmm_hit['gene_hmm_id'],
                'function':
                self.get_function_from_catalog(
                    hmm_hit['gene_hmm_id'], ok_if_missing_from_catalog=True),
                'e_value':
                hmm_hit['e_value'],
            }

            counter += 1

        if functions_dict:
            gene_function_calls_table.create(functions_dict)
        else:
            self.run.warning(
                "Pfam class has no hits to process. Returning empty handed, but still adding Pfam as "
                "a functional source.")
            gene_function_calls_table.add_empty_sources_to_functional_sources(
                {'Pfam'})

        if anvio.DEBUG:
            run.warning(
                "The temp directories, '%s' and '%s' are kept. Please don't forget to clean those up "
                "later" % (tmp_directory_path, ', '.join(hmmer.tmp_dirs)),
                header="Debug")
        else:
            run.info_single(
                'Cleaning up the temp directory (you can use `--debug` if you would '
                'like to keep it for testing purposes)',
                nl_before=1,
                nl_after=1)
            shutil.rmtree(tmp_directory_path)
            hmmer.clean_tmp_dirs()
コード例 #4
0
    def populate_search_tables(self, sources={}):
        # if we end up generating a temporary file for amino acid sequences:
        if not len(sources):
            import anvio.data.hmm
            sources = anvio.data.hmm.sources

        if not sources:
            return

        target_files_dict = {}

        tmp_directory_path = filesnpaths.get_temp_directory_path()

        # here we will go through targets and populate target_files_dict based on what we find among them.
        targets = set([s['target'] for s in list(sources.values())])
        for target in targets:
            alphabet, context = utils.anvio_hmm_target_term_to_alphabet_and_context(target)

            if not self.genes_are_called and context != "CONTIG":
                raise ConfigError("You are in trouble. The gene calling was skipped for this contigs database, yet anvi'o asked to run an\
                                   HMM profile that wishes to operate on %s context using the %s alphabet. It is not OK. You still could run\
                                   HMM profiles that does not require gene calls to be present (such as the HMM profile that identifies Ribosomal\
                                   RNAs in contigs, but for that you would have to explicitly ask for it by using the additional parameter\
                                   '--installed-hmm-profile Ribosomal_RNAs')." % (context, alphabet))

            self.run.info('Target found', '%s:%s' % (alphabet, context))

            class Args: pass
            args = Args()
            args.contigs_db = self.db_path
            contigs_db = ContigsSuperclass(args, r=terminal.Run(verbose=False))

            if context == 'GENE':
                target_files_dict['%s:GENE' % alphabet] = os.path.join(tmp_directory_path, '%s_gene_sequences.fa' % alphabet)
                contigs_db.gen_FASTA_file_of_sequences_for_gene_caller_ids(output_file_path=target_files_dict['%s:GENE' % alphabet],
                                                                           simple_headers=True,
                                                                           rna_alphabet=True if alphabet=='RNA' else False,
                                                                           report_aa_sequences=True if alphabet=='AA' else False)
            elif context == 'CONTIG':
                if alphabet == 'AA':
                    raise ConfigError("You are somewhere you shouldn't be. You came here because you thought it would be OK\
                                       to ask for AA sequences in the CONTIG context. The answer to that is 'no, thanks'. If\
                                       you think this is dumb, please let us know.")
                else:
                    target_files_dict['%s:CONTIG' % alphabet] = os.path.join(tmp_directory_path, '%s_contig_sequences.fa' % alphabet)
                    utils.export_sequences_from_contigs_db(self.db_path,
                                                           target_files_dict['%s:CONTIG' % alphabet],
                                                           rna_alphabet=True if alphabet=='RNA' else False)

        commander = HMMer(target_files_dict, num_threads_to_use=self.num_threads_to_use)

        for source in sources:
            alphabet, context = utils.anvio_hmm_target_term_to_alphabet_and_context(sources[source]['target'])

            kind_of_search = sources[source]['kind']
            domain = sources[source]['domain']
            all_genes_searched_against = sources[source]['genes']
            hmm_model = sources[source]['model']
            reference = sources[source]['ref']
            noise_cutoff_terms = sources[source]['noise_cutoff_terms']

            hmm_scan_hits_txt = commander.run_hmmscan(source,
                                                      alphabet,
                                                      context,
                                                      kind_of_search,
                                                      domain,
                                                      len(all_genes_searched_against),
                                                      hmm_model,
                                                      reference,
                                                      noise_cutoff_terms)

            if not hmm_scan_hits_txt:
                search_results_dict = {}
            else:
                parser = parser_modules['search']['hmmscan'](hmm_scan_hits_txt, alphabet=alphabet, context=context)
                search_results_dict = parser.get_search_results()

            if not len(search_results_dict):
                run.info_single("The HMM source '%s' returned 0 hits. SAD (but it's stil OK)." % source, nl_before=1)


            if context == 'CONTIG':
                # we are in trouble here. because our search results dictionary contains no gene calls, but contig
                # names that contain our hits. on the other hand, the rest of the code outside of this if statement
                # expects a `search_results_dict` with gene callers id in it. so there are two things we need to do
                # to do. one is to come up with some new gene calls and add them to the contigs database. so things
                # will go smoothly downstream. two, we will need to update our `search_results_dict` so it looks
                # like a a dictionary the rest of the code expects with `gene_callers_id` fields. both of these
                # steps are going to be taken care of in the following function. magic.

                if source != "Ribosomal_RNAs":
                    self.run.warning("You just called an HMM profile that runs on contigs and not genes. Because this HMM\
                                      operation is not directly working with gene calls anvi'o already knows about, the resulting\
                                      hits will need to be added as 'new gene calls' into the contigs database. So far so good.\
                                      But because we are in the contigs realm rater than genes realm, it is likely that\
                                      resulting hits will not correspond to open reading frames that are supposed to be\
                                      translated (such as ribosomal RNAs), because otherwise you would be working with genes\
                                      instad of defining CONTIGS as your context in that HMM profile you just used unless you\
                                      not sure what you are doing. Hence, anvi'o will not report amino acid sequences for the\
                                      new gene calls it will recover through these HMMs. Please take a moment and you be the\
                                      judge of whether this will influence your pangenomic analyses or other things you thought\
                                      you would be doing with the result of this HMM search downstream. If you do not feel like\
                                      being the judge of anything today you can move on yet remember to remember this if things\
                                      look somewhat weird later on.",
                                     header="Psst. Your fancy HMM profile '%s' speaking" % source,
                                     lc="green")

                num_hits_before = len(search_results_dict)
                search_results_dict = utils.get_pruned_HMM_hits_dict(search_results_dict)
                num_hits_after = len(search_results_dict)

                if num_hits_before != num_hits_after:
                    self.run.info('Pruned', '%d out of %d hits were removed due to redundancy' % (num_hits_before - num_hits_after, num_hits_before))

                search_results_dict = self.add_new_gene_calls_to_contigs_db_and_update_serach_results_dict(kind_of_search,
                                                                                                           search_results_dict,
                                                                                                           skip_amino_acid_sequences=True)

            self.append(source, reference, kind_of_search, domain, all_genes_searched_against, search_results_dict)

        # FIXME: I have no clue why importing the anvio module is necessary at this point,
        #        but without this, mini test fails becasue "`anvio.DEBUG` is being used
        #        before initialization". nonsense.
        import anvio
        if not anvio.DEBUG:
            commander.clean_tmp_dirs()
            for v in list(target_files_dict.values()):
                os.remove(v)
コード例 #5
0
ファイル: hmmhits.py プロジェクト: qclayssen/anvio
    def populate_search_tables(self, sources={}):
        # if we end up generating a temporary file for amino acid sequences:
        if not len(sources):
            import anvio.data.hmm
            sources = anvio.data.hmm.sources

        if not sources:
            return

        target_files_dict = {}

        tmp_directory_path = filesnpaths.get_temp_directory_path()

        # here we will go through targets and populate target_files_dict based on what we find among them.
        targets = set([s['target'] for s in list(sources.values())])
        for target in targets:

            alphabet, context = utils.anvio_hmm_target_term_to_alphabet_and_context(
                target)

            self.run.info('Target found', '%s:%s' % (alphabet, context))

            class Args:
                pass

            args = Args()
            args.contigs_db = self.db_path
            contigs_db = ContigsSuperclass(args)

            if context == 'GENE':
                target_files_dict['%s:GENE' % alphabet] = os.path.join(
                    tmp_directory_path, '%s_gene_sequences.fa' % alphabet)
                contigs_db.gen_FASTA_file_of_sequences_for_gene_caller_ids(
                    output_file_path=target_files_dict['%s:GENE' % alphabet],
                    simple_headers=True,
                    rna_alphabet=True if alphabet == 'RNA' else False,
                    report_aa_sequences=True if alphabet == 'AA' else False)
            elif context == 'CONTIG':
                if alphabet == 'AA':
                    raise ConfigError(
                        "You are somewhere you shouldn't be. You came here because you thought it would be OK\
                                       to ask for AA sequences in the CONTIG context. The answer to that is 'no, thanks'. If\
                                       you think this is dumb, please let us know."
                    )
                else:
                    target_files_dict['%s:CONTIG' % alphabet] = os.path.join(
                        tmp_directory_path,
                        '%s_contig_sequences.fa' % alphabet)
                    utils.export_sequences_from_contigs_db(
                        self.db_path,
                        target_files_dict['%s:CONTIG' % alphabet],
                        rna_alphabet=True if alphabet == 'RNA' else False)

        commander = HMMer(target_files_dict,
                          num_threads_to_use=self.num_threads_to_use)

        for source in sources:
            alphabet, context = utils.anvio_hmm_target_term_to_alphabet_and_context(
                sources[source]['target'])

            kind_of_search = sources[source]['kind']
            domain = sources[source]['domain']
            all_genes_searched_against = sources[source]['genes']
            hmm_model = sources[source]['model']
            reference = sources[source]['ref']
            noise_cutoff_terms = sources[source]['noise_cutoff_terms']

            hmm_scan_hits_txt = commander.run_hmmscan(
                source, alphabet, context, kind_of_search, domain,
                all_genes_searched_against, hmm_model, reference,
                noise_cutoff_terms)

            if not hmm_scan_hits_txt:
                search_results_dict = {}
            else:
                parser = parser_modules['search']['hmmscan'](hmm_scan_hits_txt,
                                                             alphabet=alphabet,
                                                             context=context)
                search_results_dict = parser.get_search_results()

            if not len(search_results_dict):
                run.info_single(
                    "The HMM source '%s' returned 0 hits. SAD (but it's stil OK)."
                    % source,
                    nl_before=1)

            if context == 'CONTIG':
                # we are in trouble here. because our search results dictionary contains no gene calls, but contig
                # names that contain our hits. on the other hand, the rest of the code outside of this if statement
                # expects a `search_results_dict` with gene callers id in it. so there are two things we need to do
                # to do. one is to come up with some new gene calls and add them to the contigs database. so things
                # will go smoothly downstream. two, we will need to update our `search_results_dict` so it looks
                # like a a dictionary the rest of the code expects with `gene_callers_id` fields. both of these
                # steps are going to be taken care of in the following function. magic.

                self.run.warning(
                    "Alright! You just called an HMM profile that runs on contigs. Because it is not\
                                 working with anvi'o gene calls directly, the resulting hits will need to be added\
                                 as 'new gene calls' into the contigs database. This is a new feature, and if it\
                                 starts screwing things up for you please let us know. Other than that you're pretty\
                                 much golden. Carry on.",
                    header="Psst. Your fancy HMM profile '%s' speaking" %
                    source,
                    lc="green")

                num_hits_before = len(search_results_dict)
                search_results_dict = utils.get_pruned_HMM_hits_dict(
                    search_results_dict)
                num_hits_after = len(search_results_dict)

                if num_hits_before != num_hits_after:
                    self.run.info(
                        'Pruned',
                        '%d out of %d hits were removed due to redundancy' %
                        (num_hits_before - num_hits_after, num_hits_before))

                search_results_dict = self.add_new_gene_calls_to_contigs_db_and_update_serach_results_dict(
                    kind_of_search, search_results_dict)

            self.append(source, reference, kind_of_search, domain,
                        all_genes_searched_against, search_results_dict)

        # FIXME: I have no clue why importing the anvio module is necessary at this point,
        #        but without this, mini test fails becasue "`anvio.DEBUG` is being used
        #        before initialization". nonsense.
        import anvio
        if not anvio.DEBUG:
            commander.clean_tmp_dirs()
            for v in list(target_files_dict.values()):
                os.remove(v)
コード例 #6
0
    def process(self):
        hmm_file = os.path.join(self.pfam_data_dir, 'Pfam-A.hmm.gz')

        # initialize contigs database
        class Args:
            pass

        args = Args()
        args.contigs_db = self.contigs_db_path
        contigs_db = dbops.ContigsSuperclass(args)
        tmp_directory_path = filesnpaths.get_temp_directory_path()

        # export AA sequences for genes
        target_files_dict = {
            'AA:GENE': os.path.join(tmp_directory_path, 'AA_gene_sequences.fa')
        }
        contigs_db.gen_FASTA_file_of_sequences_for_gene_caller_ids(
            output_file_path=target_files_dict['AA:GENE'],
            simple_headers=True,
            rna_alphabet=False,
            report_aa_sequences=True)

        # run hmmscan
        hmmer = HMMer(target_files_dict, num_threads_to_use=self.num_threads)
        hmm_hits_file = hmmer.run_hmmscan('Pfam', 'AA', 'GENE', None, None,
                                          len(self.function_catalog), hmm_file,
                                          None, '--cut_ga')

        # parse hmmscan output
        parser = parser_modules['search']['hmmscan'](hmm_hits_file,
                                                     alphabet='AA',
                                                     context='GENE')
        search_results_dict = parser.get_search_results()

        # add functions to database
        functions_dict = {}
        counter = 0
        for hmm_hit in search_results_dict.values():
            functions_dict[counter] = {
                'gene_callers_id': hmm_hit['gene_callers_id'],
                'source': 'Pfam',
                'accession': hmm_hit['gene_hmm_id'],
                'function':
                self.get_function_from_catalog(hmm_hit['gene_hmm_id']),
                'e_value': hmm_hit['e_value'],
            }

            counter += 1

        gene_function_calls_table = TableForGeneFunctions(
            self.contigs_db_path, self.run, self.progress)
        gene_function_calls_table.create(functions_dict)

        if anvio.DEBUG:
            run.warning(
                "The temp directories, '%s' and '%s' are kept. Please don't forget to clean those up\
                         later" %
                (tmp_directory_path, ', '.join(hmmer.tmp_dirs)),
                header="Debug")
        else:
            run.info_single(
                'Cleaning up the temp directory (you can use `--debug` if you would\
                             like to keep it for testing purposes)',
                nl_before=1,
                nl_after=1)
            shutil.rmtree(tmp_directory_path)
            hmmer.clean_tmp_dirs()
コード例 #7
0
ファイル: hmmhits.py プロジェクト: meren/anvio
    def populate_search_tables(self, sources={}):
        # if we end up generating a temporary file for amino acid sequences:
        if not len(sources):
            import anvio.data.hmm
            sources = anvio.data.hmm.sources

        if not sources:
            return

        target_files_dict = {}

        tmp_directory_path = filesnpaths.get_temp_directory_path()

        # here we will go through targets and populate target_files_dict based on what we find among them.
        targets = set([s['target'] for s in list(sources.values())])
        for target in targets:

            alphabet, context = utils.anvio_hmm_target_term_to_alphabet_and_context(target)

            self.run.info('Target found', '%s:%s' % (alphabet, context))

            class Args: pass
            args = Args()
            args.contigs_db = self.db_path
            contigs_db = ContigsSuperclass(args, r=terminal.Run(verbose=False))

            if context == 'GENE':
                target_files_dict['%s:GENE' % alphabet] = os.path.join(tmp_directory_path, '%s_gene_sequences.fa' % alphabet)
                contigs_db.gen_FASTA_file_of_sequences_for_gene_caller_ids(output_file_path=target_files_dict['%s:GENE' % alphabet],
                                                                           simple_headers=True,
                                                                           rna_alphabet=True if alphabet=='RNA' else False,
                                                                           report_aa_sequences=True if alphabet=='AA' else False)
            elif context == 'CONTIG':
                if alphabet == 'AA':
                    raise ConfigError("You are somewhere you shouldn't be. You came here because you thought it would be OK\
                                       to ask for AA sequences in the CONTIG context. The answer to that is 'no, thanks'. If\
                                       you think this is dumb, please let us know.")
                else:
                    target_files_dict['%s:CONTIG' % alphabet] = os.path.join(tmp_directory_path, '%s_contig_sequences.fa' % alphabet)
                    utils.export_sequences_from_contigs_db(self.db_path,
                                                           target_files_dict['%s:CONTIG' % alphabet],
                                                           rna_alphabet=True if alphabet=='RNA' else False)

        commander = HMMer(target_files_dict, num_threads_to_use=self.num_threads_to_use)

        for source in sources:
            alphabet, context = utils.anvio_hmm_target_term_to_alphabet_and_context(sources[source]['target'])

            kind_of_search = sources[source]['kind']
            domain = sources[source]['domain']
            all_genes_searched_against = sources[source]['genes']
            hmm_model = sources[source]['model']
            reference = sources[source]['ref']
            noise_cutoff_terms = sources[source]['noise_cutoff_terms']

            hmm_scan_hits_txt = commander.run_hmmscan(source,
                                                      alphabet,
                                                      context,
                                                      kind_of_search,
                                                      domain,
                                                      len(all_genes_searched_against),
                                                      hmm_model,
                                                      reference,
                                                      noise_cutoff_terms)

            if not hmm_scan_hits_txt:
                search_results_dict = {}
            else:
                parser = parser_modules['search']['hmmscan'](hmm_scan_hits_txt, alphabet=alphabet, context=context)
                search_results_dict = parser.get_search_results()

            if not len(search_results_dict):
                run.info_single("The HMM source '%s' returned 0 hits. SAD (but it's stil OK)." % source, nl_before=1)


            if context == 'CONTIG':
                # we are in trouble here. because our search results dictionary contains no gene calls, but contig
                # names that contain our hits. on the other hand, the rest of the code outside of this if statement
                # expects a `search_results_dict` with gene callers id in it. so there are two things we need to do
                # to do. one is to come up with some new gene calls and add them to the contigs database. so things
                # will go smoothly downstream. two, we will need to update our `search_results_dict` so it looks
                # like a a dictionary the rest of the code expects with `gene_callers_id` fields. both of these
                # steps are going to be taken care of in the following function. magic.

                if source != "Ribosomal_RNAs":
                    self.run.warning("You just called an HMM profile that runs on contigs and not genes. Because this HMM\
                                      operation is not directly working with gene calls anvi'o already knows about, the resulting\
                                      hits will need to be added as 'new gene calls' into the contigs database. So far so good.\
                                      But blecause we are in the contigs realm rater than genes realm, it is likely that\
                                      resulting hits will not correspond to open reading frames that are supposed to be\
                                      translated (such as ribosomal RNAs), because otherwise you would be working with genes\
                                      instad of defining CONTIGS as your context in that HMM profile you just used unless you\
                                      not sure what you are doing. Hence, anvi'o will not report amino acid sequences for the\
                                      new gene calls it will recover through these HMMs. Please take a moment and you be the\
                                      judge of whether this will influence your pangenomic analyses or other things you thought\
                                      you would be doing with the result of this HMM search downstream. If you do not feel like\
                                      being the judge of anything today you can move on yet remember to remember this if things\
                                      look somewhat weird later on.",
                                     header="Psst. Your fancy HMM profile '%s' speaking" % source,
                                     lc="green")

                num_hits_before = len(search_results_dict)
                search_results_dict = utils.get_pruned_HMM_hits_dict(search_results_dict)
                num_hits_after = len(search_results_dict)

                if num_hits_before != num_hits_after:
                    self.run.info('Pruned', '%d out of %d hits were removed due to redundancy' % (num_hits_before - num_hits_after, num_hits_before))

                search_results_dict = self.add_new_gene_calls_to_contigs_db_and_update_serach_results_dict(kind_of_search,
                                                                                                           search_results_dict,
                                                                                                           skip_amino_acid_sequences=True)

            self.append(source, reference, kind_of_search, domain, all_genes_searched_against, search_results_dict)

        # FIXME: I have no clue why importing the anvio module is necessary at this point,
        #        but without this, mini test fails becasue "`anvio.DEBUG` is being used
        #        before initialization". nonsense.
        import anvio
        if not anvio.DEBUG:
            commander.clean_tmp_dirs()
            for v in list(target_files_dict.values()):
                os.remove(v)