Beispiel #1
0
    def test_find_latest(self):
        with tempfile.TemporaryDirectory(prefix="aS.pfamdbtest") as temp_db_layout:
            os.makedirs(os.path.join(temp_db_layout, "pfam", "30.7invalid"))
            os.makedirs(os.path.join(temp_db_layout, "pfam", "invalid30.7"))
            os.makedirs(os.path.join(temp_db_layout, "pfam", "irrelevant"))

            with self.assertRaisesRegex(Exception, "No matching PFAM database in location " + temp_db_layout):
                pfamdb.find_latest_database_version(temp_db_layout)

            os.makedirs(os.path.join(temp_db_layout, "pfam", "31.0"))
            assert pfamdb.find_latest_database_version(temp_db_layout) == "31.0"
            os.makedirs(os.path.join(temp_db_layout, "pfam", "31.2"))
            assert pfamdb.find_latest_database_version(temp_db_layout) == "31.2"
            os.makedirs(os.path.join(temp_db_layout, "pfam", "30.7"))
            assert pfamdb.find_latest_database_version(temp_db_layout) == "31.2"
Beispiel #2
0
def run_on_record(record: Record, results: Optional[hmmer.HmmerResults],
                  options: ConfigType) -> hmmer.HmmerResults:
    """ Run hmmsearch against PFAM for all CDS features within the record """

    if options.clusterhmmer_pfamdb_version == "latest":
        database_version = pfamdb.find_latest_database_version(
            options.database_dir)
    else:
        database_version = options.clusterhmmer_pfamdb_version

    if results:
        previous_db = pfamdb.get_db_version_from_path(results.database)
        # same version requested, so reuse the results
        if database_version == previous_db:
            return results
        logging.debug("Replacing clusterhmmer results from %s with %s",
                      previous_db, database_version)

    logging.info('Running cluster PFAM search')

    features = []
    for region in record.get_regions():
        features.extend(list(region.cds_children))
    database = os.path.join(options.database_dir, 'pfam', database_version,
                            'Pfam-A.hmm')
    return hmmer.run_hmmer(record, features, MAX_EVALUE, MIN_SCORE, database,
                           "clusterhmmer")
Beispiel #3
0
def run_on_record(record: Record, results: Optional[hmmer.HmmerResults],
                  options: ConfigType) -> hmmer.HmmerResults:
    """ Run hmmsearch against PFAM for all CDS features within the record """

    if options.fullhmmer_pfamdb_version == "latest":
        database_version = pfamdb.find_latest_database_version(
            options.database_dir)
    else:
        database_version = options.fullhmmer_pfamdb_version

    if results:
        previous_db = pfamdb.get_db_version_from_path(results.database)
        # same version requested, so reuse the results
        if database_version == previous_db:
            return results
        else:
            logging.debug("Replacing fullhmmer results from %s with %s",
                          previous_db, database_version)

    logging.info('Running whole-genome PFAM search')

    database = os.path.join(options.database_dir, 'pfam', database_version,
                            'Pfam-A.hmm')
    return hmmer.run_hmmer(record, record.get_cds_features(), MAX_EVALUE,
                           MIN_SCORE, database, "fullhmmer")
Beispiel #4
0
    def setUp(self):
        self._old_max_evalue = cluster_hmmer.MAX_EVALUE
        self._old_min_score = cluster_hmmer.MIN_SCORE
        cluster_hmmer.MAX_EVALUE = 0.02
        cluster_hmmer.MIN_SCORE = 1.
        self.config = build_config([],
                                   isolated=True,
                                   modules=antismash.get_all_modules())
        self.latest_pfam = pfamdb.find_latest_database_version(
            self.config.database_dir)
        self.tracer = TraceTracker()
        self.file_list = [
            'Pfam-A.hmm', 'Pfam-A.hmm.h3f', 'Pfam-A.hmm.h3i', 'Pfam-A.hmm.h3m',
            'Pfam-A.hmm.h3p'
        ]
        mock('antismash.common.path.locate_file',
             returns_iter=self.file_list,
             tracker=self.tracer)
        mock('antismash.common.subprocessing.run_hmmscan', returns=[])

        self.expected_trace = """Called antismash.common.path.locate_file(
    '{0}/pfam/{1}/Pfam-A.hmm')
Called antismash.common.path.locate_file(
    '{0}/pfam/{1}/Pfam-A.hmm.h3f')
Called antismash.common.path.locate_file(
    '{0}/pfam/{1}/Pfam-A.hmm.h3i')
Called antismash.common.path.locate_file(
    '{0}/pfam/{1}/Pfam-A.hmm.h3m')
Called antismash.common.path.locate_file(
    '{0}/pfam/{1}/Pfam-A.hmm.h3p')""".format(self.config.database_dir,
                                             self.latest_pfam)
Beispiel #5
0
def check_options(options: ConfigType) -> List[str]:
    """ Check the requested PFAM database exists """
    database_version = options.fullhmmer_pfamdb_version
    pfam_dir = os.path.join(options.database_dir, "pfam")
    if database_version == "latest":
        database_version = pfamdb.find_latest_database_version(options.database_dir)
    return pfamdb.check_db(os.path.join(pfam_dir, database_version))
Beispiel #6
0
 def setUp(self):
     self._old_max_evalue = full_hmmer.MAX_EVALUE
     self._old_min_score = full_hmmer.MIN_SCORE
     full_hmmer.MAX_EVALUE = 0.02
     full_hmmer.MIN_SCORE = 1.
     self.config = build_config([],
                                isolated=True,
                                modules=antismash.get_all_modules())
     self.latest_pfam = pfamdb.find_latest_database_version(
         self.config.database_dir)
Beispiel #7
0
def run_on_record(record, results, options) -> hmmer.HmmerResults:
    """ Run hmmsearch against PFAM for all CDS features within the record """
    if results:
        return results

    logging.info('Running whole-genome PFAM search')

    if options.fullhmmer_pfamdb_version == "latest":
        database_version = pfamdb.find_latest_database_version(
            options.database_dir)
    else:
        database_version = options.fullhmmer_pfamdb_version
    database = os.path.join(options.database_dir, 'pfam', database_version,
                            'Pfam-A.hmm')

    return hmmer.run_hmmer(record, record.get_cds_features(), MAX_EVALUE,
                           MIN_SCORE, database, "fullhmmer")
Beispiel #8
0
def check_prereqs() -> List[str]:
    """ Ensure at least one database exists and is valid """
    failure_messages = []
    for binary_name in ['hmmscan']:
        if not path.locate_executable(binary_name):
            failure_messages.append("Failed to locate executable: %r" % binary_name)

    data_dir = get_config().database_dir
    try:
        version = pfamdb.find_latest_database_version(data_dir)
    except ValueError as err:
        failure_messages.append(str(err))
        return failure_messages

    data_path = os.path.join(data_dir, "pfam", version)
    failure_messages.extend(pfamdb.check_db(data_path))
    return failure_messages
Beispiel #9
0
def run_on_record(record, results, options) -> hmmer.HmmerResults:
    """ Run hmmsearch against PFAM for all CDS features within the record """
    if results:
        return results

    logging.info('Running cluster PFAM search')

    if options.clusterhmmer_pfamdb_version == "latest":
        database_version = pfamdb.find_latest_database_version(
            options.database_dir)
    else:
        database_version = options.clusterhmmer_pfamdb_version
    database = os.path.join(options.database_dir, 'pfam', database_version,
                            'Pfam-A.hmm')

    features = []
    for cluster in record.get_clusters():
        features.extend(list(cluster.cds_children))
    return hmmer.run_hmmer(record, features, MAX_EVALUE, MIN_SCORE, database,
                           "clusterhmmer")