Esempio n. 1
0
def run_on_record(record: Record, results: Optional[hmmer.HmmerResults],
                  options: ConfigType) -> hmmer.HmmerResults:
    """ Run hmmsearch against PFAM for all CDS features within the record """

    if options.fullhmmer_pfamdb_version == "latest":
        database_version = pfamdb.find_latest_database_version(
            options.database_dir)
    else:
        database_version = options.fullhmmer_pfamdb_version

    if results:
        previous_db = pfamdb.get_db_version_from_path(results.database)
        # same version requested, so reuse the results
        if database_version == previous_db:
            return results
        else:
            logging.debug("Replacing fullhmmer results from %s with %s",
                          previous_db, database_version)

    logging.info('Running whole-genome PFAM search')

    database = os.path.join(options.database_dir, 'pfam', database_version,
                            'Pfam-A.hmm')
    return hmmer.run_hmmer(record, record.get_cds_features(), MAX_EVALUE,
                           MIN_SCORE, database, "fullhmmer")
Esempio n. 2
0
def run_on_record(record: Record, results: Optional[hmmer.HmmerResults],
                  options: ConfigType) -> hmmer.HmmerResults:
    """ Run hmmsearch against PFAM for all CDS features within the record """

    if options.clusterhmmer_pfamdb_version == "latest":
        database_version = pfamdb.find_latest_database_version(
            options.database_dir)
    else:
        database_version = options.clusterhmmer_pfamdb_version

    if results:
        previous_db = pfamdb.get_db_version_from_path(results.database)
        # same version requested, so reuse the results
        if database_version == previous_db:
            return results
        logging.debug("Replacing clusterhmmer results from %s with %s",
                      previous_db, database_version)

    logging.info('Running cluster PFAM search')

    features = []
    for region in record.get_regions():
        features.extend(list(region.cds_children))
    database = os.path.join(options.database_dir, 'pfam', database_version,
                            'Pfam-A.hmm')
    return hmmer.run_hmmer(record, features, MAX_EVALUE, MIN_SCORE, database,
                           "clusterhmmer")
Esempio n. 3
0
def run_on_record(record, results, options) -> hmmer.HmmerResults:
    """ Run hmmsearch against PFAM for all CDS features within the record """
    if results:
        return results

    logging.info('Running whole-genome PFAM search')

    if options.fullhmmer_pfamdb_version == "latest":
        database_version = pfamdb.find_latest_database_version(
            options.database_dir)
    else:
        database_version = options.fullhmmer_pfamdb_version
    database = os.path.join(options.database_dir, 'pfam', database_version,
                            'Pfam-A.hmm')

    return hmmer.run_hmmer(record, record.get_cds_features(), MAX_EVALUE,
                           MIN_SCORE, database, "fullhmmer")
Esempio n. 4
0
def run_rrefinder(record: Record, bitscore_cutoff: float, min_length: int, database: str) -> RREFinderResults:
    """Run RREFinder on a given record
    """
    # Gather all RRE candidates
    candidates_by_protocluster, cds_info = gather_rre_candidates(record)
    # Run hmmscan per protocluster and gather the hits
    if cds_info == {}:
        filtered_hits_by_protocluster: Dict[int, List[str]] = {}
        filtered_hits_by_cds: Dict[str, List[HmmerHit]] = {}
    else:
        hmm_results = run_hmmer(record, cds_info.values(), max_evalue=1, min_score=bitscore_cutoff,
                                database=database, tool='rrefinder', use_cut_tc=False,
                                filter_overlapping=False)
        # Extract the RRE hits
        hits_by_cds = extract_rre_hits(hmm_results)
        # Filter the hits
        filtered_hits_by_cds, filtered_hits_by_protocluster = filter_hits(hits_by_cds, candidates_by_protocluster,
                                                                        min_length, bitscore_cutoff)
    return RREFinderResults(record.id, bitscore_cutoff, min_length,
                            filtered_hits_by_protocluster, filtered_hits_by_cds)
Esempio n. 5
0
def run_on_record(record, results, options) -> hmmer.HmmerResults:
    """ Run hmmsearch against PFAM for all CDS features within the record """
    if results:
        return results

    logging.info('Running cluster PFAM search')

    if options.clusterhmmer_pfamdb_version == "latest":
        database_version = pfamdb.find_latest_database_version(
            options.database_dir)
    else:
        database_version = options.clusterhmmer_pfamdb_version
    database = os.path.join(options.database_dir, 'pfam', database_version,
                            'Pfam-A.hmm')

    features = []
    for cluster in record.get_clusters():
        features.extend(list(cluster.cds_children))
    return hmmer.run_hmmer(record, features, MAX_EVALUE, MIN_SCORE, database,
                           "clusterhmmer")
Esempio n. 6
0
def run_on_record(record: Record, results: Optional[TIGRFamResults],
                  options: ConfigType) -> TIGRFamResults:
    """ Run hmmsearch against TIGRFam for all CDS features within the record """

    logging.info('Running TIGRFam search')

    if results:
        return results

    features = []
    for region in record.get_regions():
        features.extend(list(region.cds_children))
    tigr_db = os.path.join(options.database_dir, "tigrfam", "TIGRFam.hmm")
    hmmer_results = hmmer.run_hmmer(record,
                                    features,
                                    MAX_EVALUE,
                                    MIN_SCORE,
                                    tigr_db,
                                    "tigrfam",
                                    filter_overlapping=False)
    return TIGRFamResults.from_hmmer_results(hmmer_results)
Esempio n. 7
0
def run_rrefinder(record: Record, bitscore_cutoff: float, min_length: int,
                  database: str) -> RREFinderResults:
    """Run RREFinder on a given record
    """
    # Gather all RRE candidates
    candidates_per_protocluster, cds_info = gather_rre_candidates(record)
    # Run hmmscan per protocluster and gather the hits
    hmm_results = run_hmmer(record,
                            cds_info.values(),
                            max_evalue=1,
                            min_score=bitscore_cutoff,
                            database=database,
                            tool='rrefinder',
                            use_cut_tc=False)
    # Extract the RRE hits
    hit_info = extract_rre_hits(hmm_results)
    # Filter the hits
    filtered_hit_info, filtered_hits_per_protocluster = filter_hits(
        hit_info, candidates_per_protocluster, min_length, bitscore_cutoff)
    # Convert to RREFinderResults object
    RRE_results = RREFinderResults(record.id, bitscore_cutoff, min_length,
                                   filtered_hits_per_protocluster,
                                   filtered_hit_info)
    return RRE_results