Exemple #1
0
def build_hits(record: Record, hmmscan_results: List, min_score: float,
               max_evalue: float, database: str) -> List[Dict[str, Any]]:
    """ Builds PFAMDomains from the given hmmscan results

        Arguments:
            record: the Record being scanned
            hmmscan_results: the results of Bio.SearchIO.parse
            min_score: a minimum allowable bitscore for hits (exclusive)
            max_evalue: a maximum allowable evalue for hits (exclusive)
            database: the name of the database used to find the hits

        Returns:
            a list of JSON representations of hmmer hits
    """
    logging.debug("Generating feature objects for PFAM hits")

    hits = []
    feature_by_id = record.get_cds_name_mapping()

    for result in hmmscan_results:
        for hsp in result.hsps:
            if hsp.bitscore <= min_score or hsp.evalue >= max_evalue:
                continue

            feature = feature_by_id[hsp.query_id]
            location = feature.get_sub_location_from_protein_coordinates(hsp.query_start, hsp.query_end)

            hit = {"location": str(location),
                   "label": result.id, "locus_tag": feature.get_name(),
                   "domain": hsp.hit_id, "evalue": hsp.evalue, "score": hsp.bitscore,
                   "translation": feature.translation[hsp.query_start:hsp.query_end + 1],
                   "identifier": pfamdb.get_pfam_id_from_name(hsp.hit_id, database),
                   "description": hsp.hit_description, "protein_start": hsp.query_start, "protein_end": hsp.query_end}
            hits.append(hit)
    return hits
Exemple #2
0
def build_hits(record, hmmscan_results, min_score: float, max_evalue: float,
               database: str) -> List[Dict[str, Any]]:
    "Builds PFAMDomains from the given hmmscan results"
    logging.debug("Generating feature objects for PFAM hits")

    hits = []
    feature_by_id = record.get_cds_name_mapping()

    for result in hmmscan_results:
        for hsp in result.hsps:
            if hsp.bitscore <= min_score or hsp.evalue >= max_evalue:
                continue

            if hsp.query_id not in hsp.query_id:
                continue

            feature = feature_by_id[hsp.query_id]

            start, end = calculate_start_and_end(feature, hsp)

            dummy_feature = PFAMDomain(FeatureLocation(
                start, end, feature.location.strand),
                                       description="")

            hit = {
                "start":
                start,
                "end":
                end,
                "strand":
                feature.location.strand,
                "label":
                result.id,
                "locus_tag":
                feature.locus_tag,
                "domain":
                hsp.hit_id,
                "evalue":
                hsp.evalue,
                "score":
                hsp.bitscore,
                "translation":
                str(
                    dummy_feature.extract(
                        record.seq).translate(table=feature.transl_table)),
                "db_xref":
                [pfamdb.get_pfam_id_from_name(hsp.hit_id, database)],
                "description":
                hsp.hit_description
            }
            hits.append(hit)
    return hits