Esempio n. 1
0
def generate_motif_features(feature: CDSFeature,
                            motifs: List[HMMResult]) -> List[CDSMotif]:
    """ Convert a list of HMMResult to a list of CDSMotif features """
    # use a locus tag if one exists
    locus_tag = feature.get_name()
    if feature.locus_tag:
        locus_tag = feature.locus_tag

    motif_features = []
    for i, motif in enumerate(motifs):
        i += 1  # user facing, so 1-indexed
        loc = feature.get_sub_location_from_protein_coordinates(
            motif.query_start, motif.query_end)
        prot_loc = FeatureLocation(motif.query_start, motif.query_end)
        new_motif = CDSMotif(loc,
                             feature.get_name(),
                             prot_loc,
                             tool="nrps_pks_domains")
        new_motif.label = motif.hit_id
        new_motif.domain_id = 'nrpspksmotif_{}_{:04d}'.format(locus_tag, i)
        new_motif.evalue = motif.evalue
        new_motif.score = motif.bitscore
        new_motif.detection = "hmmscan"
        new_motif.database = "abmotifs"
        new_motif.locus_tag = locus_tag

        new_motif.translation = feature.translation[motif.query_start:motif.
                                                    query_end]

        motif_features.append(new_motif)
    return motif_features
def generate_domain_features(gene: CDSFeature, domains: List[HMMResult]) -> Dict[HMMResult, AntismashDomain]:
    """ Generates AntismashDomain features for each provided HMMResult

        Arguments:
            gene: the CDSFeature the domains were found in
            domains: a list of HMMResults found in the CDSFeature

        Returns:
            a dictionary mapping the HMMResult used to the matching AntismashDomain
    """
    new_features = {}
    domain_counts = defaultdict(int)  # type: Dict[str, int]
    for domain in domains:
        loc = gene.get_sub_location_from_protein_coordinates(domain.query_start, domain.query_end)

        # set up new feature
        new_feature = AntismashDomain(loc, tool="nrps_pks_domains")
        new_feature.domain = domain.hit_id
        new_feature.locus_tag = gene.locus_tag or gene.get_name()
        new_feature.detection = "hmmscan"
        new_feature.database = "nrpspksdomains.hmm"
        new_feature.evalue = domain.evalue
        new_feature.score = domain.bitscore

        new_feature.translation = gene.translation[domain.query_start:domain.query_end + 1]

        domain_counts[domain.hit_id] += 1  # 1-indexed, so increment before use
        domain_name = "{}_{}.{}".format(gene.get_name(), domain.hit_id, domain_counts[domain.hit_id])

        new_feature.domain_id = "nrpspksdomains_" + domain_name
        new_feature.label = domain_name

        new_features[domain] = new_feature
    return new_features
Esempio n. 3
0
 def __init__(self, feature: CDSFeature) -> None:
     super().__init__(["id", "sequence", "domains", "modules"])
     self.sequence = feature.translation
     self.id = feature.get_name()
     self.domains = []  # type: List[JSONDomain]
     self.modules = []  # type: List[JSONModule]
Esempio n. 4
0
 def __init__(self, feature: CDSFeature) -> None:
     super().__init__(['id', 'sequence', 'domains'])
     self.sequence = feature.translation
     self.id = feature.get_name()
     self.domains = []  # type: List[JSONDomain]