def generate_domain_features(gene: CDSFeature, domains: List[HMMResult]) -> Dict[HMMResult, AntismashDomain]:
    """ Generates AntismashDomain features for each provided HMMResult

        Arguments:
            gene: the CDSFeature the domains were found in
            domains: a list of HMMResults found in the CDSFeature

        Returns:
            a dictionary mapping the HMMResult used to the matching AntismashDomain
    """
    new_features = {}
    domain_counts = defaultdict(int)  # type: Dict[str, int]
    for domain in domains:
        loc = gene.get_sub_location_from_protein_coordinates(domain.query_start, domain.query_end)

        # set up new feature
        new_feature = AntismashDomain(loc, tool="nrps_pks_domains")
        new_feature.domain = domain.hit_id
        new_feature.locus_tag = gene.locus_tag or gene.get_name()
        new_feature.detection = "hmmscan"
        new_feature.database = "nrpspksdomains.hmm"
        new_feature.evalue = domain.evalue
        new_feature.score = domain.bitscore

        new_feature.translation = gene.translation[domain.query_start:domain.query_end + 1]

        domain_counts[domain.hit_id] += 1  # 1-indexed, so increment before use
        domain_name = "{}_{}.{}".format(gene.get_name(), domain.hit_id, domain_counts[domain.hit_id])

        new_feature.domain_id = "nrpspksdomains_" + domain_name
        new_feature.label = domain_name

        new_features[domain] = new_feature
    return new_features
Esempio n. 2
0
    def generate_domains(self):
        inputs = fasta.read_fasta(
            path.get_full_path(__file__, 'data', 'PKS_KS.input'))
        domains = []
        last_end = 0
        for translation in inputs.values():
            location = FeatureLocation(last_end + 10,
                                       last_end + len(translation) * 3 + 16)
            domain = AntismashDomain(location, tool="test")
            domain.translation = translation
            domains.append(domain)
            domain.domain = "PKS_KS"

        location = FeatureLocation(
            last_end + 10, last_end + len(domains[-1].translation) * 3 + 16)
        domains.append(AntismashDomain(location, tool="test"))
        domains[-1].domain = "PKS_KR"
        return domains