def generate_motif_features(feature: CDSFeature, motifs: List[HMMResult]) -> List[CDSMotif]: """ Convert a list of HMMResult to a list of CDSMotif features """ # use a locus tag if one exists locus_tag = feature.get_name() if feature.locus_tag: locus_tag = feature.locus_tag motif_features = [] for i, motif in enumerate(motifs): i += 1 # user facing, so 1-indexed loc = feature.get_sub_location_from_protein_coordinates( motif.query_start, motif.query_end) prot_loc = FeatureLocation(motif.query_start, motif.query_end) new_motif = CDSMotif(loc, feature.get_name(), prot_loc, tool="nrps_pks_domains") new_motif.label = motif.hit_id new_motif.domain_id = 'nrpspksmotif_{}_{:04d}'.format(locus_tag, i) new_motif.evalue = motif.evalue new_motif.score = motif.bitscore new_motif.detection = "hmmscan" new_motif.database = "abmotifs" new_motif.locus_tag = locus_tag new_motif.translation = feature.translation[motif.query_start:motif. query_end] motif_features.append(new_motif) return motif_features
def generate_domain_features(gene: CDSFeature, domains: List[HMMResult]) -> Dict[HMMResult, AntismashDomain]: """ Generates AntismashDomain features for each provided HMMResult Arguments: gene: the CDSFeature the domains were found in domains: a list of HMMResults found in the CDSFeature Returns: a dictionary mapping the HMMResult used to the matching AntismashDomain """ new_features = {} domain_counts = defaultdict(int) # type: Dict[str, int] for domain in domains: loc = gene.get_sub_location_from_protein_coordinates(domain.query_start, domain.query_end) # set up new feature new_feature = AntismashDomain(loc, tool="nrps_pks_domains") new_feature.domain = domain.hit_id new_feature.locus_tag = gene.locus_tag or gene.get_name() new_feature.detection = "hmmscan" new_feature.database = "nrpspksdomains.hmm" new_feature.evalue = domain.evalue new_feature.score = domain.bitscore new_feature.translation = gene.translation[domain.query_start:domain.query_end + 1] domain_counts[domain.hit_id] += 1 # 1-indexed, so increment before use domain_name = "{}_{}.{}".format(gene.get_name(), domain.hit_id, domain_counts[domain.hit_id]) new_feature.domain_id = "nrpspksdomains_" + domain_name new_feature.label = domain_name new_features[domain] = new_feature return new_features
def __init__(self, feature: CDSFeature) -> None: super().__init__(["id", "sequence", "domains", "modules"]) self.sequence = feature.translation self.id = feature.get_name() self.domains = [] # type: List[JSONDomain] self.modules = [] # type: List[JSONModule]
def __init__(self, feature: CDSFeature) -> None: super().__init__(['id', 'sequence', 'domains']) self.sequence = feature.translation self.id = feature.get_name() self.domains = [] # type: List[JSONDomain]