def generate_domain_features(gene: CDSFeature, domains: List[HMMResult]) -> Dict[HMMResult, AntismashDomain]: """ Generates AntismashDomain features for each provided HMMResult Arguments: gene: the CDSFeature the domains were found in domains: a list of HMMResults found in the CDSFeature Returns: a dictionary mapping the HMMResult used to the matching AntismashDomain """ new_features = {} domain_counts = defaultdict(int) # type: Dict[str, int] for domain in domains: loc = gene.get_sub_location_from_protein_coordinates(domain.query_start, domain.query_end) # set up new feature new_feature = AntismashDomain(loc, tool="nrps_pks_domains") new_feature.domain = domain.hit_id new_feature.locus_tag = gene.locus_tag or gene.get_name() new_feature.detection = "hmmscan" new_feature.database = "nrpspksdomains.hmm" new_feature.evalue = domain.evalue new_feature.score = domain.bitscore new_feature.translation = gene.translation[domain.query_start:domain.query_end + 1] domain_counts[domain.hit_id] += 1 # 1-indexed, so increment before use domain_name = "{}_{}.{}".format(gene.get_name(), domain.hit_id, domain_counts[domain.hit_id]) new_feature.domain_id = "nrpspksdomains_" + domain_name new_feature.label = domain_name new_features[domain] = new_feature return new_features
def generate_domains(self): inputs = fasta.read_fasta( path.get_full_path(__file__, 'data', 'PKS_KS.input')) domains = [] last_end = 0 for translation in inputs.values(): location = FeatureLocation(last_end + 10, last_end + len(translation) * 3 + 16) domain = AntismashDomain(location, tool="test") domain.translation = translation domains.append(domain) domain.domain = "PKS_KS" location = FeatureLocation( last_end + 10, last_end + len(domains[-1].translation) * 3 + 16) domains.append(AntismashDomain(location, tool="test")) domains[-1].domain = "PKS_KR" return domains