def run_detection(record: Record, options: ConfigType, module_results: Dict[str, Union[ModuleResults, Dict[str, Any]]]) -> Dict[str, float]: """ Detect different secondary metabolite clusters, PFAMs, and domains. Arguments: record: the Record to run detection over options: antiSMASH config module_results: a dictionary mapping a module's name to results from a previous run on this module, as a ModuleResults subclass or in JSON form Returns: the time taken by each detection module as a dictionary """ timings = {} # type: Dict[str, float] # run full genome detections for module in [full_hmmer]: run_module(record, cast(AntismashModule, module), options, module_results, timings) results = module_results.get(module.__name__) if results: assert isinstance(results, ModuleResults) logging.debug("Adding detection results from %s to record", module.__name__) results.add_to_record(record) # generate cluster predictions logging.info("Detecting secondary metabolite clusters") for module in [hmm_detection, cassis, clusterfinder_probabilistic]: run_module(record, cast(AntismashModule, module), options, module_results, timings) results = module_results.get(module.__name__) if results: assert isinstance(results, DetectionResults) for protocluster in results.get_predicted_protoclusters(): record.add_protocluster(protocluster) for region in results.get_predicted_subregions(): record.add_subregion(region) logging.debug("%d protoclusters found", len(record.get_protoclusters())) logging.debug("%d subregions found", len(record.get_subregions())) record.create_candidate_clusters() record.create_regions() if not record.get_regions(): logging.info("No regions detected, skipping record") record.skip = "No regions detected" return timings logging.info("%d region(s) detected in record", len(record.get_regions())) # finally, run any detection limited to genes in clusters for module in [nrps_pks_domains, cluster_hmmer, genefunctions]: run_module(record, cast(AntismashModule, module), options, module_results, timings) results = module_results.get(module.__name__) if results: assert isinstance(results, ModuleResults) logging.debug("Adding detection results from %s to record", module.__name__) results.add_to_record(record) return timings
def test_genbank(self): dummy_record = Record(Seq("A" * 100, generic_dna)) clusters = [ create_cluster(3, 20, "prodA"), create_cluster(25, 41, "prodB") ] for cluster in clusters: dummy_record.add_cluster(cluster) subregion = SubRegion(FeatureLocation(35, 71), "test", 0.7) dummy_record.add_subregion(subregion) supercluster = SuperCluster(SuperCluster.kinds.NEIGHBOURING, clusters) dummy_record.add_supercluster(supercluster) region = Region(superclusters=[supercluster], subregions=[subregion]) dummy_record.add_region(region) with NamedTemporaryFile(suffix=".gbk") as output: region.write_to_genbank(output.name) bio = list(seqio.parse(output.name)) assert len(bio) == 1 rec = Record.from_biopython(bio[0], taxon="bacteria") assert len(rec.get_regions()) == 1 new = rec.get_region(0) assert new.location.start == 3 - region.location.start assert new.location.end == 71 - region.location.start assert new.products == region.products assert new.probabilities == region.probabilities
def test_prepeptide_adjustment(self): dummy_record = Record(Seq("A"*400, generic_dna)) subregion = DummySubRegion(start=100, end=300) dummy_record.add_subregion(subregion) region = Region(subregions=[subregion]) dummy_record.add_region(region) dummy_prepeptide = DummyFeature(200, 230, 1, "CDS_motif") # ensure both FeatureLocation and CompoundLocations are handled appropriately leader_loc = FeatureLocation(200, 210, 1) tail_loc = CompoundLocation([FeatureLocation(220, 223, -1), FeatureLocation(227, 230, -1)]) dummy_prepeptide._qualifiers["leader_location"] = [str(leader_loc)] dummy_prepeptide._qualifiers["tail_location"] = [str(tail_loc)] dummy_record.add_feature(dummy_prepeptide) # and add a CDS_motif without either qualifier (e.g. NRPS/PKS motif) to ensure that doesn't break dummy_record.add_feature(DummyFeature(250, 280, 1, "CDS_motif")) with NamedTemporaryFile(suffix=".gbk") as output: region.write_to_genbank(output.name) bio = list(seqio.parse(output.name))[0] assert len(bio.features) == 4 found = False for feature in bio.features: tail = feature.qualifiers.get("tail_location") leader = feature.qualifiers.get("leader_location") if tail and leader: # the part locations should now be adjusted backwards 100 bases assert leader == ["[100:110](+)"] assert tail == ["join{[120:123](-), [127:130](-)}"] found = True assert found, "prepeptide feature missing in conversion"
def generate_results(record: Record, options: ConfigType) -> ClusterFinderResults: """ Find and construct probabilistic cluster areas """ predictions = find_probabilistic_clusters(record, options) new_areas = [] for prediction in predictions: new_areas.append( SubRegion(prediction.location, tool="clusterfinder", probability=prediction.probability)) if options.cf_create_clusters: for area in new_areas: record.add_subregion(area) return ClusterFinderResults(record.id, new_areas, create=options.cf_create_clusters)
def add_to_record(self, record: Record) -> None: store_promoters(self.promoters, record) for cluster in self.subregions: record.add_subregion(cluster)
def add_to_record(self, record: Record) -> None: if self.create_new_clusters: # then get_predicted_subregions covered it already return for area in self.areas: record.add_subregion(area)