def setUp(self): self.genes = [] self.clusters = [] domain_names = self.gen_domain_names() for product in ['not_atpks', 'transatpks']: cluster = helpers.DummyCluster(1, 2, products=[product]) assert cluster.products == (product, ) for i in range(7): locus_tag = chr(ord('a') + i) if i == 6: locus_tag = "all" cds = helpers.DummyCDS(1, 2, locus_tag=locus_tag) cds.product = product cds.nrps_pks = DummyNRPSQualfier() cds.nrps_pks.domain_names = domain_names["nrpspksdomains_" + locus_tag] cds.cluster = cluster cluster.add_cds(cds) self.genes.append(cds) self.clusters.append(cluster) self.predictions = [ 'redmxmal', 'ccmal', 'ohemal', 'ohmxmal', 'ohmmal', 'ccmmal', 'emal', 'redmmal', 'mmal', 'ccmxmal', 'mxmal', 'redemal', 'ohmal', 'mal', 'ccemal' ]
def test_orphaned_supercluster_number(self): record = Record("A" * 1000) cluster = helpers.DummyCluster(0, 1000) supercluster = SuperCluster(SuperCluster.kinds.SINGLE, [cluster]) with self.assertRaisesRegex(ValueError, "SuperCluster not contained in record"): print(record.get_supercluster_number(supercluster))
def test_cluster_numbering(self): record = Record(Seq("A"*1000)) for start, end in [(50, 100), (10, 40), (700, 1000), (0, 9)]: cluster = helpers.DummyCluster(start, end) record.add_cluster(cluster) for i, cluster in enumerate(sorted(list(record.get_clusters()))): assert cluster.get_cluster_number() == i + 1
def test_classification_with_colon(self): # since SMCOG id and description are stored in a string separated by :, # ensure that descriptions containing : are properly handled # test gene is AQF52_5530 from CP013129.1 translation = ( "MDTHQREEDPVAARRDRTHYLYLAVIGAVLLGIAVGFLAPGVAVELKPLGTGFVN" "LIKMMISPIIFCTIVLGVGSVRKAAKVGAVGGLALGYFLVMSTVALAIGLLVGNL" "LEPGSGLHLTKEIAEAGAKQAEGGGESTPDFLLGIIPTTFVSAFTEGEVLQTLLV" "ALLAGFALQAMGAAGEPVLRGIGHIQRLVFRILGMIMWVAPVGAFGAIAAVVGAT" "GAAALKSLAVIMIGFYLTCGLFVFVVLGAVLRLVAGINIWTLLRYLGREFLLILS" "TSSSESALPRLIAKMEHLGVSKPVVGITVPTGYSFNLDGTAIYLTMASLFVAEAM" "GDPLSIGEQISLLVFMIIASKGAAGVTGAGLATLAGGLQSHRPELVDGVGLIVGI" "DRFMSEARALTNFAGNAVATVLVGTWTKEIDKARVTEVLAGNIPFDEKTLVDDHA" "PVPVPDQRAEGGEEKARAGV") cds = helpers.DummyCDS(0, len(translation)) cds.translation = translation results = smcogs.classify("test", [cds], get_config()) assert results.best_hits[cds.get_name( )].hit_id == "SMCOG1212:sodium:dicarboxylate symporter" record = helpers.DummyRecord(seq=translation) record.add_cds_feature(cds) record.add_cluster(helpers.DummyCluster(0, len(translation))) # if we don't handle multiple semicolons right, this line will crash results.add_to_record(record) gene_functions = cds.gene_functions.get_by_tool("smcogs") assert len(gene_functions) == 1 assert str(gene_functions[0]).startswith( "transport (smcogs) SMCOG1212:sodium:dicarboxylate symporter" " (Score: 416; E-value: 2.3e-126)")
def test_orphaned_cluster_number(self): record = Record(Seq("A" * 1000)) cluster = helpers.DummyCluster(0, 1000) with self.assertRaisesRegex(ValueError, "Cluster not contained in record"): print(record.get_cluster_number(cluster)) with self.assertRaisesRegex(ValueError, "Cluster not contained in record"): print(cluster.get_cluster_number())
def test_cluster_numbering(self): features = [] for start, end in self.pairs: cluster = helpers.DummyCluster(start, end) self.record.add_cluster(cluster) features.append(cluster) features = sorted(features) for i, cluster in enumerate(self.record.get_clusters()): assert cluster.get_cluster_number() == i + 1 assert self.record.get_cluster(i + 1) is features[i]
def test_cds_cluster_linkage(self): record = Record("A"*200) for start, end in [(50, 100), (10, 90), (0, 9), (150, 200)]: record.add_cds_feature(helpers.DummyCDS(start, end)) for start, end in [(10, 120), (5, 110), (10, 160), (45, 200)]: record.clear_clusters() cluster = helpers.DummyCluster(start, end) record.add_cluster(cluster) assert len(cluster.cds_children) == 2 for cds in cluster.cds_children: assert cds.overlaps_with(cluster)
def test_supercluster_numbering(self): features = [] for location in self.locations: supercluster = SuperCluster( SuperCluster.kinds.SINGLE, [helpers.DummyCluster(location.start, location.end)]) self.record.add_supercluster(supercluster) features.append(supercluster) features = sorted(features) for i, cluster in enumerate(self.record.get_superclusters()): assert cluster.get_supercluster_number() == i + 1 assert self.record.get_supercluster(i + 1) is features[i]
def test_classification_with_colon(self): # since SMCOG id and description are stored in a string separated by :, # ensure that descriptions containing : are properly handled cds = helpers.DummyCDS(locus_tag="test") record = helpers.DummyRecord(features=[cds], seq="A" * 100) record.add_cluster(helpers.DummyCluster(0, 100)) results = SMCOGResults(record.id) results.best_hits[cds.get_name()] = HMMResult( "SMCOG1212:sodium:dicarboxylate_symporter", 0, 100, 2.3e-126, 416) results.add_to_record(record) gene_functions = cds.gene_functions.get_by_tool("smcogs") assert len(gene_functions) == 1 assert str(gene_functions[0]).startswith( "transport (smcogs) SMCOG1212:sodium:dicarboxylate_symporter" " (Score: 416; E-value: 2.3e-126)")
def test_cds_removal(self): record = Record(Seq("A" * 1000)) cluster = helpers.DummyCluster(0, 1000) record.add_cluster(cluster) first_cds = helpers.DummyCDS(0, 100, locus_tag="A") second_cds = helpers.DummyCDS(200, 300, locus_tag="B") record.add_cds_feature(first_cds) record.add_cds_feature(second_cds) assert len(record.get_cds_features()) == 2 assert len(cluster.cds_children) == 2 record.remove_cds_feature(first_cds) assert len(record.get_cds_features()) == 1 assert len(cluster.cds_children) == 1 assert record.get_cds_features()[0] is list(cluster.cds_children)[0] assert record.get_cds_features()[0].locus_tag == "B"