def test_record_linkage(self): cluster = SuperCluster(SuperCluster.kinds.INTERLEAVED, self.record.get_clusters()) with self.assertRaisesRegex(ValueError, "SuperCluster not contained in record"): cluster.get_supercluster_number() self.record.add_supercluster(cluster) assert cluster.get_supercluster_number() == 1
def test_unique_clusters(self): clusters = [ create_cluster(i, 10, product=prod) for i, prod in enumerate("ABC") ] superclusters = [ SuperCluster(SuperCluster.kinds.INTERLEAVED, clusters[:2]), SuperCluster(SuperCluster.kinds.INTERLEAVED, clusters[1:]) ] assert clusters[1] in superclusters[0].clusters and clusters[ 1] in superclusters[1].clusters region = Region(superclusters=superclusters) unique_clusters = region.get_unique_clusters() # if the cluster in both superclusters is repeated, there'll be an extra assert len(unique_clusters) == 3 assert unique_clusters == clusters
def test_genbank(self): dummy_record = Record(Seq("A" * 100, generic_dna)) clusters = [ create_cluster(3, 20, "prodA"), create_cluster(25, 41, "prodB") ] for cluster in clusters: dummy_record.add_cluster(cluster) subregion = SubRegion(FeatureLocation(35, 71), "test", 0.7) dummy_record.add_subregion(subregion) supercluster = SuperCluster(SuperCluster.kinds.NEIGHBOURING, clusters) dummy_record.add_supercluster(supercluster) region = Region(superclusters=[supercluster], subregions=[subregion]) dummy_record.add_region(region) with NamedTemporaryFile(suffix=".gbk") as output: region.write_to_genbank(output.name) bio = list(seqio.parse(output.name)) assert len(bio) == 1 rec = Record.from_biopython(bio[0], taxon="bacteria") assert len(rec.get_regions()) == 1 new = rec.get_region(0) assert new.location.start == 3 - region.location.start assert new.location.end == 71 - region.location.start assert new.products == region.products assert new.probabilities == region.probabilities
def test_products(self): supers = [ SuperCluster(SuperCluster.kinds.SINGLE, [create_cluster(0, 10)]) ] region = Region(superclusters=supers) assert region.products == ["a"] assert region.get_product_string() == "a" supers = [] for i, prod in zip(range(2), "ba"): supers.append( SuperCluster( SuperCluster.kinds.SINGLE, [create_cluster(i * 10, (i + 1) * 10, product=prod)])) region = Region(superclusters=supers) assert region.products == ["b", "a"] assert region.get_product_string() == "a-b"
def test_conversion(self): kind = SuperClusterKind.INTERLEAVED original = SuperCluster(kind, self.record.get_clusters(), smiles="dummy smiles", polymer="dummy polymer") self.record.add_supercluster(original) assert original.products == ["a"] assert len(original.clusters) == 1 bios = original.to_biopython() assert len(bios) == 1 bio = bios[0] assert bio.qualifiers["product"] == ["a"] assert bio.qualifiers["kind"] == [str(kind)] assert bio.qualifiers["candidate_cluster_number"] == [ str(original.get_supercluster_number()) ] assert bio.qualifiers["SMILES"] == ["dummy smiles"] assert bio.qualifiers["polymer"] == ["dummy polymer"] assert bio.qualifiers["contig_edge"] == ["True"] regenerated = SuperCluster.from_biopython(bio) assert isinstance(regenerated, TemporarySuperCluster) assert regenerated.products == original.products assert regenerated.location == original.location assert regenerated.smiles_structure == original.smiles_structure assert regenerated.polymer == original.polymer assert regenerated.clusters == [ cluster.get_cluster_number() for cluster in self.record.get_clusters() ] assert regenerated.kind == original.kind real = regenerated.convert_to_real_feature(self.record) assert isinstance(real, SuperCluster) assert len(real.clusters) == len(self.record.get_clusters()) for reference, record_cluster in zip(real.clusters, self.record.get_clusters()): assert reference is record_cluster # attempt a conversion with a record missing the cluster self.record.clear_clusters() with self.assertRaisesRegex(ValueError, "Not all referenced clusters are present"): regenerated.convert_to_real_feature(self.record)
def test_probabilities(self): loc = FeatureLocation(0, 10) supers = [ SuperCluster(SuperCluster.kinds.SINGLE, [create_cluster(0, 10)]) ] assert Region(superclusters=supers).probabilities == [] subs = [SubRegion(loc, "testtool", probability=None)] assert Region(superclusters=supers, subregions=subs).probabilities == [] subs.append(SubRegion(loc, "testtool", probability=0.1)) assert Region(superclusters=supers, subregions=subs).probabilities == [0.1] subs.append(SubRegion(loc, "testtool", probability=0.7)) assert Region(superclusters=supers, subregions=subs).probabilities == [0.1, 0.7]
def setUp(self): self.cluster = create_cluster(0, 10) self.super = SuperCluster(SuperCluster.kinds.SINGLE, [self.cluster]) self.sub = SubRegion(self.cluster.location, "testtool") self.region = Region(superclusters=[self.super], subregions=[self.sub])
def test_smiles_and_polymer(self): cluster = SuperCluster(SuperCluster.kinds.INTERLEAVED, self.record.get_clusters()) assert cluster.smiles_structure is None assert cluster.polymer is None
def test_rules(self): cluster = SuperCluster(SuperCluster.kinds.INTERLEAVED, self.record.get_clusters()) assert cluster.detection_rules == [ cluster.detection_rule for cluster in self.record.get_clusters() ]
def test_no_clusters(self): with self.assertRaisesRegex(ValueError, "cannot exist without at least one"): SuperCluster(SuperCluster.kinds.INTERLEAVED, [])
def test_bad_kind(self): with self.assertRaisesRegex(TypeError, "should be SuperClusterKind"): SuperCluster("berf", self.record.get_clusters())