def test_probabilities(self): loc = FeatureLocation(0, 10) candidates = [DummyCandidateCluster([create_protocluster(0, 10)])] assert Region(candidate_clusters=candidates).probabilities == [] subs = [SubRegion(loc, "testtool", probability=None)] assert Region(candidate_clusters=candidates, subregions=subs).probabilities == [] subs.append(SubRegion(loc, "testtool", probability=0.1)) assert Region(candidate_clusters=candidates, subregions=subs).probabilities == [0.1] subs.append(SubRegion(loc, "testtool", probability=0.7)) assert Region(candidate_clusters=candidates, subregions=subs).probabilities == [0.1, 0.7]
def test_products(self): candidates = [DummyCandidateCluster([create_protocluster(0, 10)])] region = Region(candidate_clusters=candidates) assert region.products == ["a"] assert region.get_product_string() == "a" candidates = [] for i, prod in zip(range(2), "ba"): candidates.append(DummyCandidateCluster([create_protocluster(i*10, (i+1)*10, product=prod)])) region = Region(candidate_clusters=candidates) assert region.products == ["b", "a"] assert region.get_product_string() == "a,b"
def test_probabilities(self): loc = FeatureLocation(0, 10) supers = [ SuperCluster(SuperCluster.kinds.SINGLE, [create_cluster(0, 10)]) ] assert Region(superclusters=supers).probabilities == [] subs = [SubRegion(loc, "testtool", probability=None)] assert Region(superclusters=supers, subregions=subs).probabilities == [] subs.append(SubRegion(loc, "testtool", probability=0.1)) assert Region(superclusters=supers, subregions=subs).probabilities == [0.1] subs.append(SubRegion(loc, "testtool", probability=0.7)) assert Region(superclusters=supers, subregions=subs).probabilities == [0.1, 0.7]
def test_genbank(self): dummy_record = Record(Seq("A" * 100, generic_dna)) clusters = [ create_cluster(3, 20, "prodA"), create_cluster(25, 41, "prodB") ] for cluster in clusters: dummy_record.add_cluster(cluster) subregion = SubRegion(FeatureLocation(35, 71), "test", 0.7) dummy_record.add_subregion(subregion) supercluster = SuperCluster(SuperCluster.kinds.NEIGHBOURING, clusters) dummy_record.add_supercluster(supercluster) region = Region(superclusters=[supercluster], subregions=[subregion]) dummy_record.add_region(region) with NamedTemporaryFile(suffix=".gbk") as output: region.write_to_genbank(output.name) bio = list(seqio.parse(output.name)) assert len(bio) == 1 rec = Record.from_biopython(bio[0], taxon="bacteria") assert len(rec.get_regions()) == 1 new = rec.get_region(0) assert new.location.start == 3 - region.location.start assert new.location.end == 71 - region.location.start assert new.products == region.products assert new.probabilities == region.probabilities
def test_prepeptide_adjustment(self): dummy_record = Record(Seq("A"*400, generic_dna)) subregion = DummySubRegion(start=100, end=300) dummy_record.add_subregion(subregion) region = Region(subregions=[subregion]) dummy_record.add_region(region) dummy_prepeptide = DummyFeature(200, 230, 1, "CDS_motif") # ensure both FeatureLocation and CompoundLocations are handled appropriately leader_loc = FeatureLocation(200, 210, 1) tail_loc = CompoundLocation([FeatureLocation(220, 223, -1), FeatureLocation(227, 230, -1)]) dummy_prepeptide._qualifiers["leader_location"] = [str(leader_loc)] dummy_prepeptide._qualifiers["tail_location"] = [str(tail_loc)] dummy_record.add_feature(dummy_prepeptide) # and add a CDS_motif without either qualifier (e.g. NRPS/PKS motif) to ensure that doesn't break dummy_record.add_feature(DummyFeature(250, 280, 1, "CDS_motif")) with NamedTemporaryFile(suffix=".gbk") as output: region.write_to_genbank(output.name) bio = list(seqio.parse(output.name))[0] assert len(bio.features) == 4 found = False for feature in bio.features: tail = feature.qualifiers.get("tail_location") leader = feature.qualifiers.get("leader_location") if tail and leader: # the part locations should now be adjusted backwards 100 bases assert leader == ["[100:110](+)"] assert tail == ["join{[120:123](-), [127:130](-)}"] found = True assert found, "prepeptide feature missing in conversion"
def test_products(self): supers = [ SuperCluster(SuperCluster.kinds.SINGLE, [create_cluster(0, 10)]) ] region = Region(superclusters=supers) assert region.products == ["a"] assert region.get_product_string() == "a" supers = [] for i, prod in zip(range(2), "ba"): supers.append( SuperCluster( SuperCluster.kinds.SINGLE, [create_cluster(i * 10, (i + 1) * 10, product=prod)])) region = Region(superclusters=supers) assert region.products == ["b", "a"] assert region.get_product_string() == "a-b"
def test_unique_clusters(self): protoclusters = [create_protocluster(i, 10, product=prod) for i, prod in enumerate("ABC")] candidates = [CandidateCluster(CandidateCluster.kinds.INTERLEAVED, protoclusters[:2]), CandidateCluster(CandidateCluster.kinds.INTERLEAVED, protoclusters[1:])] assert protoclusters[1] in candidates[0].protoclusters and protoclusters[1] in candidates[1].protoclusters region = Region(candidate_clusters=candidates) unique_clusters = region.get_unique_protoclusters() # if the protocluster in both candidates is repeated, there'll be an extra assert len(unique_clusters) == 3 assert unique_clusters == protoclusters
def test_unique_clusters(self): clusters = [ create_cluster(i, 10, product=prod) for i, prod in enumerate("ABC") ] superclusters = [ SuperCluster(SuperCluster.kinds.INTERLEAVED, clusters[:2]), SuperCluster(SuperCluster.kinds.INTERLEAVED, clusters[1:]) ] assert clusters[1] in superclusters[0].clusters and clusters[ 1] in superclusters[1].clusters region = Region(superclusters=superclusters) unique_clusters = region.get_unique_clusters() # if the cluster in both superclusters is repeated, there'll be an extra assert len(unique_clusters) == 3 assert unique_clusters == clusters
def test_limited_add_cds_propagation(self): cds = DummyCDS(0, 10) self.sub = SubRegion(FeatureLocation(20, 30), "testtool") self.region = Region(superclusters=[self.super], subregions=[self.sub]) # ensure all empty to start with assert not self.cluster.cds_children assert not self.super.cds_children assert not self.sub.cds_children assert not self.region.cds_children assert not cds.region self.region.add_cds(cds) assert self.cluster.cds_children == (cds, ) assert self.super.cds_children == (cds, ) assert not self.sub.cds_children assert self.region.cds_children == (cds, ) assert cds.region is self.region
def test_sideloaded(self): clusters = [ create_protocluster(3, 20, "prodA"), SideloadedProtocluster(FeatureLocation(25, 41), FeatureLocation(25, 41), "external", "prodB") ] candidate = CandidateCluster(CandidateCluster.kinds.NEIGHBOURING, clusters) subregions = [ SubRegion(FeatureLocation(35, 71), "test", 0.7), SideloadedSubRegion(FeatureLocation(45, 61), "external") ] region = Region(candidate_clusters=[candidate], subregions=subregions) sideloaded = region.get_sideloaded_areas() assert len(sideloaded) == 2 assert sideloaded[0] is clusters[1] assert sideloaded[1] is subregions[1]
def test_missing_children(self): with self.assertRaisesRegex(ValueError, "at least one"): Region() with self.assertRaisesRegex(ValueError, "at least one"): Region(superclusters=[], subregions=[])
def test_incorrect_args(self): with self.assertRaises(AssertionError): Region(superclusters=[self.sub]) with self.assertRaises(AssertionError): Region(subregions=[self.super])
def setUp(self): self.cluster = create_cluster(0, 10) self.super = SuperCluster(SuperCluster.kinds.SINGLE, [self.cluster]) self.sub = SubRegion(self.cluster.location, "testtool") self.region = Region(superclusters=[self.super], subregions=[self.sub])
def test_incorrect_args(self): with self.assertRaises(AssertionError): Region(candidate_clusters=[self.sub]) with self.assertRaises(AssertionError): Region(subregions=[self.candidate])
def setUp(self): self.protocluster = DummyProtocluster() self.candidate = DummyCandidateCluster([self.protocluster]) self.sub = SubRegion(self.protocluster.location, "testtool") self.region = Region(candidate_clusters=[self.candidate], subregions=[self.sub])