def test_genbank(self): dummy_record = Record(Seq("A"*100, generic_dna)) clusters = [create_protocluster(3, 20, "prodA"), create_protocluster(25, 41, "prodB")] for cluster in clusters: dummy_record.add_protocluster(cluster) subregion = SubRegion(FeatureLocation(35, 71), "test", 0.7) dummy_record.add_subregion(subregion) candidate = CandidateCluster(CandidateCluster.kinds.NEIGHBOURING, clusters) dummy_record.add_candidate_cluster(candidate) region = Region(candidate_clusters=[candidate], subregions=[subregion]) dummy_record.add_region(region) with NamedTemporaryFile(suffix=".gbk") as output: region.write_to_genbank(output.name) bio = list(seqio.parse(output.name)) assert len(bio) == 1 print(bio[0].features) rec = Record.from_biopython(bio[0], taxon="bacteria") assert len(rec.get_regions()) == 1 new = rec.get_region(0) assert new.location.start == 3 - region.location.start assert new.location.end == 71 - region.location.start assert new.products == region.products assert new.probabilities == region.probabilities
def test_rules(self): cluster = CandidateCluster(CandidateCluster.kinds.INTERLEAVED, self.record.get_protoclusters()) assert cluster.detection_rules == [ cluster.detection_rule for cluster in self.record.get_protoclusters() ]
def test_comparison(self): candidate = CandidateCluster(CandidateClusterKind.NEIGHBOURING, [create_cluster(5, 10, 20, 25, "a")]) longer = CandidateCluster(CandidateClusterKind.NEIGHBOURING, [create_cluster(5, 10, 40, 45, "a")]) after = CandidateCluster(CandidateClusterKind.NEIGHBOURING, [create_cluster(10, 20, 40, 45, "a")]) def check(first, second): assert first < second assert first < second.location assert sorted([second, first]) == [first, second] check(candidate, after) check(longer, candidate) check(longer, after) assert sorted([after, candidate, longer]) == [longer, candidate, after]
def test_conversion(self): kind = CandidateClusterKind.INTERLEAVED original = CandidateCluster(kind, self.record.get_protoclusters(), smiles="dummy smiles", polymer="dummy polymer") self.record.add_candidate_cluster(original) assert original.products == ["a"] assert len(original.protoclusters) == 1 bios = original.to_biopython() assert len(bios) == 1 bio = bios[0] assert bio.qualifiers["product"] == ["a"] assert bio.qualifiers["kind"] == [str(kind)] assert bio.qualifiers["candidate_cluster_number"] == [ str(original.get_candidate_cluster_number()) ] assert bio.qualifiers["SMILES"] == ["dummy smiles"] assert bio.qualifiers["polymer"] == ["dummy polymer"] assert bio.qualifiers["contig_edge"] == ["True"] regenerated = CandidateCluster.from_biopython(bio) assert isinstance(regenerated, TemporaryCandidateCluster) assert regenerated.products == original.products assert regenerated.location == original.location assert regenerated.smiles_structure == original.smiles_structure assert regenerated.polymer == original.polymer proto_numbers = [ cluster.get_protocluster_number() for cluster in self.record.get_protoclusters() ] assert regenerated.protoclusters == proto_numbers assert regenerated.kind == original.kind real = regenerated.convert_to_real_feature(self.record) assert isinstance(real, CandidateCluster) assert len(real.protoclusters) == len(self.record.get_protoclusters()) for reference, record_cluster in zip(real.protoclusters, self.record.get_protoclusters()): assert reference is record_cluster # attempt a conversion with a record missing the cluster self.record.clear_protoclusters() with self.assertRaisesRegex(ValueError, "Not all referenced clusters are present"): regenerated.convert_to_real_feature(self.record)
def test_unique_clusters(self): protoclusters = [ create_protocluster(i, 10, product=prod) for i, prod in enumerate("ABC") ] candidates = [ CandidateCluster(CandidateCluster.kinds.INTERLEAVED, protoclusters[:2]), CandidateCluster(CandidateCluster.kinds.INTERLEAVED, protoclusters[1:]) ] assert protoclusters[1] in candidates[ 0].protoclusters and protoclusters[1] in candidates[1].protoclusters region = Region(candidate_clusters=candidates) unique_clusters = region.get_unique_protoclusters() # if the protocluster in both candidates is repeated, there'll be an extra assert len(unique_clusters) == 3 assert unique_clusters == protoclusters
def test_core(self): protos = [ create_cluster(5, 10, 20, 25, "a"), create_cluster(30, 40, 50, 60, "b") ] cluster = CandidateCluster(CandidateClusterKind.NEIGHBOURING, protos, smiles="dummy", polymer="dummy") assert cluster.core_location == FeatureLocation(10, 50)
def test_conversion(self): kind = CandidateClusterKind.INTERLEAVED original = CandidateCluster(kind, self.record.get_protoclusters(), smiles="dummy smiles", polymer="dummy polymer") self.record.add_candidate_cluster(original) assert original.products == ["a"] assert len(original.protoclusters) == 1 bios = original.to_biopython() assert len(bios) == 1 bio = bios[0] assert bio.qualifiers["product"] == ["a"] assert bio.qualifiers["kind"] == [str(kind)] assert bio.qualifiers["candidate_cluster_number"] == [ str(original.get_candidate_cluster_number()) ] assert bio.qualifiers["SMILES"] == ["dummy smiles"] assert bio.qualifiers["polymer"] == ["dummy polymer"] assert bio.qualifiers["contig_edge"] == ["True"] real = CandidateCluster.from_biopython(bio, record=self.record) assert isinstance(real, CandidateCluster) assert len(real.protoclusters) == len(self.record.get_protoclusters()) for reference, record_cluster in zip(real.protoclusters, self.record.get_protoclusters()): assert reference is record_cluster # attempt a conversion with a record missing the cluster self.record.clear_protoclusters() with self.assertRaisesRegex( ValueError, "record does not contain all expected protoclusters"): CandidateCluster.from_biopython(bio, record=self.record) # and with no record with self.assertRaisesRegex(ValueError, "record instance required"): CandidateCluster.from_biopython(bio)
def test_sideloaded(self): clusters = [ create_protocluster(3, 20, "prodA"), SideloadedProtocluster(FeatureLocation(25, 41), FeatureLocation(25, 41), "external", "prodB") ] candidate = CandidateCluster(CandidateCluster.kinds.NEIGHBOURING, clusters) subregions = [ SubRegion(FeatureLocation(35, 71), "test", 0.7), SideloadedSubRegion(FeatureLocation(45, 61), "external") ] region = Region(candidate_clusters=[candidate], subregions=subregions) sideloaded = region.get_sideloaded_areas() assert len(sideloaded) == 2 assert sideloaded[0] is clusters[1] assert sideloaded[1] is subregions[1]
def test_smiles_and_polymer(self): cluster = CandidateCluster(CandidateCluster.kinds.INTERLEAVED, self.record.get_protoclusters()) assert cluster.smiles_structure is None assert cluster.polymer is None
def test_no_clusters(self): with self.assertRaisesRegex(ValueError, "cannot exist without at least one"): CandidateCluster(CandidateCluster.kinds.INTERLEAVED, [])
def test_bad_kind(self): with self.assertRaisesRegex(TypeError, "should be CandidateClusterKind"): CandidateCluster("berf", self.record.get_protoclusters())