def test_cds_removal(self): record = Record(Seq("A" * 1000)) cluster = helpers.DummyCluster(0, 1000) record.add_cluster(cluster) first_cds = helpers.DummyCDS(0, 100, locus_tag="A") second_cds = helpers.DummyCDS(200, 300, locus_tag="B") record.add_cds_feature(first_cds) record.add_cds_feature(second_cds) assert len(record.get_cds_features()) == 2 assert len(cluster.cds_children) == 2 record.remove_cds_feature(first_cds) assert len(record.get_cds_features()) == 1 assert len(cluster.cds_children) == 1 assert record.get_cds_features()[0] is list(cluster.cds_children)[0] assert record.get_cds_features()[0].locus_tag == "B"
def setUp(self): # locations: VVV VVV record = Record(Seq("ATGTTATGAGGGTCATAACAT", generic_dna)) record.add_cds_feature(DummyCDS(0, 9, strand=1)) record.add_cds_feature(DummyCDS(12, 21, strand=-1)) cluster_loc = FeatureLocation(0, 21) cluster = Cluster(cluster_loc, 0, 0, []) record.add_cluster(cluster) # if these aren't correct, the tests will fail assert len(cluster.cds_children) == 2 for cds in record.get_cds_features(): assert cds.overlaps_with(cluster) assert cds.cluster == cluster, str(cds.location) assert cds.extract(record.seq) == "ATGTTATGA", str(cds.location) self.record = record
def setUp(self): # locations: VVV VVV record = Record(Seq("ATGTTATGAGGGTCATAACAT")) record.add_cds_feature(DummyCDS(0, 9, strand=1)) record.add_cds_feature(DummyCDS(12, 21, strand=-1)) cluster = DummyProtocluster(start=0, end=21) record.add_protocluster(cluster) record.create_candidate_clusters() record.create_regions() # if these aren't correct, the tests will fail assert len(cluster.cds_children) == 2 assert len(record.get_regions()) == 1 for cds in record.get_cds_features(): assert cds.is_contained_by(cluster) assert cds.extract(record.seq) == "ATGTTATGA", str(cds.location) self.record = record
def get_anchor_gene_names(record: Record) -> List[str]: """ Finds all gene names that have a CDS with secondary metabolite annotations. Requires that a CDS.get_name() returns the same name of its parent Gene.get_name() Arguments: record: the record to search Returns: a list of gene names """ anchor_genes = [] for feature in record.get_cds_features(): if feature.gene_function == GeneFunction.CORE: anchor_genes.append(feature.get_name()) return anchor_genes
class TestCluster(unittest.TestCase): def create_cluster(self, start, end): return Cluster(FeatureLocation(start, end, strand=1), cutoff=1, extent=1, products=['a']) def create_cds(self, start, end, strand=1): return CDSFeature(FeatureLocation(start, end, strand), locus_tag="%d-%d" % (start, end)) def setUp(self): self.record = Record(Seq("A" * 1000)) self.start = 100 self.end = 900 self.cluster = self.create_cluster(self.start, self.end) self.record.add_cluster(self.cluster) assert self.cluster.location.start == self.start assert self.cluster.location.end == self.end def test_trim_unattached(self): cluster = self.create_cluster(1, 2) cluster.trim_overlapping() assert self.cluster.location.start == self.start assert self.cluster.location.end == self.end def test_trim_empty(self): self.cluster.trim_overlapping() assert self.cluster.location.start == self.start assert self.cluster.location.end == self.end def test_trim_contained(self): starts = [200, 300, 500] ends = [250, 350, 600] for start, end in zip(starts, ends): feature = self.create_cds(start, end) self.record.add_cds_feature(feature) assert feature.cluster == self.cluster self.cluster.trim_overlapping() assert self.cluster.location.start == self.start assert self.cluster.location.end == self.end for cds in self.record.get_cds_features(): self.record.remove_cds_feature(cds) assert not self.cluster.cds_children for start, end in zip(starts, ends): feature = self.create_cds(start, end, strand=-1) self.record.add_cds_feature(feature) assert feature.cluster == self.cluster self.cluster.trim_overlapping() assert self.cluster.location.start == self.start assert self.cluster.location.end == self.end def test_trim_leading_overlap(self): self.record.add_cds_feature(self.create_cds(self.start - 3, self.start + 3)) self.record.add_cds_feature(self.create_cds(self.start + 20, self.end - 20)) self.cluster.trim_overlapping() assert self.cluster.location.start == self.start + 3 assert self.cluster.location.end == self.end def test_trim_leading_overlap_with_overlapping_contained(self): # pylint: disable=invalid-name self.record.add_cds_feature(self.create_cds(self.start - 3, self.start + 3)) self.record.add_cds_feature(self.create_cds(self.start + 1, self.start + 10)) self.cluster.trim_overlapping() assert self.cluster.location.start == self.start + 1 assert self.cluster.location.end == self.end def test_trim_trailing_overlap(self): self.record.add_cds_feature(self.create_cds(self.end - 3, self.end + 3)) self.record.add_cds_feature(self.create_cds(self.start + 20, self.end - 20)) self.cluster.trim_overlapping() assert self.cluster.location.start == self.start assert self.cluster.location.end == self.end - 3 def test_trim_trailing_overlap_with_overlapping_contained(self): # pylint: disable=invalid-name self.record.add_cds_feature(self.create_cds(self.end - 3, self.end + 3)) self.record.add_cds_feature(self.create_cds(self.end - 10, self.end - 1)) self.cluster.trim_overlapping() assert self.cluster.location.start == self.start assert self.cluster.location.end == self.end - 1 def test_products(self): assert self.cluster.products == ("a",) self.cluster.add_product("b") assert self.cluster.products == ("a", "b") with self.assertRaises(AttributeError): self.cluster.products.append("c") # pylint: disable=no-member with self.assertRaises(AssertionError): self.cluster.add_product(None) with self.assertRaises(AssertionError): self.cluster.add_product(["C"])