def test_store_promoters(self): promoters = [ Promoter("gene1", 10, 20, seq=Seq("cgtacgtacgt")), Promoter("gene2", 30, 40, seq=Seq("cgtacgtacgt")), CombinedPromoter("gene3", "gene4", 50, 60, seq=Seq("cgtacgtacgt")) ] record_with_promoters = create_fake_record() cassis.store_promoters( promoters, record_with_promoters) # add ("store") promoters to seq_record record_without_promoters = create_fake_record( ) # just the same, without adding promoters # test promoter features expected_count = record_without_promoters.get_feature_count() + len( promoters) assert expected_count == record_with_promoters.get_feature_count() for i in range(len(promoters)): feature = record_with_promoters.get_generics()[i] assert feature.type == "promoter" assert feature.get_qualifier("seq") == ("cgtacgtacgt", ) # especially test bidirectional promoter feature (third promoter, last feature) last_promoter = record_with_promoters.get_generics()[-1] assert last_promoter.get_qualifier("locus_tag") == ("gene3", "gene4") assert last_promoter.notes == ["bidirectional promoter"]
def test_regeneration(self): record = create_fake_record() results = cassis.CassisResults(record.id) # create a prediction, since it will generate a border with many extra qualifiers start_marker = ClusterMarker("gene1", Motif(3, 3, score=1)) start_marker.promoter = "gene1" start_marker.abundance = 2 end_marker = ClusterMarker("gene4", Motif(3, 3, score=1)) end_marker.promoter = "gene3+gene4" assert end_marker.abundance == 1 cluster = cassis.ClusterPrediction(start_marker, end_marker) results.subregions = cassis.create_subregions("gene1", [cluster], record) assert results.subregions results.promoters = [ Promoter("gene1", 10, 20, seq=Seq("cgtacgtacgt")), Promoter("gene2", 30, 40, seq=Seq("cgtacgtacgt")), CombinedPromoter("gene3", "gene4", 50, 60, seq=Seq("cgtacgtacgt")) ] round_trip = cassis.regenerate_previous_results( results.to_json(), record, None) assert isinstance(round_trip, cassis.CassisResults) assert len(results.subregions) == len(round_trip.subregions) for old, new in zip(results.subregions, round_trip.subregions): assert old.location == new.location assert old.to_biopython()[0].qualifiers == new.to_biopython( )[0].qualifiers assert round_trip.promoters == results.promoters
def test_get_promoter_sets(self): meme_dir = os.path.join(self.options.output_dir, "meme") anchor_promoter = 5 promoters = [ Promoter("gene1", 1, 1, seq=Seq("acgtacgtacgtacgt")), Promoter("gene2", 2, 2, seq=Seq("acgtacgtacgtacgt")), CombinedPromoter("gene3", "gene4", 3, 4, seq=Seq("acgtacgtacgtacgt")), Promoter("gene5", 5, 5, seq=Seq("acgtacgtacgtacgt")), Promoter("gene6", 6, 6, seq=Seq("acgtacgtacgtacgt")), # promoter with index=5 --> anchor promoter Promoter("gene7", 7, 7, seq=Seq("acgtacgtacgtacgt")), Promoter("gene8", 8, 8, seq=Seq("acgtacgtacgtacgt")), Promoter("gene9", 9, 9, seq=Seq("acgtacgtacgtacgt")) ] expected_motifs = [ Motif(plus, minus) for plus in range(3) for minus in range(3 - plus, 6) ] self.assertEqual(generate_motifs(meme_dir, anchor_promoter, promoters), expected_motifs)
def test_check_cluster_predictions(self): seq_record = create_fake_record() promoters = [ Promoter("gene1", 1, 5), Promoter("gene2", 6, 10), CombinedPromoter("gene3", "gene4", 11, 15) ] ignored_genes = [ # see captured logging Gene(FeatureLocation(1, 5), locus_tag="gene5") ] clusters = [ ClusterPrediction(ClusterMarker("gene1", Motif(3, 3, score=1)), ClusterMarker("gene4", Motif(3, 3, score=1))) ] expected = [ ClusterPrediction(ClusterMarker("gene1", Motif(3, 3, score=1)), ClusterMarker("gene4", Motif(3, 3, score=1))) ] expected[0].start.promoter = "gene1" expected[0].end.promoter = "gene3+gene4" expected[0].genes = 4 expected[0].promoters = 3 assert check_cluster_predictions(clusters, seq_record, promoters, ignored_genes) == expected
def test_get_anchor_promoter(self): anchor = "gene3" promoters = [ Promoter("gene1", 1, 1), Promoter("gene2", 2, 2), CombinedPromoter("gene3", "gene4", 3, 4), Promoter("gene5", 5, 5) ] self.assertEqual(get_anchor_promoter_index(anchor, promoters), 2)
def test_serialisation(self): for seq in [Seq("ACGT"), "ACGT"]: for cls, promoter in [(Promoter, Promoter("gene1", 1, 5, seq=seq)), (CombinedPromoter, CombinedPromoter("gene1", "gene2", 2, 7, seq=seq))]: round_trip = cls.from_json(promoter.to_json()) assert promoter.seq == round_trip.seq assert round_trip == promoter
def test_promoter_id(self): assert Promoter("gene1", 1, 5).get_id() == "gene1" assert CombinedPromoter("gene1", "gene2", 1, 5).get_id() == "gene1+gene2"