def test_store_promoters(self): promoters = [ Promoter("gene1", 10, 20, seq=Seq("cgtacgtacgt")), Promoter("gene2", 30, 40, seq=Seq("cgtacgtacgt")), CombinedPromoter("gene3", "gene4", 50, 60, seq=Seq("cgtacgtacgt")) ] record_with_promoters = create_fake_record() cassis.store_promoters( promoters, record_with_promoters) # add ("store") promoters to seq_record record_without_promoters = create_fake_record( ) # just the same, without adding promoters # test promoter features expected_count = record_without_promoters.get_feature_count() + len( promoters) assert expected_count == record_with_promoters.get_feature_count() for i in range(len(promoters)): feature = record_with_promoters.get_generics()[i] assert feature.type == "promoter" assert feature.get_qualifier("seq") == ("cgtacgtacgt", ) # especially test bidirectional promoter feature (third promoter, last feature) last_promoter = record_with_promoters.get_generics()[-1] assert last_promoter.get_qualifier("locus_tag") == ("gene3", "gene4") assert last_promoter.notes == ["bidirectional promoter"]
def test_regeneration(self): record = create_fake_record() results = cassis.CassisResults(record.id) # create a prediction, since it will generate a border with many extra qualifiers start_marker = ClusterMarker("gene1", Motif(3, 3, score=1)) start_marker.promoter = "gene1" start_marker.abundance = 2 end_marker = ClusterMarker("gene4", Motif(3, 3, score=1)) end_marker.promoter = "gene3+gene4" assert end_marker.abundance == 1 cluster = cassis.ClusterPrediction(start_marker, end_marker) results.subregions = cassis.create_subregions("gene1", [cluster], record) assert results.subregions results.promoters = [ Promoter("gene1", 10, 20, seq=Seq("cgtacgtacgt")), Promoter("gene2", 30, 40, seq=Seq("cgtacgtacgt")), CombinedPromoter("gene3", "gene4", 50, 60, seq=Seq("cgtacgtacgt")) ] round_trip = cassis.regenerate_previous_results( results.to_json(), record, None) assert isinstance(round_trip, cassis.CassisResults) assert len(results.subregions) == len(round_trip.subregions) for old, new in zip(results.subregions, round_trip.subregions): assert old.location == new.location assert old.to_biopython()[0].qualifiers == new.to_biopython( )[0].qualifiers assert round_trip.promoters == results.promoters
def test_check_cluster_predictions(self): seq_record = create_fake_record() promoters = [ Promoter("gene1", 1, 5), Promoter("gene2", 6, 10), CombinedPromoter("gene3", "gene4", 11, 15) ] ignored_genes = [ # see captured logging Gene(FeatureLocation(1, 5), locus_tag="gene5") ] clusters = [ ClusterPrediction(ClusterMarker("gene1", Motif(3, 3, score=1)), ClusterMarker("gene4", Motif(3, 3, score=1))) ] expected = [ ClusterPrediction(ClusterMarker("gene1", Motif(3, 3, score=1)), ClusterMarker("gene4", Motif(3, 3, score=1))) ] expected[0].start.promoter = "gene1" expected[0].end.promoter = "gene3+gene4" expected[0].genes = 4 expected[0].promoters = 3 assert check_cluster_predictions(clusters, seq_record, promoters, ignored_genes) == expected
def test_get_anchor_promoter(self): anchor = "gene3" promoters = [ Promoter("gene1", 1, 1), Promoter("gene2", 2, 2), CombinedPromoter("gene3", "gene4", 3, 4), Promoter("gene5", 5, 5) ] self.assertEqual(get_anchor_promoter_index(anchor, promoters), 2)
def test_filter_fimo_results(self): fimo_dir = os.path.join(self.options.output_dir, "fimo") motifs = [Motif(0, 3)] # gene2 will be the anchor promoter anchor_promoter = 1 promoters = [] for i in range(1, 16): promoters.append(Promoter("gene%d" % i, i * 10, i * 10 + 4)) # need certain amount of promoters, otherwise the proportion of # promoters with a motif (motif frequency) will be too high --> error expected_motifs = [Motif(0, 3, hits={"gene1": 1, "gene2": 2})] # fake FIMO output file, corresponding to expected_motifs source = path.get_full_path(__file__, "data", "fake_short_fimo.txt") target = os.path.join(fimo_dir, "+00_-03") if not os.path.exists(target): os.makedirs(target) copy(source, os.path.join(target, "fimo.txt")) # overwrite fimo.txt if exists found_motifs = filter_fimo_results(motifs, fimo_dir, promoters, anchor_promoter) assert found_motifs == expected_motifs bs_per_promoter, expected_bs_per_promoter = read_generated_expected_file( os.path.join(target, "bs_per_promoter.csv"), "expected_bs_per_promoter.csv") self.assertEqual(bs_per_promoter, expected_bs_per_promoter)
def test_get_promoter_sets(self): meme_dir = os.path.join(self.options.output_dir, "meme") anchor_promoter = 5 promoters = [ Promoter("gene1", 1, 1, seq=Seq("acgtacgtacgtacgt")), Promoter("gene2", 2, 2, seq=Seq("acgtacgtacgtacgt")), CombinedPromoter("gene3", "gene4", 3, 4, seq=Seq("acgtacgtacgtacgt")), Promoter("gene5", 5, 5, seq=Seq("acgtacgtacgtacgt")), Promoter("gene6", 6, 6, seq=Seq("acgtacgtacgtacgt")), # promoter with index=5 --> anchor promoter Promoter("gene7", 7, 7, seq=Seq("acgtacgtacgtacgt")), Promoter("gene8", 8, 8, seq=Seq("acgtacgtacgtacgt")), Promoter("gene9", 9, 9, seq=Seq("acgtacgtacgtacgt")) ] expected_motifs = [ Motif(plus, minus) for plus in range(3) for minus in range(3 - plus, 6) ] self.assertEqual(generate_motifs(meme_dir, anchor_promoter, promoters), expected_motifs)
def test_serialisation(self): for seq in [Seq("ACGT"), "ACGT"]: for cls, promoter in [(Promoter, Promoter("gene1", 1, 5, seq=seq)), (CombinedPromoter, CombinedPromoter("gene1", "gene2", 2, 7, seq=seq))]: round_trip = cls.from_json(promoter.to_json()) assert promoter.seq == round_trip.seq assert round_trip == promoter
def test_get_islands(self): motifs = [Motif(0, 3, hits={"gene1": 1, "gene2": 2}), Motif(0, 4, hits={"gene2": 3, "gene4": 2, "gene5": 1})] # gene2 will be the anchor promoter anchor_promoter = 1 promoters = [] for i in range(1, 7): promoters.append(Promoter("gene%d" % i, i * 10, i * 10 + 4)) # resulting in 2 different islands (this example) # promoter (pos): 1 2 3 4 5 6 # binding sites: 1 2 0 0 0 0 # island: |-| first_island = Island(promoters[0], promoters[1], motifs[0]) # promoter (pos): 1 2 3 4 5 6 # binding sites: 0 3 0 2 1 0 # island: |---| second_island = Island(promoters[1], promoters[4], motifs[1]) expected_islands = [first_island, second_island] assert get_islands(anchor_promoter, motifs, promoters) == expected_islands
def test_sort_by_abundance(self): islands = [] # island 1: [gene1 -- gene2] motif = Motif(0, 3, score=3, hits={"gene1": 1, "gene2": 1}) islands.append(Island(Promoter("gene1", 1, 1), Promoter("gene2", 2, 2), motif)) # island 2: [gene2 -- gene5] motif = Motif(3, 0, score=2, hits={"gene2": 1, "gene3": 1, "gene4": 1, "gene5": 1}) islands.append(Island(Promoter("gene2", 2, 2), Promoter("gene5", 5, 5), motif)) # island 3: [gene1 -- gene5] motif = Motif(3, 3, score=1, hits={"gene1": 1, "gene2": 1, "gene3": 1, "gene4": 1, "gene5": 1}) islands.append(Island(Promoter("gene1", 1, 1), Promoter("gene5", 5, 5), motif)) # left border: 2x gene1, 1x gene2 # right border: 2x gene5, 1x gene2 expected_clusters = [] # cluster 1: [gene1 -- gene5] --> abundance 2+2 (most abundant) start = ClusterMarker("gene1", Motif(3, 3, score=1)) start.abundance = 2 end = ClusterMarker("gene5", Motif(3, 3, score=1)) end.abundance = 2 expected_clusters.append(ClusterPrediction(start, end)) # cluster 3: [gene2 -- gene5] --> abundance 1+2, score 2+1 (better/lower) start = ClusterMarker("gene2", Motif(3, 0, score=2)) start.abundance = 1 end = ClusterMarker("gene5", Motif(3, 3, score=1)) end.abundance = 2 expected_clusters.append(ClusterPrediction(start, end)) # cluster 2: [gene1 -- gene2] --> abundance 2+1, score 1+3 (worse, higher) start = ClusterMarker("gene1", Motif(3, 3, score=1)) start.abundance = 2 end = ClusterMarker("gene2", Motif(0, 3, score=3)) end.abundance = 1 expected_clusters.append(ClusterPrediction(start, end)) # cluster 4: [gene2 -- gene2] --> abundance 1+1 start = ClusterMarker("gene2", Motif(3, 0, score=2)) start.abundance = 1 end = ClusterMarker("gene2", Motif(0, 3, score=3)) end.abundance = 1 expected_clusters.append(ClusterPrediction(start, end)) # abundance: as high as possible # score: as low as possible self.assertEqual(create_predictions(islands), expected_clusters)
def test_promoter_id(self): assert Promoter("gene1", 1, 5).get_id() == "gene1" assert CombinedPromoter("gene1", "gene2", 1, 5).get_id() == "gene1+gene2"
def setUp(self): self.promoter = Promoter("gene1", 1, 1)