예제 #1
0
    def test_store_promoters(self):
        promoters = [
            Promoter("gene1", 10, 20, seq=Seq("cgtacgtacgt")),
            Promoter("gene2", 30, 40, seq=Seq("cgtacgtacgt")),
            CombinedPromoter("gene3", "gene4", 50, 60, seq=Seq("cgtacgtacgt"))
        ]
        record_with_promoters = create_fake_record()
        cassis.store_promoters(
            promoters,
            record_with_promoters)  # add ("store") promoters to seq_record

        record_without_promoters = create_fake_record(
        )  # just the same, without adding promoters

        # test promoter features
        expected_count = record_without_promoters.get_feature_count() + len(
            promoters)
        assert expected_count == record_with_promoters.get_feature_count()
        for i in range(len(promoters)):
            feature = record_with_promoters.get_generics()[i]
            assert feature.type == "promoter"
            assert feature.get_qualifier("seq") == ("cgtacgtacgt", )

        # especially test bidirectional promoter feature (third promoter, last feature)
        last_promoter = record_with_promoters.get_generics()[-1]
        assert last_promoter.get_qualifier("locus_tag") == ("gene3", "gene4")
        assert last_promoter.notes == ["bidirectional promoter"]
예제 #2
0
    def test_regeneration(self):
        record = create_fake_record()
        results = cassis.CassisResults(record.id)
        # create a prediction, since it will generate a border with many extra qualifiers
        start_marker = ClusterMarker("gene1", Motif(3, 3, score=1))
        start_marker.promoter = "gene1"
        start_marker.abundance = 2
        end_marker = ClusterMarker("gene4", Motif(3, 3, score=1))
        end_marker.promoter = "gene3+gene4"
        assert end_marker.abundance == 1
        cluster = cassis.ClusterPrediction(start_marker, end_marker)
        results.subregions = cassis.create_subregions("gene1", [cluster],
                                                      record)
        assert results.subregions

        results.promoters = [
            Promoter("gene1", 10, 20, seq=Seq("cgtacgtacgt")),
            Promoter("gene2", 30, 40, seq=Seq("cgtacgtacgt")),
            CombinedPromoter("gene3", "gene4", 50, 60, seq=Seq("cgtacgtacgt"))
        ]

        round_trip = cassis.regenerate_previous_results(
            results.to_json(), record, None)
        assert isinstance(round_trip, cassis.CassisResults)
        assert len(results.subregions) == len(round_trip.subregions)
        for old, new in zip(results.subregions, round_trip.subregions):
            assert old.location == new.location
            assert old.to_biopython()[0].qualifiers == new.to_biopython(
            )[0].qualifiers
        assert round_trip.promoters == results.promoters
예제 #3
0
    def test_check_cluster_predictions(self):
        seq_record = create_fake_record()
        promoters = [
            Promoter("gene1", 1, 5),
            Promoter("gene2", 6, 10),
            CombinedPromoter("gene3", "gene4", 11, 15)
        ]
        ignored_genes = [  # see captured logging
            Gene(FeatureLocation(1, 5), locus_tag="gene5")
        ]
        clusters = [
            ClusterPrediction(ClusterMarker("gene1", Motif(3, 3, score=1)),
                              ClusterMarker("gene4", Motif(3, 3, score=1)))
        ]
        expected = [
            ClusterPrediction(ClusterMarker("gene1", Motif(3, 3, score=1)),
                              ClusterMarker("gene4", Motif(3, 3, score=1)))
        ]
        expected[0].start.promoter = "gene1"
        expected[0].end.promoter = "gene3+gene4"
        expected[0].genes = 4
        expected[0].promoters = 3

        assert check_cluster_predictions(clusters, seq_record, promoters,
                                         ignored_genes) == expected
예제 #4
0
 def test_get_anchor_promoter(self):
     anchor = "gene3"
     promoters = [
         Promoter("gene1", 1, 1),
         Promoter("gene2", 2, 2),
         CombinedPromoter("gene3", "gene4", 3, 4),
         Promoter("gene5", 5, 5)
     ]
     self.assertEqual(get_anchor_promoter_index(anchor, promoters), 2)
예제 #5
0
    def test_filter_fimo_results(self):
        fimo_dir = os.path.join(self.options.output_dir, "fimo")
        motifs = [Motif(0, 3)]
        # gene2 will be the anchor promoter
        anchor_promoter = 1
        promoters = []
        for i in range(1, 16):
            promoters.append(Promoter("gene%d" % i, i * 10, i * 10 + 4))
        # need certain amount of promoters, otherwise the proportion of
        # promoters with a motif (motif frequency) will be too high --> error
        expected_motifs = [Motif(0, 3, hits={"gene1": 1, "gene2": 2})]

        # fake FIMO output file, corresponding to expected_motifs
        source = path.get_full_path(__file__, "data", "fake_short_fimo.txt")
        target = os.path.join(fimo_dir, "+00_-03")
        if not os.path.exists(target):
            os.makedirs(target)
        copy(source, os.path.join(target,
                                  "fimo.txt"))  # overwrite fimo.txt if exists

        found_motifs = filter_fimo_results(motifs, fimo_dir, promoters,
                                           anchor_promoter)
        assert found_motifs == expected_motifs
        bs_per_promoter, expected_bs_per_promoter = read_generated_expected_file(
            os.path.join(target, "bs_per_promoter.csv"),
            "expected_bs_per_promoter.csv")
        self.assertEqual(bs_per_promoter, expected_bs_per_promoter)
예제 #6
0
    def test_get_promoter_sets(self):
        meme_dir = os.path.join(self.options.output_dir, "meme")
        anchor_promoter = 5
        promoters = [
            Promoter("gene1", 1, 1, seq=Seq("acgtacgtacgtacgt")),
            Promoter("gene2", 2, 2, seq=Seq("acgtacgtacgtacgt")),
            CombinedPromoter("gene3",
                             "gene4",
                             3,
                             4,
                             seq=Seq("acgtacgtacgtacgt")),
            Promoter("gene5", 5, 5, seq=Seq("acgtacgtacgtacgt")),
            Promoter("gene6", 6, 6, seq=Seq("acgtacgtacgtacgt")),
            # promoter with index=5 --> anchor promoter
            Promoter("gene7", 7, 7, seq=Seq("acgtacgtacgtacgt")),
            Promoter("gene8", 8, 8, seq=Seq("acgtacgtacgtacgt")),
            Promoter("gene9", 9, 9, seq=Seq("acgtacgtacgtacgt"))
        ]

        expected_motifs = [
            Motif(plus, minus) for plus in range(3)
            for minus in range(3 - plus, 6)
        ]
        self.assertEqual(generate_motifs(meme_dir, anchor_promoter, promoters),
                         expected_motifs)
예제 #7
0
 def test_serialisation(self):
     for seq in [Seq("ACGT"), "ACGT"]:
         for cls, promoter in [(Promoter, Promoter("gene1", 1, 5, seq=seq)),
                               (CombinedPromoter,
                                CombinedPromoter("gene1",
                                                 "gene2",
                                                 2,
                                                 7,
                                                 seq=seq))]:
             round_trip = cls.from_json(promoter.to_json())
             assert promoter.seq == round_trip.seq
             assert round_trip == promoter
예제 #8
0
 def test_get_islands(self):
     motifs = [Motif(0, 3, hits={"gene1": 1, "gene2": 2}),
               Motif(0, 4, hits={"gene2": 3, "gene4": 2, "gene5": 1})]
     # gene2 will be the anchor promoter
     anchor_promoter = 1
     promoters = []
     for i in range(1, 7):
         promoters.append(Promoter("gene%d" % i, i * 10, i * 10 + 4))
     # resulting in 2 different islands (this example)
     # promoter (pos): 1 2 3 4 5 6
     # binding sites:  1 2 0 0 0 0
     # island:         |-|
     first_island = Island(promoters[0], promoters[1], motifs[0])
     # promoter (pos): 1 2 3 4 5 6
     # binding sites:  0 3 0 2 1 0
     # island:           |---|
     second_island = Island(promoters[1], promoters[4], motifs[1])
     expected_islands = [first_island, second_island]
     assert get_islands(anchor_promoter, motifs, promoters) == expected_islands
예제 #9
0
    def test_sort_by_abundance(self):
        islands = []

        # island 1: [gene1 -- gene2]
        motif = Motif(0, 3, score=3, hits={"gene1": 1, "gene2": 1})
        islands.append(Island(Promoter("gene1", 1, 1), Promoter("gene2", 2, 2), motif))
        # island 2: [gene2 -- gene5]
        motif = Motif(3, 0, score=2, hits={"gene2": 1, "gene3": 1, "gene4": 1, "gene5": 1})
        islands.append(Island(Promoter("gene2", 2, 2), Promoter("gene5", 5, 5), motif))
        # island 3: [gene1 -- gene5]
        motif = Motif(3, 3, score=1, hits={"gene1": 1, "gene2": 1, "gene3": 1, "gene4": 1, "gene5": 1})
        islands.append(Island(Promoter("gene1", 1, 1), Promoter("gene5", 5, 5), motif))

        # left border: 2x gene1, 1x gene2
        # right border: 2x gene5, 1x gene2

        expected_clusters = []
        # cluster 1: [gene1 -- gene5] --> abundance 2+2 (most abundant)
        start = ClusterMarker("gene1", Motif(3, 3, score=1))
        start.abundance = 2
        end = ClusterMarker("gene5", Motif(3, 3, score=1))
        end.abundance = 2
        expected_clusters.append(ClusterPrediction(start, end))
        # cluster 3: [gene2 -- gene5] --> abundance 1+2, score 2+1 (better/lower)
        start = ClusterMarker("gene2", Motif(3, 0, score=2))
        start.abundance = 1
        end = ClusterMarker("gene5", Motif(3, 3, score=1))
        end.abundance = 2
        expected_clusters.append(ClusterPrediction(start, end))
        # cluster 2: [gene1 -- gene2] --> abundance 2+1, score 1+3 (worse, higher)
        start = ClusterMarker("gene1", Motif(3, 3, score=1))
        start.abundance = 2
        end = ClusterMarker("gene2", Motif(0, 3, score=3))
        end.abundance = 1
        expected_clusters.append(ClusterPrediction(start, end))
        # cluster 4: [gene2 -- gene2] --> abundance 1+1
        start = ClusterMarker("gene2", Motif(3, 0, score=2))
        start.abundance = 1
        end = ClusterMarker("gene2", Motif(0, 3, score=3))
        end.abundance = 1
        expected_clusters.append(ClusterPrediction(start, end))
        # abundance: as high as possible
        # score: as low as possible

        self.assertEqual(create_predictions(islands), expected_clusters)
예제 #10
0
 def test_promoter_id(self):
     assert Promoter("gene1", 1, 5).get_id() == "gene1"
     assert CombinedPromoter("gene1", "gene2", 1,
                             5).get_id() == "gene1+gene2"
예제 #11
0
 def setUp(self):
     self.promoter = Promoter("gene1", 1, 1)