Beispiel #1
0
    def test_store_clusters(self):
        # this test is similar to test_store_promoters
        anchor = "gene3"

        start_marker = ClusterMarker("gene1", Motif(3, 3, score=1))
        start_marker.promoter = "gene1"
        start_marker.abundance = 2
        end_marker = ClusterMarker("gene4", Motif(3, 3, score=1))
        end_marker.promoter = "gene3+gene4"
        assert end_marker.abundance == 1
        first_cluster = cassis.ClusterPrediction(start_marker, end_marker)
        first_cluster.promoters = 3
        first_cluster.genes = 4

        start_marker = ClusterMarker("gene1", Motif(4, 4, score=1))
        start_marker.promoter = "gene1"
        assert start_marker.abundance == 1
        end_marker = ClusterMarker("gene5", Motif(4, 4, score=1))
        end_marker.promoter = "gene5"
        assert end_marker.abundance == 1
        second_cluster = cassis.ClusterPrediction(start_marker, end_marker)
        second_cluster.promoters = 3
        second_cluster.genes = 4

        clusters = [first_cluster, second_cluster]

        record_with_clusters = create_fake_record()
        record_without_clusters = create_fake_record(
        )  # just the same, without adding clusters

        borders = cassis.create_cluster_borders(anchor, clusters,
                                                record_with_clusters)
        assert record_with_clusters.get_feature_count(
        ) == record_without_clusters.get_feature_count()

        for border in borders:
            record_with_clusters.add_cluster_border(border)

        # test if store_clusters changed any non-cluster feature (should not!)  # TODO

        # test cluster features
        assert record_without_clusters.get_feature_count() + len(
            clusters) == record_with_clusters.get_feature_count()
        for i, cluster in enumerate(clusters):
            cluster_border = record_with_clusters.get_cluster_borders()[i]
            self.assertEqual(cluster_border.type, "cluster_border")
            self.assertEqual(cluster_border.tool, "cassis")
            self.assertEqual(cluster_border.get_qualifier("anchor"),
                             (anchor, ))
            self.assertEqual(cluster_border.get_qualifier("genes"),
                             (cluster.genes, ))
            self.assertEqual(cluster_border.get_qualifier("promoters"),
                             (cluster.promoters, ))
            self.assertEqual(cluster_border.get_qualifier("gene_left"),
                             (cluster.start.gene, ))
            self.assertEqual(cluster_border.get_qualifier("gene_right"),
                             (cluster.end.gene, ))
Beispiel #2
0
    def test_store_subregions(self):
        # this test is similar to test_store_promoters
        anchor = "gene3"

        start_marker = ClusterMarker("gene1", Motif(3, 3, score=1))
        start_marker.promoter = "gene1"
        start_marker.abundance = 2
        end_marker = ClusterMarker("gene4", Motif(3, 3, score=1))
        end_marker.promoter = "gene3+gene4"
        assert end_marker.abundance == 1
        first_cluster = cassis.ClusterPrediction(start_marker, end_marker)
        first_cluster.promoters = 3
        first_cluster.genes = 4

        start_marker = ClusterMarker("gene1", Motif(4, 4, score=1))
        start_marker.promoter = "gene1"
        assert start_marker.abundance == 1
        end_marker = ClusterMarker("gene5", Motif(4, 4, score=1))
        end_marker.promoter = "gene5"
        assert end_marker.abundance == 1
        second_cluster = cassis.ClusterPrediction(start_marker, end_marker)
        second_cluster.promoters = 3
        second_cluster.genes = 4

        # order reversed because subregions are ordered by length when starts are the same
        region_predictions = [second_cluster, first_cluster]

        record_with_subregions = create_fake_record()
        record_without_subregions = create_fake_record(
        )  # just the same, without adding subregions

        subregions = cassis.create_subregions(anchor, region_predictions,
                                              record_with_subregions)
        assert record_with_subregions.get_feature_count(
        ) == record_without_subregions.get_feature_count()

        for region in subregions:
            record_with_subregions.add_subregion(region)

        # test subregion features
        expected_count = record_without_subregions.get_feature_count() + len(
            subregions)
        assert record_with_subregions.get_feature_count() == expected_count
        for i, region in enumerate(region_predictions):
            subregion = record_with_subregions.get_subregions()[i]
            self.assertEqual(subregion.type, "subregion")
            self.assertEqual(subregion.tool, "cassis")
            self.assertEqual(subregion.anchor, anchor)
            self.assertEqual(subregion.get_qualifier("genes"),
                             (region.genes, ))
            self.assertEqual(subregion.get_qualifier("promoters"),
                             (region.promoters, ))
            self.assertEqual(subregion.get_qualifier("gene_left"),
                             (region.start.gene, ))
            self.assertEqual(subregion.get_qualifier("gene_right"),
                             (region.end.gene, ))
Beispiel #3
0
    def test_regeneration(self):
        record = create_fake_record()
        results = cassis.CassisResults(record.id)
        # create a prediction, since it will generate a border with many extra qualifiers
        start_marker = ClusterMarker("gene1", Motif(3, 3, score=1))
        start_marker.promoter = "gene1"
        start_marker.abundance = 2
        end_marker = ClusterMarker("gene4", Motif(3, 3, score=1))
        end_marker.promoter = "gene3+gene4"
        assert end_marker.abundance == 1
        cluster = cassis.ClusterPrediction(start_marker, end_marker)
        results.subregions = cassis.create_subregions("gene1", [cluster],
                                                      record)
        assert results.subregions

        results.promoters = [
            Promoter("gene1", 10, 20, seq=Seq("cgtacgtacgt")),
            Promoter("gene2", 30, 40, seq=Seq("cgtacgtacgt")),
            CombinedPromoter("gene3", "gene4", 50, 60, seq=Seq("cgtacgtacgt"))
        ]

        round_trip = cassis.regenerate_previous_results(
            results.to_json(), record, None)
        assert isinstance(round_trip, cassis.CassisResults)
        assert len(results.subregions) == len(round_trip.subregions)
        for old, new in zip(results.subregions, round_trip.subregions):
            assert old.location == new.location
            assert old.to_biopython()[0].qualifiers == new.to_biopython(
            )[0].qualifiers
        assert round_trip.promoters == results.promoters
Beispiel #4
0
    def test_cleanup_outdir(self):
        anchor_genes = ["gene1", "gene4"]
        cluster = cassis.ClusterPrediction(
            ClusterMarker("gene1", Motif(3, 3, score=1)),
            ClusterMarker("gene4", Motif(3, 3, score=1)))
        cluster.start.promoter = "gene1"
        cluster.end.promoter = "gene3+gene4"
        cluster.genes = 4
        cluster.promoters = 3
        cluster_predictions = {"gene1": [cluster]}

        # create some empty test dirs, which should be deleted during the test
        # prediction! --> keep!
        os.makedirs(
            os.path.join(self.options.output_dir, "meme", "gene1", "+03_-03"))
        # prediction! --> keep!
        os.makedirs(
            os.path.join(self.options.output_dir, "fimo", "gene1", "+03_-03"))
        # no prediction --> delete
        os.makedirs(
            os.path.join(self.options.output_dir, "meme", "gene1", "+04_-04"))
        # no prediction --> delete
        os.makedirs(
            os.path.join(self.options.output_dir, "fimo", "gene1", "+04_-04"))
        # no prediction --> delete
        os.makedirs(
            os.path.join(self.options.output_dir, "meme", "gene4", "+03_-03"))
        # no prediction --> delete
        os.makedirs(
            os.path.join(self.options.output_dir, "fimo", "gene4", "+03_-03"))
        # prediction for this gene, but not from this motif --> delete
        os.makedirs(
            os.path.join(self.options.output_dir, "meme", "gene4", "+04_-04"))
        # prediction for this gene, but not from this motif --> delete
        os.makedirs(
            os.path.join(self.options.output_dir, "fimo", "gene4", "+04_-04"))

        cassis.cleanup_outdir(anchor_genes, cluster_predictions, self.options)

        # assert kept directories
        self.assertTrue("gene1" in os.listdir(
            os.path.join(self.options.output_dir, "meme")))
        self.assertTrue("gene1" in os.listdir(
            os.path.join(self.options.output_dir, "fimo")))
        self.assertTrue("+03_-03" in os.listdir(
            os.path.join(self.options.output_dir, "meme", "gene1")))
        self.assertTrue("+03_-03" in os.listdir(
            os.path.join(self.options.output_dir, "fimo", "gene1")))

        # assert deleted directories
        self.assertTrue("gene4" not in os.listdir(
            os.path.join(self.options.output_dir, "meme")))
        self.assertTrue("gene4" not in os.listdir(
            os.path.join(self.options.output_dir, "fimo")))
        self.assertTrue("+04_-04" not in os.listdir(
            os.path.join(self.options.output_dir, "meme", "gene1")))
        self.assertTrue("+04_-04" not in os.listdir(
            os.path.join(self.options.output_dir, "fimo", "gene1")))