def test_store_clusters(self): # this test is similar to test_store_promoters anchor = "gene3" start_marker = ClusterMarker("gene1", Motif(3, 3, score=1)) start_marker.promoter = "gene1" start_marker.abundance = 2 end_marker = ClusterMarker("gene4", Motif(3, 3, score=1)) end_marker.promoter = "gene3+gene4" assert end_marker.abundance == 1 first_cluster = cassis.ClusterPrediction(start_marker, end_marker) first_cluster.promoters = 3 first_cluster.genes = 4 start_marker = ClusterMarker("gene1", Motif(4, 4, score=1)) start_marker.promoter = "gene1" assert start_marker.abundance == 1 end_marker = ClusterMarker("gene5", Motif(4, 4, score=1)) end_marker.promoter = "gene5" assert end_marker.abundance == 1 second_cluster = cassis.ClusterPrediction(start_marker, end_marker) second_cluster.promoters = 3 second_cluster.genes = 4 clusters = [first_cluster, second_cluster] record_with_clusters = create_fake_record() record_without_clusters = create_fake_record( ) # just the same, without adding clusters borders = cassis.create_cluster_borders(anchor, clusters, record_with_clusters) assert record_with_clusters.get_feature_count( ) == record_without_clusters.get_feature_count() for border in borders: record_with_clusters.add_cluster_border(border) # test if store_clusters changed any non-cluster feature (should not!) # TODO # test cluster features assert record_without_clusters.get_feature_count() + len( clusters) == record_with_clusters.get_feature_count() for i, cluster in enumerate(clusters): cluster_border = record_with_clusters.get_cluster_borders()[i] self.assertEqual(cluster_border.type, "cluster_border") self.assertEqual(cluster_border.tool, "cassis") self.assertEqual(cluster_border.get_qualifier("anchor"), (anchor, )) self.assertEqual(cluster_border.get_qualifier("genes"), (cluster.genes, )) self.assertEqual(cluster_border.get_qualifier("promoters"), (cluster.promoters, )) self.assertEqual(cluster_border.get_qualifier("gene_left"), (cluster.start.gene, )) self.assertEqual(cluster_border.get_qualifier("gene_right"), (cluster.end.gene, ))
def test_store_subregions(self): # this test is similar to test_store_promoters anchor = "gene3" start_marker = ClusterMarker("gene1", Motif(3, 3, score=1)) start_marker.promoter = "gene1" start_marker.abundance = 2 end_marker = ClusterMarker("gene4", Motif(3, 3, score=1)) end_marker.promoter = "gene3+gene4" assert end_marker.abundance == 1 first_cluster = cassis.ClusterPrediction(start_marker, end_marker) first_cluster.promoters = 3 first_cluster.genes = 4 start_marker = ClusterMarker("gene1", Motif(4, 4, score=1)) start_marker.promoter = "gene1" assert start_marker.abundance == 1 end_marker = ClusterMarker("gene5", Motif(4, 4, score=1)) end_marker.promoter = "gene5" assert end_marker.abundance == 1 second_cluster = cassis.ClusterPrediction(start_marker, end_marker) second_cluster.promoters = 3 second_cluster.genes = 4 # order reversed because subregions are ordered by length when starts are the same region_predictions = [second_cluster, first_cluster] record_with_subregions = create_fake_record() record_without_subregions = create_fake_record( ) # just the same, without adding subregions subregions = cassis.create_subregions(anchor, region_predictions, record_with_subregions) assert record_with_subregions.get_feature_count( ) == record_without_subregions.get_feature_count() for region in subregions: record_with_subregions.add_subregion(region) # test subregion features expected_count = record_without_subregions.get_feature_count() + len( subregions) assert record_with_subregions.get_feature_count() == expected_count for i, region in enumerate(region_predictions): subregion = record_with_subregions.get_subregions()[i] self.assertEqual(subregion.type, "subregion") self.assertEqual(subregion.tool, "cassis") self.assertEqual(subregion.anchor, anchor) self.assertEqual(subregion.get_qualifier("genes"), (region.genes, )) self.assertEqual(subregion.get_qualifier("promoters"), (region.promoters, )) self.assertEqual(subregion.get_qualifier("gene_left"), (region.start.gene, )) self.assertEqual(subregion.get_qualifier("gene_right"), (region.end.gene, ))
def test_regeneration(self): record = create_fake_record() results = cassis.CassisResults(record.id) # create a prediction, since it will generate a border with many extra qualifiers start_marker = ClusterMarker("gene1", Motif(3, 3, score=1)) start_marker.promoter = "gene1" start_marker.abundance = 2 end_marker = ClusterMarker("gene4", Motif(3, 3, score=1)) end_marker.promoter = "gene3+gene4" assert end_marker.abundance == 1 cluster = cassis.ClusterPrediction(start_marker, end_marker) results.subregions = cassis.create_subregions("gene1", [cluster], record) assert results.subregions results.promoters = [ Promoter("gene1", 10, 20, seq=Seq("cgtacgtacgt")), Promoter("gene2", 30, 40, seq=Seq("cgtacgtacgt")), CombinedPromoter("gene3", "gene4", 50, 60, seq=Seq("cgtacgtacgt")) ] round_trip = cassis.regenerate_previous_results( results.to_json(), record, None) assert isinstance(round_trip, cassis.CassisResults) assert len(results.subregions) == len(round_trip.subregions) for old, new in zip(results.subregions, round_trip.subregions): assert old.location == new.location assert old.to_biopython()[0].qualifiers == new.to_biopython( )[0].qualifiers assert round_trip.promoters == results.promoters
def test_cleanup_outdir(self): anchor_genes = ["gene1", "gene4"] cluster = cassis.ClusterPrediction( ClusterMarker("gene1", Motif(3, 3, score=1)), ClusterMarker("gene4", Motif(3, 3, score=1))) cluster.start.promoter = "gene1" cluster.end.promoter = "gene3+gene4" cluster.genes = 4 cluster.promoters = 3 cluster_predictions = {"gene1": [cluster]} # create some empty test dirs, which should be deleted during the test # prediction! --> keep! os.makedirs( os.path.join(self.options.output_dir, "meme", "gene1", "+03_-03")) # prediction! --> keep! os.makedirs( os.path.join(self.options.output_dir, "fimo", "gene1", "+03_-03")) # no prediction --> delete os.makedirs( os.path.join(self.options.output_dir, "meme", "gene1", "+04_-04")) # no prediction --> delete os.makedirs( os.path.join(self.options.output_dir, "fimo", "gene1", "+04_-04")) # no prediction --> delete os.makedirs( os.path.join(self.options.output_dir, "meme", "gene4", "+03_-03")) # no prediction --> delete os.makedirs( os.path.join(self.options.output_dir, "fimo", "gene4", "+03_-03")) # prediction for this gene, but not from this motif --> delete os.makedirs( os.path.join(self.options.output_dir, "meme", "gene4", "+04_-04")) # prediction for this gene, but not from this motif --> delete os.makedirs( os.path.join(self.options.output_dir, "fimo", "gene4", "+04_-04")) cassis.cleanup_outdir(anchor_genes, cluster_predictions, self.options) # assert kept directories self.assertTrue("gene1" in os.listdir( os.path.join(self.options.output_dir, "meme"))) self.assertTrue("gene1" in os.listdir( os.path.join(self.options.output_dir, "fimo"))) self.assertTrue("+03_-03" in os.listdir( os.path.join(self.options.output_dir, "meme", "gene1"))) self.assertTrue("+03_-03" in os.listdir( os.path.join(self.options.output_dir, "fimo", "gene1"))) # assert deleted directories self.assertTrue("gene4" not in os.listdir( os.path.join(self.options.output_dir, "meme"))) self.assertTrue("gene4" not in os.listdir( os.path.join(self.options.output_dir, "fimo"))) self.assertTrue("+04_-04" not in os.listdir( os.path.join(self.options.output_dir, "meme", "gene1"))) self.assertTrue("+04_-04" not in os.listdir( os.path.join(self.options.output_dir, "fimo", "gene1")))