def test_regeneration(self): record = create_fake_record() results = cassis.CassisResults(record.id) # create a prediction, since it will generate a border with many extra qualifiers start_marker = ClusterMarker("gene1", Motif(3, 3, score=1)) start_marker.promoter = "gene1" start_marker.abundance = 2 end_marker = ClusterMarker("gene4", Motif(3, 3, score=1)) end_marker.promoter = "gene3+gene4" assert end_marker.abundance == 1 cluster = cassis.ClusterPrediction(start_marker, end_marker) results.subregions = cassis.create_subregions("gene1", [cluster], record) assert results.subregions results.promoters = [ Promoter("gene1", 10, 20, seq=Seq("cgtacgtacgt")), Promoter("gene2", 30, 40, seq=Seq("cgtacgtacgt")), CombinedPromoter("gene3", "gene4", 50, 60, seq=Seq("cgtacgtacgt")) ] round_trip = cassis.regenerate_previous_results( results.to_json(), record, None) assert isinstance(round_trip, cassis.CassisResults) assert len(results.subregions) == len(round_trip.subregions) for old, new in zip(results.subregions, round_trip.subregions): assert old.location == new.location assert old.to_biopython()[0].qualifiers == new.to_biopython( )[0].qualifiers assert round_trip.promoters == results.promoters
def test_store_subregions(self): # this test is similar to test_store_promoters anchor = "gene3" start_marker = ClusterMarker("gene1", Motif(3, 3, score=1)) start_marker.promoter = "gene1" start_marker.abundance = 2 end_marker = ClusterMarker("gene4", Motif(3, 3, score=1)) end_marker.promoter = "gene3+gene4" assert end_marker.abundance == 1 first_cluster = cassis.ClusterPrediction(start_marker, end_marker) first_cluster.promoters = 3 first_cluster.genes = 4 start_marker = ClusterMarker("gene1", Motif(4, 4, score=1)) start_marker.promoter = "gene1" assert start_marker.abundance == 1 end_marker = ClusterMarker("gene5", Motif(4, 4, score=1)) end_marker.promoter = "gene5" assert end_marker.abundance == 1 second_cluster = cassis.ClusterPrediction(start_marker, end_marker) second_cluster.promoters = 3 second_cluster.genes = 4 # order reversed because subregions are ordered by length when starts are the same region_predictions = [second_cluster, first_cluster] record_with_subregions = create_fake_record() record_without_subregions = create_fake_record( ) # just the same, without adding subregions subregions = cassis.create_subregions(anchor, region_predictions, record_with_subregions) assert record_with_subregions.get_feature_count( ) == record_without_subregions.get_feature_count() for region in subregions: record_with_subregions.add_subregion(region) # test subregion features expected_count = record_without_subregions.get_feature_count() + len( subregions) assert record_with_subregions.get_feature_count() == expected_count for i, region in enumerate(region_predictions): subregion = record_with_subregions.get_subregions()[i] self.assertEqual(subregion.type, "subregion") self.assertEqual(subregion.tool, "cassis") self.assertEqual(subregion.anchor, anchor) self.assertEqual(subregion.get_qualifier("genes"), (region.genes, )) self.assertEqual(subregion.get_qualifier("promoters"), (region.promoters, )) self.assertEqual(subregion.get_qualifier("gene_left"), (region.start.gene, )) self.assertEqual(subregion.get_qualifier("gene_right"), (region.end.gene, ))