def test_check_clustering(self): myFilter = ClusteringFilter({ "maximum_noise": 15, "maximum_clusters": 30, "minimum_clusters": 5, "minimum_cluster_size": 50 }, MatrixHandlerMock(1000)) self.assertItemsEqual( myFilter.check_clustering(ClusteringMock(number_of_clusters = 25, number_of_elements = 900)),[]) self.assertItemsEqual( myFilter.check_clustering(ClusteringMock(number_of_clusters = 50, number_of_elements = 800)), [ { 'reason': 'TOO_MUCH_CLUSTERS', 'data': { 'current': 50, 'maximum': 30 } }, { 'reason': 'TOO_MUCH_NOISE', 'data': { 'current': 20.0, 'maximum': 15 } } ])
def test_check_num_clusters_in_range(self): myFilter = ClusteringFilter({ "minimum_clusters": 5, "maximum_clusters": 30, }, MatrixHandlerMock(1000)) self.assertItemsEqual(myFilter.check_num_clusters_in_range(ClusteringMock(number_of_clusters = 10, number_of_elements = 1000)), []) self.assertItemsEqual( myFilter.check_num_clusters_in_range(ClusteringMock(number_of_clusters = 2, number_of_elements = 1000)), [ { 'reason': 'TOO_FEW_CLUSTERS', 'data': {'current': 2, 'minimum': 5} } ] ) self.assertItemsEqual( myFilter.check_num_clusters_in_range(ClusteringMock(number_of_clusters = 35, number_of_elements = 1000)), [ { 'reason': 'TOO_MUCH_CLUSTERS', 'data': { 'current': 35, 'maximum': 30 } } ])
def test_filter(self): myFilter = ClusteringFilter({ "maximum_noise": 15, "maximum_clusters": 30, "minimum_clusters": 5, "minimum_cluster_size": 50 }, MatrixHandlerMock(1000)) clustering_info ={"clustering 1":{ "clustering":ClusteringMock(number_of_clusters = 50, number_of_elements = 800) }, "clustering 2":{ "clustering":ClusteringMock(number_of_clusters = 25, number_of_elements = 900) }, "clustering 3":{ "clustering":ClusteringMock(number_of_clusters = 25, number_of_elements = 900) }, "clustering 4":{ "clustering":ClusteringMock(number_of_clusters = 31, number_of_elements = 900) } } selected, not_selected = myFilter.filter(clustering_info) self.assert_(len(selected) == 1 and len(not_selected) == 3) self.assertItemsEqual(selected.keys() , [ "clustering 3"]) self.assertItemsEqual(not_selected.keys() , ["clustering 1","clustering 2","clustering 4",])
def run(self, clustering_parameters, matrix_handler, data_handler, workspaceHandler): ############################ # Clustering exploration ############################ self.notify("Exploration Started", []) self.timer.start("Clustering Exploration") clusterings = ClusteringExplorer( clustering_parameters, matrix_handler, workspaceHandler, scheduling_tools.build_scheduler(clustering_parameters["global"]["control"], self.observer), AlgorithmRunParametersGenerator(clustering_parameters, matrix_handler), self.observer).run() self.notify("Clusterings Created", {"number_of_clusters":len(clusterings)}) self.timer.stop("Clustering Exploration") ###################### # First filtering ###################### self.timer.start("Clustering Filtering") selected_clusterings, not_selected_clusterings = ClusteringFilter(clustering_parameters["clustering"]["evaluation"], matrix_handler).filter(clusterings) self.notify("Filter", {"selected":len(selected_clusterings.keys()),"not_selected":len(not_selected_clusterings.keys())}) self.timer.stop("Clustering Filtering") if selected_clusterings == {}: return None ###################### # Clustering scoring ###################### self.timer.start("Evaluation") analyzer = AnalysisRunner(scheduling_tools.build_scheduler( clustering_parameters["global"]["control"], self.observer), selected_clusterings, AnalysisPopulator(matrix_handler, data_handler, clustering_parameters)) analyzer.evaluate() self.timer.stop("Evaluation") ###################### # Choose the best clustering ###################### self.timer.start("Selection") best_clustering_id, all_scores = BestClusteringSelector(clustering_parameters).choose_best(selected_clusterings) self.timer.stop("Selection") return best_clustering_id, selected_clusterings, not_selected_clusterings, all_scores
def test_filter_repeated(self): clustering_info ={"clustering 1":{ "clustering":ClusteringMock(number_of_clusters = 50, number_of_elements = 800) }, "clustering 2":{ "clustering":ClusteringMock(number_of_clusters = 25, number_of_elements = 900) }, "clustering 3":{ "clustering":ClusteringMock(number_of_clusters = 25, number_of_elements = 900) } } myFilter = ClusteringFilter({},MatrixHandlerMock(1000)) sel, not_sel = myFilter.filter_repeated(clustering_info,{}) self.assertItemsEqual(sel.keys(),["clustering 1","clustering 3"]) self.assertItemsEqual(not_sel.keys(),["clustering 2"]) self.assertDictEqual(not_sel["clustering 2"]["reasons"][0], {'reason': 'EQUAL_TO_OTHER_CLUSTERING', 'data': {'id': 'clustering 3'}})
def test_check_noise_level(self): myFilter = ClusteringFilter({ "maximum_noise": 15, }, MatrixHandlerMock(1000)) # 10% noise self.assertItemsEqual( myFilter.check_noise_level(ClusteringMock(number_of_clusters = 10, number_of_elements = 900)), []) # 15% noise self.assertItemsEqual( myFilter.check_noise_level(ClusteringMock(number_of_clusters = 10, number_of_elements = 850)), []) # 20% noise self.assertItemsEqual( myFilter.check_noise_level(ClusteringMock(number_of_clusters = 10, number_of_elements = 800)), [ { 'reason': 'TOO_MUCH_NOISE', 'data': { 'current': 20.0, 'maximum': 15 } } ])
def run(self, clustering_parameters, matrixHandler, workspaceHandler, trajectoryHandler): ############################ # Clustering exploration ############################ self.notify("Exploration Started", []) self.timer.start("Clustering Exploration") clusterings = ClusteringExplorer( clustering_parameters, matrixHandler, workspaceHandler, scheduling_tools.build_scheduler( clustering_parameters["global"]["control"], self.observer), AlgorithmRunParametersGenerator(clustering_parameters, matrixHandler), self.observer).run() self.notify("Clusterings Created", {"number_of_clusters": len(clusterings)}) self.timer.stop("Clustering Exploration") ###################### # First filtering ###################### self.timer.start("Clustering Filtering") selected_clusterings, not_selected_clusterings = ClusteringFilter( clustering_parameters["clustering"]["evaluation"], matrixHandler).filter(clusterings) self.notify( "Filter", { "selected": len(selected_clusterings.keys()), "not_selected": len(not_selected_clusterings.keys()) }) self.timer.stop("Clustering Filtering") if selected_clusterings == {}: return None ###################### # Clustering scoring ###################### self.timer.start("Evaluation") analyzer = AnalysisRunner( scheduling_tools.build_scheduler( clustering_parameters["global"]["control"], self.observer), selected_clusterings, AnalysisPopulator(matrixHandler, trajectoryHandler, clustering_parameters)) analyzer.evaluate() self.timer.stop("Evaluation") ###################### # Choose the best clustering ###################### self.timer.start("Selection") best_clustering_id, all_scores = BestClusteringSelector( clustering_parameters).choose_best(selected_clusterings) self.timer.stop("Selection") return best_clustering_id, selected_clusterings, not_selected_clusterings, all_scores