def test_normalize_one_evaluation(self): clustering_info = { "clustering1":{ "type":"algorithm2", "clustering":"clustering2", "parameters":{}, "evaluation":{ "myeval":0.5, "myothereval":1.0 } }, "clustering2":{ "type":"algorithm1", "clustering":"clustering1", "parameters":{}, "evaluation":{ "myeval":-0.5, "myothereval":2.0 } }, "clustering3":{ "type":"algorithm1", "clustering":"clustering3", "parameters":{}, "evaluation":{ "myeval":0.2, "myothereval":0.0 } } } values_for_myeval = BestClusteringSelector.get_values_for_evaluation_type("myeval", clustering_info) self.assertDictEqual(values_for_myeval, {'clustering1': 0.5, 'clustering3': 0.2, 'clustering2': -0.5}) BestClusteringSelector.normalize_one_evaluation_type("myeval", clustering_info) values_for_myeval = BestClusteringSelector.get_values_for_evaluation_type("Normalized_myeval", clustering_info) self.assertDictEqual( values_for_myeval, {'clustering1': 1.0, 'clustering3': 0.7, 'clustering2': 0.0})
def test_get_score_for_criteria(self): criteria = { "analysis_1":{ "action": ">", "weight": 1.0 }, "analysis_2":{ "action": "<", "weight": 0.5 } } clustering_info = { 'Clustering 1': { 'evaluation': { 'Normalized_analysis_1': 1, 'Normalized_analysis_2': 0.3 } }, 'Clustering 2': { 'evaluation': { 'Normalized_analysis_1': 0.7, 'Normalized_analysis_2': 1. } }, 'Clustering 3': { 'evaluation': { 'Normalized_analysis_1': 0.6, 'Normalized_analysis_2': 0.5 } }, 'Clustering 4': { 'evaluation': { 'Normalized_analysis_1': 0.9, 'Normalized_analysis_2': 0.0 } } } self.assertEqual(BestClusteringSelector.get_score_for_criteria("Clustering 1", clustering_info, criteria), 0.9) self.assertAlmostEqual(BestClusteringSelector.get_score_for_criteria("Clustering 4", clustering_info, criteria), 0.933333333333,12)
def test_normalize_one_evaluation(self): clustering_info = { "clustering1": { "type": "algorithm2", "clustering": "clustering2", "parameters": {}, "evaluation": { "myeval": 0.5, "myothereval": 1.0 } }, "clustering2": { "type": "algorithm1", "clustering": "clustering1", "parameters": {}, "evaluation": { "myeval": -0.5, "myothereval": 2.0 } }, "clustering3": { "type": "algorithm1", "clustering": "clustering3", "parameters": {}, "evaluation": { "myeval": 0.2, "myothereval": 0.0 } } } values_for_myeval = BestClusteringSelector.get_values_for_evaluation_type( "myeval", clustering_info) self.assertDictEqual(values_for_myeval, { 'clustering1': 0.5, 'clustering3': 0.2, 'clustering2': -0.5 }) BestClusteringSelector.normalize_one_evaluation_type( "myeval", clustering_info) values_for_myeval = BestClusteringSelector.get_values_for_evaluation_type( "Normalized_myeval", clustering_info) self.assertDictEqual(values_for_myeval, { 'clustering1': 1.0, 'clustering3': 0.7, 'clustering2': 0.0 })
def test_get_score_for_criteria(self): criteria = { "analysis_1": { "action": ">", "weight": 1.0 }, "analysis_2": { "action": "<", "weight": 0.5 } } clustering_info = { 'Clustering 1': { 'evaluation': { 'Normalized_analysis_1': 1, 'Normalized_analysis_2': 0.3 } }, 'Clustering 2': { 'evaluation': { 'Normalized_analysis_1': 0.7, 'Normalized_analysis_2': 1. } }, 'Clustering 3': { 'evaluation': { 'Normalized_analysis_1': 0.6, 'Normalized_analysis_2': 0.5 } }, 'Clustering 4': { 'evaluation': { 'Normalized_analysis_1': 0.9, 'Normalized_analysis_2': 0.0 } } } self.assertEqual( BestClusteringSelector.get_score_for_criteria( "Clustering 1", clustering_info, criteria), 0.9) self.assertAlmostEqual( BestClusteringSelector.get_score_for_criteria( "Clustering 4", clustering_info, criteria), 0.933333333333, 12)
def test_get_best_clustering(self): scores = { 'criteria 1': { 'Clustering 4': 1.4, 'Clustering 2': 0.7, 'Clustering 3': 0.85, 'Clustering 1': 1.35}, 'criteria 2': { 'Clustering 4': 0.56, 'Clustering 2': 0.28, 'Clustering 3': 0.34, 'Clustering 1': 0.54 } } bclust, bcrit, scores = BestClusteringSelector.get_best_clustering(scores) self.assertItemsEqual( (bclust, bcrit, scores[bcrit][bclust]), ('Clustering 4', 'criteria 1', 1.4))
def test_get_best_clustering(self): scores = { 'criteria 1': { 'Clustering 4': 1.4, 'Clustering 2': 0.7, 'Clustering 3': 0.85, 'Clustering 1': 1.35 }, 'criteria 2': { 'Clustering 4': 0.56, 'Clustering 2': 0.28, 'Clustering 3': 0.34, 'Clustering 1': 0.54 } } bclust, bcrit, scores = BestClusteringSelector.get_best_clustering( scores) self.assertItemsEqual((bclust, bcrit, scores[bcrit][bclust]), ('Clustering 4', 'criteria 1', 1.4))
def test_get_scores_for_all_clusters_and_criterias(self): criteria = { "criteria 1":{ "analysis_1":{ "action": ">", "weight": 1.0 }, "analysis_2":{ "action": "<", "weight": 0.5 } }, "criteria 2":{ "analysis_1":{ "action": ">", "weight": 0.4 }, "analysis_2":{ "action": "<", "weight": 0.2 } } } clustering_info = { 'Clustering 1': { 'evaluation': { 'Normalized_analysis_1': 1, 'Normalized_analysis_2': 0.3 } }, 'Clustering 2': { 'evaluation': { 'Normalized_analysis_1': 0.7, 'Normalized_analysis_2': 1. } }, 'Clustering 3': { 'evaluation': { 'Normalized_analysis_1': 0.6, 'Normalized_analysis_2': 0.5 } }, 'Clustering 4': { 'evaluation': { 'Normalized_analysis_1': 0.9, 'Normalized_analysis_2': 0.0 } } } # regression, checked self.assertDictEqual({ 'criteria 1': { 'Clustering 4': 0.9333333333333332, 'Clustering 2': 0.4666666666666666, 'Clustering 3': 0.5666666666666667, 'Clustering 1': 0.9 }, 'criteria 2': { 'Clustering 4': 0.9333333333333332, 'Clustering 2': 0.46666666666666656, 'Clustering 3': 0.5666666666666665, 'Clustering 1': 0.8999999999999999 } }, BestClusteringSelector.get_scores_for_all_clusters_and_criterias(criteria, clustering_info))
def run(self, clustering_parameters, matrixHandler, workspaceHandler, trajectoryHandler): ############################ # Clustering exploration ############################ self.notify("Exploration Started", []) self.timer.start("Clustering Exploration") clusterings = ClusteringExplorer( clustering_parameters, matrixHandler, workspaceHandler, scheduling_tools.build_scheduler( clustering_parameters["global"]["control"], self.observer), AlgorithmRunParametersGenerator(clustering_parameters, matrixHandler), self.observer).run() self.notify("Clusterings Created", {"number_of_clusters": len(clusterings)}) self.timer.stop("Clustering Exploration") ###################### # First filtering ###################### self.timer.start("Clustering Filtering") selected_clusterings, not_selected_clusterings = ClusteringFilter( clustering_parameters["clustering"]["evaluation"], matrixHandler).filter(clusterings) self.notify( "Filter", { "selected": len(selected_clusterings.keys()), "not_selected": len(not_selected_clusterings.keys()) }) self.timer.stop("Clustering Filtering") if selected_clusterings == {}: return None ###################### # Clustering scoring ###################### self.timer.start("Evaluation") analyzer = AnalysisRunner( scheduling_tools.build_scheduler( clustering_parameters["global"]["control"], self.observer), selected_clusterings, AnalysisPopulator(matrixHandler, trajectoryHandler, clustering_parameters)) analyzer.evaluate() self.timer.stop("Evaluation") ###################### # Choose the best clustering ###################### self.timer.start("Selection") best_clustering_id, all_scores = BestClusteringSelector( clustering_parameters).choose_best(selected_clusterings) self.timer.stop("Selection") return best_clustering_id, selected_clusterings, not_selected_clusterings, all_scores
def test_get_scores_for_all_clusters_and_criterias(self): criteria = { "criteria 1": { "analysis_1": { "action": ">", "weight": 1.0 }, "analysis_2": { "action": "<", "weight": 0.5 } }, "criteria 2": { "analysis_1": { "action": ">", "weight": 0.4 }, "analysis_2": { "action": "<", "weight": 0.2 } } } clustering_info = { 'Clustering 1': { 'evaluation': { 'Normalized_analysis_1': 1, 'Normalized_analysis_2': 0.3 } }, 'Clustering 2': { 'evaluation': { 'Normalized_analysis_1': 0.7, 'Normalized_analysis_2': 1. } }, 'Clustering 3': { 'evaluation': { 'Normalized_analysis_1': 0.6, 'Normalized_analysis_2': 0.5 } }, 'Clustering 4': { 'evaluation': { 'Normalized_analysis_1': 0.9, 'Normalized_analysis_2': 0.0 } } } # regression, checked self.assertDictEqual( { 'criteria 1': { 'Clustering 4': 0.9333333333333332, 'Clustering 2': 0.4666666666666666, 'Clustering 3': 0.5666666666666667, 'Clustering 1': 0.9 }, 'criteria 2': { 'Clustering 4': 0.9333333333333332, 'Clustering 2': 0.46666666666666656, 'Clustering 3': 0.5666666666666665, 'Clustering 1': 0.8999999999999999 } }, BestClusteringSelector.get_scores_for_all_clusters_and_criterias( criteria, clustering_info))
def run(self, clustering): """ Refine a clustering recursively using a k-means over each cluster. New clusters obtained from a cluster must have no noise and """ max_partitions = self.refinement_parameters["max_partitions"] try_step = int( max( 1, float(max_partitions) / self.refinement_parameters["tries_per_cluster"])) matrix = self.matrixHandler.distance_matrix new_clusters = [] for cluster in clustering.clusters: base_id = cluster.id # The initial clustering is added to the list of new clusters. # With this 'trick' the initial cluster also enters the competition for the best clustering price. clusterings = { base_id: { "type": "refined_base", "clustering": Clustering([cluster]), "parameters": {} } } submatrix = get_submatrix(matrix, cluster.all_elements) # Proceed with some K Medoids partitions # TODO: Generate parameters with parameter generator for k in range(2, max_partitions, try_step): clustering = self.repartition_with_kmedoids( cluster, k, submatrix) clusterings["%s_%d" % (base_id, k)] = { "type": "refined", "clustering": clustering, "parameters": { "k": k } } # Evaluate all clusterings and pick the best one AnalysisRunner( scheduling_tools.build_scheduler( self.clustering_parameters["clustering"]["control"], self.observer), clusterings, AnalysisPopulator(self.matrixHandler, self.trajectoryHandler, self.clustering_parameters)).evaluate() best_clustering_id, all_scores = BestClusteringSelector( self.clustering_parameters).choose_best( clusterings) # @UnusedVariable new_clusters.extend( clusterings[best_clustering_id]["clustering"].clusters) # Convert all new clusters in the new clustering return { "type": "refined_clustering", "clustering": Clustering(new_clusters), "parameters": self.refinement_parameters }