def test_normalize_one_evaluation(self):
     clustering_info = {
                        "clustering1":{
                                       "type":"algorithm2",
                                       "clustering":"clustering2",
                                       "parameters":{},
                                       "evaluation":{
                                                     "myeval":0.5,
                                                     "myothereval":1.0
                                                    }
                         },
                        "clustering2":{
                                       "type":"algorithm1",
                                       "clustering":"clustering1",
                                       "parameters":{},
                                       "evaluation":{
                                                     "myeval":-0.5,
                                                     "myothereval":2.0
                                                     }
                         },
                        "clustering3":{
                                       "type":"algorithm1",
                                       "clustering":"clustering3",
                                       "parameters":{},
                                       "evaluation":{
                                                     "myeval":0.2,
                                                     "myothereval":0.0
                                                     }
                         }
     }
     values_for_myeval = BestClusteringSelector.get_values_for_evaluation_type("myeval", clustering_info)
     self.assertDictEqual(values_for_myeval, {'clustering1': 0.5, 'clustering3': 0.2, 'clustering2': -0.5})
     BestClusteringSelector.normalize_one_evaluation_type("myeval", clustering_info)
     values_for_myeval = BestClusteringSelector.get_values_for_evaluation_type("Normalized_myeval", clustering_info)
     self.assertDictEqual( values_for_myeval, {'clustering1': 1.0, 'clustering3': 0.7, 'clustering2': 0.0})
 def test_get_score_for_criteria(self):
     criteria = {
                 "analysis_1":{
                               "action": ">",
                               "weight": 1.0
                               },
                 "analysis_2":{
                               "action": "<",
                               "weight": 0.5
                               }
     }
     
     clustering_info = {
                        'Clustering 1': {
                                          'evaluation': {
                                                         'Normalized_analysis_1': 1,
                                                         'Normalized_analysis_2': 0.3
                                           }   
                         },
                         'Clustering 2': {
                                          'evaluation': {
                                                         'Normalized_analysis_1': 0.7,
                                                         'Normalized_analysis_2': 1.
                                           }
                         }, 
                         'Clustering 3': {
                                          'evaluation': {
                                                         'Normalized_analysis_1': 0.6,
                                                         'Normalized_analysis_2': 0.5
                                           }
                         }, 
                         'Clustering 4': {
                                          'evaluation': {
                                                         'Normalized_analysis_1': 0.9,
                                                         'Normalized_analysis_2': 0.0
                                           }
                         } 
     }
     self.assertEqual(BestClusteringSelector.get_score_for_criteria("Clustering 1", 
                                                                   clustering_info,
                                                                   criteria), 
                      0.9)
     
     self.assertAlmostEqual(BestClusteringSelector.get_score_for_criteria("Clustering 4", 
                                                                   clustering_info,
                                                                   criteria), 
                      0.933333333333,12)
 def test_normalize_one_evaluation(self):
     clustering_info = {
         "clustering1": {
             "type": "algorithm2",
             "clustering": "clustering2",
             "parameters": {},
             "evaluation": {
                 "myeval": 0.5,
                 "myothereval": 1.0
             }
         },
         "clustering2": {
             "type": "algorithm1",
             "clustering": "clustering1",
             "parameters": {},
             "evaluation": {
                 "myeval": -0.5,
                 "myothereval": 2.0
             }
         },
         "clustering3": {
             "type": "algorithm1",
             "clustering": "clustering3",
             "parameters": {},
             "evaluation": {
                 "myeval": 0.2,
                 "myothereval": 0.0
             }
         }
     }
     values_for_myeval = BestClusteringSelector.get_values_for_evaluation_type(
         "myeval", clustering_info)
     self.assertDictEqual(values_for_myeval, {
         'clustering1': 0.5,
         'clustering3': 0.2,
         'clustering2': -0.5
     })
     BestClusteringSelector.normalize_one_evaluation_type(
         "myeval", clustering_info)
     values_for_myeval = BestClusteringSelector.get_values_for_evaluation_type(
         "Normalized_myeval", clustering_info)
     self.assertDictEqual(values_for_myeval, {
         'clustering1': 1.0,
         'clustering3': 0.7,
         'clustering2': 0.0
     })
    def test_get_score_for_criteria(self):
        criteria = {
            "analysis_1": {
                "action": ">",
                "weight": 1.0
            },
            "analysis_2": {
                "action": "<",
                "weight": 0.5
            }
        }

        clustering_info = {
            'Clustering 1': {
                'evaluation': {
                    'Normalized_analysis_1': 1,
                    'Normalized_analysis_2': 0.3
                }
            },
            'Clustering 2': {
                'evaluation': {
                    'Normalized_analysis_1': 0.7,
                    'Normalized_analysis_2': 1.
                }
            },
            'Clustering 3': {
                'evaluation': {
                    'Normalized_analysis_1': 0.6,
                    'Normalized_analysis_2': 0.5
                }
            },
            'Clustering 4': {
                'evaluation': {
                    'Normalized_analysis_1': 0.9,
                    'Normalized_analysis_2': 0.0
                }
            }
        }
        self.assertEqual(
            BestClusteringSelector.get_score_for_criteria(
                "Clustering 1", clustering_info, criteria), 0.9)

        self.assertAlmostEqual(
            BestClusteringSelector.get_score_for_criteria(
                "Clustering 4", clustering_info, criteria), 0.933333333333, 12)
 def test_get_best_clustering(self):
     scores = {
               'criteria 1': {
                              'Clustering 4': 1.4, 
                              'Clustering 2': 0.7, 
                              'Clustering 3': 0.85, 
                              'Clustering 1': 1.35}, 
               'criteria 2': {
                              'Clustering 4': 0.56, 
                              'Clustering 2': 0.28, 
                              'Clustering 3': 0.34, 
                              'Clustering 1': 0.54
                              }
               }
     bclust, bcrit, scores = BestClusteringSelector.get_best_clustering(scores)
     self.assertItemsEqual( (bclust, bcrit, scores[bcrit][bclust]),  ('Clustering 4', 'criteria 1', 1.4))
 def test_get_best_clustering(self):
     scores = {
         'criteria 1': {
             'Clustering 4': 1.4,
             'Clustering 2': 0.7,
             'Clustering 3': 0.85,
             'Clustering 1': 1.35
         },
         'criteria 2': {
             'Clustering 4': 0.56,
             'Clustering 2': 0.28,
             'Clustering 3': 0.34,
             'Clustering 1': 0.54
         }
     }
     bclust, bcrit, scores = BestClusteringSelector.get_best_clustering(
         scores)
     self.assertItemsEqual((bclust, bcrit, scores[bcrit][bclust]),
                           ('Clustering 4', 'criteria 1', 1.4))
 def test_get_scores_for_all_clusters_and_criterias(self):
     criteria = {
                  "criteria 1":{
                             "analysis_1":{
                                           "action": ">",
                                           "weight": 1.0
                                           },
                             "analysis_2":{
                                           "action": "<",
                                           "weight": 0.5
                                           }
                 },
                 "criteria 2":{
                             "analysis_1":{
                                           "action": ">",
                                           "weight": 0.4
                                           },
                             "analysis_2":{
                                           "action": "<",
                                           "weight": 0.2
                                           }
                 }
     }
     
     clustering_info = {
                        'Clustering 1': {
                                          'evaluation': {
                                                         'Normalized_analysis_1': 1,
                                                         'Normalized_analysis_2': 0.3
                                           }   
                         },
                         'Clustering 2': {
                                          'evaluation': {
                                                         'Normalized_analysis_1': 0.7,
                                                         'Normalized_analysis_2': 1.
                                           }
                         }, 
                         'Clustering 3': {
                                          'evaluation': {
                                                         'Normalized_analysis_1': 0.6,
                                                         'Normalized_analysis_2': 0.5
                                           }
                         }, 
                         'Clustering 4': {
                                          'evaluation': {
                                                         'Normalized_analysis_1': 0.9,
                                                         'Normalized_analysis_2': 0.0
                                           }
                         } 
     }
     
     # regression, checked
     self.assertDictEqual({
                          'criteria 1': {
                                         'Clustering 4': 0.9333333333333332, 
                                         'Clustering 2': 0.4666666666666666, 
                                         'Clustering 3': 0.5666666666666667, 
                                         'Clustering 1': 0.9
                                         }, 
                          'criteria 2': {
                                         'Clustering 4': 0.9333333333333332, 
                                         'Clustering 2': 0.46666666666666656, 
                                         'Clustering 3': 0.5666666666666665, 
                                         'Clustering 1': 0.8999999999999999
                                         }
                          },
                          BestClusteringSelector.get_scores_for_all_clusters_and_criterias(criteria, clustering_info))
Esempio n. 8
0
    def run(self, clustering_parameters, matrixHandler, workspaceHandler,
            trajectoryHandler):

        ############################
        # Clustering exploration
        ############################
        self.notify("Exploration Started", [])
        self.timer.start("Clustering Exploration")
        clusterings = ClusteringExplorer(
            clustering_parameters, matrixHandler, workspaceHandler,
            scheduling_tools.build_scheduler(
                clustering_parameters["global"]["control"], self.observer),
            AlgorithmRunParametersGenerator(clustering_parameters,
                                            matrixHandler),
            self.observer).run()

        self.notify("Clusterings Created",
                    {"number_of_clusters": len(clusterings)})
        self.timer.stop("Clustering Exploration")

        ######################
        # First filtering
        ######################
        self.timer.start("Clustering Filtering")
        selected_clusterings, not_selected_clusterings = ClusteringFilter(
            clustering_parameters["clustering"]["evaluation"],
            matrixHandler).filter(clusterings)

        self.notify(
            "Filter", {
                "selected": len(selected_clusterings.keys()),
                "not_selected": len(not_selected_clusterings.keys())
            })
        self.timer.stop("Clustering Filtering")

        if selected_clusterings == {}:
            return None

        ######################
        # Clustering scoring
        ######################
        self.timer.start("Evaluation")
        analyzer = AnalysisRunner(
            scheduling_tools.build_scheduler(
                clustering_parameters["global"]["control"], self.observer),
            selected_clusterings,
            AnalysisPopulator(matrixHandler, trajectoryHandler,
                              clustering_parameters))

        analyzer.evaluate()
        self.timer.stop("Evaluation")

        ######################
        # Choose the best clustering
        ######################
        self.timer.start("Selection")
        best_clustering_id, all_scores = BestClusteringSelector(
            clustering_parameters).choose_best(selected_clusterings)
        self.timer.stop("Selection")

        return best_clustering_id, selected_clusterings, not_selected_clusterings, all_scores
    def test_get_scores_for_all_clusters_and_criterias(self):
        criteria = {
            "criteria 1": {
                "analysis_1": {
                    "action": ">",
                    "weight": 1.0
                },
                "analysis_2": {
                    "action": "<",
                    "weight": 0.5
                }
            },
            "criteria 2": {
                "analysis_1": {
                    "action": ">",
                    "weight": 0.4
                },
                "analysis_2": {
                    "action": "<",
                    "weight": 0.2
                }
            }
        }

        clustering_info = {
            'Clustering 1': {
                'evaluation': {
                    'Normalized_analysis_1': 1,
                    'Normalized_analysis_2': 0.3
                }
            },
            'Clustering 2': {
                'evaluation': {
                    'Normalized_analysis_1': 0.7,
                    'Normalized_analysis_2': 1.
                }
            },
            'Clustering 3': {
                'evaluation': {
                    'Normalized_analysis_1': 0.6,
                    'Normalized_analysis_2': 0.5
                }
            },
            'Clustering 4': {
                'evaluation': {
                    'Normalized_analysis_1': 0.9,
                    'Normalized_analysis_2': 0.0
                }
            }
        }

        # regression, checked
        self.assertDictEqual(
            {
                'criteria 1': {
                    'Clustering 4': 0.9333333333333332,
                    'Clustering 2': 0.4666666666666666,
                    'Clustering 3': 0.5666666666666667,
                    'Clustering 1': 0.9
                },
                'criteria 2': {
                    'Clustering 4': 0.9333333333333332,
                    'Clustering 2': 0.46666666666666656,
                    'Clustering 3': 0.5666666666666665,
                    'Clustering 1': 0.8999999999999999
                }
            },
            BestClusteringSelector.get_scores_for_all_clusters_and_criterias(
                criteria, clustering_info))
Esempio n. 10
0
    def run(self, clustering):
        """
        Refine a clustering recursively using a k-means over each cluster.
        New clusters obtained from a cluster must have no noise and
        """
        max_partitions = self.refinement_parameters["max_partitions"]
        try_step = int(
            max(
                1,
                float(max_partitions) /
                self.refinement_parameters["tries_per_cluster"]))
        matrix = self.matrixHandler.distance_matrix

        new_clusters = []
        for cluster in clustering.clusters:
            base_id = cluster.id
            # The initial clustering is added to the list of new clusters.
            # With this 'trick' the initial cluster also enters the competition for the best clustering price.
            clusterings = {
                base_id: {
                    "type": "refined_base",
                    "clustering": Clustering([cluster]),
                    "parameters": {}
                }
            }

            submatrix = get_submatrix(matrix, cluster.all_elements)

            # Proceed with some K Medoids partitions
            # TODO: Generate parameters with parameter generator
            for k in range(2, max_partitions, try_step):
                clustering = self.repartition_with_kmedoids(
                    cluster, k, submatrix)
                clusterings["%s_%d" % (base_id, k)] = {
                    "type": "refined",
                    "clustering": clustering,
                    "parameters": {
                        "k": k
                    }
                }

            # Evaluate all clusterings and pick the best one
            AnalysisRunner(
                scheduling_tools.build_scheduler(
                    self.clustering_parameters["clustering"]["control"],
                    self.observer), clusterings,
                AnalysisPopulator(self.matrixHandler, self.trajectoryHandler,
                                  self.clustering_parameters)).evaluate()

            best_clustering_id, all_scores = BestClusteringSelector(
                self.clustering_parameters).choose_best(
                    clusterings)  # @UnusedVariable
            new_clusters.extend(
                clusterings[best_clustering_id]["clustering"].clusters)

        # Convert all new clusters in the new clustering
        return {
            "type": "refined_clustering",
            "clustering": Clustering(new_clusters),
            "parameters": self.refinement_parameters
        }