Esempio n. 1
0
 def test_check_clustering(self): 
     myFilter = ClusteringFilter({
                                     "maximum_noise": 15,
                                     "maximum_clusters": 30,
                                     "minimum_clusters": 5,
                                     "minimum_cluster_size": 50
                                  },
                                 MatrixHandlerMock(1000))
     
     self.assertItemsEqual( myFilter.check_clustering(ClusteringMock(number_of_clusters = 25, number_of_elements = 900)),[])
     
     self.assertItemsEqual( myFilter.check_clustering(ClusteringMock(number_of_clusters = 50, number_of_elements = 800)),
                             [
                              {
                               'reason': 'TOO_MUCH_CLUSTERS', 
                               'data': {
                                        'current': 50, 
                                        'maximum': 30
                                        }
                               }, 
                              {
                               'reason': 'TOO_MUCH_NOISE', 
                               'data': {
                                        'current': 20.0, 
                                        'maximum': 15
                                        }
                               }
                              ])
Esempio n. 2
0
 def test_check_num_clusters_in_range(self):
     myFilter = ClusteringFilter({
                                     "minimum_clusters": 5,
                                     "maximum_clusters": 30,
                                  },
                                 MatrixHandlerMock(1000))
     
     self.assertItemsEqual(myFilter.check_num_clusters_in_range(ClusteringMock(number_of_clusters = 10, number_of_elements = 1000)), [])
                           
     self.assertItemsEqual( myFilter.check_num_clusters_in_range(ClusteringMock(number_of_clusters = 2, number_of_elements = 1000)),
                            [
                             {
                              'reason': 'TOO_FEW_CLUSTERS', 
                              'data': {'current': 2, 
                                       'minimum': 5}
                              }
                             ]
                            )
     self.assertItemsEqual( myFilter.check_num_clusters_in_range(ClusteringMock(number_of_clusters = 35, number_of_elements = 1000)),
                            [
                             {
                              'reason': 'TOO_MUCH_CLUSTERS', 
                              'data': {
                                       'current': 35, 
                                       'maximum': 30
                                       }
                              }
                             ])
Esempio n. 3
0
 def test_filter(self):
     myFilter = ClusteringFilter({
                                     "maximum_noise": 15,
                                     "maximum_clusters": 30,
                                     "minimum_clusters": 5,
                                     "minimum_cluster_size": 50
                                  },
                                 MatrixHandlerMock(1000))
     
     clustering_info ={"clustering 1":{
                                    "clustering":ClusteringMock(number_of_clusters = 50, number_of_elements = 800)
                                    },
                       "clustering 2":{
                                    "clustering":ClusteringMock(number_of_clusters = 25, number_of_elements = 900)
                                    },
                       "clustering 3":{
                                    "clustering":ClusteringMock(number_of_clusters = 25, number_of_elements = 900)
                                    },
                       "clustering 4":{
                                    "clustering":ClusteringMock(number_of_clusters = 31, number_of_elements = 900)
                                    }
                       }
     
     selected, not_selected =  myFilter.filter(clustering_info)
     self.assert_(len(selected) == 1 and len(not_selected) == 3)
     self.assertItemsEqual(selected.keys() ,      [ "clustering 3"])
     self.assertItemsEqual(not_selected.keys() ,  ["clustering 1","clustering 2","clustering 4",])
Esempio n. 4
0
    def run(self, clustering_parameters, matrix_handler, data_handler, workspaceHandler):

        ############################
        # Clustering exploration
        ############################
        self.notify("Exploration Started", [])
        self.timer.start("Clustering Exploration")
        clusterings  = ClusteringExplorer(  clustering_parameters,
                                            matrix_handler,
                                            workspaceHandler,
                                            scheduling_tools.build_scheduler(clustering_parameters["global"]["control"],
                                                                             self.observer),
                                            AlgorithmRunParametersGenerator(clustering_parameters,
                                                                            matrix_handler),
                                            self.observer).run()

        self.notify("Clusterings Created", {"number_of_clusters":len(clusterings)})
        self.timer.stop("Clustering Exploration")

        ######################
        # First filtering
        ######################
        self.timer.start("Clustering Filtering")
        selected_clusterings, not_selected_clusterings = ClusteringFilter(clustering_parameters["clustering"]["evaluation"],
                                                                          matrix_handler).filter(clusterings)

        self.notify("Filter", {"selected":len(selected_clusterings.keys()),"not_selected":len(not_selected_clusterings.keys())})
        self.timer.stop("Clustering Filtering")

        if selected_clusterings == {}:
            return None

        ######################
        # Clustering scoring
        ######################
        self.timer.start("Evaluation")
        analyzer = AnalysisRunner(scheduling_tools.build_scheduler(
                                                       clustering_parameters["global"]["control"],
                                                       self.observer),
                                          selected_clusterings,
                                          AnalysisPopulator(matrix_handler,
                                                            data_handler,
                                                            clustering_parameters))

        analyzer.evaluate()
        self.timer.stop("Evaluation")

        ######################
        # Choose the best clustering
        ######################
        self.timer.start("Selection")
        best_clustering_id, all_scores = BestClusteringSelector(clustering_parameters).choose_best(selected_clusterings)
        self.timer.stop("Selection")

        return best_clustering_id, selected_clusterings, not_selected_clusterings, all_scores
Esempio n. 5
0
    def test_filter_repeated(self):
        clustering_info ={"clustering 1":{
                                       "clustering":ClusteringMock(number_of_clusters = 50, number_of_elements = 800)
                                       },
                          "clustering 2":{
                                       "clustering":ClusteringMock(number_of_clusters = 25, number_of_elements = 900)
                                       },
                          "clustering 3":{
                                       "clustering":ClusteringMock(number_of_clusters = 25, number_of_elements = 900)
                                       }
                          }

        myFilter = ClusteringFilter({},MatrixHandlerMock(1000))
        sel, not_sel = myFilter.filter_repeated(clustering_info,{})
        self.assertItemsEqual(sel.keys(),["clustering 1","clustering 3"])
        self.assertItemsEqual(not_sel.keys(),["clustering 2"])
        self.assertDictEqual(not_sel["clustering 2"]["reasons"][0],
                             {'reason': 'EQUAL_TO_OTHER_CLUSTERING', 'data': {'id': 'clustering 3'}})
Esempio n. 6
0
 def test_check_noise_level(self):
     myFilter = ClusteringFilter({
                                     "maximum_noise": 15,
                                  },
                                 MatrixHandlerMock(1000))
     # 10% noise
     self.assertItemsEqual( myFilter.check_noise_level(ClusteringMock(number_of_clusters = 10, number_of_elements = 900)), [])
     
     # 15% noise
     self.assertItemsEqual( myFilter.check_noise_level(ClusteringMock(number_of_clusters = 10, number_of_elements = 850)), [])
     
     # 20% noise
     self.assertItemsEqual( myFilter.check_noise_level(ClusteringMock(number_of_clusters = 10, number_of_elements = 800)),
                            [
                             {
                              'reason': 'TOO_MUCH_NOISE', 
                              'data': {
                                       'current': 20.0, 
                                       'maximum': 15
                                       }
                              }
                             ])
Esempio n. 7
0
    def run(self, clustering_parameters, matrixHandler, workspaceHandler,
            trajectoryHandler):

        ############################
        # Clustering exploration
        ############################
        self.notify("Exploration Started", [])
        self.timer.start("Clustering Exploration")
        clusterings = ClusteringExplorer(
            clustering_parameters, matrixHandler, workspaceHandler,
            scheduling_tools.build_scheduler(
                clustering_parameters["global"]["control"], self.observer),
            AlgorithmRunParametersGenerator(clustering_parameters,
                                            matrixHandler),
            self.observer).run()

        self.notify("Clusterings Created",
                    {"number_of_clusters": len(clusterings)})
        self.timer.stop("Clustering Exploration")

        ######################
        # First filtering
        ######################
        self.timer.start("Clustering Filtering")
        selected_clusterings, not_selected_clusterings = ClusteringFilter(
            clustering_parameters["clustering"]["evaluation"],
            matrixHandler).filter(clusterings)

        self.notify(
            "Filter", {
                "selected": len(selected_clusterings.keys()),
                "not_selected": len(not_selected_clusterings.keys())
            })
        self.timer.stop("Clustering Filtering")

        if selected_clusterings == {}:
            return None

        ######################
        # Clustering scoring
        ######################
        self.timer.start("Evaluation")
        analyzer = AnalysisRunner(
            scheduling_tools.build_scheduler(
                clustering_parameters["global"]["control"], self.observer),
            selected_clusterings,
            AnalysisPopulator(matrixHandler, trajectoryHandler,
                              clustering_parameters))

        analyzer.evaluate()
        self.timer.stop("Evaluation")

        ######################
        # Choose the best clustering
        ######################
        self.timer.start("Selection")
        best_clustering_id, all_scores = BestClusteringSelector(
            clustering_parameters).choose_best(selected_clusterings)
        self.timer.stop("Selection")

        return best_clustering_id, selected_clusterings, not_selected_clusterings, all_scores