예제 #1
0
    def choose_best(self, clustering_info):
        """
        Normalizes the values of the evaluation scores, then calculates the scores for all clusterings and criteria
        and finally chooses the best clustering.

        @param clustering_info: Is the clustering_info structure with clusterings, evaluation info... etc

        @return: The id of the best clustering with the criteria_id with higher score and the score itself.
        """
        if len(clustering_info) == 0:
            print "[WARNING BestClusteringSelector::choose_best] clustering_info is empty."
            return None

        evaluation_types = AnalysisPopulator.get_evaluation_analysis_types(self.parameters)

        # If there were no criteria defined, then the clustering is randomly selected
        if evaluation_types == []:
            return clustering_info[clustering_info.keys()[random.randint(0,len(clustering_info.keys())-1)]]

        for evaluation_type in evaluation_types:
            BestClusteringSelector.normalize_one_evaluation_type(evaluation_type, clustering_info)

        scores = BestClusteringSelector.get_scores_for_all_clusters_and_criterias(self.criteria, clustering_info)


        best_clustering_id, criteria_id, scores = self.get_best_clustering(scores)


        return best_clustering_id, scores
예제 #2
0
    def test_get_query_and_evaluation_analysis_types(self):
        parameters = {
                      "clustering":{
                            "evaluation": {
                                            "evaluation_criteria": {
                                                                    "criteria_0": {
                                                                                   "CythonMirrorCohesion":{
                                                                                        "action": ">",
                                                                                        "weight": 0.05
                                                                                    },
                                                                                    "CythonMinimumMeanSeparation":{
                                                                                        "action": ">",
                                                                                        "weight": 0.1
                                                                                    },
                                                                                    "CythonSilhouette":{
                                                                                        "action": ">",
                                                                                        "weight": 0.15
                                                                                    }
                                                                    }
                                            },
                                            "query_types": [
                                                            "NumClusters",
                                                            "CythonMinimumMeanSeparation",
                                                            "NoiseLevel"
                                            ]
                                           }
                            }
                      }
        
        self.assertItemsEqual( AnalysisPopulator.get_evaluation_analysis_types(parameters),
            ['CythonMinimumMeanSeparation', 'CythonMirrorCohesion', 'CythonSilhouette'])

        self.assertItemsEqual( AnalysisPopulator.get_query_and_evaluation_analysis_types(parameters),
            ['CythonMinimumMeanSeparation', 'NumClusters',  'CythonMirrorCohesion', 'NoiseLevel', 'CythonSilhouette'])
예제 #3
0
    def choose_best(self, clustering_info):
        """
        Normalizes the values of the evaluation scores, then calculates the scores for all clusterings and criteria
        and finally chooses the best clustering.

        @param clustering_info: Is the clustering_info structure with clusterings, evaluation info... etc

        @return: The id of the best clustering with the criteria_id with higher score and the score itself.
        """
        if len(clustering_info) == 0:
            print "[WARNING BestClusteringSelector::choose_best] clustering_info is empty."
            return None

        evaluation_types = AnalysisPopulator.get_evaluation_analysis_types(
            self.parameters)

        # If there were no criteria defined, then the clustering is randomly selected
        if evaluation_types == []:
            return clustering_info[clustering_info.keys()[random.randint(
                0,
                len(clustering_info.keys()) - 1)]]

        for evaluation_type in evaluation_types:
            BestClusteringSelector.normalize_one_evaluation_type(
                evaluation_type, clustering_info)

        scores = BestClusteringSelector.get_scores_for_all_clusters_and_criterias(
            self.criteria, clustering_info)

        best_clustering_id, criteria_id, scores = self.get_best_clustering(
            scores)

        return best_clustering_id, scores