def choose_best(self, clustering_info): """ Normalizes the values of the evaluation scores, then calculates the scores for all clusterings and criteria and finally chooses the best clustering. @param clustering_info: Is the clustering_info structure with clusterings, evaluation info... etc @return: The id of the best clustering with the criteria_id with higher score and the score itself. """ if len(clustering_info) == 0: print "[WARNING BestClusteringSelector::choose_best] clustering_info is empty." return None evaluation_types = AnalysisPopulator.get_evaluation_analysis_types(self.parameters) # If there were no criteria defined, then the clustering is randomly selected if evaluation_types == []: return clustering_info[clustering_info.keys()[random.randint(0,len(clustering_info.keys())-1)]] for evaluation_type in evaluation_types: BestClusteringSelector.normalize_one_evaluation_type(evaluation_type, clustering_info) scores = BestClusteringSelector.get_scores_for_all_clusters_and_criterias(self.criteria, clustering_info) best_clustering_id, criteria_id, scores = self.get_best_clustering(scores) return best_clustering_id, scores
def test_get_query_and_evaluation_analysis_types(self): parameters = { "clustering":{ "evaluation": { "evaluation_criteria": { "criteria_0": { "CythonMirrorCohesion":{ "action": ">", "weight": 0.05 }, "CythonMinimumMeanSeparation":{ "action": ">", "weight": 0.1 }, "CythonSilhouette":{ "action": ">", "weight": 0.15 } } }, "query_types": [ "NumClusters", "CythonMinimumMeanSeparation", "NoiseLevel" ] } } } self.assertItemsEqual( AnalysisPopulator.get_evaluation_analysis_types(parameters), ['CythonMinimumMeanSeparation', 'CythonMirrorCohesion', 'CythonSilhouette']) self.assertItemsEqual( AnalysisPopulator.get_query_and_evaluation_analysis_types(parameters), ['CythonMinimumMeanSeparation', 'NumClusters', 'CythonMirrorCohesion', 'NoiseLevel', 'CythonSilhouette'])
def choose_best(self, clustering_info): """ Normalizes the values of the evaluation scores, then calculates the scores for all clusterings and criteria and finally chooses the best clustering. @param clustering_info: Is the clustering_info structure with clusterings, evaluation info... etc @return: The id of the best clustering with the criteria_id with higher score and the score itself. """ if len(clustering_info) == 0: print "[WARNING BestClusteringSelector::choose_best] clustering_info is empty." return None evaluation_types = AnalysisPopulator.get_evaluation_analysis_types( self.parameters) # If there were no criteria defined, then the clustering is randomly selected if evaluation_types == []: return clustering_info[clustering_info.keys()[random.randint( 0, len(clustering_info.keys()) - 1)]] for evaluation_type in evaluation_types: BestClusteringSelector.normalize_one_evaluation_type( evaluation_type, clustering_info) scores = BestClusteringSelector.get_scores_for_all_clusters_and_criterias( self.criteria, clustering_info) best_clustering_id, criteria_id, scores = self.get_best_clustering( scores) return best_clustering_id, scores