Ejemplo n.º 1
0
    def test_get_query_and_evaluation_analysis_types(self):
        parameters = {
                      "clustering":{
                            "evaluation": {
                                            "evaluation_criteria": {
                                                                    "criteria_0": {
                                                                                   "CythonMirrorCohesion":{
                                                                                        "action": ">",
                                                                                        "weight": 0.05
                                                                                    },
                                                                                    "CythonMinimumMeanSeparation":{
                                                                                        "action": ">",
                                                                                        "weight": 0.1
                                                                                    },
                                                                                    "CythonSilhouette":{
                                                                                        "action": ">",
                                                                                        "weight": 0.15
                                                                                    }
                                                                    }
                                            },
                                            "query_types": [
                                                            "NumClusters",
                                                            "CythonMinimumMeanSeparation",
                                                            "NoiseLevel"
                                            ]
                                           }
                            }
                      }
        
        self.assertItemsEqual( AnalysisPopulator.get_evaluation_analysis_types(parameters),
            ['CythonMinimumMeanSeparation', 'CythonMirrorCohesion', 'CythonSilhouette'])

        self.assertItemsEqual( AnalysisPopulator.get_query_and_evaluation_analysis_types(parameters),
            ['CythonMinimumMeanSeparation', 'NumClusters',  'CythonMirrorCohesion', 'NoiseLevel', 'CythonSilhouette'])
Ejemplo n.º 2
0
    def choose_best(self, clustering_info):
        """
        Normalizes the values of the evaluation scores, then calculates the scores for all clusterings and criteria
        and finally chooses the best clustering.

        @param clustering_info: Is the clustering_info structure with clusterings, evaluation info... etc

        @return: The id of the best clustering with the criteria_id with higher score and the score itself.
        """
        if len(clustering_info) == 0:
            print "[WARNING BestClusteringSelector::choose_best] clustering_info is empty."
            return None

        evaluation_types = AnalysisPopulator.get_evaluation_analysis_types(self.parameters)

        # If there were no criteria defined, then the clustering is randomly selected
        if evaluation_types == []:
            return clustering_info[clustering_info.keys()[random.randint(0,len(clustering_info.keys())-1)]]

        for evaluation_type in evaluation_types:
            BestClusteringSelector.normalize_one_evaluation_type(evaluation_type, clustering_info)

        scores = BestClusteringSelector.get_scores_for_all_clusters_and_criterias(self.criteria, clustering_info)


        best_clustering_id, criteria_id, scores = self.get_best_clustering(scores)


        return best_clustering_id, scores
Ejemplo n.º 3
0
    def choose_best(self, clustering_info):
        """
        Normalizes the values of the evaluation scores, then calculates the scores for all clusterings and criteria
        and finally chooses the best clustering.

        @param clustering_info: Is the clustering_info structure with clusterings, evaluation info... etc

        @return: The id of the best clustering with the criteria_id with higher score and the score itself.
        """
        if len(clustering_info) == 0:
            print "[WARNING BestClusteringSelector::choose_best] clustering_info is empty."
            return None

        evaluation_types = AnalysisPopulator.get_evaluation_analysis_types(
            self.parameters)

        # If there were no criteria defined, then the clustering is randomly selected
        if evaluation_types == []:
            return clustering_info[clustering_info.keys()[random.randint(
                0,
                len(clustering_info.keys()) - 1)]]

        for evaluation_type in evaluation_types:
            BestClusteringSelector.normalize_one_evaluation_type(
                evaluation_type, clustering_info)

        scores = BestClusteringSelector.get_scores_for_all_clusters_and_criterias(
            self.criteria, clustering_info)

        best_clustering_id, criteria_id, scores = self.get_best_clustering(
            scores)

        return best_clustering_id, scores
Ejemplo n.º 4
0
    def run(self, clustering_parameters, matrixHandler, workspaceHandler,
            trajectoryHandler):

        ############################
        # Clustering exploration
        ############################
        self.notify("Exploration Started", [])
        self.timer.start("Clustering Exploration")
        clusterings = ClusteringExplorer(
            clustering_parameters, matrixHandler, workspaceHandler,
            scheduling_tools.build_scheduler(
                clustering_parameters["global"]["control"], self.observer),
            AlgorithmRunParametersGenerator(clustering_parameters,
                                            matrixHandler),
            self.observer).run()

        self.notify("Clusterings Created",
                    {"number_of_clusters": len(clusterings)})
        self.timer.stop("Clustering Exploration")

        ######################
        # First filtering
        ######################
        self.timer.start("Clustering Filtering")
        selected_clusterings, not_selected_clusterings = ClusteringFilter(
            clustering_parameters["clustering"]["evaluation"],
            matrixHandler).filter(clusterings)

        self.notify(
            "Filter", {
                "selected": len(selected_clusterings.keys()),
                "not_selected": len(not_selected_clusterings.keys())
            })
        self.timer.stop("Clustering Filtering")

        if selected_clusterings == {}:
            return None

        ######################
        # Clustering scoring
        ######################
        self.timer.start("Evaluation")
        analyzer = AnalysisRunner(
            scheduling_tools.build_scheduler(
                clustering_parameters["global"]["control"], self.observer),
            selected_clusterings,
            AnalysisPopulator(matrixHandler, trajectoryHandler,
                              clustering_parameters))

        analyzer.evaluate()
        self.timer.stop("Evaluation")

        ######################
        # Choose the best clustering
        ######################
        self.timer.start("Selection")
        best_clustering_id, all_scores = BestClusteringSelector(
            clustering_parameters).choose_best(selected_clusterings)
        self.timer.stop("Selection")

        return best_clustering_id, selected_clusterings, not_selected_clusterings, all_scores
Ejemplo n.º 5
0
 def __init__(self ,parameters):
     AnalysisPopulator.__init__(self, "", "", parameters)
Ejemplo n.º 6
0
    def run(self, clustering):
        """
        Refine a clustering recursively using a k-means over each cluster.
        New clusters obtained from a cluster must have no noise and
        """
        max_partitions = self.refinement_parameters["max_partitions"]
        try_step = int(
            max(
                1,
                float(max_partitions) /
                self.refinement_parameters["tries_per_cluster"]))
        matrix = self.matrixHandler.distance_matrix

        new_clusters = []
        for cluster in clustering.clusters:
            base_id = cluster.id
            # The initial clustering is added to the list of new clusters.
            # With this 'trick' the initial cluster also enters the competition for the best clustering price.
            clusterings = {
                base_id: {
                    "type": "refined_base",
                    "clustering": Clustering([cluster]),
                    "parameters": {}
                }
            }

            submatrix = get_submatrix(matrix, cluster.all_elements)

            # Proceed with some K Medoids partitions
            # TODO: Generate parameters with parameter generator
            for k in range(2, max_partitions, try_step):
                clustering = self.repartition_with_kmedoids(
                    cluster, k, submatrix)
                clusterings["%s_%d" % (base_id, k)] = {
                    "type": "refined",
                    "clustering": clustering,
                    "parameters": {
                        "k": k
                    }
                }

            # Evaluate all clusterings and pick the best one
            AnalysisRunner(
                scheduling_tools.build_scheduler(
                    self.clustering_parameters["clustering"]["control"],
                    self.observer), clusterings,
                AnalysisPopulator(self.matrixHandler, self.trajectoryHandler,
                                  self.clustering_parameters)).evaluate()

            best_clustering_id, all_scores = BestClusteringSelector(
                self.clustering_parameters).choose_best(
                    clusterings)  # @UnusedVariable
            new_clusters.extend(
                clusterings[best_clustering_id]["clustering"].clusters)

        # Convert all new clusters in the new clustering
        return {
            "type": "refined_clustering",
            "clustering": Clustering(new_clusters),
            "parameters": self.refinement_parameters
        }