Beispiel #1
0
 def run_optimization(self):
     # Disabling is needed since this function is not reentrant
     # Fast clicking on, say, "To: " causes multiple calls
     try:
         self.controlArea.setDisabled(True)
         self.optimization_runs = []
         if self.check_data_size(self.k_to):
             self.optimization_runs = []
             kmeans = KMeans(
                 init=['random', 'k-means++'][self.smart_init],
                 n_init=self.n_init,
                 max_iter=self.max_iterations)
             with self.progressBar(self.k_to - self.k_from + 1) as progress:
                 for k in range(self.k_from, self.k_to + 1):
                     progress.advance()
                     kmeans.params["n_clusters"] = k
                     self.optimization_runs.append((k, kmeans(self.data)))
     finally:
         self.controlArea.setDisabled(False)
     self.show_results()
     self.send_data()
Beispiel #2
0
 def run_optimization(self):
     # Disabling is needed since this function is not reentrant
     # Fast clicking on, say, "To: " causes multiple calls
     try:
         self.controlArea.setDisabled(True)
         self.optimization_runs = []
         if not self.check_data_size(self.k_from, self.error):
             return
         self.check_data_size(self.k_to, self.warning)
         k_to = min(self.k_to, len(self.data))
         kmeans = KMeans(init=['random', 'k-means++'][self.smart_init],
                         n_init=self.n_init,
                         max_iter=self.max_iterations)
         with self.progressBar(k_to - self.k_from + 1) as progress:
             for k in range(self.k_from, k_to + 1):
                 progress.advance()
                 kmeans.params["n_clusters"] = k
                 self.optimization_runs.append((k, kmeans(self.data)))
     finally:
         self.controlArea.setDisabled(False)
     self.show_results()
     self.send_data()
Beispiel #3
0
 def run_optimization(self):
     # Disabling is needed since this function is not reentrant
     # Fast clicking on, say, "To: " causes multiple calls
     try:
         self.controlArea.setDisabled(True)
         self.optimization_runs = []
         if self.check_data_size(self.k_to):
             self.progressBarInit()
             progress_steps = self.k_to - self.k_from + 1
             self.optimization_runs = []
             kmeans = KMeans(
                 init=["random", "k-means++"][self.smart_init], n_init=self.n_init, max_iter=self.max_iterations
             )
             for k in range(self.k_from, self.k_to + 1):
                 self.progressBarSet(100.0 * (k - self.k_from) / progress_steps)
                 kmeans.params["n_clusters"] = k
                 self.optimization_runs.append((k, kmeans(self.data)))
             self.progressBarFinished()
     finally:
         self.controlArea.setDisabled(False)
     self.show_results()
     self.send_data()
Beispiel #4
0
    def _compute_clustering(data, k, init, n_init, max_iter, silhouette,
                            random_state):
        # type: (Table, int, str, int, int, bool) -> KMeansModel
        if k > len(data):
            raise NotEnoughData()

        return KMeans(
            n_clusters=k,
            init=init,
            n_init=n_init,
            max_iter=max_iter,
            compute_silhouette_score=silhouette,
            random_state=random_state,
        )(data)
Beispiel #5
0
 def run_optimization(self):
     # Disabling is needed since this function is not reentrant
     # Fast clicking on, say, "To: " causes multiple calls
     try:
         self.controlArea.setDisabled(True)
         self.optimization_runs = []
         if not self.check_data_size(self.k_from, self.Error):
             return
         self.check_data_size(self.k_to, self.Warning)
         k_to = min(self.k_to, len(self.data))
         kmeans = KMeans(
             init=['random', 'k-means++'][self.smart_init],
             n_init=self.n_init, max_iter=self.max_iterations,
             compute_silhouette_score=self.scoring == self.SILHOUETTE)
         with self.progressBar(k_to - self.k_from + 1) as progress:
             for k in range(self.k_from, k_to + 1):
                 progress.advance()
                 kmeans.params["n_clusters"] = k
                 self.optimization_runs.append((k, kmeans(self.data)))
     finally:
         self.controlArea.setDisabled(False)
     self.show_results()
     self.send_data()
Beispiel #6
0
 def run_optimization(self):
     # Disabling is needed since this function is not reentrant
     # Fast clicking on, say, "To: " causes multiple calls
     try:
         self.controlArea.setDisabled(True)
         self.optimization_runs = []
         if self.check_data_size(self.k_to):
             self.progressBarInit()
             progress_steps = self.k_to - self.k_from + 1
             self.optimization_runs = []
             kmeans = KMeans(init=['random', 'k-means++'][self.smart_init],
                             n_init=self.n_init,
                             max_iter=self.max_iterations)
             for k in range(self.k_from, self.k_to + 1):
                 self.progressBarSet(100.0 * (k - self.k_from) /
                                     progress_steps)
                 kmeans.params["n_clusters"] = k
                 self.optimization_runs.append((k, kmeans(self.data)))
             self.progressBarFinished()
     finally:
         self.controlArea.setDisabled(False)
     self.show_results()
     self.send_data()
Beispiel #7
0
 def _compute_clustering(self, k):
     # False positives (Setting is not recognized as int)
     # pylint: disable=invalid-sequence-index
     # pylint: disable=broad-except
     try:
         if k > len(self.data):
             self.clusterings[k] = "not enough data"
         else:
             self.clusterings[k] = KMeans(
                 n_clusters=k,
                 init=['random', 'k-means++'][self.smart_init],
                 n_init=self.n_init,
                 max_iter=self.max_iterations,
                 compute_silhouette_score=True)(self.data)
     except Exception as exc:
         self.clusterings[k] = str(exc)
         return False
     else:
         return True
Beispiel #8
0
    def _compute_clustering(data, k, init, n_init, max_iter, random_state):
        # type: (Table, int, str, int, int, bool) -> KMeansModel
        if k > len(data):
            raise NotEnoughData()

        model = KMeans(n_clusters=k,
                       init=init,
                       n_init=n_init,
                       max_iter=max_iter,
                       random_state=random_state,
                       preprocessors=[]).get_model(data)

        if data.X.shape[0] <= SILHOUETTE_MAX_SAMPLES:
            model.silhouette_samples = silhouette_samples(data.X, model.labels)
            model.silhouette = np.mean(model.silhouette_samples)
        else:
            model.silhouette_samples = None
            model.silhouette = \
                silhouette_score(data.X, model.labels,
                                 sample_size=SILHOUETTE_MAX_SAMPLES,
                                 random_state=RANDOM_STATE)

        return model