def run_optimization(self): # Disabling is needed since this function is not reentrant # Fast clicking on, say, "To: " causes multiple calls try: self.controlArea.setDisabled(True) self.optimization_runs = [] if self.check_data_size(self.k_to): self.optimization_runs = [] kmeans = KMeans( init=['random', 'k-means++'][self.smart_init], n_init=self.n_init, max_iter=self.max_iterations) with self.progressBar(self.k_to - self.k_from + 1) as progress: for k in range(self.k_from, self.k_to + 1): progress.advance() kmeans.params["n_clusters"] = k self.optimization_runs.append((k, kmeans(self.data))) finally: self.controlArea.setDisabled(False) self.show_results() self.send_data()
def run_optimization(self): # Disabling is needed since this function is not reentrant # Fast clicking on, say, "To: " causes multiple calls try: self.controlArea.setDisabled(True) self.optimization_runs = [] if not self.check_data_size(self.k_from, self.error): return self.check_data_size(self.k_to, self.warning) k_to = min(self.k_to, len(self.data)) kmeans = KMeans(init=['random', 'k-means++'][self.smart_init], n_init=self.n_init, max_iter=self.max_iterations) with self.progressBar(k_to - self.k_from + 1) as progress: for k in range(self.k_from, k_to + 1): progress.advance() kmeans.params["n_clusters"] = k self.optimization_runs.append((k, kmeans(self.data))) finally: self.controlArea.setDisabled(False) self.show_results() self.send_data()
def run_optimization(self): # Disabling is needed since this function is not reentrant # Fast clicking on, say, "To: " causes multiple calls try: self.controlArea.setDisabled(True) self.optimization_runs = [] if self.check_data_size(self.k_to): self.progressBarInit() progress_steps = self.k_to - self.k_from + 1 self.optimization_runs = [] kmeans = KMeans( init=["random", "k-means++"][self.smart_init], n_init=self.n_init, max_iter=self.max_iterations ) for k in range(self.k_from, self.k_to + 1): self.progressBarSet(100.0 * (k - self.k_from) / progress_steps) kmeans.params["n_clusters"] = k self.optimization_runs.append((k, kmeans(self.data))) self.progressBarFinished() finally: self.controlArea.setDisabled(False) self.show_results() self.send_data()
def _compute_clustering(data, k, init, n_init, max_iter, silhouette, random_state): # type: (Table, int, str, int, int, bool) -> KMeansModel if k > len(data): raise NotEnoughData() return KMeans( n_clusters=k, init=init, n_init=n_init, max_iter=max_iter, compute_silhouette_score=silhouette, random_state=random_state, )(data)
def run_optimization(self): # Disabling is needed since this function is not reentrant # Fast clicking on, say, "To: " causes multiple calls try: self.controlArea.setDisabled(True) self.optimization_runs = [] if not self.check_data_size(self.k_from, self.Error): return self.check_data_size(self.k_to, self.Warning) k_to = min(self.k_to, len(self.data)) kmeans = KMeans( init=['random', 'k-means++'][self.smart_init], n_init=self.n_init, max_iter=self.max_iterations, compute_silhouette_score=self.scoring == self.SILHOUETTE) with self.progressBar(k_to - self.k_from + 1) as progress: for k in range(self.k_from, k_to + 1): progress.advance() kmeans.params["n_clusters"] = k self.optimization_runs.append((k, kmeans(self.data))) finally: self.controlArea.setDisabled(False) self.show_results() self.send_data()
def run_optimization(self): # Disabling is needed since this function is not reentrant # Fast clicking on, say, "To: " causes multiple calls try: self.controlArea.setDisabled(True) self.optimization_runs = [] if self.check_data_size(self.k_to): self.progressBarInit() progress_steps = self.k_to - self.k_from + 1 self.optimization_runs = [] kmeans = KMeans(init=['random', 'k-means++'][self.smart_init], n_init=self.n_init, max_iter=self.max_iterations) for k in range(self.k_from, self.k_to + 1): self.progressBarSet(100.0 * (k - self.k_from) / progress_steps) kmeans.params["n_clusters"] = k self.optimization_runs.append((k, kmeans(self.data))) self.progressBarFinished() finally: self.controlArea.setDisabled(False) self.show_results() self.send_data()
def _compute_clustering(self, k): # False positives (Setting is not recognized as int) # pylint: disable=invalid-sequence-index # pylint: disable=broad-except try: if k > len(self.data): self.clusterings[k] = "not enough data" else: self.clusterings[k] = KMeans( n_clusters=k, init=['random', 'k-means++'][self.smart_init], n_init=self.n_init, max_iter=self.max_iterations, compute_silhouette_score=True)(self.data) except Exception as exc: self.clusterings[k] = str(exc) return False else: return True
def _compute_clustering(data, k, init, n_init, max_iter, random_state): # type: (Table, int, str, int, int, bool) -> KMeansModel if k > len(data): raise NotEnoughData() model = KMeans(n_clusters=k, init=init, n_init=n_init, max_iter=max_iter, random_state=random_state, preprocessors=[]).get_model(data) if data.X.shape[0] <= SILHOUETTE_MAX_SAMPLES: model.silhouette_samples = silhouette_samples(data.X, model.labels) model.silhouette = np.mean(model.silhouette_samples) else: model.silhouette_samples = None model.silhouette = \ silhouette_score(data.X, model.labels, sample_size=SILHOUETTE_MAX_SAMPLES, random_state=RANDOM_STATE) return model