def predict(self, X): """Predict the target for the provided data Parameters ---------- X : array-like, shape (n_ts, sz, d) Test samples. Returns ------- array, shape = (n_ts, ) or (n_ts, dim_y) Array of predicted targets """ if self.metric in TSLEARN_VALID_METRICS: check_is_fitted(self, '_ts_fit') X = to_time_series_dataset(X) X = check_dims(X, X_fit_dims=self._ts_fit.shape, extend=True, check_n_features_only=True) X_ = self._precompute_cross_dist(X) pred = super().predict(X_) self.metric = self._ts_metric return pred else: check_is_fitted(self, '_X_fit') X = check_array(X, allow_nd=True) X = to_time_series_dataset(X) X_ = to_sklearn_dataset(X) X_ = check_dims(X_, X_fit_dims=self._X_fit.shape, extend=False) return super().predict(X_)
def inverse_transform(self, X): """Compute time series corresponding to given 1d-SAX representations. Parameters ---------- X : array-like of shape (n_ts, sz_sax, 2 * d) A dataset of SAX series. Returns ------- numpy.ndarray of shape (n_ts, sz_original_ts, d) A dataset of time series corresponding to the provided representation. """ self._is_fitted() X = check_array(X, allow_nd=True) X = check_dims(X, X_fit_dims=(None, None, 2 * self._X_fit_dims_[-1]), check_n_features_only=True) X_orig = inv_transform_1d_sax( X, breakpoints_avg_middle_=self.breakpoints_avg_middle_, breakpoints_slope_middle_=self.breakpoints_slope_middle_, original_size=self._X_fit_dims_[1] ) return self._unscale(X_orig)
def fit(self, X, y=None, sample_weight=None): """Compute kernel k-means clustering. Parameters ---------- X : array-like of shape=(n_ts, sz, d) Time series dataset. y Ignored sample_weight : array-like of shape=(n_ts, ) or None (default: None) Weights to be given to time series in the learning process. By default, all time series weights are equal. """ X = check_array(X, allow_nd=True, force_all_finite=False) X = check_dims(X) sample_weight = _check_sample_weight(sample_weight=sample_weight, X=X) max_attempts = max(self.n_init, 10) self.labels_ = None self.inertia_ = None self.sample_weight_ = None self._X_fit = None # n_iter_ will contain the number of iterations the most # successful run required. self.n_iter_ = 0 n_samples = X.shape[0] K = self._get_kernel(X) sw = (sample_weight if sample_weight is not None else numpy.ones(n_samples)) self.sample_weight_ = sw rs = check_random_state(self.random_state) last_correct_labels = None min_inertia = numpy.inf n_attempts = 0 n_successful = 0 while n_successful < self.n_init and n_attempts < max_attempts: try: if self.verbose and self.n_init > 1: print("Init %d" % (n_successful + 1)) n_attempts += 1 self._fit_one_init(K, rs) if self.inertia_ < min_inertia: last_correct_labels = self.labels_ min_inertia = self.inertia_ self.n_iter_ = self._iter n_successful += 1 except EmptyClusterError: if self.verbose: print("Resumed because of empty cluster") if n_successful > 0: self.labels_ = last_correct_labels self.inertia_ = min_inertia self._X_fit = X return self
def predict(self, X): """Predict class for a given set of time series. Parameters ---------- X : array-like of shape=(n_ts, sz, d) Time series dataset. Returns ------- array of shape=(n_ts, ) or (n_ts, n_classes), depending on the shape of the label vector provided at training time. Index of the cluster each sample belongs to or class probability matrix, depending on what was provided at training time. """ check_is_fitted(self, '_X_fit') X = check_array(X, allow_nd=True) X = to_time_series_dataset(X) X = check_dims(X, X_fit=self._X_fit) categorical_preds = self.predict_proba(X) if self.categorical_y_: return categorical_preds else: return self.label_binarizer_.inverse_transform(categorical_preds)
def transform(self, X, y=None, **kwargs): """Will normalize (min-max) each of the timeseries. IMPORTANT: this transformation is completely stateless, and is applied to each of the timeseries individually. Parameters ---------- X : array-like of shape (n_ts, sz, d) Time series dataset to be rescaled. Returns ------- numpy.ndarray Rescaled time series dataset. """ value_range = self.value_range if value_range[0] >= value_range[1]: raise ValueError("Minimum of desired range must be smaller" " than maximum. Got %s." % str(value_range)) check_is_fitted(self, '_X_fit_dims') X = check_array(X, allow_nd=True, force_all_finite=False) X_ = to_time_series_dataset(X) X_ = check_dims(X_, X_fit_dims=self._X_fit_dims, extend=False) min_t = numpy.nanmin(X_, axis=1)[:, numpy.newaxis, :] max_t = numpy.nanmax(X_, axis=1)[:, numpy.newaxis, :] range_t = max_t - min_t nomin = (X_ - min_t) * (value_range[1] - value_range[0]) X_ = nomin / range_t + value_range[0] return X_
def fit(self, X, y=None): """Fit the model using X as training data Parameters ---------- X : array-like, shape (n_ts, sz, d) Training data. """ if self.metric in TSLEARN_VALID_METRICS: self._ts_metric = self.metric self.metric = "precomputed" X = check_array(X, allow_nd=True, force_all_finite=(self.metric != "precomputed")) X = to_time_series_dataset(X) X = check_dims(X) if self.metric == "precomputed" and hasattr(self, '_ts_metric'): self._ts_fit = X self._d = X.shape[2] self._X_fit = numpy.zeros((self._ts_fit.shape[0], self._ts_fit.shape[0])) else: self._X_fit, self._d = to_sklearn_dataset(X, return_dim=True) super().fit(self._X_fit, y) if hasattr(self, '_ts_metric'): self.metric = self._ts_metric return self
def transform(self, X): """Generate shapelet transform for a set of time series. Parameters ---------- X : array-like of shape=(n_ts, sz, d) Time series dataset. Returns ------- array of shape=(n_ts, n_shapelets) Shapelet-Transform of the provided time series. """ check_is_fitted(self, '_X_fit_dims') X = check_array(X, allow_nd=True, force_all_finite=False) X = self._preprocess_series(X) X = check_dims(X, X_fit_dims=self._X_fit_dims, check_n_features_only=True) self._check_series_length(X) n_ts, sz, d = X.shape pred = self.transformer_model_.predict( [X[:, :, di].reshape((n_ts, sz, 1)) for di in range(self.d_)], batch_size=self.batch_size, verbose=self.verbose ) return pred
def predict(self, X): """Predict class for a given set of time series. Parameters ---------- X : array-like of shape=(n_ts, sz, d) Time series dataset. Returns ------- array of shape=(n_ts, ) or (n_ts, n_classes), depending on the shape of the label vector provided at training time. Index of the cluster each sample belongs to or class probability matrix, depending on what was provided at training time. """ check_is_fitted(self, '_X_fit_dims') X = check_array(X, allow_nd=True) X = to_time_series_dataset(X) X = check_dims(X, X_fit_dims=self._X_fit_dims) y_ind = self.predict_proba(X).argmax(axis=1) y_label = numpy.array( [self.classes_[ind] for ind in y_ind] ) return y_label
def predict_proba(self, X): """Predict class probability for a given set of time series. Parameters ---------- X : array-like of shape=(n_ts, sz, d) Time series dataset. Returns ------- array of shape=(n_ts, n_classes), Class probability matrix. """ check_is_fitted(self, '_X_fit_dims') X = check_array(X, allow_nd=True) X = to_time_series_dataset(X) X = check_dims(X, X_fit_dims=self._X_fit_dims) n_ts, sz, d = X.shape categorical_preds = self.model_.predict( [X[:, :, di].reshape((n_ts, sz, 1)) for di in range(self.d_)], batch_size=self.batch_size, verbose=self.verbose ) if categorical_preds.shape[1] == 1 and len(self.classes_) == 2: categorical_preds = numpy.hstack((1 - categorical_preds, categorical_preds)) return categorical_preds
def predict_class_and_earliness(self, X): """ Provide predicted class as well as prediction timestamps. Prediction timestamps are timestamps at which a prediction is made in early classification setting. Parameters ---------- X : array-like of shape (n_series, n_timestamps, n_features) Vector to be scored, where `n_series` is the number of time series, `n_timestamps` is the number of timestamps in the series and `n_features` is the number of features recorded at each timestamp. Returns ------- array, shape (n_samples,) Predicted classes. array-like of shape (n_series, ) Prediction timestamps. """ X = check_array(X, allow_nd=True) check_is_fitted(self, '_X_fit_dims') X = check_dims(X, X_fit_dims=self._X_fit_dims, check_n_features_only=True) y_pred = [] time_prediction = [] for i in range(0, X.shape[0]): cl, t = self._predict_single_series(X[i]) y_pred.append(cl) time_prediction.append(t) return np.array(y_pred), np.array(time_prediction)
def fit(self, X, y): """Fit the model using X as training data and y as target values Parameters ---------- X : array-like, shape (n_ts, sz, d) Training data. y : array-like, shape (n_ts, ) or (n_ts, dim_y) Target values. Returns ------- KNeighborsTimeSeriesRegressor The fitted estimator """ if self.metric in VARIABLE_LENGTH_METRICS: self._ts_metric = self.metric self.metric = "precomputed" X = check_array(X, allow_nd=True, force_all_finite=(self.metric != "precomputed")) X = to_time_series_dataset(X) X = check_dims(X, X_fit=None) if self.metric == "precomputed" and hasattr(self, '_ts_metric'): self._ts_fit = X self._d = X.shape[2] self._X_fit = numpy.zeros( (self._ts_fit.shape[0], self._ts_fit.shape[0])) else: self._X_fit, self._d = to_sklearn_dataset(X, return_dim=True) super(KNeighborsTimeSeriesRegressor, self).fit(self._X_fit, y) if hasattr(self, '_ts_metric'): self.metric = self._ts_metric return self
def locate(self, X): """Compute shapelet match location for a set of time series. Parameters ---------- X : array-like of shape=(n_ts, sz, d) Time series dataset. Returns ------- array of shape=(n_ts, n_shapelets) Location of the shapelet matches for the provided time series. Examples -------- >>> from tslearn.generators import random_walk_blobs >>> X = numpy.zeros((3, 10, 1)) >>> X[0, 4:7, 0] = numpy.array([1, 2, 3]) >>> y = [1, 0, 0] >>> # Data is all zeros except a motif 1-2-3 in the first time series >>> clf = ShapeletModel(n_shapelets_per_size={3: 1}, max_iter=0, ... verbose=0) >>> _ = clf.fit(X, y) >>> weights_shapelet = [ ... numpy.array([[1, 2, 3]]) ... ] >>> clf.set_weights(weights_shapelet, layer_name="shapelets_0_0") >>> clf.locate(X) array([[4], [0], [0]]) """ X = check_dims(X, X_fit=self._X_fit) X = check_array(X, allow_nd=True) X = to_time_series_dataset(X) X = check_dims(X, X_fit=self._X_fit) n_ts, sz, d = X.shape locations = self.locator_model_.predict( [X[:, :, di].reshape((n_ts, sz, 1)) for di in range(self.d_)], batch_size=self.batch_size, verbose=self.verbose ) return locations.astype(numpy.int)
def fit(self, X, y): """Learn time-series shapelets. Parameters ---------- X : array-like of shape=(n_ts, sz, d) Time series dataset. y : array-like of shape=(n_ts, ) Time series labels. """ if self.verbose_level is not None: warnings.warn( "'verbose_level' is deprecated in version 0.2 and will be " "removed in 0.4. Use 'verbose' instead.", DeprecationWarning, stacklevel=2) self.verbose = self.verbose_level X, y = check_X_y(X, y, allow_nd=True) X = to_time_series_dataset(X) X = check_dims(X) set_random_seed(seed=self.random_state) numpy.random.seed(seed=self.random_state) n_ts, sz, d = X.shape self._X_fit_dims = X.shape self.model_ = None self.transformer_model_ = None self.locator_model_ = None self.d_ = d y_ = self._preprocess_labels(y) n_labels = len(self.classes_) if self.n_shapelets_per_size is None: sizes = grabocka_params_to_shapelet_size_dict(n_ts, sz, n_labels, self.shapelet_length, self.total_lengths) self.n_shapelets_per_size_ = sizes else: self.n_shapelets_per_size_ = self.n_shapelets_per_size self._set_model_layers(X=X, ts_sz=sz, d=d, n_classes=n_labels) self._set_weights_false_conv(d=d) self.model_.fit( [X[:, :, di].reshape((n_ts, sz, 1)) for di in range(d)], y_, batch_size=self.batch_size, epochs=self.max_iter, verbose=self.verbose ) self.n_iter_ = len(self.model_.history.history) return self
def predict_proba(self, X): """Predict the class probabilities for the provided data Parameters ---------- X : array-like, shape (n_ts, sz, d) Test samples. Returns ------- array, shape = (n_ts, n_classes) Array of predicted class probabilities """ if self.metric in VARIABLE_LENGTH_METRICS: self._ts_metric = self.metric self.metric = "precomputed" if self.metric_params is None: metric_params = {} else: metric_params = self.metric_params.copy() if "n_jobs" in metric_params.keys(): del metric_params["n_jobs"] if "verbose" in metric_params.keys(): del metric_params["verbose"] check_is_fitted(self, '_ts_fit') X = check_array(X, allow_nd=True, force_all_finite=False) X = to_time_series_dataset(X) if self._ts_metric == "dtw": X_ = cdist_dtw(X, self._ts_fit, n_jobs=self.n_jobs, verbose=self.verbose, **metric_params) elif self._ts_metric == "softdtw": X_ = cdist_soft_dtw(X, self._ts_fit, **metric_params) else: raise ValueError("Invalid metric recorded: %s" % self._ts_metric) pred = super(KNeighborsTimeSeriesClassifier, self).predict_proba(X_) self.metric = self._ts_metric return pred else: check_is_fitted(self, '_X_fit') X = check_array(X, allow_nd=True) X = to_time_series_dataset(X) X_ = to_sklearn_dataset(X) X_ = check_dims(X_, self._X_fit, extend=False) return super(KNeighborsTimeSeriesClassifier, self).predict_proba(X_)
def locate(self, X): """Compute shapelet match location for a set of time series. Parameters ---------- X : array-like of shape=(n_ts, sz, d) Time series dataset. Returns ------- array of shape=(n_ts, n_shapelets) Location of the shapelet matches for the provided time series. """ X = check_dims(X, X_fit=self._X_fit) X = check_array(X, allow_nd=True) X = to_time_series_dataset(X) X = check_dims(X, X_fit=self._X_fit) n_ts, sz, d = X.shape locations = self.locator_model_.predict( [X[:, :, di].reshape((n_ts, sz, 1)) for di in range(self.d_)], batch_size=self.batch_size, verbose=self.verbose ) return locations.astype(numpy.int)
def _preprocess_sklearn(self, X, y=None, fit_time=False): force_all_finite = self.kernel not in VARIABLE_LENGTH_METRICS if y is None: X = check_array(X, allow_nd=True, force_all_finite=force_all_finite) else: X, y = check_X_y(X, y, allow_nd=True, force_all_finite=force_all_finite) X = to_time_series_dataset(X) if fit_time: self._X_fit = X if self.gamma == "auto": self.gamma_ = gamma_soft_dtw(X) else: self.gamma_ = self.gamma self.classes_ = numpy.unique(y) else: check_is_fitted(self, ['svm_estimator_', '_X_fit']) X = check_dims(X, X_fit_dims=self._X_fit.shape, extend=True, check_n_features_only=(self.kernel in VARIABLE_LENGTH_METRICS)) if self.kernel in VARIABLE_LENGTH_METRICS: assert self.kernel == "gak" self.estimator_kernel_ = "precomputed" if fit_time: sklearn_X = cdist_gak(X, sigma=numpy.sqrt(self.gamma_ / 2.), n_jobs=self.n_jobs, verbose=self.verbose) else: sklearn_X = cdist_gak(X, self._X_fit, sigma=numpy.sqrt(self.gamma_ / 2.), n_jobs=self.n_jobs, verbose=self.verbose) else: self.estimator_kernel_ = self.kernel sklearn_X = to_sklearn_dataset(X) if y is None: return sklearn_X else: return sklearn_X, y
def fit(self, X, y): """Learn time-series shapelets. Parameters ---------- X : array-like of shape=(n_ts, sz, d) Time series dataset. y : array-like of shape=(n_ts, ) Time series labels. """ X, y = check_X_y(X, y, allow_nd=True, force_all_finite=False) X = self._preprocess_series(X) X = check_dims(X) self._check_series_length(X) numpy.random.seed(seed=self.random_state) tf.random.set_seed(seed=self.random_state) n_ts, sz, d = X.shape self._X_fit_dims = X.shape self.model_ = None self.transformer_model_ = None self.locator_model_ = None self.d_ = d y_ = self._preprocess_labels(y) n_labels = len(self.classes_) if self.n_shapelets_per_size is None: sizes = grabocka_params_to_shapelet_size_dict(n_ts, self._min_sz_fit, n_labels, self.shapelet_length, self.total_lengths) self.n_shapelets_per_size_ = sizes else: self.n_shapelets_per_size_ = self.n_shapelets_per_size self._set_model_layers(X=X, ts_sz=sz, d=d, n_classes=n_labels) self._set_weights_false_conv(d=d) h = self.model_.fit( [X[:, :, di].reshape((n_ts, sz, 1)) for di in range(d)], y_, batch_size=self.batch_size, epochs=self.max_iter, verbose=self.verbose ) self.history_ = h.history self.n_iter_ = len(self.history_.get("loss", [])) return self
def fit(self, X, y=None): """Fit a Matrix Profile representation. Parameters ---------- X : array-like of shape (n_ts, sz, d) Time series dataset Returns ------- MatrixProfile self """ X = check_array(X, allow_nd=True, force_all_finite=False) X = check_dims(X) return self._fit(X)
def fit_transform(self, X, y=None, **fit_params): """Fit a SAX representation and transform the data accordingly. Parameters ---------- X : array-like of shape (n_ts, sz, d) Time series dataset Returns ------- numpy.ndarray of integers with shape (n_ts, n_segments, d) SAX-Transformed dataset """ X = check_array(X, allow_nd=True, force_all_finite=False) X = check_dims(X) return self._fit(X)._transform(X)
def fit(self, X, y=None): """Fit a 1d-SAX representation. Parameters ---------- X : array-like of shape (n_ts, sz, d) Time series dataset Returns ------- OneD_SymbolicAggregateApproximation self """ X = check_array(X, allow_nd=True, force_all_finite=False) X = check_dims(X) return self._fit(X)
def transform(self, X, y=None): """Transform a dataset of time series into its SAX representation. Parameters ---------- X : array-like of shape (n_ts, sz, d) Time series dataset Returns ------- numpy.ndarray of integers with shape (n_ts, n_segments, d) SAX-Transformed dataset """ self._is_fitted() X = check_array(X, allow_nd=True, force_all_finite=False) X = check_dims(X, X_fit_dims=tuple(self._X_fit_dims_), check_n_features_only=True) return self._transform(X, y)
def predict(self, X): """Predict the closest cluster each time series in X belongs to. Parameters ---------- X : array-like of shape=(n_ts, sz, d) Time series dataset to predict. Returns ------- labels : array of shape=(n_ts, ) Index of the cluster each sample belongs to. """ X = check_array(X, allow_nd=True, force_all_finite='allow-nan') check_is_fitted(self, 'cluster_centers_') X = check_dims(X, self.cluster_centers_) X_ = to_time_series_dataset(X) return self._assign(X_, update_class_attributes=False)
def fit_transform(self, X, y=None, **fit_params): """Transform a dataset of time series into its Matrix Profile representation. Parameters ---------- X : array-like of shape (n_ts, sz, d) Time series dataset Returns ------- numpy.ndarray of shape (n_ts, output_size, 1) Matrix-Profile-Transformed dataset. `ouput_size` is equal to `sz - subsequence_length + 1` """ X = check_array(X, allow_nd=True, force_all_finite=False) X = check_dims(X) return self._fit(X)._transform(X)
def inverse_transform(self, X): """Compute time series corresponding to given PAA representations. Parameters ---------- X : array-like of shape (n_ts, sz_paa, d) A dataset of PAA series. Returns ------- numpy.ndarray of shape (n_ts, sz_original_ts, d) A dataset of time series corresponding to the provided representation. """ self._is_fitted() X = check_array(X, allow_nd=True, force_all_finite=False) X = check_dims(X) return inv_transform_paa(X, original_size=self._X_fit_dims_[1])
def fit_transform(self, X, y=None, **fit_params): """Fit a 1d-SAX representation and transform the data accordingly. Parameters ---------- X : array-like of shape (n_ts, sz, d) Time series dataset Returns ------- numpy.ndarray of integers with shape (n_ts, n_segments, 2 * d) 1d-SAX-Transformed dataset. The order of the last dimension is: first d elements represent average values (standard SAX symbols) and the last d are for slopes """ X = check_array(X, allow_nd=True, force_all_finite=False) X = check_dims(X) return self._fit(X)._transform(X)
def fit(self, X, y): """Fit the model using X as training data and y as target values Parameters ---------- X : array-like, shape (n_ts, sz, d) Training data. y : array-like, shape (n_ts, ) Target values. Returns ------- KNeighborsTimeSeriesClassifier The fitted estimator """ if self.metric in TSLEARN_VALID_METRICS: self._ts_metric = self.metric self.metric = "precomputed" X = check_array(X, allow_nd=True, force_all_finite=(self.metric != "precomputed")) X = to_time_series_dataset(X) X = check_dims(X) if self.metric == "precomputed" and hasattr(self, '_ts_metric'): self._ts_fit = X if self._ts_metric == 'sax': self._sax_mu = None self._sax_sigma = None if self.metric_params is not None: self._ts_fit = self._sax_preprocess( X, **self.metric_params) else: self._ts_fit = self._sax_preprocess(X) self._d = X.shape[2] self._X_fit = numpy.zeros( (self._ts_fit.shape[0], self._ts_fit.shape[0])) else: self._X_fit, self._d = to_sklearn_dataset(X, return_dim=True) super().fit(self._X_fit, y) if hasattr(self, '_ts_metric'): self.metric = self._ts_metric return self
def predict(self, X): """Predict the closest cluster each time series in X belongs to. Parameters ---------- X : array-like of shape=(n_ts, sz, d) Time series dataset to predict. Returns ------- labels : array of shape=(n_ts, ) Index of the cluster each sample belongs to. """ X = check_array(X, allow_nd=True, force_all_finite='allow-nan') check_is_fitted(self, 'cluster_centers_') X = check_dims(X, X_fit_dims=self.cluster_centers_.shape, extend=True, check_n_features_only=(self.metric != "euclidean")) return self._assign(X, update_class_attributes=False)
def predict(self, X): """Predict the closest cluster each time series in X belongs to. Parameters ---------- X : array-like of shape=(n_ts, sz, d) Time series dataset to predict. Returns ------- labels : array of shape=(n_ts, ) Index of the cluster each sample belongs to. """ X = check_array(X, allow_nd=True) check_is_fitted(self, '_X_fit') X_ = to_time_series_dataset(X) X = check_dims(X, self._X_fit) X_ = TimeSeriesScalerMeanVariance(mu=0., std=1.).fit_transform(X_) dists = self._cross_dists(X_) return dists.argmin(axis=1)
def _preprocess_sklearn(self, X, y=None, fit_time=False): force_all_finite = self.kernel not in VARIABLE_LENGTH_METRICS if y is None: X = check_array(X, allow_nd=True, force_all_finite=force_all_finite) else: X, y = check_X_y(X, y, allow_nd=True, force_all_finite=force_all_finite) X = check_dims(X, X_fit=None) X = to_time_series_dataset(X) if fit_time: self._X_fit = X self.gamma_ = gamma_soft_dtw(X) self.classes_ = numpy.unique(y) if self.kernel in VARIABLE_LENGTH_METRICS: assert self.kernel == "gak" self.estimator_kernel_ = "precomputed" if fit_time: sklearn_X = cdist_gak(X, sigma=numpy.sqrt(self.gamma_ / 2.), n_jobs=self.n_jobs, verbose=self.verbose) else: sklearn_X = cdist_gak(X, self._X_fit, sigma=numpy.sqrt(self.gamma_ / 2.), n_jobs=self.n_jobs, verbose=self.verbose) else: self.estimator_kernel_ = self.kernel sklearn_X = _prepare_ts_datasets_sklearn(X) if y is None: return sklearn_X else: return sklearn_X, y
def predict(self, X): """Predict the closest cluster each time series in X belongs to. Parameters ---------- X : array-like of shape=(n_ts, sz, d) Time series dataset to predict. Returns ------- labels : array of shape=(n_ts, ) Index of the cluster each sample belongs to. """ X = check_array(X, allow_nd=True, force_all_finite=False) check_is_fitted(self, '_X_fit') X = check_dims(X, self._X_fit) K = self._get_kernel(X, self._X_fit) n_samples = X.shape[0] dist = numpy.zeros((n_samples, self.n_clusters)) self._compute_dist(K, dist) return dist.argmin(axis=1)