def _fit(self, X, y): """Fit the model using X as training data and y as target values. Parameters ---------- X : sktime-format pandas dataframe with shape([n_cases,n_dimensions]), or numpy ndarray with shape([n_cases,n_readings,n_dimensions]) y : {array-like, sparse matrix} Target values of shape = [n_samples] """ # Transpose to work correctly with distance functions X = X.transpose((0, 2, 1)) if isinstance(self.distance, str): if self.distance_params is None: self.metric = distance_factory(X[0], X[0], metric=self.distance) else: self.metric = distance_factory(X[0], X[0], metric=self.distance, **self.distance_params) y = np.asarray(y) check_classification_targets(y) if y.ndim == 1 or y.ndim == 2 and y.shape[1] == 1: self.outputs_2d_ = False y = y.reshape((-1, 1)) else: self.outputs_2d_ = True self.classes_ = [] self._y = np.empty(y.shape, dtype=int) for k in range(self._y.shape[1]): classes, self._y[:, k] = np.unique(y[:, k], return_inverse=True) self.classes_.append(classes) if not self.outputs_2d_: self.classes_ = self.classes_[0] self._y = self._y.ravel() if hasattr(check_array, "__wrapped__"): temp = check_array.__wrapped__.__code__ check_array.__wrapped__.__code__ = _check_array_ts.__code__ else: temp = check_array.__code__ check_array.__code__ = _check_array_ts.__code__ # this is not fx = self._fit(X, y) in order to maintain backward # compatibility with scikit learn 0.23, where _fit does not take an arg y fx = super()._fit(X) if hasattr(check_array, "__wrapped__"): check_array.__wrapped__.__code__ = temp else: check_array.__code__ = temp self._is_fitted = True return fx
def __init__( self, n_neighbors=1, weights="uniform", distance="dtw", distance_params=None, **kwargs ): self._distance_params = distance_params if distance_params is None: self._distance_params = {} self.distance = distance self.distance_params = distance_params if isinstance(self.distance, str): distance = distance_factory(metric=self.distance) super(KNeighborsTimeSeriesClassifier, self).__init__( n_neighbors=n_neighbors, algorithm="brute", metric=distance, metric_params=None, # Extra distance params handled in _fit **kwargs ) BaseClassifier.__init__(self) self.weights = _check_weights(weights) # We need to add is-fitted state when inheriting from scikit-learn self._is_fitted = False
def _fit(self, X: np.ndarray, y=None): """Fit time series clusterer to training data. Parameters ---------- X : np.ndarray (2d or 3d array of shape (n_instances, series_length) or shape (n_instances, n_dimensions, series_length)) Training time series instances to cluster. y: ignored, exists for API consistency reasons. Returns ------- self: Fitted estimator. """ self._check_params(X) if self.metric == "ddtw" or self.metric == "wddtw": X = average_of_slope_transform(X) if self.metric == "ddtw": self._distance_metric = distance_factory( X[0], X[1], metric="dtw", **self._distance_params ) else: self._distance_metric = distance_factory( X[0], X[1], metric="wdtw", **self._distance_params ) else: self._distance_metric = distance_factory( X[0], X[1], metric=self.metric, **self._distance_params ) best_centers = None best_inertia = np.inf best_labels = None best_iters = self.max_iter for _ in range(self.n_init): labels, centers, inertia, n_iters = self._fit_one_init(X) if inertia < best_inertia: best_centers = centers best_labels = labels best_inertia = inertia best_iters = n_iters self.labels_ = best_labels self.inertia_ = best_inertia self.cluster_centers_ = best_centers self.n_iter_ = best_iters return self
def _test_distance_params(param_list: List[Dict], distance_func: Callable, distance_str: str): x_univ = to_numba_timeseries(create_test_distance_numpy(10, 1)) y_univ = to_numba_timeseries( create_test_distance_numpy(10, 1, random_state=2)) x_multi = create_test_distance_numpy(10, 10) y_multi = create_test_distance_numpy(10, 10, random_state=2) test_ts = [[x_univ, y_univ], [x_multi, y_multi]] results_to_fill = [] i = 0 for param_dict in param_list: j = 0 curr_results = [] for x, y in test_ts: results = [] curr_dist_fact = distance_factory(x, y, metric=distance_str, **param_dict) results.append(distance_func(x, y, **param_dict)) results.append(distance(x, y, metric=distance_str, **param_dict)) results.append(curr_dist_fact(x, y)) if distance_str in _expected_distance_results_params: if _expected_distance_results_params[distance_str][i][ j] is not None: for result in results: assert result == pytest.approx( _expected_distance_results_params[distance_str][i] [j]) curr_results.append(results[0]) j += 1 i += 1 results_to_fill.append(curr_results)
def _check_params(self, X: np.ndarray) -> None: """Check parameters are valid and initialized. Parameters ---------- X : np.ndarray (2d or 3d array of shape (n_instances, series_length) or shape (n_instances, n_dimensions, series_length)) Time series instances to cluster. Raises ------ ValueError If the init_algorithm value is invalid. """ self._random_state = check_random_state(self.random_state) if isinstance(self.init_algorithm, str): self._init_algorithm = self._init_algorithms.get(self.init_algorithm) else: self._init_algorithm = self.init_algorithm if not isinstance(self._init_algorithm, Callable): raise ValueError( f"The value provided for init_algorim: {self.init_algorithm} is " f"invalid. The following are a list of valid init algorithms strings: " f"{list(self._init_algorithms.keys())}" ) if self.distance_params is None: self._distance_params = {} else: self._distance_params = self.distance_params self._distance_metric = distance_factory( X[0], X[1], metric=self.metric, **self._distance_params )