def predict_proba(self, X): """Predict the class probabilities for the provided data Parameters ---------- X : array-like, shape (n_ts, sz, d) Test samples. Returns ------- array, shape = (n_ts, n_classes) Array of predicted class probabilities """ if self.metric in VARIABLE_LENGTH_METRICS: self._ts_metric = self.metric self.metric = "precomputed" if self.metric_params is None: metric_params = {} else: metric_params = self.metric_params.copy() if "n_jobs" in metric_params.keys(): del metric_params["n_jobs"] if "verbose" in metric_params.keys(): del metric_params["verbose"] check_is_fitted(self, '_ts_fit') X = check_array(X, allow_nd=True, force_all_finite=False) X = to_time_series_dataset(X) if self._ts_metric == "dtw": X_ = cdist_dtw(X, self._ts_fit, n_jobs=self.n_jobs, verbose=self.verbose, **metric_params) elif self._ts_metric == "softdtw": X_ = cdist_soft_dtw(X, self._ts_fit, **metric_params) else: raise ValueError("Invalid metric recorded: %s" % self._ts_metric) pred = super(KNeighborsTimeSeriesClassifier, self).predict_proba(X_) self.metric = self._ts_metric return pred else: check_is_fitted(self, '_X_fit') X = check_array(X, allow_nd=True) X = to_time_series_dataset(X) X_ = to_sklearn_dataset(X) X_ = check_dims(X_, self._X_fit, extend=False) return super(KNeighborsTimeSeriesClassifier, self).predict_proba(X_)
def predict(self, X): """Predict class for a given set of time series. Parameters ---------- X : array-like of shape=(n_ts, sz, d) Time series dataset. Returns ------- array of shape=(n_ts, ) or (n_ts, n_classes), depending on the shape of the label vector provided at training time. Index of the cluster each sample belongs to or class probability matrix, depending on what was provided at training time. """ check_is_fitted(self, '_X_fit') X = check_array(X, allow_nd=True) X = to_time_series_dataset(X) X = check_dims(X, X_fit=self._X_fit) categorical_preds = self.predict_proba(X) if self.categorical_y_: return categorical_preds else: return self.label_binarizer_.inverse_transform(categorical_preds)
def fit(self, X, y): """Fit the model using X as training data and y as target values Parameters ---------- X : array-like, shape (n_ts, sz, d) Training data. y : array-like, shape (n_ts, ) or (n_ts, dim_y) Target values. Returns ------- KNeighborsTimeSeriesRegressor The fitted estimator """ if self.metric in VARIABLE_LENGTH_METRICS: self._ts_metric = self.metric self.metric = "precomputed" X = check_array(X, allow_nd=True, force_all_finite=(self.metric != "precomputed")) X = to_time_series_dataset(X) X = check_dims(X, X_fit=None) if self.metric == "precomputed" and hasattr(self, '_ts_metric'): self._ts_fit = X self._d = X.shape[2] self._X_fit = numpy.zeros((self._ts_fit.shape[0], self._ts_fit.shape[0])) else: self._X_fit, self._d = to_sklearn_dataset(X, return_dim=True) super(KNeighborsTimeSeriesRegressor, self).fit(self._X_fit, y) if hasattr(self, '_ts_metric'): self.metric = self._ts_metric return self
def predict_proba(self, X): """Predict class probability for a given set of time series. Parameters ---------- X : array-like of shape=(n_ts, sz, d) Time series dataset. Returns ------- array of shape=(n_ts, n_classes), Class probability matrix. """ check_is_fitted(self, '_X_fit') X = check_array(X, allow_nd=True) X = to_time_series_dataset(X) X = check_dims(X, self._X_fit) n_ts, sz, d = X.shape categorical_preds = self.model_.predict( [X[:, :, di].reshape((n_ts, sz, 1)) for di in range(self.d_)], batch_size=self.batch_size, verbose=self.verbose) if categorical_preds.shape[1] == 1 and len(self.classes_) == 2: categorical_preds = numpy.hstack( (1 - categorical_preds, categorical_preds)) return categorical_preds
def transform(self, X, **kwargs): """Fit to data, then transform it. Parameters ---------- X : array-like Time series dataset to be resampled. Returns ------- numpy.ndarray Resampled time series dataset. """ X_ = to_time_series_dataset(X) n_ts, sz, d = X_.shape equal_size = check_equal_size(X_) X_out = numpy.empty((n_ts, self.sz_, d)) for i in range(X_.shape[0]): xnew = numpy.linspace(0, 1, self.sz_) if not equal_size: sz = ts_size(X_[i]) for di in range(d): f = interp1d(numpy.linspace(0, 1, sz), X_[i, :sz, di], kind="slinear") X_out[i, :, di] = f(xnew) return X_out
def support_vectors_time_series_(self, X): X_ = to_time_series_dataset(X) sv = [] idx_start = 0 for cl in range(len(self.svm_estimator_.n_support_)): idx_end = idx_start + self.svm_estimator_.n_support_[cl] indices = self.svm_estimator_.support_[idx_start:idx_end] sv.append(X_[indices]) idx_start += self.svm_estimator_.n_support_[cl] return sv
def fit(self, X, y=None): """Compute k-Shape clustering. Parameters ---------- X : array-like of shape=(n_ts, sz, d) Time series dataset. y Ignored """ X = check_array(X, allow_nd=True) max_attempts = max(self.n_init, 10) self.labels_ = None self.inertia_ = numpy.inf self.cluster_centers_ = None self.norms_ = 0. self.norms_centroids_ = 0. self.n_iter_ = 0 X_ = to_time_series_dataset(X) self._X_fit = X_ self.norms_ = numpy.linalg.norm(X_, axis=(1, 2)) _check_initial_guess(self.init, self.n_clusters) rs = check_random_state(self.random_state) best_correct_centroids = None min_inertia = numpy.inf n_successful = 0 n_attempts = 0 while n_successful < self.n_init and n_attempts < max_attempts: try: if self.verbose and self.n_init > 1: print("Init %d" % (n_successful + 1)) n_attempts += 1 self._fit_one_init(X_, rs) if self.inertia_ < min_inertia: best_correct_centroids = self.cluster_centers_.copy() min_inertia = self.inertia_ self.n_iter_ = self._iter n_successful += 1 except EmptyClusterError: if self.verbose: print("Resumed because of empty cluster") self.norms_centroids_ = numpy.linalg.norm(self.cluster_centers_, axis=(1, 2)) self._post_fit(X_, best_correct_centroids, min_inertia) return self
def _prepare_ts_datasets_sklearn(X): """Prepare time series datasets for sklearn. Examples -------- >>> X = to_time_series_dataset([[1, 2, 3], [2, 2, 3]]) >>> _prepare_ts_datasets_sklearn(X).shape (2, 3) """ sklearn_X = to_time_series_dataset(X) n_ts, sz, d = sklearn_X.shape return sklearn_X.reshape((n_ts, -1))
def test_variable_length_knn(): X = to_time_series_dataset([[1, 2, 3, 4], [1, 2, 3], [2, 5, 6, 7, 8, 9], [3, 5, 6, 7, 8]]) y = [0, 0, 1, 1] clf = KNeighborsTimeSeriesClassifier(metric="dtw", n_neighbors=1) clf.fit(X, y) assert_allclose(clf.predict(X), [0, 0, 1, 1]) clf = KNeighborsTimeSeriesClassifier(metric="softdtw", n_neighbors=1) clf.fit(X, y) assert_allclose(clf.predict(X), [0, 0, 1, 1])
def fit(self, X, y=None): """Compute k-means clustering. Parameters ---------- X : array-like of shape=(n_ts, sz, d) Time series dataset. y Ignored """ X = check_array(X, allow_nd=True, force_all_finite='allow-nan') self.labels_ = None self.inertia_ = numpy.inf self.cluster_centers_ = None self._X_fit = None self._squared_inertia = True self.n_iter_ = 0 max_attempts = max(self.n_init, 10) X_ = to_time_series_dataset(X) rs = check_random_state(self.random_state) x_squared_norms = cdist(X_.reshape((X_.shape[0], -1)), numpy.zeros((1, X_.shape[1] * X_.shape[2])), metric="sqeuclidean").reshape((1, -1)) _check_initial_guess(self.init, self.n_clusters) best_correct_centroids = None min_inertia = numpy.inf n_successful = 0 n_attempts = 0 while n_successful < self.n_init and n_attempts < max_attempts: try: if self.verbose and self.n_init > 1: print("Init %d" % (n_successful + 1)) n_attempts += 1 self._fit_one_init(X_, x_squared_norms, rs) if self.inertia_ < min_inertia: best_correct_centroids = self.cluster_centers_.copy() min_inertia = self.inertia_ self.n_iter_ = self._iter n_successful += 1 except EmptyClusterError: if self.verbose: print("Resumed because of empty cluster") self._post_fit(X_, best_correct_centroids, min_inertia) return self
def fit_transform(self, X, y=None, **fit_params): """Fit a SAX representation and transform the data accordingly. Parameters ---------- X : array-like of shape (n_ts, sz, d) Time series dataset Returns ------- numpy.ndarray of integers with shape (n_ts, n_segments, d) SAX-Transformed dataset """ X_ = to_time_series_dataset(X) return self.fit(X_)._transform(X_)
def fit(self, X, y=None): """Fit a PAA representation. Parameters ---------- X : array-like of shape (n_ts, sz, d) Time series dataset Returns ------- PiecewiseAggregateApproximation self """ X_ = to_time_series_dataset(X) return self._fit(X_, y)
def test_variable_length_svm(): X = to_time_series_dataset([[1, 2, 3, 4], [1, 2, 3], [2, 5, 6, 7, 8, 9], [3, 5, 6, 7, 8]]) y = [0, 0, 1, 1] rng = np.random.RandomState(0) clf = TimeSeriesSVC(kernel="gak", random_state=rng) clf.fit(X, y) assert_allclose(clf.predict(X), [0, 0, 1, 1]) y_reg = [-1., -1.3, 3.2, 4.1] clf = TimeSeriesSVR(kernel="gak") clf.fit(X, y_reg) assert_array_less(clf.predict(X[:2]), 0.) assert_array_less(-clf.predict(X[2:]), 0.)
def inverse_transform(self, X): """Compute time series corresponding to given PAA representations. Parameters ---------- X : array-like of shape (n_ts, sz_paa, d) A dataset of PAA series. Returns ------- numpy.ndarray of shape (n_ts, sz_original_ts, d) A dataset of time series corresponding to the provided representation. """ X_ = to_time_series_dataset(X) return inv_transform_paa(X_, original_size=self.size_fitted_)
def fit_transform(self, X, y=None, **fit_params): """Fit a 1d-SAX representation and transform the data accordingly. Parameters ---------- X : array-like of shape (n_ts, sz, d) Time series dataset Returns ------- numpy.ndarray of integers with shape (n_ts, n_segments, 2 * d) 1d-SAX-Transformed dataset. The order of the last dimension is: first d elements represent average values (standard SAX symbols) and the last d are for slopes """ X_ = to_time_series_dataset(X) return self.fit(X_)._transform(X_)
def test_variable_length_clustering(): # TODO: here we just check that they can accept variable-length TS, not # that they do clever things X = to_time_series_dataset([[1, 2, 3, 4], [1, 2, 3], [2, 5, 6, 7, 8, 9], [3, 5, 6, 7, 8]]) rng = np.random.RandomState(0) clf = GlobalAlignmentKernelKMeans(n_clusters=2, random_state=rng) clf.fit(X) clf = TimeSeriesKMeans(n_clusters=2, metric="dtw", random_state=rng) clf.fit(X) clf = TimeSeriesKMeans(n_clusters=2, metric="softdtw", random_state=rng) clf.fit(X)
def fit(self, X, y=None): """Fit a 1d-SAX representation. Parameters ---------- X : array-like of shape (n_ts, sz, d) Time series dataset Returns ------- OneD_SymbolicAggregateApproximation self """ self.breakpoints_avg_ = _breakpoints(self.alphabet_size_avg) self.breakpoints_avg_middle_ = _bin_medians(self.alphabet_size_avg) X_ = to_time_series_dataset(X) return self._fit(X_)
def transform(self, X, y=None): """Transform a dataset of time series into its PAA representation. Parameters ---------- X : array-like of shape (n_ts, sz, d) Time series dataset Returns ------- numpy.ndarray of shape (n_ts, n_segments, d) PAA-Transformed dataset """ if not self._is_fitted(): raise NotFittedError("Model not fitted.") X_ = to_time_series_dataset(X) return self._transform(X_, y)
def predict(self, X): """Predict the closest cluster each time series in X belongs to. Parameters ---------- X : array-like of shape=(n_ts, sz, d) Time series dataset to predict. Returns ------- labels : array of shape=(n_ts, ) Index of the cluster each sample belongs to. """ X = check_array(X, allow_nd=True, force_all_finite='allow-nan') check_is_fitted(self, 'cluster_centers_') X = check_dims(X, self.cluster_centers_) X_ = to_time_series_dataset(X) return self._assign(X_, update_class_attributes=False)
def _preprocess_sklearn(self, X, y=None, fit_time=False): force_all_finite = self.kernel not in VARIABLE_LENGTH_METRICS if y is None: X = check_array(X, allow_nd=True, force_all_finite=force_all_finite) else: X, y = check_X_y(X, y, allow_nd=True, force_all_finite=force_all_finite) X = check_dims(X, X_fit=None) X = to_time_series_dataset(X) if fit_time: self._X_fit = X if self.gamma == "auto": self.gamma_ = gamma_soft_dtw(X) else: self.gamma_ = self.gamma self.classes_ = numpy.unique(y) if self.kernel in VARIABLE_LENGTH_METRICS: assert self.kernel == "gak" self.estimator_kernel_ = "precomputed" if fit_time: sklearn_X = cdist_gak(X, sigma=numpy.sqrt(self.gamma_ / 2.), n_jobs=self.n_jobs, verbose=self.verbose) else: sklearn_X = cdist_gak(X, self._X_fit, sigma=numpy.sqrt(self.gamma_ / 2.), n_jobs=self.n_jobs, verbose=self.verbose) else: self.estimator_kernel_ = self.kernel sklearn_X = _prepare_ts_datasets_sklearn(X) if y is None: return sklearn_X else: return sklearn_X, y
def transform(self, X, y=None, **kwargs): """Will normalize (min-max) each of the timeseries. IMPORTANT: this transformation is completely stateless, and is applied to each of the timeseries individually. Parameters ---------- X : array-like Time series dataset to be rescaled. Returns ------- numpy.ndarray Rescaled time series dataset. """ if self.min_ is not None: warnings.warn( "'min' is deprecated in version 0.2 and will be " "removed in 0.4. Use value_range instead.", DeprecationWarning, stacklevel=2) self.value_range = (self.min_, self.value_range[1]) if self.max_ is not None: warnings.warn( "'max' is deprecated in version 0.2 and will be " "removed in 0.4. Use value_range instead.", DeprecationWarning, stacklevel=2) self.value_range = (self.value_range[0], self.max_) if self.value_range[0] >= self.value_range[1]: raise ValueError("Minimum of desired range must be smaller" " than maximum. Got %s." % str(self.value_range)) X_ = to_time_series_dataset(X) min_t = numpy.nanmin(X_, axis=1)[:, numpy.newaxis, :] max_t = numpy.nanmax(X_, axis=1)[:, numpy.newaxis, :] range_t = max_t - min_t nomin = (X_ - min_t) * (self.value_range[1] - self.value_range[0]) X_ = nomin / range_t + self.value_range[0] return X_
def locate(self, X): """Compute shapelet match location for a set of time series. Parameters ---------- X : array-like of shape=(n_ts, sz, d) Time series dataset. Returns ------- array of shape=(n_ts, n_shapelets) Location of the shapelet matches for the provided time series. Examples -------- >>> from tslearn_cuda.not_used.tslearn import random_walk_blobs >>> X = numpy.zeros((3, 10, 1)) >>> X[0, 4:7, 0] = numpy.array([1, 2, 3]) >>> y = [1, 0, 0] >>> # Data is all zeros except a motif 1-2-3 in the first time series >>> clf = ShapeletModel(n_shapelets_per_size={3: 1}, max_iter=0, ... verbose=0) >>> _ = clf.fit(X, y) >>> weights_shapelet = [ ... numpy.array([[1, 2, 3]]) ... ] >>> clf.set_weights(weights_shapelet, layer_name="shapelets_0_0") >>> clf.locate(X) array([[4], [0], [0]]) """ X = check_dims(X, X_fit=self._X_fit) X = check_array(X, allow_nd=True) X = to_time_series_dataset(X) X = check_dims(X, X_fit=self._X_fit) n_ts, sz, d = X.shape locations = self.locator_model_.predict( [X[:, :, di].reshape((n_ts, sz, 1)) for di in range(self.d_)], batch_size=self.batch_size, verbose=self.verbose) return locations.astype(numpy.int)
def transform(self, X, **kwargs): """Fit to data, then transform it. Parameters ---------- X Time series dataset to be rescaled Returns ------- numpy.ndarray Rescaled time series dataset """ X_ = to_time_series_dataset(X) mean_t = numpy.nanmean(X_, axis=1)[:, numpy.newaxis, :] std_t = numpy.nanstd(X_, axis=1)[:, numpy.newaxis, :] std_t[std_t == 0.] = 1. X_ = (X_ - mean_t) * self.std_ / std_t + self.mu_ return X_
def transform(self, X): """Generate shapelet transform for a set of time series. Parameters ---------- X : array-like of shape=(n_ts, sz, d) Time series dataset. Returns ------- array of shape=(n_ts, n_shapelets) Shapelet-Transform of the provided time series. """ check_is_fitted(self, '_X_fit') X = check_array(X, allow_nd=True) X = to_time_series_dataset(X) X = check_dims(X, X_fit=self._X_fit) n_ts, sz, d = X.shape pred = self.transformer_model_.predict( [X[:, :, di].reshape((n_ts, sz, 1)) for di in range(self.d_)], batch_size=self.batch_size, verbose=self.verbose) return pred
def euclidean_barycenter(X, weights=None): """Standard Euclidean barycenter computed from a set of time series. Parameters ---------- X : array-like, shape=(n_ts, sz, d) Time series dataset. weights: None or array Weights of each X[i]. Must be the same size as len(X). If None, uniform weights are used. Returns ------- numpy.array of shape (sz, d) Barycenter of the provided time series dataset. Notes ----- This method requires a dataset of equal-sized time series Examples -------- >>> time_series = [[1, 2, 3, 4], [1, 2, 4, 5]] >>> bar = euclidean_barycenter(time_series) >>> bar.shape (4, 1) >>> bar array([[1. ], [2. ], [3.5], [4.5]]) """ X_ = to_time_series_dataset(X) weights = _set_weights(weights, X_.shape[0]) return numpy.average(X_, axis=0, weights=weights)
def predict(self, X): """Predict the closest cluster each time series in X belongs to. Parameters ---------- X : array-like of shape=(n_ts, sz, d) Time series dataset to predict. Returns ------- labels : array of shape=(n_ts, ) Index of the cluster each sample belongs to. """ X = check_array(X, allow_nd=True) check_is_fitted(self, ['cluster_centers_', 'norms_', 'norms_centroids_']) X_ = to_time_series_dataset(X) X = check_dims(X, self.cluster_centers_) X_ = TimeSeriesScalerMeanVariance(mu=0., std=1.).fit_transform(X_) dists = self._cross_dists(X_) return dists.argmin(axis=1)
def test_variable_cross_val(): # TODO: here we just check that they can accept variable-length TS, not # that they do clever things X = to_time_series_dataset([[1, 2, 3, 4], [1, 2, 3], [1, 2, 3, 4], [1, 2, 3], [2, 5, 6, 7, 8, 9], [3, 5, 6, 7, 8], [2, 5, 6, 7, 8, 9], [3, 5, 6, 7, 8]]) y = [0, 0, 0, 0, 1, 1, 1, 1] rng = np.random.RandomState(0) cv = KFold(n_splits=2, shuffle=True) for estimator in [ TimeSeriesSVC(kernel="gak", random_state=rng), TimeSeriesSVR(kernel="gak"), KNeighborsTimeSeriesClassifier(metric="dtw", n_neighbors=1), KNeighborsTimeSeriesClassifier(metric="softdtw", n_neighbors=1) ]: # TODO: cannot test for clustering methods since they don't have a # score method yet cross_val_score(estimator, X=X, y=y, cv=cv)
def __init__(self, X): self.X_ = to_time_series_dataset(X)
def fit(self, X, y): """Learn time-series shapelets. Parameters ---------- X : array-like of shape=(n_ts, sz, d) Time series dataset. y : array-like of shape=(n_ts, ) Time series labels. """ if self.verbose_level is not None: warnings.warn( "'verbose_level' is deprecated in version 0.2 and will be " "removed in 0.4. Use 'verbose' instead.", DeprecationWarning, stacklevel=2) self.verbose = self.verbose_level X, y = check_X_y(X, y, allow_nd=True) X = to_time_series_dataset(X) X = check_dims(X, X_fit=None) set_random_seed(seed=self.random_state) numpy.random.seed(seed=self.random_state) n_ts, sz, d = X.shape self._X_fit = X self.model_ = None self.transformer_model_ = None self.locator_model_ = None self.categorical_y_ = False self.label_binarizer_ = None self.d_ = d if y.ndim == 1 or y.shape[1] == 1: self.label_binarizer_ = LabelBinarizer().fit(y) y_ = self.label_binarizer_.transform(y) self.classes_ = self.label_binarizer_.classes_ else: y_ = y self.categorical_y_ = True self.classes_ = numpy.unique(y) assert y_.shape[1] != 2, ("Binary classification case, " + "monodimensional y should be passed.") if y_.ndim == 1 or y_.shape[1] == 1: n_classes = 2 else: n_classes = y_.shape[1] if self.n_shapelets_per_size is None: sizes = grabocka_params_to_shapelet_size_dict( n_ts, sz, n_classes, self.shapelet_length, self.total_lengths) self.n_shapelets_per_size_ = sizes else: self.n_shapelets_per_size_ = self.n_shapelets_per_size self._set_model_layers(X=X, ts_sz=sz, d=d, n_classes=n_classes) self.transformer_model_.compile(loss="mean_squared_error", optimizer=self.optimizer) self.locator_model_.compile(loss="mean_squared_error", optimizer=self.optimizer) self._set_weights_false_conv(d=d) self.model_.fit( [X[:, :, di].reshape((n_ts, sz, 1)) for di in range(d)], y_, batch_size=self.batch_size, epochs=self.max_iter, verbose=self.verbose) self.n_iter_ = len(self.model_.history.history) return self
def kneighbors(self, X=None, n_neighbors=None, return_distance=True): """Finds the K-neighbors of a point. Returns indices of and distances to the neighbors of each point. Parameters ---------- X : array-like, shape (n_ts, sz, d) The query time series. If not provided, neighbors of each indexed point are returned. In this case, the query point is not considered its own neighbor. n_neighbors : int Number of neighbors to get (default is the value passed to the constructor). return_distance : boolean, optional. Defaults to True. If False, distances will not be returned Returns ------- dist : array Array representing the distance to points, only present if return_distance=True ind : array Indices of the nearest points in the population matrix. """ if self.metric in VARIABLE_LENGTH_METRICS: self._ts_metric = self.metric self.metric = "precomputed" if self.metric_params is None: metric_params = {} else: metric_params = self.metric_params.copy() if "n_jobs" in metric_params.keys(): del metric_params["n_jobs"] if "verbose" in metric_params.keys(): del metric_params["verbose"] check_is_fitted(self, '_ts_fit') X = check_array(X, allow_nd=True, force_all_finite=False) X = to_time_series_dataset(X) if self._ts_metric == "dtw": X_ = cdist_dtw(X, self._ts_fit, n_jobs=self.n_jobs, verbose=self.verbose, **metric_params) elif self._ts_metric == "softdtw": X_ = cdist_soft_dtw(X, self._ts_fit, **metric_params) else: raise ValueError("Invalid metric recorded: %s" % self._ts_metric) pred = KNeighborsTimeSeriesMixin.kneighbors( self, X=X_, n_neighbors=n_neighbors, return_distance=return_distance) self.metric = self._ts_metric return pred else: check_is_fitted(self, '_X_fit') if X is None: X_ = None else: X = check_array(X, allow_nd=True) X = to_time_series_dataset(X) X_ = to_sklearn_dataset(X) X_ = check_dims(X_, self._X_fit, extend=False) return KNeighborsTimeSeriesMixin.kneighbors( self, X=X_, n_neighbors=n_neighbors, return_distance=return_distance)