def fit(self, X): self._X_fit = to_time_series_dataset(X) self.weights = _set_weights(self.weights, self._X_fit.shape[0]) if self.barycenter_ is None: if check_equal_size(self._X_fit): self.barycenter_ = EuclideanBarycenter.fit( self, self._X_fit) else: resampled_X = TimeSeriesResampler( sz=self._X_fit.shape[1]).fit_transform(self._X_fit) self.barycenter_ = EuclideanBarycenter.fit( self, resampled_X) if self.max_iter > 0: # The function works with vectors so we need to vectorize # barycenter_. res = minimize(self._func, self.barycenter_.ravel(), method=self.method, jac=True, tol=self.tol, options=dict(maxiter=self.max_iter, disp=False)) return res.x.reshape(self.barycenter_.shape) else: return self.barycenter_
def fit_transform(self, X, **kwargs): """Fit to data, then transform it. Parameters ---------- X : array-like Time series dataset to be resampled. Returns ------- numpy.ndarray Resampled time series dataset. """ X_ = to_time_series_dataset(X) n_ts, sz, d = X_.shape equal_size = check_equal_size(X_) X_out = numpy.empty((n_ts, self.sz_, d)) for i in range(X_.shape[0]): xnew = numpy.linspace(0, 1, self.sz_) if not equal_size: sz = ts_size(X_[i]) for di in range(d): f = interp1d(numpy.linspace(0, 1, sz), X_[i, :sz, di], kind="slinear") X_out[i, :, di] = f(xnew) return X_out
def sigma_gak(dataset, n_samples=100, random_state=None): r"""Compute sigma value to be used for GAK. This method was originally presented in [1]_. Parameters ---------- dataset A dataset of time series n_samples : int (default: 100) Number of samples on which median distance should be estimated random_state : integer or numpy.RandomState or None (default: None) The generator used to draw the samples. If an integer is given, it fixes the seed. Defaults to the global numpy random number generator. Returns ------- float Suggested bandwidth (:math:`\\sigma`) for the Global Alignment kernel Examples -------- >>> dataset = [[1, 2, 2, 3], [1., 2., 3., 4.]] >>> sigma_gak(dataset=dataset, ... n_samples=200, ... random_state=0) # doctest: +ELLIPSIS 2.0... See Also -------- gak : Compute Global Alignment kernel cdist_gak : Compute cross-similarity matrix using Global Alignment kernel References ---------- .. [1] M. Cuturi, "Fast global alignment kernels," ICML 2011. """ random_state = check_random_state(random_state) dataset = to_time_series_dataset(dataset) n_ts, sz, d = dataset.shape if not check_equal_size(dataset): sz = numpy.min([ts_size(ts) for ts in dataset]) if n_ts * sz < n_samples: replace = True else: replace = False sample_indices = random_state.choice(n_ts * sz, size=n_samples, replace=replace) dists = pdist(dataset[:, :sz, :].reshape((-1, d))[sample_indices], metric="euclidean") return numpy.median(dists) * numpy.sqrt(sz)
def softdtw_barycenter(X, gamma=1.0, weights=None, method="L-BFGS-B", tol=1e-3, max_iter=50, init=None): """Compute barycenter (time series averaging) under the soft-DTW geometry. Parameters ---------- X : array-like, shape=(n_ts, sz, d) Time series dataset. gamma: float Regularization parameter. Lower is less smoothed (closer to true DTW). weights: None or array Weights of each X[i]. Must be the same size as len(X). method: string Optimization method, passed to `scipy.optimize.minimize`. Default: L-BFGS. tol: float Tolerance of the method used. max_iter: int Maximum number of iterations. Examples -------- >>> time_series = [[1, 2, 3, 4], [1, 2, 4, 5]] >>> euc_bar = euclidean_barycenter(time_series) >>> stdw_bar = softdtw_barycenter(time_series, max_iter=0) >>> stdw_bar.shape (4, 1) >>> numpy.alltrue(numpy.abs(euc_bar - stdw_bar) < 1e-9) # Because 0 iterations were performed True >>> softdtw_barycenter(time_series, max_iter=5).shape (4, 1) """ X_ = to_time_series_dataset(X) weights = _set_weights(weights, X_.shape[0]) if init is None: if check_equal_size(X_): barycenter = euclidean_barycenter(X_, weights) else: resampled_X = TimeSeriesResampler(sz=X_.shape[1]).fit_transform(X_) barycenter = euclidean_barycenter(resampled_X, weights) else: barycenter = init if max_iter > 0: f = lambda Z: _softdtw_func(Z, X_, weights, barycenter, gamma) # The function works with vectors so we need to vectorize barycenter. res = minimize(f, barycenter.ravel(), method=method, jac=True, tol=tol, options=dict(maxiter=max_iter, disp=False)) return res.x.reshape(barycenter.shape) else: return barycenter
def _fit_one_init(self, X, x_squared_norms, rs): n_ts, sz, d = time_series_dataset_shape(X) if check_equal_size(X): X_ = to_equal_sized_dataset(X) else: X_ = TimeSeriesResampler(sz=sz).fit_transform(X) self.cluster_centers_ = _k_init(X_.reshape( (n_ts, -1)), self.n_clusters, x_squared_norms, rs).reshape( (-1, sz, d)) old_inertia = numpy.inf for it in range(self.max_iter): self._assign(X) if self.verbose: print("%.3f" % self.inertia_, end=" --> ") self._update_centroids(X) if numpy.abs(old_inertia - self.inertia_) < self.tol: break old_inertia = self.inertia_ if self.verbose: print("") return self
def cdist_soft_dtw(dataset1, dataset2=None, gamma=1.): """Compute cross-similarity matrix using Soft-DTW metric. Soft-DTW was originally presented in [1]_. Parameters ---------- dataset1 A dataset of time series dataset2 Another dataset of time series gamma : float (default 1.) Gamma paraneter for Soft-DTW Returns ------- numpy.ndarray Cross-similarity matrix Examples -------- >>> cdist_soft_dtw([[1, 2, 2, 3], [1., 2., 3., 4.]], gamma=.01) # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS array([[-0.01..., 1. ], [ 1. , 0. ]]) >>> cdist_soft_dtw([[1, 2, 2, 3], [1., 2., 3., 4.]], [[1, 2, 2, 3], [1., 2., 3., 4.]], gamma=.01) # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS array([[-0.01..., 1. ], [ 1. , 0. ]]) See Also -------- soft_dtw : Compute Soft-DTW cdist_soft_dtw_normalized : Cross similarity matrix between time series datasets using a normalized version of Soft-DTW References ---------- .. [1] M. Cuturi, M. Blondel "Soft-DTW: a Differentiable Loss Function for Time-Series," ICML 2017. """ dataset1 = to_time_series_dataset(dataset1, dtype=numpy.float64) self_similarity = False if dataset2 is None: dataset2 = dataset1 self_similarity = True else: dataset2 = to_time_series_dataset(dataset2, dtype=numpy.float64) dists = numpy.empty((dataset1.shape[0], dataset2.shape[0])) equal_size_ds1 = check_equal_size(dataset1) equal_size_ds2 = check_equal_size(dataset2) for i, ts1 in enumerate(dataset1): if equal_size_ds1: ts1_short = ts1 else: ts1_short = ts1[:ts_size(ts1)] for j, ts2 in enumerate(dataset2): if equal_size_ds2: ts2_short = ts2 else: ts2_short = ts2[:ts_size(ts2)] if self_similarity and j < i: dists[i, j] = dists[j, i] else: dists[i, j] = soft_dtw(ts1_short, ts2_short, gamma=gamma) return dists
def softdtw_barycenter(X, gamma=1.0, weights=None, method="L-BFGS-B", tol=1e-3, max_iter=50, init=None): """Compute barycenter (time series averaging) under the soft-DTW [1] geometry. Soft-DTW was originally presented in [1]_. Parameters ---------- X : array-like, shape=(n_ts, sz, d) Time series dataset. gamma: float Regularization parameter. Lower is less smoothed (closer to true DTW). weights: None or array Weights of each X[i]. Must be the same size as len(X). If None, uniform weights are used. method: string Optimization method, passed to `scipy.optimize.minimize`. Default: L-BFGS. tol: float Tolerance of the method used. max_iter: int Maximum number of iterations. init: array or None (default: None) Initial barycenter to start from for the optimization process. If `None`, euclidean barycenter is used as a starting point. Returns ------- numpy.array of shape (bsz, d) where `bsz` is the size of the `init` array \ if provided or `sz` otherwise Soft-DTW barycenter of the provided time series dataset. Examples -------- >>> time_series = [[1, 2, 3, 4], [1, 2, 4, 5]] >>> softdtw_barycenter(time_series, max_iter=5) array([[1.25161574], [2.03821705], [3.5101956 ], [4.36140605]]) >>> time_series = [[1, 2, 3, 4], [1, 2, 3, 4, 5]] >>> softdtw_barycenter(time_series, max_iter=5) array([[1.21349933], [1.8932251 ], [2.67573269], [3.51057026], [4.33645802]]) References ---------- .. [1] M. Cuturi, M. Blondel "Soft-DTW: a Differentiable Loss Function for Time-Series," ICML 2017. """ X_ = to_time_series_dataset(X) weights = _set_weights(weights, X_.shape[0]) if init is None: if check_equal_size(X_): barycenter = euclidean_barycenter(X_, weights) else: resampled_X = TimeSeriesResampler(sz=X_.shape[1]).fit_transform(X_) barycenter = euclidean_barycenter(resampled_X, weights) else: barycenter = init if max_iter > 0: X_ = numpy.array([to_time_series(d, remove_nans=True) for d in X_]) def f(Z): return _softdtw_func(Z, X_, weights, barycenter, gamma) # The function works with vectors so we need to vectorize barycenter. res = minimize(f, barycenter.ravel(), method=method, jac=True, tol=tol, options=dict(maxiter=max_iter, disp=False)) return res.x.reshape(barycenter.shape) else: return barycenter
def _init_avg(self, X): if X[0].shape[0] == self.barycenter_size and check_equal_size(X): return X.mean(axis=0) else: X_ = TimeSeriesResampler(sz=self.barycenter_size).fit_transform(X) return X_.mean(axis=0)
def cdist_soft_dtw(dataset1, dataset2=None, gamma=1.): r"""Compute cross-similarity matrix using Soft-DTW metric. Soft-DTW was originally presented in [1]_ and is discussed in more details in our :ref:`user-guide page on DTW and its variants<dtw>`. Soft-DTW is computed as: .. math:: \text{soft-DTW}_{\gamma}(X, Y) = \min_{\pi}{}^\gamma \sum_{(i, j) \in \pi} \|X_i, Y_j\|^2 where :math:`\min^\gamma` is the soft-min operator of parameter :math:`\gamma`. In the limit case :math:`\gamma = 0`, :math:`\min^\gamma` reduces to a hard-min operator and soft-DTW is defined as the square of the DTW similarity measure. Parameters ---------- dataset1 A dataset of time series dataset2 Another dataset of time series gamma : float (default 1.) Gamma paraneter for Soft-DTW Returns ------- numpy.ndarray Cross-similarity matrix Examples -------- >>> cdist_soft_dtw([[1, 2, 2, 3], [1., 2., 3., 4.]], gamma=.01) array([[-0.01098612, 1. ], [ 1. , 0. ]]) >>> cdist_soft_dtw([[1, 2, 2, 3], [1., 2., 3., 4.]], ... [[1, 2, 2, 3], [1., 2., 3., 4.]], gamma=.01) array([[-0.01098612, 1. ], [ 1. , 0. ]]) See Also -------- soft_dtw : Compute Soft-DTW cdist_soft_dtw_normalized : Cross similarity matrix between time series datasets using a normalized version of Soft-DTW References ---------- .. [1] M. Cuturi, M. Blondel "Soft-DTW: a Differentiable Loss Function for Time-Series," ICML 2017. """ dataset1 = to_time_series_dataset(dataset1, dtype=numpy.float64) self_similarity = False if dataset2 is None: dataset2 = dataset1 self_similarity = True else: dataset2 = to_time_series_dataset(dataset2, dtype=numpy.float64) dists = numpy.empty((dataset1.shape[0], dataset2.shape[0])) equal_size_ds1 = check_equal_size(dataset1) equal_size_ds2 = check_equal_size(dataset2) for i, ts1 in enumerate(dataset1): if equal_size_ds1: ts1_short = ts1 else: ts1_short = ts1[:ts_size(ts1)] for j, ts2 in enumerate(dataset2): if equal_size_ds2: ts2_short = ts2 else: ts2_short = ts2[:ts_size(ts2)] if self_similarity and j < i: dists[i, j] = dists[j, i] else: dists[i, j] = soft_dtw(ts1_short, ts2_short, gamma=gamma) return dists