Beispiel #1
0
    def predict_proba(self, X):
        """Predict the class probabilities for the provided data

        Parameters
        ----------
        X : array-like, shape (n_ts, sz, d)
            Test samples.

        Returns
        -------
        array, shape = (n_ts, n_classes)
            Array of predicted class probabilities
        """
        if self.metric in VARIABLE_LENGTH_METRICS:
            self._ts_metric = self.metric
            self.metric = "precomputed"

            if self.metric_params is None:
                metric_params = {}
            else:
                metric_params = self.metric_params.copy()
                if "n_jobs" in metric_params.keys():
                    del metric_params["n_jobs"]
                if "verbose" in metric_params.keys():
                    del metric_params["verbose"]
            check_is_fitted(self, '_ts_fit')
            X = check_array(X, allow_nd=True, force_all_finite=False)
            X = to_time_series_dataset(X)
            if self._ts_metric == "dtw":
                X_ = cdist_dtw(X, self._ts_fit, n_jobs=self.n_jobs,
                               verbose=self.verbose, **metric_params)
            elif self._ts_metric == "softdtw":
                X_ = cdist_soft_dtw(X, self._ts_fit, **metric_params)
            else:
                raise ValueError("Invalid metric recorded: %s" %
                                 self._ts_metric)
            pred = super(KNeighborsTimeSeriesClassifier,
                         self).predict_proba(X_)
            self.metric = self._ts_metric
            return pred
        else:
            check_is_fitted(self, '_X_fit')
            X = check_array(X, allow_nd=True)
            X = to_time_series_dataset(X)
            X_ = to_sklearn_dataset(X)
            X_ = check_dims(X_, self._X_fit, extend=False)
            return super(KNeighborsTimeSeriesClassifier,
                         self).predict_proba(X_)
Beispiel #2
0
 def _assign(self, X, update_class_attributes=True):
     if self.metric_params is None:
         metric_params = {}
     else:
         metric_params = self.metric_params.copy()
     if "gamma_sdtw" in metric_params.keys():
         metric_params["gamma"] = metric_params["gamma_sdtw"]
         del metric_params["gamma_sdtw"]
     if "n_jobs" in metric_params.keys():
         del metric_params["n_jobs"]
     if self.metric == "euclidean":
         dists = cdist(X.reshape((X.shape[0], -1)),
                       self.cluster_centers_.reshape((self.n_clusters, -1)),
                       metric="euclidean")
     elif self.metric == "dtw":
         dists = cdist_dtw(X, self.cluster_centers_, n_jobs=self.n_jobs,
                           verbose=self.verbose, **metric_params)
     elif self.metric == "softdtw":
         dists = cdist_soft_dtw(X, self.cluster_centers_, **metric_params)
     else:
         raise ValueError("Incorrect metric: %s (should be one of 'dtw', "
                          "'softdtw', 'euclidean')" % self.metric)
     matched_labels = dists.argmin(axis=1)
     if update_class_attributes:
         self.labels_ = matched_labels
         _check_no_empty_cluster(self.labels_, self.n_clusters)
         if self.dtw_inertia and self.metric != "dtw":
             inertia_dists = cdist_dtw(X, self.cluster_centers_,
                                       n_jobs=self.n_jobs,
                                       verbose=self.verbose)
         else:
             inertia_dists = dists
         self.inertia_ = _compute_inertia(inertia_dists,
                                          self.labels_,
                                          self._squared_inertia)
     return matched_labels
Beispiel #3
0
    def kneighbors(self, X=None, n_neighbors=None, return_distance=True):
        """Finds the K-neighbors of a point.

        Returns indices of and distances to the neighbors of each point.

        Parameters
        ----------
        X : array-like, shape (n_ts, sz, d)
            The query time series.
            If not provided, neighbors of each indexed point are returned.
            In this case, the query point is not considered its own neighbor.
        n_neighbors : int
            Number of neighbors to get (default is the value passed to the
            constructor).
        return_distance : boolean, optional. Defaults to True.
            If False, distances will not be returned

        Returns
        -------
        dist : array
            Array representing the distance to points, only present if
            return_distance=True
        ind : array
            Indices of the nearest points in the population matrix.
        """
        if self.metric in VARIABLE_LENGTH_METRICS:
            self._ts_metric = self.metric
            self.metric = "precomputed"

            if self.metric_params is None:
                metric_params = {}
            else:
                metric_params = self.metric_params.copy()
                if "n_jobs" in metric_params.keys():
                    del metric_params["n_jobs"]
                if "verbose" in metric_params.keys():
                    del metric_params["verbose"]
            check_is_fitted(self, '_ts_fit')
            X = check_array(X, allow_nd=True, force_all_finite=False)
            X = to_time_series_dataset(X)
            if self._ts_metric == "dtw":
                X_ = cdist_dtw(X, self._ts_fit, n_jobs=self.n_jobs,
                               verbose=self.verbose, **metric_params)
            elif self._ts_metric == "softdtw":
                X_ = cdist_soft_dtw(X, self._ts_fit, **metric_params)
            else:
                raise ValueError("Invalid metric recorded: %s" %
                                 self._ts_metric)
            pred = KNeighborsTimeSeriesMixin.kneighbors(
                self,
                X=X_,
                n_neighbors=n_neighbors,
                return_distance=return_distance)
            self.metric = self._ts_metric
            return pred
        else:
            check_is_fitted(self, '_X_fit')
            if X is None:
                X_ = None
            else:
                X = check_array(X, allow_nd=True)
                X = to_time_series_dataset(X)
                X_ = to_sklearn_dataset(X)
                X_ = check_dims(X_, self._X_fit, extend=False)
            return KNeighborsTimeSeriesMixin.kneighbors(
                self,
                X=X_,
                n_neighbors=n_neighbors,
                return_distance=return_distance)
def test_kmeans():
    n, sz, d = 15, 10, 3
    rng = np.random.RandomState(0)
    time_series = rng.randn(n, sz, d)

    km = TimeSeriesKMeans(n_clusters=3,
                          metric="euclidean",
                          max_iter=5,
                          verbose=False,
                          random_state=rng).fit(time_series)
    dists = cdist(time_series.reshape((n, -1)),
                  km.cluster_centers_.reshape((3, -1)))
    np.testing.assert_allclose(km.labels_, dists.argmin(axis=1))
    np.testing.assert_allclose(km.labels_, km.predict(time_series))

    km_dba = TimeSeriesKMeans(n_clusters=3,
                              metric="dtw",
                              max_iter=5,
                              verbose=False,
                              random_state=rng).fit(time_series)
    dists = cdist_dtw(time_series, km_dba.cluster_centers_)
    np.testing.assert_allclose(km_dba.labels_, dists.argmin(axis=1))
    np.testing.assert_allclose(km_dba.labels_, km_dba.predict(time_series))

    km_sdtw = TimeSeriesKMeans(n_clusters=3,
                               metric="softdtw",
                               max_iter=5,
                               verbose=False,
                               random_state=rng).fit(time_series)
    dists = cdist_soft_dtw(time_series, km_sdtw.cluster_centers_)
    np.testing.assert_allclose(km_sdtw.labels_, dists.argmin(axis=1))
    np.testing.assert_allclose(km_sdtw.labels_, km_sdtw.predict(time_series))

    km_nofit = TimeSeriesKMeans(n_clusters=101,
                                verbose=False,
                                random_state=rng).fit(time_series)
    assert (km_nofit._X_fit is None)

    X_bis = to_time_series_dataset([[1, 2, 3, 4], [1, 2, 3],
                                    [2, 5, 6, 7, 8, 9]])
    TimeSeriesKMeans(n_clusters=2,
                     verbose=False,
                     max_iter=5,
                     metric="softdtw",
                     random_state=0).fit(X_bis)
    TimeSeriesKMeans(n_clusters=2,
                     verbose=False,
                     max_iter=5,
                     metric="dtw",
                     random_state=0,
                     init="random").fit(X_bis)
    TimeSeriesKMeans(n_clusters=2,
                     verbose=False,
                     max_iter=5,
                     metric="dtw",
                     random_state=0,
                     init="k-means++").fit(X_bis)
    TimeSeriesKMeans(n_clusters=2,
                     verbose=False,
                     max_iter=5,
                     metric="dtw",
                     init=X_bis[:2]).fit(X_bis)