Beispiel #1
0
 def fit(self, X, y, sample_weight=None):
     sklearn_X = _prepare_ts_datasets_sklearn(X)
     if self.kernel == "gak" and self.gamma == "auto":
         self.gamma = gamma_soft_dtw(to_time_series_dataset(X))
         self.kernel = _sparse_kernel_func_gak(sz=self.sz, d=self.d, gamma=self.gamma)
     super(TimeSeriesSVC, self).fit(sklearn_X, y, sample_weight=sample_weight)
     self.kernel = _sparse_kernel_func_gak(sz=self.sz, d=self.d, gamma=self.gamma, slice_support_vectors=self.support_)
     return self
Beispiel #2
0
    def _preprocess_sklearn(self, X, y=None, fit_time=False):
        force_all_finite = self.kernel not in VARIABLE_LENGTH_METRICS
        if y is None:
            X = check_array(X,
                            allow_nd=True,
                            force_all_finite=force_all_finite)
        else:
            X, y = check_X_y(X,
                             y,
                             allow_nd=True,
                             force_all_finite=force_all_finite)
        X = to_time_series_dataset(X)

        if fit_time:
            self._X_fit = X
            if self.gamma == "auto":
                self.gamma_ = gamma_soft_dtw(X)
            else:
                self.gamma_ = self.gamma
            self.classes_ = numpy.unique(y)
        else:
            check_is_fitted(self, ['svm_estimator_', '_X_fit'])
            X = check_dims(X,
                           X_fit_dims=self._X_fit.shape,
                           extend=True,
                           check_n_features_only=(self.kernel
                                                  in VARIABLE_LENGTH_METRICS))

        if self.kernel in VARIABLE_LENGTH_METRICS:
            assert self.kernel == "gak"
            self.estimator_kernel_ = "precomputed"
            if fit_time:
                sklearn_X = cdist_gak(X,
                                      sigma=numpy.sqrt(self.gamma_ / 2.),
                                      n_jobs=self.n_jobs,
                                      verbose=self.verbose)
            else:
                sklearn_X = cdist_gak(X,
                                      self._X_fit,
                                      sigma=numpy.sqrt(self.gamma_ / 2.),
                                      n_jobs=self.n_jobs,
                                      verbose=self.verbose)
        else:
            self.estimator_kernel_ = self.kernel
            sklearn_X = to_sklearn_dataset(X)

        if y is None:
            return sklearn_X
        else:
            return sklearn_X, y
Beispiel #3
0
    def _preprocess_sklearn(self, X, y=None, fit_time=False):
        force_all_finite = self.kernel not in VARIABLE_LENGTH_METRICS
        if y is None:
            X = check_array(X,
                            allow_nd=True,
                            force_all_finite=force_all_finite)
        else:
            X, y = check_X_y(X,
                             y,
                             allow_nd=True,
                             force_all_finite=force_all_finite)
        X = check_dims(X, X_fit=None)
        X = to_time_series_dataset(X)

        if fit_time:
            self._X_fit = X
            self.gamma_ = gamma_soft_dtw(X)
            self.classes_ = numpy.unique(y)

        if self.kernel in VARIABLE_LENGTH_METRICS:
            assert self.kernel == "gak"
            self.estimator_kernel_ = "precomputed"
            if fit_time:
                sklearn_X = cdist_gak(X,
                                      sigma=numpy.sqrt(self.gamma_ / 2.),
                                      n_jobs=self.n_jobs,
                                      verbose=self.verbose)
            else:
                sklearn_X = cdist_gak(X,
                                      self._X_fit,
                                      sigma=numpy.sqrt(self.gamma_ / 2.),
                                      n_jobs=self.n_jobs,
                                      verbose=self.verbose)
        else:
            self.estimator_kernel_ = self.kernel
            sklearn_X = _prepare_ts_datasets_sklearn(X)

        if y is None:
            return sklearn_X
        else:
            return sklearn_X, y
Beispiel #4
0
def softdtw_augment_train_set(x_train,
                              y_train,
                              classes,
                              num_synthetic_ts,
                              max_neighbors=5):
    from tslearn.neighbors import KNeighborsTimeSeries
    from tslearn.barycenters import softdtw_barycenter
    from tslearn.metrics import gamma_soft_dtw

    # synthetic train set and labels
    synthetic_x_train = []
    synthetic_y_train = []
    # loop through each class
    for c in classes:
        # get the MTS for this class
        c_x_train = x_train[np.where(y_train == 0)[0]]
        if len(c_x_train) == 1:
            # skip if there is only one time series per set
            continue
        # compute appropriate gamma for softdtw for the entire class
        class_gamma = gamma_soft_dtw(c_x_train)
        # loop through the number of synthtectic examples needed
        generated_samples = 0
        while generated_samples < num_synthetic_ts:
            # Choose a random representative for the class
            representative_indices = np.arange(len(c_x_train))
            random_representative_index = np.random.choice(
                representative_indices, size=1, replace=False)
            random_representative = c_x_train[random_representative_index]
            # Choose a random number of neighbors (between 1 and one minus the total number of class representatives)
            random_number_of_neighbors = int(
                np.random.uniform(1, max_neighbors, size=1))
            knn = KNeighborsTimeSeries(n_neighbors=random_number_of_neighbors +
                                       1,
                                       metric='softdtw',
                                       metric_params={
                                           'gamma': class_gamma
                                       }).fit(c_x_train)
            random_neighbor_distances, random_neighbor_indices = knn.kneighbors(
                X=random_representative, return_distance=True)
            random_neighbor_indices = random_neighbor_indices[0]
            random_neighbor_distances = random_neighbor_distances[0]
            nearest_neighbor_distance = np.sort(random_neighbor_distances)[1]
            # random_neighbors = np.zeros((random_number_of_neighbors+1, c_x_train.shape[1]), dtype=float)
            random_neighbors = np.zeros(
                (random_number_of_neighbors + 1, c_x_train.shape[1],
                 c_x_train.shape[2]),
                dtype=float)

            for j, neighbor_index in enumerate(random_neighbor_indices):
                random_neighbors[j, :] = c_x_train[neighbor_index]
            # Choose a random weight vector (and then normalize it)
            weights = np.exp(
                np.log(0.5) * random_neighbor_distances /
                nearest_neighbor_distance)
            weights /= np.sum(weights)
            # Compute tslearn.barycenters.softdtw_barycenter with weights=random weights and gamma value specific to neighbors
            random_neighbors_gamma = gamma_soft_dtw(random_neighbors)
            generated_sample = softdtw_barycenter(random_neighbors,
                                                  weights=weights,
                                                  gamma=random_neighbors_gamma)
            synthetic_x_train.append(generated_sample)
            synthetic_y_train.append(c)
            # Repeat until you have the desired number of synthetic samples for each class
            generated_samples += 1
    # return the synthetic set
    return np.array(synthetic_x_train), np.array(synthetic_y_train)