import numpy as np from pyts.classification import LearningShapelets from pyts.datasets import load_gunpoint from pyts.utils import windowed_view # Load the data set and fit the classifier X, _, y, _ = load_gunpoint(return_X_y=True) clf = LearningShapelets(random_state=42, tol=0.01) clf.fit(X, y) # Select two shapelets shapelets = np.asarray([clf.shapelets_[0, -9], clf.shapelets_[0, -12]]) # Derive the distances between the time series and the shapelets shapelet_size = shapelets.shape[1] X_window = windowed_view(X, window_size=shapelet_size, window_step=1) X_dist = np.mean( (X_window[:, :, None] - shapelets[None, :]) ** 2, axis=3).min(axis=1) plt.figure(figsize=(14, 4)) # Plot the two shapelets plt.subplot(1, 2, 1) plt.plot(shapelets[0]) plt.plot(shapelets[1]) plt.title('Two learned shapelets', fontsize=14) # Plot the distances plt.subplot(1, 2, 2) for color, label in zip('br', (1, 2)): plt.scatter(X_dist[y == label, 0], X_dist[y == label, 1],
def test_accurate_results(params, arr_desired): """Test that the actual results are the expected ones.""" arr_actual = windowed_view(**params) np.testing.assert_array_equal(arr_actual, arr_desired)
def _explain(self, X_specimens): from pyts.utils import windowed_view X_specimens = np.asarray(X_specimens) n_specimens, size_x = X_specimens.shape n_freq_bins = max(self.model._window_sizes) // 2 time_domain = self.domain.startswith("t") overall_y_preds = [] overall_impacts = [] for (window_size, window_step, sfa, vectorizer, relevant_features) \ in zip(self.model._window_sizes, self.model._window_steps, self.model._sfa_list, self.model._vectorizer_list, self.model._relevant_features_list): n_windows = (size_x - window_size + window_step) // window_step X_windowed = windowed_view(X_specimens, window_size=window_size, window_step=window_step) X_windowed = X_windowed.reshape(n_specimens * n_windows, window_size) X_sfa = sfa.transform(X_windowed) X_word = np.array( ["".join(X_sfa[i]) for i in range(n_specimens * n_windows)]) X_word = X_word.reshape(n_specimens, n_windows) # Predictions X_bow = np.asarray( [" ".join(X_word[i]) for i in range(n_specimens)]) overall_y_preds.append( vectorizer.transform(X_bow)[:, relevant_features]) # Impacts # 1. Create an array of pairs: # (ngram length, numba dict from ngrams of that length to actual model outputs) ngram_range = range(vectorizer.ngram_range[0], vectorizer.ngram_range[1] + 1) ngramlen_to_ngram_to_modelout = { ngram_len: optional_numba_dict("unicode_type", "int64") for ngram_len in ngram_range } for ngram, ngram_idx in vectorizer.vocabulary_.items(): find = np.where(relevant_features == ngram_idx)[0] if find.size != 0: ngramlen_to_ngram_to_modelout[ngram.count(" ") + 1][ngram] = find[0] ngramlen_to_ngram_to_modelout = optional_numba_list( ngramlen_to_ngram_to_modelout.items()) if time_domain: # Dummy data to make numba not complain. global_freq_bins = np.zeros((1, 2)) else: # 2. If drop_sum is False, retroactively drop the sum from the support indices. win_freq_bins = np.copy(sfa.support_) if not self.model.drop_sum: win_freq_bins = win_freq_bins[win_freq_bins != 0] win_freq_bins -= 1 # Also convert the support indices (two consecutive indices represent the real and imag parts # of one bin's output) to bin indices by dividing by 2 and rounding down. win_freq_bins //= 2 # Convert the support bin indices for this window to global support bin indices along with a weight # for each index which is smaller than 1 when the local bin doesn't fully cover the respective # global bin. n_win_freq_bins = window_size // 2 bin_split = _soft_range_split(n_freq_bins, n_win_freq_bins) global_freq_bins = np.vstack( [bin_split[freq_bin] for freq_bin in win_freq_bins]) # 3. Compute a sparse representation of the impacts. rowptr, cols, data = \ _weasel_impacts_csr(time_domain, n_specimens, size_x, n_freq_bins, window_size, window_step, n_windows, ngramlen_to_ngram_to_modelout, global_freq_bins, X_word, np.array(" ")) # 4. Construct the sparse matrix object. impacts_shape = (n_specimens, len(relevant_features) * size_x * (1 if time_domain else n_freq_bins)) overall_impacts.append( sparse.csr_matrix((data, cols, rowptr), impacts_shape)) overall_y_preds = sparse.hstack(overall_y_preds, format="csr") overall_impacts = sparse.hstack(overall_impacts, format="csr") if not getattr(self.model, "sparse", True): overall_y_preds = overall_y_preds.toarray() n_model_outputs = overall_y_preds.shape[1] if time_domain: constr = TimeExplanation kwargs = {} else: constr = FreqExplanation kwargs = { "freq_slicing": Slicing(bin_rate=size_x, n_slices=n_freq_bins, cont_interval=(0, 0.5)) } return [ constr(x_specimen, impact_row.reshape((n_model_outputs, -1)).tocsr(), y_pred=y_pred, **kwargs) for x_specimen, y_pred, impact_row in zip( X_specimens, overall_y_preds, overall_impacts) ]
def test_parameter_check(params, error, err_msg): """Test parameter validation in segmentation.""" with pytest.raises(error, match=re.escape(err_msg)): windowed_view(**params)