def _explain(self, X_specimens): from pyts.bag_of_words import BagOfWords X_specimens = np.asarray(X_specimens) size_x = X_specimens.shape[1] n_model_outputs = self.model.tfidf_.shape[0] vocab = self.model._tfidf.vocabulary_ normed_tfidf = self.model.tfidf_ / np.linalg.norm( self.model.tfidf_, axis=1, keepdims=True) # Matrix of the form: # 1 -1/3 -1/3 -1/3 # -1/3 1 -1/3 -1/3 # -1/3 -1/3 1 -1/3 # -1/3 -1/3 -1/3 1 mat = np.full((n_model_outputs, n_model_outputs), -1 / (n_model_outputs - 1)) np.fill_diagonal(mat, 1) word_impacts = mat @ normed_tfidf # No numerosity reduction here! bow = BagOfWords(self.model.window_size, self.model.window_step, numerosity_reduction=False) sentences = bow.transform(self.model._sax.transform(X_specimens)) explanations = [] for x_specimen, sentence in zip(X_specimens, sentences): sentence = sentence.split(" ") word_contribs = { word: word_impacts[:, vocab[word]] / (cnt if self.divide_spread else 1) for word, cnt in zip(*np.unique(sentence, return_counts=True)) if word in vocab } impacts = np.zeros((n_model_outputs, size_x)) starts_at = 0 for word in sentence: if word in word_contribs: impacts[:, starts_at:starts_at + self.model. window_size] += word_contribs[word][:, np.newaxis] starts_at += self.model.window_step explanations.append(TimeExplanation(x_specimen, impacts)) return explanations
:class:`pyts.bag_of_words.BagOfWords`. """ # Author: Johann Faouzi <*****@*****.**> # License: BSD-3-Clause import matplotlib.pyplot as plt import numpy as np from pyts.bag_of_words import BagOfWords from pyts.datasets import load_gunpoint # Load the dataset and perform the transformation X, _, _, _ = load_gunpoint(return_X_y=True) window_size, word_size = 30, 5 bow = BagOfWords(window_size=window_size, word_size=word_size, window_step=window_size, numerosity_reduction=False) X_bow = bow.transform(X) # Plot the considered subseries plt.figure(figsize=(10, 4)) splits_series = np.linspace(0, X.shape[1], 1 + X.shape[1] // window_size, dtype='int64') for start, end in zip(splits_series[:-1], np.clip(splits_series[1:] + 1, 0, X.shape[1])): plt.plot(np.arange(start, end), X[0, start:end], 'o-', lw=1, ms=1) # Plot the corresponding letters splits_letters = np.linspace(0, X.shape[1],
import numpy as np import matplotlib.pyplot as plt from pyts.bag_of_words import BagOfWords # Parameters n_samples, n_timestamps = 100, 48 n_bins = 4 # Toy dataset rng = np.random.RandomState(42) alphabet = np.array(['a', 'b', 'c', 'd']) X_ordinal = rng.randint(n_bins, size=(n_samples, n_timestamps)) X_alphabet = alphabet[X_ordinal] # Bag-of-words transformation bow = BagOfWords(window_size=2, numerosity_reduction=False) X_bow = bow.transform(X_alphabet) words = np.asarray(X_bow[0].split(' ')) different_words_idx = np.r_[True, words[1:] != words[:-1]] # Show the results plt.figure(figsize=(16, 7)) plt.suptitle('Transforming a discretized time series into a bag of words', fontsize=20, y=0.9) plt.subplot(121) plt.plot(X_ordinal[0], 'o', scalex=0.2) plt.yticks(np.arange(4), alphabet) plt.xticks([], []) plt.yticks(fontsize=16)
def test_actual_results_bag_of_words(params, arr_desired): """Test that the actual results are the expected ones.""" arr_actual = BagOfWords(**params).fit_transform(X_bow) np.testing.assert_array_equal(arr_actual, arr_desired)
def test_parameter_check_bag_of_words(params, error, err_msg): """Test parameter validation.""" bow = BagOfWords(**params) with pytest.raises(error, match=re.escape(err_msg)): bow.transform(X_bow)