def test_actual_results_no_numerosity_reduction(): """Test that the actual results are the expected ones.""" bossvs = BOSSVS( word_size=4, n_bins=3, window_size=10, window_step=10, anova=False, drop_sum=False, norm_mean=False, norm_std=False, strategy='quantile', alphabet=None, numerosity_reduction=False, use_idf=True, smooth_idf=False, sublinear_tf=True ) X_windowed = X.reshape(8, 2, 10).reshape(16, 10) sfa = SymbolicFourierApproximation( n_coefs=4, drop_sum=False, anova=False, norm_mean=False, norm_std=False, n_bins=3, strategy='quantile', alphabet=None ) y_repeated = np.repeat(y, 2) X_sfa = sfa.fit_transform(X_windowed, y_repeated) X_word = np.asarray([''.join(X_sfa[i]) for i in range(16)]) X_word = X_word.reshape(8, 2) X_bow = np.asarray([' '.join(X_word[i]) for i in range(8)]) X_class = np.array([' '.join(X_bow[y == i]) for i in range(2)]) tfidf = TfidfVectorizer( norm=None, use_idf=True, smooth_idf=False, sublinear_tf=True ) tfidf_desired = tfidf.fit_transform(X_class).toarray() # Vocabulary vocabulary_desired = {value: key for key, value in tfidf.vocabulary_.items()} # Tf-idf tfidf_actual = bossvs.fit(X, y).tfidf_ # Decision function decision_function_actual = bossvs.decision_function(X) decision_function_desired = cosine_similarity( tfidf.transform(X_bow), tfidf_desired) # Predictions y_pred_actual = bossvs.predict(X) y_pred_desired = decision_function_desired.argmax(axis=1) # Testing assert bossvs.vocabulary_ == vocabulary_desired np.testing.assert_allclose(tfidf_actual, tfidf_desired, atol=1e-5, rtol=0) np.testing.assert_allclose( decision_function_actual, decision_function_desired, atol=1e-5, rtol=0) np.testing.assert_allclose( y_pred_actual, y_pred_desired, atol=1e-5, rtol=0)
drop_sum=norm_mean, window_size=window_size) for (word_size, norm_mean, window_size) in zip( word_size, norm_mean, window_size)] voting = VotingClassifier([("bossvs_" + str(i), bossvs) for i, bossvs in enumerate(bossvs)]) accuracy_BOSSVS = voting.fit(X_train, y_train).score(X_test, y_test) print('Accuracy BOSSVS: ', accuracy_BOSSVS ) print("Error rate SAX {0:.4f}".format(1-accuracy_BOSSVS)) error_BOSSVS_list.append(1-accuracy_BOSSVS) else: if dataset_list == ["Adiac"]: clf_bossvs = BOSSVS(word_size=12, n_bins=4, norm_mean=True,drop_sum=True, window_size=80) accuracy_BOSSVS = clf_bossvs.fit(X_train, y_train).score(X_test, y_test) print('Accuracy BOSSVS: ', accuracy_BOSSVS ) print("Error rate SAX {0:.4f}".format(1-accuracy_BOSSVS)) error_BOSSVS_list.append(1-accuracy_BOSSVS) else: clf_bossvs = BOSSVS(word_size=5, n_bins=4, norm_mean=False,drop_sum=False, window_size=40) accuracy_BOSSVS = clf_bossvs.fit(X_train, y_train).score(X_test, y_test) print('Accuracy BOSSVS: ', accuracy_BOSSVS ) print("Error rate SAX {0:.4f}".format(1-accuracy_BOSSVS)) error_BOSSVS_list.append(1-accuracy_BOSSVS) print()
the label of the tfidf vector giving the highest cosine similarity with the tf vector of the unlabeled time series. BOSSVS algorithm is implemented as :class:`pyts.classification.BOSSVS`. """ import numpy as np import matplotlib.pyplot as plt from pyts.classification import BOSSVS from pyts.datasets import load_gunpoint # Toy dataset X_train, X_test, y_train, y_test = load_gunpoint(return_X_y=True) # BOSSVS transformation bossvs = BOSSVS(word_size=2, n_bins=3, window_size=10) bossvs.fit(X_train, y_train) tfidf = bossvs.tfidf_ vocabulary_length = len(bossvs.vocabulary_) X_new = bossvs.decision_function(X_test) # Visualize the transformation plt.figure(figsize=(14, 5)) width = 0.4 plt.subplot(121) plt.bar(np.arange(vocabulary_length) - width / 2, tfidf[0], width=width, label='Class 1') plt.bar(np.arange(vocabulary_length) + width / 2, tfidf[1],
def test_parameter_check(params, error, err_msg): """Test parameter validation.""" clf = BOSSVS(**params) with pytest.raises(error, match=re.escape(err_msg)): clf.fit(X, y)
import matplotlib.pyplot as plt from pyts.classification import BOSSVS # Parameters n_samples, n_timestamps = 100, 144 n_classes = 2 # Toy dataset rng = np.random.RandomState(41) X = rng.randn(n_samples, n_timestamps) y = rng.randint(n_classes, size=n_samples) X[y == 0] = np.cumsum(X[y == 0], axis=1) # BOSSVS transformation bossvs = BOSSVS(word_size=2, n_bins=3, window_size=10) bossvs.fit(X, y) tfidf = bossvs.tfidf_ vocabulary_length = len(bossvs.vocabulary_) X_new = bossvs.decision_function(X) # Visualize the transformation plt.figure(figsize=(16, 5)) width = 0.4 plt.subplot(121) plt.bar(np.arange(vocabulary_length) - width / 2, tfidf[0], width=width, label='Class 0') plt.bar(np.arange(vocabulary_length) + width / 2, tfidf[1], width=width, label='Class 1') plt.xticks(np.arange(vocabulary_length), np.vectorize(bossvs.vocabulary_.get)(np.arange(vocabulary_length)),