def test_actual_results_no_numerosity_reduction(): """Test that the actual results are the expected ones.""" bossvs = BOSSVS( word_size=4, n_bins=3, window_size=10, window_step=10, anova=False, drop_sum=False, norm_mean=False, norm_std=False, strategy='quantile', alphabet=None, numerosity_reduction=False, use_idf=True, smooth_idf=False, sublinear_tf=True ) X_windowed = X.reshape(8, 2, 10).reshape(16, 10) sfa = SymbolicFourierApproximation( n_coefs=4, drop_sum=False, anova=False, norm_mean=False, norm_std=False, n_bins=3, strategy='quantile', alphabet=None ) y_repeated = np.repeat(y, 2) X_sfa = sfa.fit_transform(X_windowed, y_repeated) X_word = np.asarray([''.join(X_sfa[i]) for i in range(16)]) X_word = X_word.reshape(8, 2) X_bow = np.asarray([' '.join(X_word[i]) for i in range(8)]) X_class = np.array([' '.join(X_bow[y == i]) for i in range(2)]) tfidf = TfidfVectorizer( norm=None, use_idf=True, smooth_idf=False, sublinear_tf=True ) tfidf_desired = tfidf.fit_transform(X_class).toarray() # Vocabulary vocabulary_desired = {value: key for key, value in tfidf.vocabulary_.items()} # Tf-idf tfidf_actual = bossvs.fit(X, y).tfidf_ # Decision function decision_function_actual = bossvs.decision_function(X) decision_function_desired = cosine_similarity( tfidf.transform(X_bow), tfidf_desired) # Predictions y_pred_actual = bossvs.predict(X) y_pred_desired = decision_function_desired.argmax(axis=1) # Testing assert bossvs.vocabulary_ == vocabulary_desired np.testing.assert_allclose(tfidf_actual, tfidf_desired, atol=1e-5, rtol=0) np.testing.assert_allclose( decision_function_actual, decision_function_desired, atol=1e-5, rtol=0) np.testing.assert_allclose( y_pred_actual, y_pred_desired, atol=1e-5, rtol=0)
return correct / count accuracy_SAX = test_accuracy(train, test, win, paa, alp, na_strategy, zthresh) error_SAX = 1 - accuracy_SAX print('Accuracy SAX: ', accuracy_SAX ) print("Error rate SAX {0:.4f}".format(error_SAX)) error_SAX_list.append(error_SAX) #BOSSVS if dataset_list == ["SyntheticControl"]: window_size = np.arange(18, 37, dtype='int64') norm_mean = np.full(window_size.size, False) word_size = np.full(window_size.size, 6, dtype='int64') bossvs = [BOSSVS(word_size=word_size, n_bins=4, norm_mean=norm_mean, drop_sum=norm_mean, window_size=window_size) for (word_size, norm_mean, window_size) in zip( word_size, norm_mean, window_size)] voting = VotingClassifier([("bossvs_" + str(i), bossvs) for i, bossvs in enumerate(bossvs)]) accuracy_BOSSVS = voting.fit(X_train, y_train).score(X_test, y_test) print('Accuracy BOSSVS: ', accuracy_BOSSVS ) print("Error rate SAX {0:.4f}".format(1-accuracy_BOSSVS)) error_BOSSVS_list.append(1-accuracy_BOSSVS) else: if dataset_list == ["Adiac"]: clf_bossvs = BOSSVS(word_size=12, n_bins=4, norm_mean=True,drop_sum=True, window_size=80) accuracy_BOSSVS = clf_bossvs.fit(X_train, y_train).score(X_test, y_test)
def test_parameter_check(params, error, err_msg): """Test parameter validation.""" clf = BOSSVS(**params) with pytest.raises(error, match=re.escape(err_msg)): clf.fit(X, y)
as a tfidf vector. For an unlabeled time series, the predicted label is the label of the tfidf vector giving the highest cosine similarity with the tf vector of the unlabeled time series. BOSSVS algorithm is implemented as :class:`pyts.classification.BOSSVS`. """ import numpy as np import matplotlib.pyplot as plt from pyts.classification import BOSSVS from pyts.datasets import load_gunpoint # Toy dataset X_train, X_test, y_train, y_test = load_gunpoint(return_X_y=True) # BOSSVS transformation bossvs = BOSSVS(word_size=2, n_bins=3, window_size=10) bossvs.fit(X_train, y_train) tfidf = bossvs.tfidf_ vocabulary_length = len(bossvs.vocabulary_) X_new = bossvs.decision_function(X_test) # Visualize the transformation plt.figure(figsize=(14, 5)) width = 0.4 plt.subplot(121) plt.bar(np.arange(vocabulary_length) - width / 2, tfidf[0], width=width, label='Class 1') plt.bar(np.arange(vocabulary_length) + width / 2,
({ 'estimator': BOSS() }, X[0], ValueError, "X must be 3-dimensional (got 2).")]) def test_input_check(params, X, error, err_msg): """Test input data validation.""" clf = MultivariateClassifier(**params) with pytest.raises(error, match=re.escape(err_msg)): clf.fit(X, y) @pytest.mark.parametrize('params', [{ 'estimator': SAXVSM() }, { 'estimator': [SAXVSM() for _ in range(n_features)] }, { 'estimator': [SAXVSM(), SAXVSM(), BOSSVS()] }]) def test_actual_results_without_weights(params): """Test that the actual results are the expected ones.""" arr_actual = MultivariateClassifier(**params).fit(X, y).predict(X) predictions = [] if isinstance(params['estimator'], list): for i in range(n_features): predictions.append(params['estimator'][i].fit(X[:, i], y).predict(X[:, i])) else: for i in range(n_features): predictions.append(params['estimator'].fit(X[:, i], y).predict(X[:, i])) predictions = np.asarray(predictions) arr_desired = []
from datetime import datetime import os import itertools import pickle import pandas as pd from pyts.classification import BOSSVS, SAXVSM from sktime.utils.load_data import load_from_tsfile_to_dataframe from sklearn.metrics import * UCR_DATASET_PATH = '/mnt/DATA/data/Univariate_ts' datasets = list(os.walk(UCR_DATASET_PATH))[0][1] random_state = 0 classifiers = [ SAXVSM(n_bins=4, strategy='uniform', window_size=2, sublinear_tf=True), BOSSVS(word_size=2, n_bins=4, window_size=2) ] tuples = ([(clf.__class__.__name__, 'Accuracy'), (clf.__class__.__name__, 'F1-Score')] for clf in classifiers) index = pd.MultiIndex.from_tuples(itertools.chain(*tuples), names=['classifier', 'metric']) def calculate_performance(output_file): def evaluate_classifiers(dst): print("[%s] Processing dataset %s" % (datetime.now().strftime("%F %T"), dst)) train_x, train_y = load_from_tsfile_to_dataframe( os.path.join(UCR_DATASET_PATH, dst, dst + "_TRAIN.ts")) test_x, test_y = load_from_tsfile_to_dataframe(
import numpy as np import matplotlib.pyplot as plt from pyts.classification import BOSSVS # Parameters n_samples, n_timestamps = 100, 144 n_classes = 2 # Toy dataset rng = np.random.RandomState(41) X = rng.randn(n_samples, n_timestamps) y = rng.randint(n_classes, size=n_samples) X[y == 0] = np.cumsum(X[y == 0], axis=1) # BOSSVS transformation bossvs = BOSSVS(word_size=2, n_bins=3, window_size=10) bossvs.fit(X, y) tfidf = bossvs.tfidf_ vocabulary_length = len(bossvs.vocabulary_) X_new = bossvs.decision_function(X) # Visualize the transformation plt.figure(figsize=(16, 5)) width = 0.4 plt.subplot(121) plt.bar(np.arange(vocabulary_length) - width / 2, tfidf[0], width=width, label='Class 0') plt.bar(np.arange(vocabulary_length) + width / 2, tfidf[1], width=width, label='Class 1') plt.xticks(np.arange(vocabulary_length),
from pyts.classification import BOSSVS, SAXVSM from deeplookup import datasets, metrics from deeplookup.ts import Rsamp rsamp = Rsamp() bossvs = BOSSVS(window_size=36, word_size=12) saxvsm = SAXVSM(window_size=36, word_size=12) x_train, y_train = datasets.load_train_ts() for model in (bossvs, saxvsm): model.fit(x_train, y_train) for name, model, kwargs in [ ("rsamp", rsamp, { "to_categorical": False }), ("bossvs", bossvs, { "to_categorical": True }), ("saxvsm", saxvsm, { "to_categorical": True }), ]: res = metrics.ts_score(model, **kwargs) print(f"metrics for {name}") print(res) print("-" * 80)