def test_actual_results_strategy_quantile(): """Test that the actual results are the expected ones.""" # Data X = [[0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9], [0.0, 0.3, 0.2, 0.4, 0.1, 0.5, 0.6, 0.7, 0.8, 0.9], [0.0, 0.9, 0.1, 0.8, 0.2, 0.7, 0.3, 0.6, 0.4, 0.5]] y = [0, 0, 1] clf = SAXVSM(window_size=4, word_size=4, n_bins=2, strategy='quantile', numerosity_reduction=False, sublinear_tf=False) decision_function_actual = clf.fit(X, y).decision_function(X) # X_bow = ["aabb aabb aabb aabb aabb aabb aabb", # "abab baba abab aabb aabb aabb aabb", # "abab baba abab baba abab baba abab"] assert clf.vocabulary_ == {0: 'aabb', 1: 'abab', 2: 'baba'} freq = np.asarray([[7, 0, 0], [4, 2, 1], [0, 4, 3]]) tf = np.asarray([[11, 2, 1], [0, 4, 3]]) idf = np.asarray([log(2) + 1, 1, 1]) decision_function_desired = cosine_similarity(freq, tf * idf[None, :]) np.testing.assert_allclose(decision_function_actual, decision_function_desired, atol=1e-5, rtol=0.) pred_actual = clf.fit(X, y).predict(X) pred_desired = cosine_similarity(freq, tf * idf[None, :]).argmax(axis=1) np.testing.assert_array_equal(pred_actual, pred_desired)
def test_actual_results_strategy_uniform(): """Test that the actual results are the expected ones.""" # Data X = [[0, 0, 0, 1, 0, 0, 1, 1, 1], [0, 1, 1, 1, 0, 0, 1, 1, 1], [0, 0, 0, 1, 0, 0, 0, 1, 0]] y = [0, 0, 1] clf = SAXVSM(n_bins=2, strategy='uniform', window_size=2, numerosity_reduction=False, sublinear_tf=False) decision_function_actual = clf.fit(X, y).decision_function(X) # X_sax = np.array(['a', 'b'])[np.asarray(X)] # X_bow = ["aa aa ab ba aa ab bb bb", # "ab bb bb ba aa ab bb bb", # "aa aa ab ba aa aa ab ba"] freq = np.asarray([[3, 2, 1, 2], [1, 2, 1, 4], [4, 2, 2, 0]]) tf = np.asarray([[4, 4, 2, 6], [4, 2, 2, 0]]) idf = np.asarray([1, 1, 1, log(2) + 1]) decision_function_desired = cosine_similarity(freq, tf * idf[None, :]) np.testing.assert_allclose(decision_function_actual, decision_function_desired, atol=1e-5, rtol=0.) pred_actual = clf.predict(X) pred_desired = cosine_similarity(freq, tf * idf[None, :]).argmax(axis=1) np.testing.assert_array_equal(pred_actual, pred_desired)
def test_actual_results_strategy_quantile(): """Test that the actual results are the expected ones.""" # Data X = [[0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9], [0.0, 0.3, 0.2, 0.4, 0.1, 0.5, 0.6, 0.7, 0.8, 0.9], [0.0, 0.9, 0.1, 0.8, 0.2, 0.7, 0.3, 0.6, 0.4, 0.5]] y = [0, 0, 1] clf = SAXVSM(n_bins=2, strategy='quantile', window_size=2, numerosity_reduction=False, sublinear_tf=False) decision_function_actual = clf.fit(X, y).decision_function(X) # X_sax = [['a', 'a', 'a', 'a', 'a', 'b', 'b', 'b', 'b', 'b'], # ['a', 'a', 'a', 'a', 'a', 'b', 'b', 'b', 'b', 'b'], # ['a', 'b', 'a', 'b', 'a', 'b', 'a', 'b', 'a', 'b']] # X_bow = ["aa aa aa aa ab bb bb bb bb", # "aa aa aa aa ab bb bb bb bb", # "ab ba ab ba ab ba ab ba ab"] freq = np.asarray([[4, 1, 0, 4], [4, 1, 0, 4], [0, 5, 4, 0]]) tf = np.asarray([[8, 2, 0, 8], [0, 5, 4, 0]]) idf = np.asarray([log(2) + 1, 1, log(2) + 1, log(2) + 1]) decision_function_desired = cosine_similarity(freq, tf * idf[None, :]) np.testing.assert_allclose(decision_function_actual, decision_function_desired, atol=1e-5, rtol=0.) pred_actual = clf.fit(X, y).predict(X) pred_desired = cosine_similarity(freq, tf * idf[None, :]).argmax(axis=1) np.testing.assert_array_equal(pred_actual, pred_desired)
def test_actual_results_strategy_uniform(): """Test that the actual results are the expected ones.""" # Data X = [[0, 0, 0, 1, 0, 0, 1, 1, 1], [0, 1, 1, 1, 0, 0, 1, 1, 1], [0, 0, 0, 1, 0, 0, 0, 1, 0]] y = [0, 0, 1] clf = SAXVSM(window_size=4, word_size=4, n_bins=2, strategy='uniform', numerosity_reduction=False, sublinear_tf=False) decision_function_actual = clf.fit(X, y).decision_function(X) # X_bow = ["aaab aaba abaa baab aabb abbb", # "abbb bbba bbaa baab aabb abbb", # "aaab aaba abaa baaa aaab aaba"] assert clf.vocabulary_ == { 0: 'aaab', 1: 'aaba', 2: 'aabb', 3: 'abaa', 4: 'abbb', 5: 'baaa', 6: 'baab', 7: 'bbaa', 8: 'bbba' } freq = np.asarray([[1, 1, 1, 1, 1, 0, 1, 0, 0], [0, 0, 1, 0, 2, 0, 1, 1, 1], [2, 2, 0, 1, 0, 1, 0, 0, 0]]) tf = np.asarray([[1, 1, 2, 1, 3, 0, 2, 1, 1], [2, 2, 0, 1, 0, 1, 0, 0, 0]]) idf = np.asarray([ 1, 1, log(2) + 1, 1, log(2) + 1, log(2) + 1, log(2) + 1, log(2) + 1, log(2) + 1 ]) decision_function_desired = cosine_similarity(freq, tf * idf[None, :]) np.testing.assert_allclose(decision_function_actual, decision_function_desired, atol=1e-5, rtol=0.) pred_actual = clf.predict(X) pred_desired = cosine_similarity(freq, tf * idf[None, :]).argmax(axis=1) np.testing.assert_array_equal(pred_actual, pred_desired)
def test_actual_results_without_weights(): """Test that the actual results are the expected ones.""" params = {'estimator': SAXVSM()} arr_actual = MultivariateClassifier(**params).fit(X, y).predict(X) predictions = [] for i in range(n_features): predictions.append(params['estimator'].fit(X[:, i], y).predict(X[:, i])) predictions = np.asarray(predictions) arr_desired = [] for i in range(n_samples): arr_desired.append(np.argmax(np.bincount(predictions[:, i]))) np.testing.assert_allclose(arr_actual, arr_desired, atol=1e-5, rtol=0.)
import pytest import re from pyts.classification import SAXVSM from pyts.multivariate.classification import MultivariateClassifier from pyts.transformation import BOSS n_samples, n_features, n_timestamps, n_classes = 40, 3, 30, 2 rng = np.random.RandomState(42) X = rng.randn(n_samples, n_features, n_timestamps) y = rng.randint(n_classes, size=n_samples) @pytest.mark.parametrize( 'params, error, err_msg', [({ 'estimator': [SAXVSM(), SAXVSM(), BOSS()] }, ValueError, "Estimator 2 must be a classifier."), ({ 'estimator': [SAXVSM()] }, ValueError, "If 'estimator' is a list, its length must be equal to " "the number of features (1 != 3)"), ({ 'estimator': None }, TypeError, "'estimator' must be a classifier that inherits from " "sklearn.base.BaseEstimator or a list thereof.")]) def test_parameter_check(params, error, err_msg): """Test parameter validation.""" clf = MultivariateClassifier(**params) with pytest.raises(error, match=re.escape(err_msg)): clf.fit(X, y)
from scipy.sparse import csr_matrix from pyts.classification import SAXVSM from pyts.image import RecurrencePlot from pyts.multivariate.transformation import MultivariateTransformer from pyts.transformation import BOSS n_samples, n_features, n_timestamps = 40, 3, 30 rng = np.random.RandomState(42) X = rng.randn(n_samples, n_features, n_timestamps) @pytest.mark.parametrize( 'params, error, err_msg', [({ 'estimator': [BOSS(), RecurrencePlot(), SAXVSM()] }, ValueError, "Estimator 2 must be a transformer."), ({ 'estimator': [BOSS()] }, ValueError, "If 'estimator' is a list, its length must be equal to " "the number of features (1 != 3)"), ({ 'estimator': None }, TypeError, "'estimator' must be a transformer that inherits from " "sklearn.base.BaseEstimator or a list thereof.")]) def test_parameter_check(params, error, err_msg): """Test parameter validation.""" transformer = MultivariateTransformer(**params) with pytest.raises(error, match=re.escape(err_msg)): transformer.fit_transform(X)
It is implemented as :class:`pyts.classification.SAXVSM`. """ # Author: Johann Faouzi <*****@*****.**> # License: BSD-3-Clause import numpy as np import matplotlib.pyplot as plt from pyts.classification import SAXVSM from pyts.datasets import load_gunpoint # Toy dataset X_train, X_test, y_train, y_test = load_gunpoint(return_X_y=True) # SAXVSM transformation saxvsm = SAXVSM(n_bins=4, strategy='uniform', window_size=2, sublinear_tf=True) saxvsm.fit(X_train, y_train) tfidf = saxvsm.tfidf_ vocabulary_length = len(saxvsm.vocabulary_) X_new = saxvsm.decision_function(X_test) # Visualize the transformation plt.figure(figsize=(14, 5)) width = 0.4 plt.subplot(121) plt.bar(np.arange(vocabulary_length) - width / 2, tfidf[0], width=width, label='Class 1') plt.bar(np.arange(vocabulary_length) + width / 2,
def test_parameter_check(params, error, err_msg): """Test parameter validation.""" clf = SAXVSM(**params) with pytest.raises(error, match=re.escape(err_msg)): clf.fit(X, y)
import matplotlib.pyplot as plt from pyts.classification import SAXVSM # Parameters n_samples, n_timestamps = 100, 144 n_classes = 2 # Toy dataset rng = np.random.RandomState(41) X = rng.randn(n_samples, n_timestamps) y = rng.randint(n_classes, size=n_samples) X[y == 0] = np.cumsum(X[y == 0], axis=1) # SAXVSM transformation saxvsm = SAXVSM(n_bins=4, strategy='quantile', window_size=2, sublinear_tf=True) saxvsm.fit(X, y) tfidf = saxvsm.tfidf_ vocabulary_length = len(saxvsm.vocabulary_) X_new = saxvsm.decision_function(X) # Visualize the transformation plt.figure(figsize=(16, 5)) width = 0.4 plt.subplot(121) plt.bar(np.arange(vocabulary_length) - width / 2, tfidf[0], width=width, label='Class 0')
import pytest import re from pyts.classification import SAXVSM, BOSSVS from pyts.multivariate.classification import MultivariateClassifier from pyts.transformation import BOSS n_samples, n_features, n_timestamps, n_classes = 40, 3, 30, 2 rng = np.random.RandomState(42) X = rng.randn(n_samples, n_features, n_timestamps) y = rng.randint(n_classes, size=n_samples) @pytest.mark.parametrize( 'params, error, err_msg', [({ 'estimator': [SAXVSM(), SAXVSM(), BOSS()] }, ValueError, "Estimator 2 must be a classifier."), ({ 'estimator': [SAXVSM()] }, ValueError, "If 'estimator' is a list, its length must be equal to " "the number of features (1 != 3)"), ({ 'estimator': None }, TypeError, "'estimator' must be a classifier that inherits from " "sklearn.base.BaseEstimator or a list thereof.")]) def test_parameter_check(params, error, err_msg): """Test parameter validation.""" clf = MultivariateClassifier(**params) with pytest.raises(error, match=re.escape(err_msg)): clf.fit(X, y)
from datetime import datetime import os import itertools import pickle import pandas as pd from pyts.classification import BOSSVS, SAXVSM from sktime.utils.load_data import load_from_tsfile_to_dataframe from sklearn.metrics import * UCR_DATASET_PATH = '/mnt/DATA/data/Univariate_ts' datasets = list(os.walk(UCR_DATASET_PATH))[0][1] random_state = 0 classifiers = [ SAXVSM(n_bins=4, strategy='uniform', window_size=2, sublinear_tf=True), BOSSVS(word_size=2, n_bins=4, window_size=2) ] tuples = ([(clf.__class__.__name__, 'Accuracy'), (clf.__class__.__name__, 'F1-Score')] for clf in classifiers) index = pd.MultiIndex.from_tuples(itertools.chain(*tuples), names=['classifier', 'metric']) def calculate_performance(output_file): def evaluate_classifiers(dst): print("[%s] Processing dataset %s" % (datetime.now().strftime("%F %T"), dst)) train_x, train_y = load_from_tsfile_to_dataframe( os.path.join(UCR_DATASET_PATH, dst, dst + "_TRAIN.ts")) test_x, test_y = load_from_tsfile_to_dataframe(
def test_multivariate_classifier_mixin(sample_weight): clf = MultivariateClassifier(SAXVSM()).fit(X_multi, y_multi) assert isinstance(clf.score(X_multi, y_multi, sample_weight), (float, np.floating))
def test_univariate_classifier_mixin(sample_weight): clf = SAXVSM().fit(X_uni, y_uni) assert isinstance(clf.score(X_uni, y_uni, sample_weight), (float, np.floating))
from pyts.classification import BOSSVS, SAXVSM from deeplookup import datasets, metrics from deeplookup.ts import Rsamp rsamp = Rsamp() bossvs = BOSSVS(window_size=36, word_size=12) saxvsm = SAXVSM(window_size=36, word_size=12) x_train, y_train = datasets.load_train_ts() for model in (bossvs, saxvsm): model.fit(x_train, y_train) for name, model, kwargs in [ ("rsamp", rsamp, { "to_categorical": False }), ("bossvs", bossvs, { "to_categorical": True }), ("saxvsm", saxvsm, { "to_categorical": True }), ]: res = metrics.ts_score(model, **kwargs) print(f"metrics for {name}") print(res) print("-" * 80)
It is implemented as :class:`pyts.classification.SAXVSM`. """ # Author: Johann Faouzi <*****@*****.**> # License: BSD-3-Clause import numpy as np import matplotlib.pyplot as plt from pyts.classification import SAXVSM from pyts.datasets import load_gunpoint # Toy dataset X_train, X_test, y_train, y_test = load_gunpoint(return_X_y=True) # SAXVSM transformation saxvsm = SAXVSM(window_size=15, word_size=3, n_bins=2, strategy='uniform') saxvsm.fit(X_train, y_train) tfidf = saxvsm.tfidf_ vocabulary_length = len(saxvsm.vocabulary_) X_new = saxvsm.decision_function(X_test) # Visualize the transformation plt.figure(figsize=(14, 5)) width = 0.4 plt.subplot(121) plt.bar(np.arange(vocabulary_length) - width / 2, tfidf[0], width=width, label='Class 1') plt.bar(np.arange(vocabulary_length) + width / 2,