Ejemplo n.º 1
0
def test_actual_results_strategy_quantile():
    """Test that the actual results are the expected ones."""
    # Data
    X = [[0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9],
         [0.0, 0.3, 0.2, 0.4, 0.1, 0.5, 0.6, 0.7, 0.8, 0.9],
         [0.0, 0.9, 0.1, 0.8, 0.2, 0.7, 0.3, 0.6, 0.4, 0.5]]
    y = [0, 0, 1]

    clf = SAXVSM(window_size=4,
                 word_size=4,
                 n_bins=2,
                 strategy='quantile',
                 numerosity_reduction=False,
                 sublinear_tf=False)
    decision_function_actual = clf.fit(X, y).decision_function(X)

    # X_bow = ["aabb aabb aabb aabb aabb aabb aabb",
    #          "abab baba abab aabb aabb aabb aabb",
    #          "abab baba abab baba abab baba abab"]

    assert clf.vocabulary_ == {0: 'aabb', 1: 'abab', 2: 'baba'}

    freq = np.asarray([[7, 0, 0], [4, 2, 1], [0, 4, 3]])
    tf = np.asarray([[11, 2, 1], [0, 4, 3]])
    idf = np.asarray([log(2) + 1, 1, 1])
    decision_function_desired = cosine_similarity(freq, tf * idf[None, :])
    np.testing.assert_allclose(decision_function_actual,
                               decision_function_desired,
                               atol=1e-5,
                               rtol=0.)

    pred_actual = clf.fit(X, y).predict(X)
    pred_desired = cosine_similarity(freq, tf * idf[None, :]).argmax(axis=1)
    np.testing.assert_array_equal(pred_actual, pred_desired)
Ejemplo n.º 2
0
def test_actual_results_strategy_uniform():
    """Test that the actual results are the expected ones."""
    # Data
    X = [[0, 0, 0, 1, 0, 0, 1, 1, 1], [0, 1, 1, 1, 0, 0, 1, 1, 1],
         [0, 0, 0, 1, 0, 0, 0, 1, 0]]
    y = [0, 0, 1]

    clf = SAXVSM(n_bins=2,
                 strategy='uniform',
                 window_size=2,
                 numerosity_reduction=False,
                 sublinear_tf=False)
    decision_function_actual = clf.fit(X, y).decision_function(X)

    # X_sax = np.array(['a', 'b'])[np.asarray(X)]
    # X_bow = ["aa aa ab ba aa ab bb bb",
    #          "ab bb bb ba aa ab bb bb",
    #          "aa aa ab ba aa aa ab ba"]
    freq = np.asarray([[3, 2, 1, 2], [1, 2, 1, 4], [4, 2, 2, 0]])
    tf = np.asarray([[4, 4, 2, 6], [4, 2, 2, 0]])
    idf = np.asarray([1, 1, 1, log(2) + 1])
    decision_function_desired = cosine_similarity(freq, tf * idf[None, :])
    np.testing.assert_allclose(decision_function_actual,
                               decision_function_desired,
                               atol=1e-5,
                               rtol=0.)

    pred_actual = clf.predict(X)
    pred_desired = cosine_similarity(freq, tf * idf[None, :]).argmax(axis=1)
    np.testing.assert_array_equal(pred_actual, pred_desired)
Ejemplo n.º 3
0
def test_actual_results_strategy_quantile():
    """Test that the actual results are the expected ones."""
    # Data
    X = [[0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9],
         [0.0, 0.3, 0.2, 0.4, 0.1, 0.5, 0.6, 0.7, 0.8, 0.9],
         [0.0, 0.9, 0.1, 0.8, 0.2, 0.7, 0.3, 0.6, 0.4, 0.5]]
    y = [0, 0, 1]

    clf = SAXVSM(n_bins=2,
                 strategy='quantile',
                 window_size=2,
                 numerosity_reduction=False,
                 sublinear_tf=False)
    decision_function_actual = clf.fit(X, y).decision_function(X)

    # X_sax = [['a', 'a', 'a', 'a', 'a', 'b', 'b', 'b', 'b', 'b'],
    #          ['a', 'a', 'a', 'a', 'a', 'b', 'b', 'b', 'b', 'b'],
    #          ['a', 'b', 'a', 'b', 'a', 'b', 'a', 'b', 'a', 'b']]
    # X_bow = ["aa aa aa aa ab bb bb bb bb",
    #          "aa aa aa aa ab bb bb bb bb",
    #          "ab ba ab ba ab ba ab ba ab"]
    freq = np.asarray([[4, 1, 0, 4], [4, 1, 0, 4], [0, 5, 4, 0]])
    tf = np.asarray([[8, 2, 0, 8], [0, 5, 4, 0]])
    idf = np.asarray([log(2) + 1, 1, log(2) + 1, log(2) + 1])
    decision_function_desired = cosine_similarity(freq, tf * idf[None, :])
    np.testing.assert_allclose(decision_function_actual,
                               decision_function_desired,
                               atol=1e-5,
                               rtol=0.)

    pred_actual = clf.fit(X, y).predict(X)
    pred_desired = cosine_similarity(freq, tf * idf[None, :]).argmax(axis=1)
    np.testing.assert_array_equal(pred_actual, pred_desired)
Ejemplo n.º 4
0
def test_actual_results_strategy_uniform():
    """Test that the actual results are the expected ones."""
    # Data
    X = [[0, 0, 0, 1, 0, 0, 1, 1, 1], [0, 1, 1, 1, 0, 0, 1, 1, 1],
         [0, 0, 0, 1, 0, 0, 0, 1, 0]]
    y = [0, 0, 1]

    clf = SAXVSM(window_size=4,
                 word_size=4,
                 n_bins=2,
                 strategy='uniform',
                 numerosity_reduction=False,
                 sublinear_tf=False)
    decision_function_actual = clf.fit(X, y).decision_function(X)

    # X_bow = ["aaab aaba abaa baab aabb abbb",
    #          "abbb bbba bbaa baab aabb abbb",
    #          "aaab aaba abaa baaa aaab aaba"]

    assert clf.vocabulary_ == {
        0: 'aaab',
        1: 'aaba',
        2: 'aabb',
        3: 'abaa',
        4: 'abbb',
        5: 'baaa',
        6: 'baab',
        7: 'bbaa',
        8: 'bbba'
    }

    freq = np.asarray([[1, 1, 1, 1, 1, 0, 1, 0,
                        0], [0, 0, 1, 0, 2, 0, 1, 1, 1],
                       [2, 2, 0, 1, 0, 1, 0, 0, 0]])
    tf = np.asarray([[1, 1, 2, 1, 3, 0, 2, 1, 1], [2, 2, 0, 1, 0, 1, 0, 0, 0]])
    idf = np.asarray([
        1, 1,
        log(2) + 1, 1,
        log(2) + 1,
        log(2) + 1,
        log(2) + 1,
        log(2) + 1,
        log(2) + 1
    ])
    decision_function_desired = cosine_similarity(freq, tf * idf[None, :])
    np.testing.assert_allclose(decision_function_actual,
                               decision_function_desired,
                               atol=1e-5,
                               rtol=0.)

    pred_actual = clf.predict(X)
    pred_desired = cosine_similarity(freq, tf * idf[None, :]).argmax(axis=1)
    np.testing.assert_array_equal(pred_actual, pred_desired)
Ejemplo n.º 5
0
def test_actual_results_without_weights():
    """Test that the actual results are the expected ones."""
    params = {'estimator': SAXVSM()}
    arr_actual = MultivariateClassifier(**params).fit(X, y).predict(X)
    predictions = []
    for i in range(n_features):
        predictions.append(params['estimator'].fit(X[:, i], y).predict(X[:,
                                                                         i]))
    predictions = np.asarray(predictions)
    arr_desired = []
    for i in range(n_samples):
        arr_desired.append(np.argmax(np.bincount(predictions[:, i])))
    np.testing.assert_allclose(arr_actual, arr_desired, atol=1e-5, rtol=0.)
Ejemplo n.º 6
0
import pytest
import re
from pyts.classification import SAXVSM
from pyts.multivariate.classification import MultivariateClassifier
from pyts.transformation import BOSS

n_samples, n_features, n_timestamps, n_classes = 40, 3, 30, 2
rng = np.random.RandomState(42)
X = rng.randn(n_samples, n_features, n_timestamps)
y = rng.randint(n_classes, size=n_samples)


@pytest.mark.parametrize(
    'params, error, err_msg',
    [({
        'estimator': [SAXVSM(), SAXVSM(), BOSS()]
    }, ValueError, "Estimator 2 must be a classifier."),
     ({
         'estimator': [SAXVSM()]
     }, ValueError, "If 'estimator' is a list, its length must be equal to "
      "the number of features (1 != 3)"),
     ({
         'estimator': None
     }, TypeError, "'estimator' must be a classifier that inherits from "
      "sklearn.base.BaseEstimator or a list thereof.")])
def test_parameter_check(params, error, err_msg):
    """Test parameter validation."""
    clf = MultivariateClassifier(**params)
    with pytest.raises(error, match=re.escape(err_msg)):
        clf.fit(X, y)
Ejemplo n.º 7
0
from scipy.sparse import csr_matrix
from pyts.classification import SAXVSM
from pyts.image import RecurrencePlot
from pyts.multivariate.transformation import MultivariateTransformer
from pyts.transformation import BOSS

n_samples, n_features, n_timestamps = 40, 3, 30
rng = np.random.RandomState(42)
X = rng.randn(n_samples, n_features, n_timestamps)


@pytest.mark.parametrize(
    'params, error, err_msg',
    [({
        'estimator': [BOSS(), RecurrencePlot(),
                      SAXVSM()]
    }, ValueError, "Estimator 2 must be a transformer."),
     ({
         'estimator': [BOSS()]
     }, ValueError, "If 'estimator' is a list, its length must be equal to "
      "the number of features (1 != 3)"),
     ({
         'estimator': None
     }, TypeError, "'estimator' must be a transformer that inherits from "
      "sklearn.base.BaseEstimator or a list thereof.")])
def test_parameter_check(params, error, err_msg):
    """Test parameter validation."""
    transformer = MultivariateTransformer(**params)
    with pytest.raises(error, match=re.escape(err_msg)):
        transformer.fit_transform(X)
Ejemplo n.º 8
0
It is implemented as :class:`pyts.classification.SAXVSM`.
"""

# Author: Johann Faouzi <*****@*****.**>
# License: BSD-3-Clause

import numpy as np
import matplotlib.pyplot as plt
from pyts.classification import SAXVSM
from pyts.datasets import load_gunpoint

# Toy dataset
X_train, X_test, y_train, y_test = load_gunpoint(return_X_y=True)

# SAXVSM transformation
saxvsm = SAXVSM(n_bins=4, strategy='uniform', window_size=2, sublinear_tf=True)
saxvsm.fit(X_train, y_train)
tfidf = saxvsm.tfidf_
vocabulary_length = len(saxvsm.vocabulary_)
X_new = saxvsm.decision_function(X_test)

# Visualize the transformation
plt.figure(figsize=(14, 5))
width = 0.4

plt.subplot(121)
plt.bar(np.arange(vocabulary_length) - width / 2,
        tfidf[0],
        width=width,
        label='Class 1')
plt.bar(np.arange(vocabulary_length) + width / 2,
Ejemplo n.º 9
0
def test_parameter_check(params, error, err_msg):
    """Test parameter validation."""
    clf = SAXVSM(**params)
    with pytest.raises(error, match=re.escape(err_msg)):
        clf.fit(X, y)
Ejemplo n.º 10
0
import matplotlib.pyplot as plt
from pyts.classification import SAXVSM

# Parameters
n_samples, n_timestamps = 100, 144
n_classes = 2

# Toy dataset
rng = np.random.RandomState(41)
X = rng.randn(n_samples, n_timestamps)
y = rng.randint(n_classes, size=n_samples)
X[y == 0] = np.cumsum(X[y == 0], axis=1)

# SAXVSM transformation
saxvsm = SAXVSM(n_bins=4,
                strategy='quantile',
                window_size=2,
                sublinear_tf=True)
saxvsm.fit(X, y)
tfidf = saxvsm.tfidf_
vocabulary_length = len(saxvsm.vocabulary_)
X_new = saxvsm.decision_function(X)

# Visualize the transformation
plt.figure(figsize=(16, 5))
width = 0.4

plt.subplot(121)
plt.bar(np.arange(vocabulary_length) - width / 2,
        tfidf[0],
        width=width,
        label='Class 0')
Ejemplo n.º 11
0
import pytest
import re
from pyts.classification import SAXVSM, BOSSVS
from pyts.multivariate.classification import MultivariateClassifier
from pyts.transformation import BOSS

n_samples, n_features, n_timestamps, n_classes = 40, 3, 30, 2
rng = np.random.RandomState(42)
X = rng.randn(n_samples, n_features, n_timestamps)
y = rng.randint(n_classes, size=n_samples)


@pytest.mark.parametrize(
    'params, error, err_msg',
    [({
        'estimator': [SAXVSM(), SAXVSM(), BOSS()]
    }, ValueError, "Estimator 2 must be a classifier."),
     ({
         'estimator': [SAXVSM()]
     }, ValueError, "If 'estimator' is a list, its length must be equal to "
      "the number of features (1 != 3)"),
     ({
         'estimator': None
     }, TypeError, "'estimator' must be a classifier that inherits from "
      "sklearn.base.BaseEstimator or a list thereof.")])
def test_parameter_check(params, error, err_msg):
    """Test parameter validation."""
    clf = MultivariateClassifier(**params)
    with pytest.raises(error, match=re.escape(err_msg)):
        clf.fit(X, y)
from datetime import datetime
import os
import itertools
import pickle
import pandas as pd
from pyts.classification import BOSSVS, SAXVSM
from sktime.utils.load_data import load_from_tsfile_to_dataframe
from sklearn.metrics import *

UCR_DATASET_PATH = '/mnt/DATA/data/Univariate_ts'
datasets = list(os.walk(UCR_DATASET_PATH))[0][1]
random_state = 0

classifiers = [
    SAXVSM(n_bins=4, strategy='uniform', window_size=2, sublinear_tf=True),
    BOSSVS(word_size=2, n_bins=4, window_size=2)
]
tuples = ([(clf.__class__.__name__, 'Accuracy'),
           (clf.__class__.__name__, 'F1-Score')] for clf in classifiers)
index = pd.MultiIndex.from_tuples(itertools.chain(*tuples),
                                  names=['classifier', 'metric'])


def calculate_performance(output_file):
    def evaluate_classifiers(dst):
        print("[%s] Processing dataset %s" %
              (datetime.now().strftime("%F %T"), dst))

        train_x, train_y = load_from_tsfile_to_dataframe(
            os.path.join(UCR_DATASET_PATH, dst, dst + "_TRAIN.ts"))
        test_x, test_y = load_from_tsfile_to_dataframe(
Ejemplo n.º 13
0
def test_multivariate_classifier_mixin(sample_weight):
    clf = MultivariateClassifier(SAXVSM()).fit(X_multi, y_multi)
    assert isinstance(clf.score(X_multi, y_multi, sample_weight),
                      (float, np.floating))
Ejemplo n.º 14
0
def test_univariate_classifier_mixin(sample_weight):
    clf = SAXVSM().fit(X_uni, y_uni)
    assert isinstance(clf.score(X_uni, y_uni, sample_weight),
                      (float, np.floating))
Ejemplo n.º 15
0
from pyts.classification import BOSSVS, SAXVSM

from deeplookup import datasets, metrics
from deeplookup.ts import Rsamp

rsamp = Rsamp()
bossvs = BOSSVS(window_size=36, word_size=12)
saxvsm = SAXVSM(window_size=36, word_size=12)

x_train, y_train = datasets.load_train_ts()
for model in (bossvs, saxvsm):
    model.fit(x_train, y_train)

for name, model, kwargs in [
    ("rsamp", rsamp, {
        "to_categorical": False
    }),
    ("bossvs", bossvs, {
        "to_categorical": True
    }),
    ("saxvsm", saxvsm, {
        "to_categorical": True
    }),
]:
    res = metrics.ts_score(model, **kwargs)
    print(f"metrics for {name}")
    print(res)
    print("-" * 80)
Ejemplo n.º 16
0
It is implemented as :class:`pyts.classification.SAXVSM`.
"""

# Author: Johann Faouzi <*****@*****.**>
# License: BSD-3-Clause

import numpy as np
import matplotlib.pyplot as plt
from pyts.classification import SAXVSM
from pyts.datasets import load_gunpoint

# Toy dataset
X_train, X_test, y_train, y_test = load_gunpoint(return_X_y=True)

# SAXVSM transformation
saxvsm = SAXVSM(window_size=15, word_size=3, n_bins=2, strategy='uniform')
saxvsm.fit(X_train, y_train)
tfidf = saxvsm.tfidf_
vocabulary_length = len(saxvsm.vocabulary_)
X_new = saxvsm.decision_function(X_test)

# Visualize the transformation
plt.figure(figsize=(14, 5))
width = 0.4

plt.subplot(121)
plt.bar(np.arange(vocabulary_length) - width / 2,
        tfidf[0],
        width=width,
        label='Class 1')
plt.bar(np.arange(vocabulary_length) + width / 2,