Example #1
0
def test_actual_results_no_numerosity_reduction():
    """Test that the actual results are the expected ones."""
    bossvs = BOSSVS(
        word_size=4, n_bins=3, window_size=10, window_step=10,
        anova=False, drop_sum=False, norm_mean=False, norm_std=False,
        strategy='quantile', alphabet=None, numerosity_reduction=False,
        use_idf=True, smooth_idf=False, sublinear_tf=True
    )

    X_windowed = X.reshape(8, 2, 10).reshape(16, 10)
    sfa = SymbolicFourierApproximation(
        n_coefs=4, drop_sum=False, anova=False, norm_mean=False,
        norm_std=False, n_bins=3, strategy='quantile', alphabet=None
    )
    y_repeated = np.repeat(y, 2)
    X_sfa = sfa.fit_transform(X_windowed, y_repeated)
    X_word = np.asarray([''.join(X_sfa[i])
                         for i in range(16)])
    X_word = X_word.reshape(8, 2)
    X_bow = np.asarray([' '.join(X_word[i]) for i in range(8)])
    X_class = np.array([' '.join(X_bow[y == i]) for i in range(2)])

    tfidf = TfidfVectorizer(
        norm=None, use_idf=True, smooth_idf=False, sublinear_tf=True
    )
    tfidf_desired = tfidf.fit_transform(X_class).toarray()

    # Vocabulary
    vocabulary_desired = {value: key for key, value in
                          tfidf.vocabulary_.items()}

    # Tf-idf
    tfidf_actual = bossvs.fit(X, y).tfidf_

    # Decision function
    decision_function_actual = bossvs.decision_function(X)
    decision_function_desired = cosine_similarity(
        tfidf.transform(X_bow), tfidf_desired)

    # Predictions
    y_pred_actual = bossvs.predict(X)
    y_pred_desired = decision_function_desired.argmax(axis=1)

    # Testing
    assert bossvs.vocabulary_ == vocabulary_desired
    np.testing.assert_allclose(tfidf_actual, tfidf_desired, atol=1e-5, rtol=0)
    np.testing.assert_allclose(
        decision_function_actual, decision_function_desired, atol=1e-5, rtol=0)
    np.testing.assert_allclose(
        y_pred_actual, y_pred_desired, atol=1e-5, rtol=0)
                
        return correct / count
    
    accuracy_SAX = test_accuracy(train, test, win, paa, alp, na_strategy, zthresh)
    error_SAX = 1 - accuracy_SAX
    print('Accuracy SAX: ', accuracy_SAX )
    print("Error rate SAX {0:.4f}".format(error_SAX))
    error_SAX_list.append(error_SAX)
    
    #BOSSVS
    if dataset_list == ["SyntheticControl"]:
        window_size = np.arange(18, 37, dtype='int64')
        norm_mean = np.full(window_size.size, False)
        word_size = np.full(window_size.size, 6, dtype='int64')

        bossvs = [BOSSVS(word_size=word_size, n_bins=4, norm_mean=norm_mean,
                           drop_sum=norm_mean, window_size=window_size)
                    for (word_size, norm_mean, window_size) in zip(
                         word_size, norm_mean, window_size)]

        voting = VotingClassifier([("bossvs_" + str(i), bossvs)
                                     for i, bossvs in enumerate(bossvs)])

        accuracy_BOSSVS = voting.fit(X_train, y_train).score(X_test, y_test)
        print('Accuracy BOSSVS: ', accuracy_BOSSVS )
        print("Error rate SAX {0:.4f}".format(1-accuracy_BOSSVS))
        error_BOSSVS_list.append(1-accuracy_BOSSVS)         
        
    else:
        if dataset_list == ["Adiac"]:
            clf_bossvs = BOSSVS(word_size=12, n_bins=4, norm_mean=True,drop_sum=True, window_size=80)
            accuracy_BOSSVS = clf_bossvs.fit(X_train, y_train).score(X_test, y_test)
Example #3
0
def test_parameter_check(params, error, err_msg):
    """Test parameter validation."""
    clf = BOSSVS(**params)
    with pytest.raises(error, match=re.escape(err_msg)):
        clf.fit(X, y)
Example #4
0
as a tfidf vector. For an unlabeled time series, the predicted label is
the label of the tfidf vector giving the highest cosine similarity with
the tf vector of the unlabeled time series. BOSSVS algorithm is
implemented as :class:`pyts.classification.BOSSVS`.
"""

import numpy as np
import matplotlib.pyplot as plt
from pyts.classification import BOSSVS
from pyts.datasets import load_gunpoint

# Toy dataset
X_train, X_test, y_train, y_test = load_gunpoint(return_X_y=True)

# BOSSVS transformation
bossvs = BOSSVS(word_size=2, n_bins=3, window_size=10)
bossvs.fit(X_train, y_train)
tfidf = bossvs.tfidf_
vocabulary_length = len(bossvs.vocabulary_)
X_new = bossvs.decision_function(X_test)

# Visualize the transformation
plt.figure(figsize=(14, 5))
width = 0.4

plt.subplot(121)
plt.bar(np.arange(vocabulary_length) - width / 2,
        tfidf[0],
        width=width,
        label='Class 1')
plt.bar(np.arange(vocabulary_length) + width / 2,
Example #5
0
     ({
         'estimator': BOSS()
     }, X[0], ValueError, "X must be 3-dimensional (got 2).")])
def test_input_check(params, X, error, err_msg):
    """Test input data validation."""
    clf = MultivariateClassifier(**params)
    with pytest.raises(error, match=re.escape(err_msg)):
        clf.fit(X, y)


@pytest.mark.parametrize('params', [{
    'estimator': SAXVSM()
}, {
    'estimator': [SAXVSM() for _ in range(n_features)]
}, {
    'estimator': [SAXVSM(), SAXVSM(), BOSSVS()]
}])
def test_actual_results_without_weights(params):
    """Test that the actual results are the expected ones."""
    arr_actual = MultivariateClassifier(**params).fit(X, y).predict(X)
    predictions = []
    if isinstance(params['estimator'], list):
        for i in range(n_features):
            predictions.append(params['estimator'][i].fit(X[:, i],
                                                          y).predict(X[:, i]))
    else:
        for i in range(n_features):
            predictions.append(params['estimator'].fit(X[:, i],
                                                       y).predict(X[:, i]))
    predictions = np.asarray(predictions)
    arr_desired = []
from datetime import datetime
import os
import itertools
import pickle
import pandas as pd
from pyts.classification import BOSSVS, SAXVSM
from sktime.utils.load_data import load_from_tsfile_to_dataframe
from sklearn.metrics import *

UCR_DATASET_PATH = '/mnt/DATA/data/Univariate_ts'
datasets = list(os.walk(UCR_DATASET_PATH))[0][1]
random_state = 0

classifiers = [
    SAXVSM(n_bins=4, strategy='uniform', window_size=2, sublinear_tf=True),
    BOSSVS(word_size=2, n_bins=4, window_size=2)
]
tuples = ([(clf.__class__.__name__, 'Accuracy'),
           (clf.__class__.__name__, 'F1-Score')] for clf in classifiers)
index = pd.MultiIndex.from_tuples(itertools.chain(*tuples),
                                  names=['classifier', 'metric'])


def calculate_performance(output_file):
    def evaluate_classifiers(dst):
        print("[%s] Processing dataset %s" %
              (datetime.now().strftime("%F %T"), dst))

        train_x, train_y = load_from_tsfile_to_dataframe(
            os.path.join(UCR_DATASET_PATH, dst, dst + "_TRAIN.ts"))
        test_x, test_y = load_from_tsfile_to_dataframe(
Example #7
0
import numpy as np
import matplotlib.pyplot as plt
from pyts.classification import BOSSVS

# Parameters
n_samples, n_timestamps = 100, 144
n_classes = 2

# Toy dataset
rng = np.random.RandomState(41)
X = rng.randn(n_samples, n_timestamps)
y = rng.randint(n_classes, size=n_samples)
X[y == 0] = np.cumsum(X[y == 0], axis=1)

# BOSSVS transformation
bossvs = BOSSVS(word_size=2, n_bins=3, window_size=10)
bossvs.fit(X, y)
tfidf = bossvs.tfidf_
vocabulary_length = len(bossvs.vocabulary_)
X_new = bossvs.decision_function(X)

# Visualize the transformation
plt.figure(figsize=(16, 5))
width = 0.4

plt.subplot(121)
plt.bar(np.arange(vocabulary_length) - width / 2, tfidf[0],
        width=width, label='Class 0')
plt.bar(np.arange(vocabulary_length) + width / 2, tfidf[1],
        width=width, label='Class 1')
plt.xticks(np.arange(vocabulary_length),
Example #8
0
from pyts.classification import BOSSVS, SAXVSM

from deeplookup import datasets, metrics
from deeplookup.ts import Rsamp

rsamp = Rsamp()
bossvs = BOSSVS(window_size=36, word_size=12)
saxvsm = SAXVSM(window_size=36, word_size=12)

x_train, y_train = datasets.load_train_ts()
for model in (bossvs, saxvsm):
    model.fit(x_train, y_train)

for name, model, kwargs in [
    ("rsamp", rsamp, {
        "to_categorical": False
    }),
    ("bossvs", bossvs, {
        "to_categorical": True
    }),
    ("saxvsm", saxvsm, {
        "to_categorical": True
    }),
]:
    res = metrics.ts_score(model, **kwargs)
    print(f"metrics for {name}")
    print(res)
    print("-" * 80)