예제 #1
0
def test_actual_results_no_numerosity_reduction():
    """Test that the actual results are the expected ones."""
    bossvs = BOSSVS(
        word_size=4, n_bins=3, window_size=10, window_step=10,
        anova=False, drop_sum=False, norm_mean=False, norm_std=False,
        strategy='quantile', alphabet=None, numerosity_reduction=False,
        use_idf=True, smooth_idf=False, sublinear_tf=True
    )

    X_windowed = X.reshape(8, 2, 10).reshape(16, 10)
    sfa = SymbolicFourierApproximation(
        n_coefs=4, drop_sum=False, anova=False, norm_mean=False,
        norm_std=False, n_bins=3, strategy='quantile', alphabet=None
    )
    y_repeated = np.repeat(y, 2)
    X_sfa = sfa.fit_transform(X_windowed, y_repeated)
    X_word = np.asarray([''.join(X_sfa[i])
                         for i in range(16)])
    X_word = X_word.reshape(8, 2)
    X_bow = np.asarray([' '.join(X_word[i]) for i in range(8)])
    X_class = np.array([' '.join(X_bow[y == i]) for i in range(2)])

    tfidf = TfidfVectorizer(
        norm=None, use_idf=True, smooth_idf=False, sublinear_tf=True
    )
    tfidf_desired = tfidf.fit_transform(X_class).toarray()

    # Vocabulary
    vocabulary_desired = {value: key for key, value in
                          tfidf.vocabulary_.items()}

    # Tf-idf
    tfidf_actual = bossvs.fit(X, y).tfidf_

    # Decision function
    decision_function_actual = bossvs.decision_function(X)
    decision_function_desired = cosine_similarity(
        tfidf.transform(X_bow), tfidf_desired)

    # Predictions
    y_pred_actual = bossvs.predict(X)
    y_pred_desired = decision_function_desired.argmax(axis=1)

    # Testing
    assert bossvs.vocabulary_ == vocabulary_desired
    np.testing.assert_allclose(tfidf_actual, tfidf_desired, atol=1e-5, rtol=0)
    np.testing.assert_allclose(
        decision_function_actual, decision_function_desired, atol=1e-5, rtol=0)
    np.testing.assert_allclose(
        y_pred_actual, y_pred_desired, atol=1e-5, rtol=0)
                           drop_sum=norm_mean, window_size=window_size)
                    for (word_size, norm_mean, window_size) in zip(
                         word_size, norm_mean, window_size)]

        voting = VotingClassifier([("bossvs_" + str(i), bossvs)
                                     for i, bossvs in enumerate(bossvs)])

        accuracy_BOSSVS = voting.fit(X_train, y_train).score(X_test, y_test)
        print('Accuracy BOSSVS: ', accuracy_BOSSVS )
        print("Error rate SAX {0:.4f}".format(1-accuracy_BOSSVS))
        error_BOSSVS_list.append(1-accuracy_BOSSVS)         
        
    else:
        if dataset_list == ["Adiac"]:
            clf_bossvs = BOSSVS(word_size=12, n_bins=4, norm_mean=True,drop_sum=True, window_size=80)
            accuracy_BOSSVS = clf_bossvs.fit(X_train, y_train).score(X_test, y_test)
            print('Accuracy BOSSVS: ', accuracy_BOSSVS )
            print("Error rate SAX {0:.4f}".format(1-accuracy_BOSSVS))
            error_BOSSVS_list.append(1-accuracy_BOSSVS) 
        else:
            clf_bossvs = BOSSVS(word_size=5, n_bins=4, norm_mean=False,drop_sum=False, window_size=40)
            accuracy_BOSSVS = clf_bossvs.fit(X_train, y_train).score(X_test, y_test)
            print('Accuracy BOSSVS: ', accuracy_BOSSVS )
            print("Error rate SAX {0:.4f}".format(1-accuracy_BOSSVS))
            error_BOSSVS_list.append(1-accuracy_BOSSVS)            
    
    
    
    print()

예제 #3
0
the label of the tfidf vector giving the highest cosine similarity with
the tf vector of the unlabeled time series. BOSSVS algorithm is
implemented as :class:`pyts.classification.BOSSVS`.
"""

import numpy as np
import matplotlib.pyplot as plt
from pyts.classification import BOSSVS
from pyts.datasets import load_gunpoint

# Toy dataset
X_train, X_test, y_train, y_test = load_gunpoint(return_X_y=True)

# BOSSVS transformation
bossvs = BOSSVS(word_size=2, n_bins=3, window_size=10)
bossvs.fit(X_train, y_train)
tfidf = bossvs.tfidf_
vocabulary_length = len(bossvs.vocabulary_)
X_new = bossvs.decision_function(X_test)

# Visualize the transformation
plt.figure(figsize=(14, 5))
width = 0.4

plt.subplot(121)
plt.bar(np.arange(vocabulary_length) - width / 2,
        tfidf[0],
        width=width,
        label='Class 1')
plt.bar(np.arange(vocabulary_length) + width / 2,
        tfidf[1],
예제 #4
0
def test_parameter_check(params, error, err_msg):
    """Test parameter validation."""
    clf = BOSSVS(**params)
    with pytest.raises(error, match=re.escape(err_msg)):
        clf.fit(X, y)
예제 #5
0
import matplotlib.pyplot as plt
from pyts.classification import BOSSVS

# Parameters
n_samples, n_timestamps = 100, 144
n_classes = 2

# Toy dataset
rng = np.random.RandomState(41)
X = rng.randn(n_samples, n_timestamps)
y = rng.randint(n_classes, size=n_samples)
X[y == 0] = np.cumsum(X[y == 0], axis=1)

# BOSSVS transformation
bossvs = BOSSVS(word_size=2, n_bins=3, window_size=10)
bossvs.fit(X, y)
tfidf = bossvs.tfidf_
vocabulary_length = len(bossvs.vocabulary_)
X_new = bossvs.decision_function(X)

# Visualize the transformation
plt.figure(figsize=(16, 5))
width = 0.4

plt.subplot(121)
plt.bar(np.arange(vocabulary_length) - width / 2, tfidf[0],
        width=width, label='Class 0')
plt.bar(np.arange(vocabulary_length) + width / 2, tfidf[1],
        width=width, label='Class 1')
plt.xticks(np.arange(vocabulary_length),
           np.vectorize(bossvs.vocabulary_.get)(np.arange(vocabulary_length)),