Beispiel #1
0
def test_actual_results_no_numerosity_reduction():
    """Test that the actual results are the expected ones."""
    bossvs = BOSSVS(
        word_size=4, n_bins=3, window_size=10, window_step=10,
        anova=False, drop_sum=False, norm_mean=False, norm_std=False,
        strategy='quantile', alphabet=None, numerosity_reduction=False,
        use_idf=True, smooth_idf=False, sublinear_tf=True
    )

    X_windowed = X.reshape(8, 2, 10).reshape(16, 10)
    sfa = SymbolicFourierApproximation(
        n_coefs=4, drop_sum=False, anova=False, norm_mean=False,
        norm_std=False, n_bins=3, strategy='quantile', alphabet=None
    )
    y_repeated = np.repeat(y, 2)
    X_sfa = sfa.fit_transform(X_windowed, y_repeated)
    X_word = np.asarray([''.join(X_sfa[i])
                         for i in range(16)])
    X_word = X_word.reshape(8, 2)
    X_bow = np.asarray([' '.join(X_word[i]) for i in range(8)])
    X_class = np.array([' '.join(X_bow[y == i]) for i in range(2)])

    tfidf = TfidfVectorizer(
        norm=None, use_idf=True, smooth_idf=False, sublinear_tf=True
    )
    tfidf_desired = tfidf.fit_transform(X_class).toarray()

    # Vocabulary
    vocabulary_desired = {value: key for key, value in
                          tfidf.vocabulary_.items()}

    # Tf-idf
    tfidf_actual = bossvs.fit(X, y).tfidf_

    # Decision function
    decision_function_actual = bossvs.decision_function(X)
    decision_function_desired = cosine_similarity(
        tfidf.transform(X_bow), tfidf_desired)

    # Predictions
    y_pred_actual = bossvs.predict(X)
    y_pred_desired = decision_function_desired.argmax(axis=1)

    # Testing
    assert bossvs.vocabulary_ == vocabulary_desired
    np.testing.assert_allclose(tfidf_actual, tfidf_desired, atol=1e-5, rtol=0)
    np.testing.assert_allclose(
        decision_function_actual, decision_function_desired, atol=1e-5, rtol=0)
    np.testing.assert_allclose(
        y_pred_actual, y_pred_desired, atol=1e-5, rtol=0)
Beispiel #2
0
"""

import numpy as np
import matplotlib.pyplot as plt
from pyts.classification import BOSSVS
from pyts.datasets import load_gunpoint

# Toy dataset
X_train, X_test, y_train, y_test = load_gunpoint(return_X_y=True)

# BOSSVS transformation
bossvs = BOSSVS(word_size=2, n_bins=3, window_size=10)
bossvs.fit(X_train, y_train)
tfidf = bossvs.tfidf_
vocabulary_length = len(bossvs.vocabulary_)
X_new = bossvs.decision_function(X_test)

# Visualize the transformation
plt.figure(figsize=(14, 5))
width = 0.4

plt.subplot(121)
plt.bar(np.arange(vocabulary_length) - width / 2,
        tfidf[0],
        width=width,
        label='Class 1')
plt.bar(np.arange(vocabulary_length) + width / 2,
        tfidf[1],
        width=width,
        label='Class 2')
plt.xticks(np.arange(vocabulary_length),
Beispiel #3
0
print('Feature-BOSSVS')
'''
nps = input("Enter the number of segments for PAA: ")
n_paa_segments = int(nps) #number of segments for PAA
nss = input('Enter the number if segments for SAX: ')
n_sax_symbols = int(nss)
w = input('Enter the window size:')
window_size = int(w)
'''

PATH = "G:/Coding/ML/UCRArchive_2018/"  # Change this value if necessary
ds = input('Enter the Time series Dataset: ')
dataset = str(ds)

file_train = PATH + str(dataset) + "/" + str(dataset) + "_TRAIN.tsv"
file_test = PATH + str(dataset) + "/" + str(dataset) + "_TEST.tsv"

fulltrain = np.genfromtxt(fname=file_train, delimiter="\t", skip_header=0)
fulltest = np.genfromtxt(fname=file_test, delimiter="\t", skip_header=0)

X_train, y_train = fulltrain[:, 1:], fulltrain[:, 0]
X_test, y_test = fulltest[:, 1:], fulltest[:, 0]

# BOSSVS transformation
bossvs = BOSSVS(word_size=2, n_bins=3, window_size=10)
bossvs.fit(X_train, y_train)
tfidf = bossvs.tfidf_
vocabulary_length = len(bossvs.vocabulary_)
X_new = bossvs.decision_function(X_train)  #tfidf vectors