Exemple #1
0
def test_accurate_results_without_numerosity_reduction():
    """Test that the actual results are the expected ones."""
    boss = BOSS(
        word_size=4, n_bins=3, window_size=100, window_step=100,
        anova=False, drop_sum=False, norm_mean=False, norm_std=False,
        strategy='quantile', alphabet=None, numerosity_reduction=False
    )

    X_windowed = X.reshape(8, 2, 100).reshape(16, 100)
    sfa = SymbolicFourierApproximation(
        n_coefs=4, drop_sum=False, anova=False, norm_mean=False,
        norm_std=False, n_bins=3, strategy='quantile', alphabet=None
    )
    y_repeated = np.repeat(y, 2)
    X_sfa = sfa.fit_transform(X_windowed, y_repeated)
    X_word = np.asarray([''.join(X_sfa[i]) for i in range(16)])
    X_word = X_word.reshape(8, 2)
    X_bow = np.asarray([' '.join(X_word[i]) for i in range(8)])

    vectorizer = CountVectorizer()
    arr_desired = vectorizer.fit_transform(X_bow).toarray()
    vocabulary_desired = {value: key for key, value in
                          vectorizer.vocabulary_.items()}

    arr_actual = boss.fit_transform(X, y).toarray()
    np.testing.assert_allclose(arr_actual, arr_desired, atol=1e-5, rtol=0)
    assert boss.vocabulary_ == vocabulary_desired

    arr_actual = boss.fit(X, y).transform(X).toarray()
    np.testing.assert_allclose(arr_actual, arr_desired, atol=1e-5, rtol=0)
    assert boss.vocabulary_ == vocabulary_desired
Exemple #2
0
def test_accurate_results():
    """Test that the actual results are the expected ones."""
    X_features = csr_matrix((n_samples, 0), dtype=np.int64)
    vocabulary_ = {}

    weasel = WEASEL(
        word_size=4, n_bins=3, window_sizes=[5, 10],
        window_steps=None, anova=True, drop_sum=True, norm_mean=True,
        norm_std=True, strategy='entropy', chi2_threshold=2, alphabet=None
    )

    for window_size, n_windows in zip([5, 10], [40, 20]):
        X_windowed = X.reshape(n_samples, n_windows, window_size)
        X_windowed = X_windowed.reshape(n_samples * n_windows, window_size)

        sfa = SymbolicFourierApproximation(
            n_coefs=4, drop_sum=True, anova=True, norm_mean=True,
            norm_std=True, n_bins=3, strategy='entropy', alphabet=None
        )
        y_repeated = np.repeat(y, n_windows)
        X_sfa = sfa.fit_transform(X_windowed, y_repeated)
        X_word = np.asarray([''.join(X_sfa[i])
                             for i in range((n_samples * n_windows))])
        X_word = X_word.reshape(n_samples, n_windows)
        X_bow = np.asarray([' '.join(X_word[i]) for i in range(n_samples)])

        vectorizer = CountVectorizer(ngram_range=(1, 2))
        X_counts = vectorizer.fit_transform(X_bow)
        chi2_statistics, _ = chi2(X_counts, y)
        relevant_features = np.where(
            chi2_statistics > 2)[0]
        X_features = hstack([X_features, X_counts[:, relevant_features]])

        old_length_vocab = len(vocabulary_)
        vocabulary = {value: key
                      for (key, value) in vectorizer.vocabulary_.items()}
        for i, idx in enumerate(relevant_features):
            vocabulary_[i + old_length_vocab] = \
                str(window_size) + " " + vocabulary[idx]

    arr_desired = X_features.toarray()

    # Accuracte results for fit followed by transform
    arr_actual_1 = weasel.fit_transform(X, y).toarray()
    np.testing.assert_allclose(arr_actual_1, arr_desired, atol=1e-5, rtol=0)
    assert weasel.vocabulary_ == vocabulary_

    # Accuracte results for fit_transform
    arr_actual_2 = weasel.fit(X, y).transform(X).toarray()
    np.testing.assert_allclose(arr_actual_2, arr_desired, atol=1e-5, rtol=0)
    assert weasel.vocabulary_ == vocabulary_
Exemple #3
0
def test_actual_results_no_numerosity_reduction():
    """Test that the actual results are the expected ones."""
    bossvs = BOSSVS(
        word_size=4, n_bins=3, window_size=10, window_step=10,
        anova=False, drop_sum=False, norm_mean=False, norm_std=False,
        strategy='quantile', alphabet=None, numerosity_reduction=False,
        use_idf=True, smooth_idf=False, sublinear_tf=True
    )

    X_windowed = X.reshape(8, 2, 10).reshape(16, 10)
    sfa = SymbolicFourierApproximation(
        n_coefs=4, drop_sum=False, anova=False, norm_mean=False,
        norm_std=False, n_bins=3, strategy='quantile', alphabet=None
    )
    y_repeated = np.repeat(y, 2)
    X_sfa = sfa.fit_transform(X_windowed, y_repeated)
    X_word = np.asarray([''.join(X_sfa[i])
                         for i in range(16)])
    X_word = X_word.reshape(8, 2)
    X_bow = np.asarray([' '.join(X_word[i]) for i in range(8)])
    X_class = np.array([' '.join(X_bow[y == i]) for i in range(2)])

    tfidf = TfidfVectorizer(
        norm=None, use_idf=True, smooth_idf=False, sublinear_tf=True
    )
    tfidf_desired = tfidf.fit_transform(X_class).toarray()

    # Vocabulary
    vocabulary_desired = {value: key for key, value in
                          tfidf.vocabulary_.items()}

    # Tf-idf
    tfidf_actual = bossvs.fit(X, y).tfidf_

    # Decision function
    decision_function_actual = bossvs.decision_function(X)
    decision_function_desired = cosine_similarity(
        tfidf.transform(X_bow), tfidf_desired)

    # Predictions
    y_pred_actual = bossvs.predict(X)
    y_pred_desired = decision_function_desired.argmax(axis=1)

    # Testing
    assert bossvs.vocabulary_ == vocabulary_desired
    np.testing.assert_allclose(tfidf_actual, tfidf_desired, atol=1e-5, rtol=0)
    np.testing.assert_allclose(
        decision_function_actual, decision_function_desired, atol=1e-5, rtol=0)
    np.testing.assert_allclose(
        y_pred_actual, y_pred_desired, atol=1e-5, rtol=0)
Exemple #4
0
class Data_Transformer():
    SAX = SymbolicAggregateApproximation(strategy= 'uniform', alphabet= 'ordinal')
    Xtr_SAX = SAX.fit_transform(Xtr)
    Xte_SAX = SAX.fit_transform(Xte)
    
    SFA = SymbolicFourierApproximation(alphabet= 'ordinal')
    Xtr_SFA = SFA.fit_transform(Xtr)
    Xte_SFA = SFA.fit_transform(Xte)
Exemple #5
0
    def sfa(train_vec_samples, test_vec_samples, n_components=100, n_bins=2, alphabet='ordinal'):
        '''
        Apply SFA to reduce dimensionality of input vector.
        :param train_vec_samples:
        :param test_vec_samples:
        :param n_components:
        :param n_bins:
        :param alphabet:
        :return:
        '''

        sfa = SymbolicFourierApproximation(n_coefs=n_components, n_bins=n_bins, alphabet=alphabet)
        sfa.fit(train_vec_samples)

        sfa_train_samples = sfa.transform(train_vec_samples)
        # print("sfa_train_samples.shape: ", sfa_train_samples.shape)

        sfa_test_samples = sfa.transform(test_vec_samples)
        # print("sfa_test_samples.shape: ", sfa_test_samples.shape)


        return  sfa_train_samples, sfa_test_samples
Exemple #6
0
def test_actual_results(params):
    """Test that the actual results are the expected ones."""
    arr_actual = SymbolicFourierApproximation(**params).fit_transform(X, y)
    arr_desired = _compute_expected_results(X, y, **params)
    np.testing.assert_array_equal(arr_actual, arr_desired)
Exemple #7
0
def test_fit_transform(params):
    """Test that fit and transform yield the same results as fit_transform."""
    arr_1 = SymbolicFourierApproximation(**params).fit(X, y).transform(X)
    arr_2 = SymbolicFourierApproximation(**params).fit_transform(X, y)
    np.testing.assert_array_equal(arr_1, arr_2)
Exemple #8
0
import numpy as np
import pytest
from sklearn.base import clone

from pyts.classification import SAXVSM
from pyts.datasets import load_gunpoint, load_basic_motions
from pyts.multivariate.image import JointRecurrencePlot
from pyts.multivariate.classification import MultivariateClassifier
from pyts.approximation import SymbolicFourierApproximation

X_uni, _, y_uni, _ = load_gunpoint(return_X_y=True)
X_multi, _, y_multi, _ = load_basic_motions(return_X_y=True)


@pytest.mark.parametrize('estimator, X, y', [
    (SymbolicFourierApproximation(n_bins=2), X_uni, None),
    (SymbolicFourierApproximation(n_bins=2, strategy='entropy'), X_uni, y_uni)
])
def test_univariate_transformer_mixin(estimator, X, y):
    sfa_1 = clone(estimator)
    sfa_2 = clone(estimator)
    np.testing.assert_array_equal(sfa_1.fit_transform(X, y),
                                  sfa_2.fit(X, y).transform(X))


@pytest.mark.parametrize('estimator, X, y',
                         [(JointRecurrencePlot(), X_multi, None),
                          (JointRecurrencePlot(), X_multi, y_multi)])
def test_multivariate_transformer_mixin(estimator, X, y):
    jrp_1 = clone(estimator)
    jrp_2 = clone(estimator)
Exemple #9
0
from pyts.approximation import SymbolicFourierApproximation
from pyts.datasets import load_gunpoint

X, _, _, _ = load_gunpoint(return_X_y=True)
transformer = SymbolicFourierApproximation(n_coefs=4)
X_new = transformer.fit_transform(X)