class ActiveLearner():

    ##KNN classifier for BOSS \n
    #Probably 1 Nearest Neighbour \n
    #distance measure: Cosine similarity
    class BOSS_NN_classifier(BaseEstimator, ClassifierMixin):
        def __init__(self):
            self.X_train = None
            self.Y_train = None

        ## fit classifier, for KNN (= lazy learner) store training data
        def fit(self, X, Y):
            # Check that X and y have correct shape
            X, Y = check_X_y(X, Y)
            # Store the classes seen during fit
            self.classes_ = unique_labels(Y)
            self.X_train = X
            self.Y_train = Y

        ## predict confidences of every class for X
        def predict_proba(self, X):
            # Check if fit had been called
            check_is_fitted(self)
            # Input validation
            X = check_array(X)
            similarity = cosine_similarity(X, self.X_train)
            neighborclass = self.Y_train[similarity.argmax(axis=1)]
            probas = to_categorical(neighborclass,
                                    num_classes=int(max(self.Y_train)) +
                                    1)  #+1 für die null
            return probas

        ## predict class of x_test
        def predict(self, x_test):
            proba_predictedlabels = self.predict_proba(x_test)
            predictedlabels = np.argmax(proba_predictedlabels, axis=1)
            return predictedlabels

    ## Rocket Classifier \n
    # probably Random Forest
    class ROCKETClassifier(RandomForestClassifier):
        def __init__(self):
            ## Settings for Random Forest
            super().__init__(
                n_estimators=100, max_depth=10, n_jobs=-1
            )  # Random Forest #warm_start n_estimators=100, max_depth=10,

        def __str__(self):
            return super().__str__()

    ## Query Strategy
    def imbalance_certainty_sampling(
            self, classifier,
            X_pool):  ## last class is the "don't care" class and gets cut off
        probas = self.classifier.predict_proba(X_pool)
        probas = probas[:, 0:-1]  #cut off "don't cares"
        probas = np.max(probas, axis=1)
        query_idx = multi_argmax(probas, n_instances=self.queryBagsize)
        return query_idx.reshape(-1), X_pool[query_idx].reshape(
            self.queryBagsize, -1)

    ## init function
    def __init__(self, windows_pool, n_classes, algo):
        self.n_windows = windows_pool.shape[0]  # number fo windows
        self.samplesPerWindow = windows_pool.shape[
            1]  #samples per window is need to accelerate ROCKET for faster FST with numba

        #super().__init__(n_classes,class_names='', windowLength = 1, samplesPerWindow = samplesPerWindow, n_windows = n_windows,n_initial=0,n_queries=0,algo= algo,query="certainty",fast_mode = False, auto_annotation = False,query_bagsize = 10,timing_run= False, detailResults= False, singleErrorOutput= False)

        ## TSC Algorrihms
        self.algo = algo

        ## query strategy
        self.query_strategy = self.imbalance_certainty_sampling

        ## bagsize setting
        self.queryBagsize = 10

        ##algo management and Settings
        if self.algo == "ROCKET":
            # number of kernels Setting
            self.n_kernels = 10000  # 2 features per kernel
            self.fs_transform = self.Rocket_transform
            self.classifier = self.ROCKETClassifier()
            ## C code generation with numba
            _ = generate_kernels(int(self.samplesPerWindow),
                                 int(self.n_kernels))
            zeros = np.zeros([self.n_windows, self.samplesPerWindow],
                             dtype=float)
            _ = apply_kernels(np.zeros_like(zeros)[:, 1:], _)

            #self.classifier = LogisticRegression()

        if self.algo == "BOSS":
            self.fs_transform = self.BOSS_transform
            # Boss Settings
            self.Boss = BOSS(word_size=2,
                             n_bins=4,
                             window_size=12,
                             sparse=False)
            #self.Boss = BOSS(word_size=4, n_bins=2, window_size=10, sparse=False)
            self.classifier = self.BOSS_NN_classifier()
            #self.classifier = RandomForestClassifier()
        else:
            self.Boss = None

        # execute Feature Space Transformation
        self.x_pool = self.fs_transform(windows_pool)

        # Give IDs to windows
        self.x_pool_ID = np.arange(self.x_pool.shape[0])

    ## Feature Space Transformations

    ## BOSS FST
    def BOSS_transform(self, windows):
        X_boss = self.Boss.fit_transform(windows)
        return X_boss

    ## ROCKET FST
    def Rocket_transform(self, windows):
        #normalized to have a mean of zero and a standard deviation of one
        #windows = (windows - windows.mean(axis =1,keepdims=True)) / (windows.std(axis =1,keepdims = True) + 1e-8)
        kernels = generate_kernels(windows.shape[1], self.n_kernels)

        ## C code generation with numba
        # wird normalerweise übersprungnen da C Code bereits im Konstruktor generiert wird
        _ = generate_kernels(int(self.samplesPerWindow), int(self.n_kernels))
        zeros = np.zeros([self.n_windows, self.samplesPerWindow], dtype=float)
        _ = apply_kernels(np.zeros_like(zeros)[:, 1:], _)
        # apply c code
        features = apply_kernels(windows, kernels)
        ##Scaling for Logistic Regression
        #for feature in features:
        #    feature = preprocessing.scale(feature)
        return features

    ## Real used functions

    ## search new interesting samples
    def query(self):
        # modal query
        query_idx, query_inst = self.learner.query(self.x_pool)
        ## queried samples
        self.current_queries = query_inst
        ## and their position in Feature Space
        self.current_idx = query_idx
        ## window IDs = position in storage of windows
        windowsIDs = self.x_pool_ID[query_idx]

        proba_predictedlabels = self.learner.estimator.predict_proba(
            query_inst)
        predictedlabels = np.argmax(proba_predictedlabels, axis=1)

        return windowsIDs, predictedlabels

    ## initial training
    def initialTraining(self, window_IDs, labels):
        #poolIDs = window_IDs because inital training
        x_initial, y_initial = self.x_pool[window_IDs], labels
        self.learner = modAL_ActiveLearner(estimator=self.classifier,
                                           query_strategy=self.query_strategy,
                                           X_training=x_initial,
                                           y_training=y_initial)
        ## remove seen samples from pool
        self.x_pool, self.x_pool_ID = np.delete(self.x_pool,
                                                window_IDs,
                                                axis=0), np.delete(
                                                    self.x_pool_ID,
                                                    window_IDs,
                                                    axis=0)

    ## whole Iteration as described in the software architecture "Realization"
    def ActiveLearningIteration(self, new_Labels):
        # learn new samples with new labels
        self.learner.teach(self.current_queries, new_Labels, only_new=False)
        # remove learned samples from pool
        self.x_pool, self.x_pool_ID = np.delete(self.x_pool,
                                                self.current_idx,
                                                axis=0), np.delete(
                                                    self.x_pool_ID,
                                                    self.current_idx,
                                                    axis=0)
        # query new ones
        return self.query()
Example #2
0
import pytest
import re
from pyts.classification import SAXVSM, BOSSVS
from pyts.multivariate.classification import MultivariateClassifier
from pyts.transformation import BOSS

n_samples, n_features, n_timestamps, n_classes = 40, 3, 30, 2
rng = np.random.RandomState(42)
X = rng.randn(n_samples, n_features, n_timestamps)
y = rng.randint(n_classes, size=n_samples)


@pytest.mark.parametrize(
    'params, error, err_msg',
    [({
        'estimator': [SAXVSM(), SAXVSM(), BOSS()]
    }, ValueError, "Estimator 2 must be a classifier."),
     ({
         'estimator': [SAXVSM()]
     }, ValueError, "If 'estimator' is a list, its length must be equal to "
      "the number of features (1 != 3)"),
     ({
         'estimator': None
     }, TypeError, "'estimator' must be a classifier that inherits from "
      "sklearn.base.BaseEstimator or a list thereof.")])
def test_parameter_check(params, error, err_msg):
    """Test parameter validation."""
    clf = MultivariateClassifier(**params)
    with pytest.raises(error, match=re.escape(err_msg)):
        clf.fit(X, y)
Example #3
0
:class:`pyts.transformation.BOSS`.
"""

import numpy as np
import matplotlib.pyplot as plt
from pyts.transformation import BOSS

# Parameters
n_samples, n_timestamps = 100, 144

# Toy dataset
rng = np.random.RandomState(41)
X = rng.randn(n_samples, n_timestamps)

# BOSS transformation
boss = BOSS(word_size=2, n_bins=4, window_size=12)
X_boss = boss.fit_transform(X).toarray()

# Visualize the transformation for the first time series
plt.figure(figsize=(12, 8))
vocabulary_length = len(boss.vocabulary_)
width = 0.3
plt.bar(np.arange(vocabulary_length) - width / 2,
        X_boss[0],
        width=width,
        label='First time series')
plt.bar(np.arange(vocabulary_length) + width / 2,
        X_boss[1],
        width=width,
        label='Second time series')
plt.xticks(np.arange(vocabulary_length),
Example #4
0
It is implemented as :class:`pyts.transformation.BOSS`.
"""

# Author: Johann Faouzi <*****@*****.**>
# License: BSD-3-Clause

import numpy as np
import matplotlib.pyplot as plt
from pyts.datasets import load_gunpoint
from pyts.transformation import BOSS

# Toy dataset
X_train, _, y_train, _ = load_gunpoint(return_X_y=True)

# BOSS transformation
boss = BOSS(word_size=2, n_bins=4, window_size=12, sparse=False)
X_boss = boss.fit_transform(X_train)

# Visualize the transformation for the first time series
plt.figure(figsize=(6, 4))
vocabulary_length = len(boss.vocabulary_)
width = 0.3
plt.bar(np.arange(vocabulary_length) - width / 2,
        X_boss[y_train == 1][0],
        width=width,
        label='First time series in class 1')
plt.bar(np.arange(vocabulary_length) + width / 2,
        X_boss[y_train == 2][0],
        width=width,
        label='First time series in class 2')
plt.xticks(np.arange(vocabulary_length),
Example #5
0
    # Dynamic Time Warping with a learned warping window
    error_dtw_w = 1 - clf_dtw_w.fit(X_train, y_train).score(X_test, y_test)
    print('Accuracy DTW_W: ', 1 - error_dtw_w)
    print("Error rate with Dynamic Time Warping with a learned warping "
          "window: {0:.4f}".format(error_dtw_w))
    error_dtw_w_list.append(error_dtw_w)

    #BOSS
    if dataset_list == ["Adiac"] or dataset_list == ["Herring"]:
        window_size = np.repeat(np.arange(60, 110, 20), 3)
        norm_mean = np.full(window_size.size, True)
        word_size = np.tile(np.arange(10, 16, 2), 3)
        boss = [
            BOSS(word_size=word_size,
                 n_bins=4,
                 norm_mean=norm_mean,
                 drop_sum=norm_mean,
                 window_size=window_size)
            for (word_size, norm_mean,
                 window_size) in zip(word_size, norm_mean, window_size)
        ]
        pipeline = [
            Pipeline([("boss", boss), ('to_dense', transformer), ("knn", knn)])
            for boss in boss
        ]
        voting = VotingClassifier([("pipeline_" + str(i), pipeline)
                                   for i, pipeline in enumerate(pipeline)])
        error_boss = 1 - voting.fit(X_train, y_train).score(X_test, y_test)
        print('Accuracy BOSS: ', 1 - error_boss)
        print("Error rate with BOSS: {0:.4f}".format(error_boss))
        error_boss_list.append(error_boss)
Example #6
0
import re
from scipy.sparse import csr_matrix
from pyts.classification import SAXVSM
from pyts.image import RecurrencePlot
from pyts.multivariate.transformation import MultivariateTransformer
from pyts.transformation import BOSS

n_samples, n_features, n_timestamps = 40, 3, 30
rng = np.random.RandomState(42)
X = rng.randn(n_samples, n_features, n_timestamps)


@pytest.mark.parametrize(
    'params, error, err_msg',
    [({
        'estimator': [BOSS(), RecurrencePlot(),
                      SAXVSM()]
    }, ValueError, "Estimator 2 must be a transformer."),
     ({
         'estimator': [BOSS()]
     }, ValueError, "If 'estimator' is a list, its length must be equal to "
      "the number of features (1 != 3)"),
     ({
         'estimator': None
     }, TypeError, "'estimator' must be a transformer that inherits from "
      "sklearn.base.BaseEstimator or a list thereof.")])
def test_parameter_check(params, error, err_msg):
    """Test parameter validation."""
    transformer = MultivariateTransformer(**params)
    with pytest.raises(error, match=re.escape(err_msg)):
        transformer.fit_transform(X)