class ActiveLearner(): ##KNN classifier for BOSS \n #Probably 1 Nearest Neighbour \n #distance measure: Cosine similarity class BOSS_NN_classifier(BaseEstimator, ClassifierMixin): def __init__(self): self.X_train = None self.Y_train = None ## fit classifier, for KNN (= lazy learner) store training data def fit(self, X, Y): # Check that X and y have correct shape X, Y = check_X_y(X, Y) # Store the classes seen during fit self.classes_ = unique_labels(Y) self.X_train = X self.Y_train = Y ## predict confidences of every class for X def predict_proba(self, X): # Check if fit had been called check_is_fitted(self) # Input validation X = check_array(X) similarity = cosine_similarity(X, self.X_train) neighborclass = self.Y_train[similarity.argmax(axis=1)] probas = to_categorical(neighborclass, num_classes=int(max(self.Y_train)) + 1) #+1 für die null return probas ## predict class of x_test def predict(self, x_test): proba_predictedlabels = self.predict_proba(x_test) predictedlabels = np.argmax(proba_predictedlabels, axis=1) return predictedlabels ## Rocket Classifier \n # probably Random Forest class ROCKETClassifier(RandomForestClassifier): def __init__(self): ## Settings for Random Forest super().__init__( n_estimators=100, max_depth=10, n_jobs=-1 ) # Random Forest #warm_start n_estimators=100, max_depth=10, def __str__(self): return super().__str__() ## Query Strategy def imbalance_certainty_sampling( self, classifier, X_pool): ## last class is the "don't care" class and gets cut off probas = self.classifier.predict_proba(X_pool) probas = probas[:, 0:-1] #cut off "don't cares" probas = np.max(probas, axis=1) query_idx = multi_argmax(probas, n_instances=self.queryBagsize) return query_idx.reshape(-1), X_pool[query_idx].reshape( self.queryBagsize, -1) ## init function def __init__(self, windows_pool, n_classes, algo): self.n_windows = windows_pool.shape[0] # number fo windows self.samplesPerWindow = windows_pool.shape[ 1] #samples per window is need to accelerate ROCKET for faster FST with numba #super().__init__(n_classes,class_names='', windowLength = 1, samplesPerWindow = samplesPerWindow, n_windows = n_windows,n_initial=0,n_queries=0,algo= algo,query="certainty",fast_mode = False, auto_annotation = False,query_bagsize = 10,timing_run= False, detailResults= False, singleErrorOutput= False) ## TSC Algorrihms self.algo = algo ## query strategy self.query_strategy = self.imbalance_certainty_sampling ## bagsize setting self.queryBagsize = 10 ##algo management and Settings if self.algo == "ROCKET": # number of kernels Setting self.n_kernels = 10000 # 2 features per kernel self.fs_transform = self.Rocket_transform self.classifier = self.ROCKETClassifier() ## C code generation with numba _ = generate_kernels(int(self.samplesPerWindow), int(self.n_kernels)) zeros = np.zeros([self.n_windows, self.samplesPerWindow], dtype=float) _ = apply_kernels(np.zeros_like(zeros)[:, 1:], _) #self.classifier = LogisticRegression() if self.algo == "BOSS": self.fs_transform = self.BOSS_transform # Boss Settings self.Boss = BOSS(word_size=2, n_bins=4, window_size=12, sparse=False) #self.Boss = BOSS(word_size=4, n_bins=2, window_size=10, sparse=False) self.classifier = self.BOSS_NN_classifier() #self.classifier = RandomForestClassifier() else: self.Boss = None # execute Feature Space Transformation self.x_pool = self.fs_transform(windows_pool) # Give IDs to windows self.x_pool_ID = np.arange(self.x_pool.shape[0]) ## Feature Space Transformations ## BOSS FST def BOSS_transform(self, windows): X_boss = self.Boss.fit_transform(windows) return X_boss ## ROCKET FST def Rocket_transform(self, windows): #normalized to have a mean of zero and a standard deviation of one #windows = (windows - windows.mean(axis =1,keepdims=True)) / (windows.std(axis =1,keepdims = True) + 1e-8) kernels = generate_kernels(windows.shape[1], self.n_kernels) ## C code generation with numba # wird normalerweise übersprungnen da C Code bereits im Konstruktor generiert wird _ = generate_kernels(int(self.samplesPerWindow), int(self.n_kernels)) zeros = np.zeros([self.n_windows, self.samplesPerWindow], dtype=float) _ = apply_kernels(np.zeros_like(zeros)[:, 1:], _) # apply c code features = apply_kernels(windows, kernels) ##Scaling for Logistic Regression #for feature in features: # feature = preprocessing.scale(feature) return features ## Real used functions ## search new interesting samples def query(self): # modal query query_idx, query_inst = self.learner.query(self.x_pool) ## queried samples self.current_queries = query_inst ## and their position in Feature Space self.current_idx = query_idx ## window IDs = position in storage of windows windowsIDs = self.x_pool_ID[query_idx] proba_predictedlabels = self.learner.estimator.predict_proba( query_inst) predictedlabels = np.argmax(proba_predictedlabels, axis=1) return windowsIDs, predictedlabels ## initial training def initialTraining(self, window_IDs, labels): #poolIDs = window_IDs because inital training x_initial, y_initial = self.x_pool[window_IDs], labels self.learner = modAL_ActiveLearner(estimator=self.classifier, query_strategy=self.query_strategy, X_training=x_initial, y_training=y_initial) ## remove seen samples from pool self.x_pool, self.x_pool_ID = np.delete(self.x_pool, window_IDs, axis=0), np.delete( self.x_pool_ID, window_IDs, axis=0) ## whole Iteration as described in the software architecture "Realization" def ActiveLearningIteration(self, new_Labels): # learn new samples with new labels self.learner.teach(self.current_queries, new_Labels, only_new=False) # remove learned samples from pool self.x_pool, self.x_pool_ID = np.delete(self.x_pool, self.current_idx, axis=0), np.delete( self.x_pool_ID, self.current_idx, axis=0) # query new ones return self.query()
import pytest import re from pyts.classification import SAXVSM, BOSSVS from pyts.multivariate.classification import MultivariateClassifier from pyts.transformation import BOSS n_samples, n_features, n_timestamps, n_classes = 40, 3, 30, 2 rng = np.random.RandomState(42) X = rng.randn(n_samples, n_features, n_timestamps) y = rng.randint(n_classes, size=n_samples) @pytest.mark.parametrize( 'params, error, err_msg', [({ 'estimator': [SAXVSM(), SAXVSM(), BOSS()] }, ValueError, "Estimator 2 must be a classifier."), ({ 'estimator': [SAXVSM()] }, ValueError, "If 'estimator' is a list, its length must be equal to " "the number of features (1 != 3)"), ({ 'estimator': None }, TypeError, "'estimator' must be a classifier that inherits from " "sklearn.base.BaseEstimator or a list thereof.")]) def test_parameter_check(params, error, err_msg): """Test parameter validation.""" clf = MultivariateClassifier(**params) with pytest.raises(error, match=re.escape(err_msg)): clf.fit(X, y)
:class:`pyts.transformation.BOSS`. """ import numpy as np import matplotlib.pyplot as plt from pyts.transformation import BOSS # Parameters n_samples, n_timestamps = 100, 144 # Toy dataset rng = np.random.RandomState(41) X = rng.randn(n_samples, n_timestamps) # BOSS transformation boss = BOSS(word_size=2, n_bins=4, window_size=12) X_boss = boss.fit_transform(X).toarray() # Visualize the transformation for the first time series plt.figure(figsize=(12, 8)) vocabulary_length = len(boss.vocabulary_) width = 0.3 plt.bar(np.arange(vocabulary_length) - width / 2, X_boss[0], width=width, label='First time series') plt.bar(np.arange(vocabulary_length) + width / 2, X_boss[1], width=width, label='Second time series') plt.xticks(np.arange(vocabulary_length),
It is implemented as :class:`pyts.transformation.BOSS`. """ # Author: Johann Faouzi <*****@*****.**> # License: BSD-3-Clause import numpy as np import matplotlib.pyplot as plt from pyts.datasets import load_gunpoint from pyts.transformation import BOSS # Toy dataset X_train, _, y_train, _ = load_gunpoint(return_X_y=True) # BOSS transformation boss = BOSS(word_size=2, n_bins=4, window_size=12, sparse=False) X_boss = boss.fit_transform(X_train) # Visualize the transformation for the first time series plt.figure(figsize=(6, 4)) vocabulary_length = len(boss.vocabulary_) width = 0.3 plt.bar(np.arange(vocabulary_length) - width / 2, X_boss[y_train == 1][0], width=width, label='First time series in class 1') plt.bar(np.arange(vocabulary_length) + width / 2, X_boss[y_train == 2][0], width=width, label='First time series in class 2') plt.xticks(np.arange(vocabulary_length),
# Dynamic Time Warping with a learned warping window error_dtw_w = 1 - clf_dtw_w.fit(X_train, y_train).score(X_test, y_test) print('Accuracy DTW_W: ', 1 - error_dtw_w) print("Error rate with Dynamic Time Warping with a learned warping " "window: {0:.4f}".format(error_dtw_w)) error_dtw_w_list.append(error_dtw_w) #BOSS if dataset_list == ["Adiac"] or dataset_list == ["Herring"]: window_size = np.repeat(np.arange(60, 110, 20), 3) norm_mean = np.full(window_size.size, True) word_size = np.tile(np.arange(10, 16, 2), 3) boss = [ BOSS(word_size=word_size, n_bins=4, norm_mean=norm_mean, drop_sum=norm_mean, window_size=window_size) for (word_size, norm_mean, window_size) in zip(word_size, norm_mean, window_size) ] pipeline = [ Pipeline([("boss", boss), ('to_dense', transformer), ("knn", knn)]) for boss in boss ] voting = VotingClassifier([("pipeline_" + str(i), pipeline) for i, pipeline in enumerate(pipeline)]) error_boss = 1 - voting.fit(X_train, y_train).score(X_test, y_test) print('Accuracy BOSS: ', 1 - error_boss) print("Error rate with BOSS: {0:.4f}".format(error_boss)) error_boss_list.append(error_boss)
import re from scipy.sparse import csr_matrix from pyts.classification import SAXVSM from pyts.image import RecurrencePlot from pyts.multivariate.transformation import MultivariateTransformer from pyts.transformation import BOSS n_samples, n_features, n_timestamps = 40, 3, 30 rng = np.random.RandomState(42) X = rng.randn(n_samples, n_features, n_timestamps) @pytest.mark.parametrize( 'params, error, err_msg', [({ 'estimator': [BOSS(), RecurrencePlot(), SAXVSM()] }, ValueError, "Estimator 2 must be a transformer."), ({ 'estimator': [BOSS()] }, ValueError, "If 'estimator' is a list, its length must be equal to " "the number of features (1 != 3)"), ({ 'estimator': None }, TypeError, "'estimator' must be a transformer that inherits from " "sklearn.base.BaseEstimator or a list thereof.")]) def test_parameter_check(params, error, err_msg): """Test parameter validation.""" transformer = MultivariateTransformer(**params) with pytest.raises(error, match=re.escape(err_msg)): transformer.fit_transform(X)