class RocketRegressor(sklearn.pipeline.Pipeline): """Time series regression using ROCKET features and a linear regressor""" def __init__(self, num_kernels=10_000, normalize_input=True, random_state=None, alphas=np.logspace(-3, 3, 7), normalize_features=True, memory=None, verbose=False, scoring=None, **kwargs): """ RocketRegressor is recommended for up to 10k time series. For a larger dataset, you can use ROCKET (in Pytorch). scoring = None --> defaults to r2. Args: num_kernels : int, number of random convolutional kernels (default 10,000) normalize_input : boolean, whether or not to normalise the input time series per instance (default True) random_state : int (ignored unless int due to compatability with Numba), random seed (optional, default None) """ self.steps = [('rocket', Rocket(num_kernels=num_kernels, normalise=normalize_input, random_state=random_state)), ('ridgecv', RidgeCV(alphas=alphas, normalize=normalize_features, scoring=scoring, **kwargs))] store_attr() self._validate_steps()
class RocketRegressor(sklearn.pipeline.Pipeline): """Time series regression using ROCKET features and a linear regressor""" def __init__(self, num_kernels=10_000, normalize_input=True, random_state=None, alphas=np.logspace(-3, 3, 7), normalize_features=True, memory=None, verbose=False, scoring=None, **kwargs): """ RocketRegressor is recommended for up to 10k time series. For a larger dataset, you can use ROCKET (in Pytorch). scoring = None --> defaults to r2. Args: num_kernels : int, number of random convolutional kernels (default 10,000) normalize_input : boolean, whether or not to normalise the input time series per instance (default True) random_state : Optional random seed (default None) """ try: import sktime from sktime.transformations.panel.rocket import Rocket except ImportError: print("You need to install sktime to be able to use RocketRegressor") self.steps = [('rocket', Rocket(num_kernels=num_kernels, normalise=normalize_input, random_state=random_state)), ('ridgecv', RidgeCV(alphas=alphas, normalize=normalize_features, scoring=scoring, **kwargs))] store_attr() self._validate_steps()
def _fit(self, X, y): """Build a pipeline containing the ROCKET transformer and RidgeClassifierCV. Parameters ---------- X : 3D np.array of shape = [n_instances, n_dimensions, series_length] The training data. y : array-like, shape = [n_instances] The class labels. Returns ------- self : Reference to self. Notes ----- Changes state by creating a fitted model that updates attributes ending in "_" and sets is_fitted flag to True. """ self._pipeline = rocket_pipeline = make_pipeline( Rocket( num_kernels=self.num_kernels, random_state=self.random_state, n_jobs=self._threads_to_use, ), RidgeClassifierCV(alphas=np.logspace(-3, 3, 10), normalize=True), ) rocket_pipeline.fit(X, y) return self
def fit(self, X, y): """Build a pipeline containing the ROCKET transformer and RidgeClassifierCV. Parameters ---------- X : nested pandas DataFrame of shape [n_instances, 1] Nested dataframe with univariate time-series in cells. y : array-like, shape = [n_instances] The class labels. Returns ------- self : object """ X, y = check_X_y(X, y) self.n_classes = np.unique(y).shape[0] self.classes_ = class_distribution(np.asarray(y).reshape(-1, 1))[0][0] for index, classVal in enumerate(self.classes_): self.class_dictionary[classVal] = index self.classifier = rocket_pipeline = make_pipeline( Rocket( num_kernels=self.num_kernels, random_state=self.random_state, n_jobs=self.n_jobs, ), RidgeClassifierCV(alphas=np.logspace(-3, 3, 10), normalize=True), ) rocket_pipeline.fit(X, y) self._is_fitted = True return self
def fit(self, X, y): """ Build a single or ensemble of pipelines containing the ROCKET transformer and RidgeClassifierCV classifier. Parameters ---------- X : nested pandas DataFrame of shape [n_instances, 1] Nested dataframe with univariate time-series in cells. y : array-like, shape = [n_instances] The class labels. Returns ------- self : object """ X, y = check_X_y(X, y) self.n_classes = np.unique(y).shape[0] self.classes_ = class_distribution(np.asarray(y).reshape(-1, 1))[0][0] for index, classVal in enumerate(self.classes_): self.class_dictionary[classVal] = index if self.ensemble: for i in range(self.ensemble_size): rocket_pipeline = make_pipeline( Rocket(num_kernels=self.num_kernels, random_state=self.random_state), RidgeClassifierCV(alphas=np.logspace(-3, 3, 10), normalize=True), ) rocket_pipeline.fit(X, y) self.classifiers.append(rocket_pipeline) self.weights.append(rocket_pipeline.steps[1][1].best_score_) self.weight_sum = self.weight_sum + self.weights[i] else: rocket_pipeline = make_pipeline( Rocket(num_kernels=self.num_kernels, random_state=self.random_state), RidgeClassifierCV(alphas=np.logspace(-3, 3, 10), normalize=True), ) rocket_pipeline.fit(X, y) self.classifiers.append(rocket_pipeline) self._is_fitted = True return self
def test_rocket_on_gunpoint(): """Test of Rocket on gun point.""" # load training data X_training, Y_training = load_gunpoint(split="train", return_X_y=True) # 'fit' ROCKET -> infer data dimensions, generate random kernels ROCKET = Rocket(num_kernels=10_000) ROCKET.fit(X_training) # transform training data X_training_transform = ROCKET.transform(X_training) # test shape of transformed training data -> (number of training # examples, num_kernels * 2) np.testing.assert_equal(X_training_transform.shape, (len(X_training), 20_000)) # fit classifier classifier = make_pipeline( StandardScaler(with_mean=False), RidgeClassifierCV(alphas=np.logspace(-3, 3, 10)), ) classifier.fit(X_training_transform, Y_training) # load test data X_test, Y_test = load_gunpoint(split="test", return_X_y=True) # transform test data X_test_transform = ROCKET.transform(X_test) # test shape of transformed test data -> (number of test examples, # num_kernels * 2) np.testing.assert_equal(X_test_transform.shape, (len(X_test), 20_000)) # predict (alternatively: 'classifier.score(X_test_transform, Y_test)') predictions = classifier.predict(X_test_transform) accuracy = accuracy_score(predictions, Y_test) # test predictions (on Gunpoint, should be 100% accurate) assert accuracy == 1.0
def _fit(self, X, y): """Fit Arsenal to training data. Parameters ---------- X : 3D np.array of shape = [n_instances, n_dimensions, series_length] The training data. y : array-like, shape = [n_instances] The class labels. Returns ------- self : Reference to self. Notes ----- Changes state by creating a fitted model that updates attributes ending in "_" and sets is_fitted flag to True. """ self.n_instances_, self.n_dims_, self.series_length_ = X.shape time_limit = self.time_limit_in_minutes * 60 start_time = time.time() train_time = 0 if self.rocket_transform == "rocket": base_rocket = Rocket(num_kernels=self.num_kernels) elif self.rocket_transform == "minirocket": if self.n_dims_ > 1: base_rocket = MiniRocketMultivariate( num_kernels=self.num_kernels, max_dilations_per_kernel=self.max_dilations_per_kernel, ) else: base_rocket = MiniRocket( num_kernels=self.num_kernels, max_dilations_per_kernel=self.max_dilations_per_kernel, ) elif self.rocket_transform == "multirocket": if self.n_dims_ > 1: base_rocket = MultiRocketMultivariate( num_kernels=self.num_kernels, max_dilations_per_kernel=self.max_dilations_per_kernel, n_features_per_kernel=self.n_features_per_kernel, ) else: base_rocket = MultiRocket( num_kernels=self.num_kernels, max_dilations_per_kernel=self.max_dilations_per_kernel, n_features_per_kernel=self.n_features_per_kernel, ) else: raise ValueError(f"Invalid Rocket transformer: {self.rocket_transform}") if time_limit > 0: self.n_estimators = 0 self.estimators_ = [] self.transformed_data_ = [] while ( train_time < time_limit and self.n_estimators < self.contract_max_n_estimators ): fit = Parallel(n_jobs=self._threads_to_use)( delayed(self._fit_estimator)( _clone_estimator( base_rocket, None if self.random_state is None else (255 if self.random_state == 0 else self.random_state) * 37 * (i + 1), ), X, y, ) for i in range(self._threads_to_use) ) estimators, transformed_data = zip(*fit) self.estimators_ += estimators self.transformed_data_ += transformed_data self.n_estimators += self._threads_to_use train_time = time.time() - start_time else: fit = Parallel(n_jobs=self._threads_to_use)( delayed(self._fit_estimator)( _clone_estimator( base_rocket, None if self.random_state is None else (255 if self.random_state == 0 else self.random_state) * 37 * (i + 1), ), X, y, ) for i in range(self.n_estimators) ) self.estimators_, self.transformed_data_ = zip(*fit) self.weights_ = [] self._weight_sum = 0 for rocket_pipeline in self.estimators_: weight = rocket_pipeline.steps[1][1].best_score_ self.weights_.append(weight) self._weight_sum += weight return self
def _fit(self, X, y): self._n_jobs = check_n_jobs(self.n_jobs) self.n_instances, self.n_dims, self.series_length = X.shape self.n_classes = np.unique(y).shape[0] self.classes_ = class_distribution(np.asarray(y).reshape(-1, 1))[0][0] for index, classVal in enumerate(self.classes_): self._class_dictionary[classVal] = index time_limit = self.time_limit_in_minutes * 60 start_time = time.time() train_time = 0 base_rocket = Rocket(num_kernels=self.num_kernels) if time_limit > 0: self.n_estimators = 0 self.estimators_ = [] self.transformed_data = [] while ( train_time < time_limit and self.n_estimators < self.contract_max_n_estimators ): fit = Parallel(n_jobs=self._n_jobs)( delayed(self._fit_estimator)( _clone_estimator( base_rocket, None if self.random_state is None else (255 if self.random_state == 0 else self.random_state) * 37 * (i + 1), ), X, y, ) for i in range(self._n_jobs) ) estimators, transformed_data = zip(*fit) self.estimators_ += estimators self.transformed_data += transformed_data self.n_estimators += self._n_jobs train_time = time.time() - start_time else: fit = Parallel(n_jobs=self._n_jobs)( delayed(self._fit_estimator)( _clone_estimator( base_rocket, None if self.random_state is None else (255 if self.random_state == 0 else self.random_state) * 37 * (i + 1), ), X, y, ) for i in range(self.n_estimators) ) self.estimators_, self.transformed_data = zip(*fit) self.weights = [] self._weight_sum = 0 for rocket_pipeline in self.estimators_: weight = rocket_pipeline.steps[1][1].best_score_ self.weights.append(weight) self._weight_sum += weight
import sys sys.path.append('./') from datasets.ucr_uWaveGes import load_data from rocket_functions import generate_kernels, apply_kernels from sklearn.linear_model import RidgeClassifierCV from sktime.transformations.panel.rocket import Rocket import numpy as np (X_train, y_train, pic_train), (X_test, y_test, pic_test) = load_data() X_train = X_train.astype(np.float64) X_test = X_test.astype(np.float64) print(X_train.dtype) rocket = Rocket() rocket.fit(X_train) X_train_transform = rocket.transform(X_train) classifier = RidgeClassifierCV(alphas=np.logspace(-3, 3, 10), normalize=True) classifier.fit(X_train_transform, y_train) X_test_transform = rocket.transform(X_test) print(classifier.score(X_test_transform, y_test))
def _make_estimator(num_kernels, random_state): return make_pipeline( Rocket(num_kernels=num_kernels, random_state=random_state), RidgeClassifierCV(alphas=np.logspace(-3, 3, 10), normalize=True), )
def _fit(self, X, y): """Build a pipeline containing the Rocket transformer and RidgeClassifierCV. Parameters ---------- X : 3D np.array of shape = [n_instances, n_dimensions, series_length] The training data. y : array-like, shape = [n_instances] The class labels. Returns ------- self : Reference to self. Notes ----- Changes state by creating a fitted model that updates attributes ending in "_" and sets is_fitted flag to True. """ _, n_dims, _ = X.shape if self.rocket_transform == "rocket": rocket = Rocket( num_kernels=self.num_kernels, random_state=self.random_state, n_jobs=self._threads_to_use, ) elif self.rocket_transform == "minirocket": if n_dims > 1: rocket = MiniRocketMultivariate( num_kernels=self.num_kernels, max_dilations_per_kernel=self.max_dilations_per_kernel, random_state=self.random_state, n_jobs=self._threads_to_use, ) else: rocket = MiniRocket( num_kernels=self.num_kernels, max_dilations_per_kernel=self.max_dilations_per_kernel, random_state=self.random_state, n_jobs=self._threads_to_use, ) elif self.rocket_transform == "multirocket": if n_dims > 1: rocket = MultiRocketMultivariate( num_kernels=self.num_kernels, max_dilations_per_kernel=self.max_dilations_per_kernel, n_features_per_kernel=self.n_features_per_kernel, random_state=self.random_state, n_jobs=self._threads_to_use, ) else: rocket = MultiRocket( num_kernels=self.num_kernels, max_dilations_per_kernel=self.max_dilations_per_kernel, n_features_per_kernel=self.n_features_per_kernel, random_state=self.random_state, n_jobs=self._threads_to_use, ) else: raise ValueError( f"Invalid Rocket transformer: {self.rocket_transform}") self._pipeline = rocket_pipeline = make_pipeline( rocket, RidgeClassifierCV(alphas=np.logspace(-3, 3, 10), normalize=True), ) rocket_pipeline.fit(X, y) return self
def set_classifier(cls, resampleId): """ Basic way of determining the classifier to build. To differentiate settings just and another elif. So, for example, if you wanted tuned TSF, you just pass TuneTSF and set up the tuning mechanism in the elif. This may well get superceded, it is just how e have always done it :param cls: String indicating which classifier you want :return: A classifier. """ if cls.lower() == "pf": return pf.ProximityForest(random_state=resampleId) elif cls.lower() == "pt": return pf.ProximityTree(random_state=resampleId) elif cls.lower() == "ps": return pf.ProximityStump(random_state=resampleId) elif cls.lower() == "rise": return fb.RandomIntervalSpectralForest(random_state=resampleId) elif cls.lower() == "tsf": return ib.TimeSeriesForest(random_state=resampleId) elif cls.lower() == "cif": return CanonicalIntervalForest(random_state=resampleId) elif cls.lower() == "boss": return BOSSEnsemble(random_state=resampleId) elif cls.lower() == "cboss": return ContractableBOSS(random_state=resampleId) elif cls.lower() == "tde": return TemporalDictionaryEnsemble(random_state=resampleId) elif cls.lower() == "st": return st.ShapeletTransformClassifier(time_contract_in_mins=1500) elif cls.lower() == "dtwcv": return nn.KNeighborsTimeSeriesClassifier(metric="dtwcv") elif cls.lower() == "ee" or cls.lower() == "elasticensemble": return dist.ElasticEnsemble() elif cls.lower() == "tsfcomposite": # It defaults to TSF return ensemble.TimeSeriesForestClassifier() elif cls.lower() == "risecomposite": steps = [ ("segment", RandomIntervalSegmenter(n_intervals=1, min_length=5)), ( "transform", FeatureUnion([ ( "acf", make_row_transformer( FunctionTransformer(func=acf_coefs, validate=False)), ), ( "ps", make_row_transformer( FunctionTransformer(func=powerspectrum, validate=False)), ), ]), ), ("tabularise", Tabularizer()), ("clf", DecisionTreeClassifier()), ] base_estimator = Pipeline(steps) return ensemble.TimeSeriesForestClassifier(estimator=base_estimator, n_estimators=100) elif cls.lower() == "rocket": rocket_pipeline = make_pipeline( Rocket(random_state=resampleId), RidgeClassifierCV(alphas=np.logspace(-3, 3, 10), normalize=True), ) return rocket_pipeline else: raise Exception("UNKNOWN CLASSIFIER")