def test_arsenal_on_gunpoint(): # load gunpoint data X_train, y_train = load_gunpoint(split="train", return_X_y=True) X_test, y_test = load_gunpoint(split="test", return_X_y=True) indices = np.random.RandomState(0).permutation(10) # train Arsenal arsenal = Arsenal(num_kernels=1000, n_estimators=10, random_state=0) arsenal.fit(X_train.iloc[indices], y_train[indices]) # assert probabilities are the same probas = arsenal.predict_proba(X_test.iloc[indices]) testing.assert_array_equal(probas, arsenal_gunpoint_probas)
def test_arsenal_on_basic_motions(): """Test of Arsenal on basic motions data.""" # load basic motions data X_train, y_train = load_basic_motions(split="train", return_X_y=True) X_test, y_test = load_basic_motions(split="test", return_X_y=True) indices = np.random.RandomState(4).choice(len(y_train), 10, replace=False) # train Arsenal arsenal = Arsenal(num_kernels=500, n_estimators=5, random_state=0) arsenal.fit(X_train.iloc[indices], y_train[indices]) # assert probabilities are the same probas = arsenal.predict_proba(X_test.iloc[indices]) testing.assert_array_equal(probas, arsenal_basic_motions_probas)
def test_arsenal_on_unit_test_data(): """Test of Arsenal on unit test data.""" # load unit test data X_train, y_train = load_unit_test(split="train", return_X_y=True) X_test, y_test = load_unit_test(split="test", return_X_y=True) indices = np.random.RandomState(0).choice(len(y_train), 10, replace=False) # train Arsenal arsenal = Arsenal(num_kernels=500, n_estimators=5, random_state=0, save_transformed_data=True) arsenal.fit(X_train, y_train) # assert probabilities are the same probas = arsenal.predict_proba(X_test.iloc[indices]) testing.assert_array_equal(probas, arsenal_unit_test_probas) # test train estimate train_probas = arsenal._get_train_probs(X_train, y_train) train_preds = arsenal.classes_[np.argmax(train_probas, axis=1)] assert accuracy_score(y_train, train_preds) >= 0.85
class HIVECOTEV2(BaseClassifier): """Hierarchical Vote Collective of Transformation-based Ensembles (HIVE-COTE) V2. An ensemble of the STC, DrCIF, Arsenal and TDE classifiers from different feature representations using the CAWPE structure as described in [1]. Parameters ---------- stc_params : dict or None, default=None Parameters for the ShapeletTransformClassifier module. If None, uses the default parameters with a 2 hour transform contract. drcif_params : dict or None, default=None Parameters for the DrCIF module. If None, uses the default parameters with n_estimators set to 500. arsenal_params : dict or None, default=None Parameters for the Arsenal module. If None, uses the default parameters. tde_params : dict or None, default=None Parameters for the TemporalDictionaryEnsemble module. If None, uses the default parameters. time_limit_in_minutes : int, default=0 Time contract to limit build time in minutes, overriding n_estimators/n_parameter_samples for each component. Default of 0 means n_estimators/n_parameter_samples for each component is used. save_component_probas : bool, default=False When predict/predict_proba is called, save each HIVE-COTEV2 component probability predictions in component_probas. verbose : int, default=0 Level of output printed to the console (for information only). n_jobs : int, default=1 The number of jobs to run in parallel for both `fit` and `predict`. ``-1`` means using all processors. random_state : int or None, default=None Seed for random number generation. Attributes ---------- n_classes_ : int The number of classes. classes_ : list The unique class labels. stc_weight_ : float The weight for STC probabilities. drcif_weight_ : float The weight for DrCIF probabilities. arsenal_weight_ : float The weight for Arsenal probabilities. tde_weight_ : float The weight for TDE probabilities. component_probas : dict Only used if save_component_probas is true. Saved probability predictions for each HIVE-COTEV2 component. See Also -------- HIVECOTEV1, ShapeletTransformClassifier, DrCIF, Arsenal, TemporalDictionaryEnsemble Notes ----- For the Java version, see `https://github.com/uea-machine-learning/tsml/blob/master/src/main/java/ tsml/classifiers/hybrids/HIVE_COTE.java`_. References ---------- .. [1] Middlehurst, Matthew, James Large, Michael Flynn, Jason Lines, Aaron Bostrom, and Anthony Bagnall. "HIVE-COTE 2.0: a new meta ensemble for time series classification." Machine Learning (2021). Examples -------- >>> from sktime.classification.hybrid import HIVECOTEV2 >>> from sktime.contrib.vector_classifiers._rotation_forest import RotationForest >>> from sktime.datasets import load_unit_test >>> X_train, y_train = load_unit_test(split="train", return_X_y=True) >>> X_test, y_test = load_unit_test(split="test", return_X_y=True) >>> clf = HIVECOTEV2( ... stc_params={ ... "estimator": RotationForest(n_estimators=3), ... "n_shapelet_samples": 500, ... "max_shapelets": 20, ... "batch_size": 100, ... }, ... drcif_params={"n_estimators": 10}, ... arsenal_params={"num_kernels": 100, "n_estimators": 5}, ... tde_params={ ... "n_parameter_samples": 25, ... "max_ensemble_size": 5, ... "randomly_selected_params": 10, ... }, ... ) >>> clf.fit(X_train, y_train) HIVECOTEV2(...) >>> y_pred = clf.predict(X_test) """ _tags = { "capability:multivariate": True, "capability:contractable": True, "capability:multithreading": True, } def __init__( self, stc_params=None, drcif_params=None, arsenal_params=None, tde_params=None, time_limit_in_minutes=0, save_component_probas=False, verbose=0, n_jobs=1, random_state=None, ): self.stc_params = stc_params self.drcif_params = drcif_params self.arsenal_params = arsenal_params self.tde_params = tde_params self.time_limit_in_minutes = time_limit_in_minutes self.save_component_probas = save_component_probas self.verbose = verbose self.n_jobs = n_jobs self.random_state = random_state self.stc_weight_ = 0 self.drcif_weight_ = 0 self.arsenal_weight_ = 0 self.tde_weight_ = 0 self.component_probas = {} self._stc_params = stc_params self._drcif_params = drcif_params self._arsenal_params = arsenal_params self._tde_params = tde_params self._stc = None self._drcif = None self._arsenal = None self._tde = None super(HIVECOTEV2, self).__init__() def _fit(self, X, y): """Fit HIVE-COTE 2.0 to training data. Parameters ---------- X : 3D np.array of shape = [n_instances, n_dimensions, series_length] The training data. y : array-like, shape = [n_instances] The class labels. Returns ------- self : Reference to self. Notes ----- Changes state by creating a fitted model that updates attributes ending in "_" and sets is_fitted flag to True. """ # Default values from HC2 paper if self.stc_params is None: self._stc_params = {"transform_limit_in_minutes": 120} if self.drcif_params is None: self._drcif_params = {"n_estimators": 500} if self.arsenal_params is None: self._arsenal_params = {} if self.tde_params is None: self._tde_params = {} # If we are contracting split the contract time between each algorithm if self.time_limit_in_minutes > 0: # Leave 1/3 for train estimates ct = self.time_limit_in_minutes / 6 self._stc_params["time_limit_in_minutes"] = ct self._drcif_params["time_limit_in_minutes"] = ct self._arsenal_params["time_limit_in_minutes"] = ct self._tde_params["time_limit_in_minutes"] = ct # Build STC self._stc = ShapeletTransformClassifier( **self._stc_params, save_transformed_data=True, random_state=self.random_state, n_jobs=self._threads_to_use, ) self._stc.fit(X, y) if self.verbose > 0: print("STC ", datetime.now().strftime("%H:%M:%S %d/%m/%Y")) # noqa # Find STC weight using train set estimate train_probs = self._stc._get_train_probs(X, y) train_preds = self._stc.classes_[np.argmax(train_probs, axis=1)] self.stc_weight_ = accuracy_score(y, train_preds) ** 4 if self.verbose > 0: print( # noqa "STC train estimate ", datetime.now().strftime("%H:%M:%S %d/%m/%Y"), ) print("STC weight = " + str(self.stc_weight_)) # noqa # Build DrCIF self._drcif = DrCIF( **self._drcif_params, save_transformed_data=True, random_state=self.random_state, n_jobs=self._threads_to_use, ) self._drcif.fit(X, y) if self.verbose > 0: print("DrCIF ", datetime.now().strftime("%H:%M:%S %d/%m/%Y")) # noqa # Find DrCIF weight using train set estimate train_probs = self._drcif._get_train_probs(X, y) train_preds = self._drcif.classes_[np.argmax(train_probs, axis=1)] self.drcif_weight_ = accuracy_score(y, train_preds) ** 4 if self.verbose > 0: print( # noqa "DrCIF train estimate ", datetime.now().strftime("%H:%M:%S %d/%m/%Y"), ) print("DrCIF weight = " + str(self.drcif_weight_)) # noqa # Build Arsenal self._arsenal = Arsenal( **self._arsenal_params, save_transformed_data=True, random_state=self.random_state, n_jobs=self._threads_to_use, ) self._arsenal.fit(X, y) if self.verbose > 0: print("Arsenal ", datetime.now().strftime("%H:%M:%S %d/%m/%Y")) # noqa # Find Arsenal weight using train set estimate train_probs = self._arsenal._get_train_probs(X, y) train_preds = self._arsenal.classes_[np.argmax(train_probs, axis=1)] self.arsenal_weight_ = accuracy_score(y, train_preds) ** 4 if self.verbose > 0: print( # noqa "Arsenal train estimate ", datetime.now().strftime("%H:%M:%S %d/%m/%Y"), ) print("Arsenal weight = " + str(self.arsenal_weight_)) # noqa # Build TDE self._tde = TemporalDictionaryEnsemble( **self._tde_params, save_train_predictions=True, random_state=self.random_state, n_jobs=self._threads_to_use, ) self._tde.fit(X, y) if self.verbose > 0: print("TDE ", datetime.now().strftime("%H:%M:%S %d/%m/%Y")) # noqa # Find TDE weight using train set estimate train_probs = self._tde._get_train_probs(X, y, train_estimate_method="loocv") train_preds = self._tde.classes_[np.argmax(train_probs, axis=1)] self.tde_weight_ = accuracy_score(y, train_preds) ** 4 if self.verbose > 0: print( # noqa "TDE train estimate ", datetime.now().strftime("%H:%M:%S %d/%m/%Y"), ) print("TDE weight = " + str(self.tde_weight_)) # noqa return self def _predict(self, X): """Predicts labels for sequences in X. Parameters ---------- X : 3D np.array of shape = [n_instances, n_dimensions, series_length] The data to make predictions for. Returns ------- y : array-like, shape = [n_instances] Predicted class labels. """ rng = check_random_state(self.random_state) return np.array( [ self.classes_[int(rng.choice(np.flatnonzero(prob == prob.max())))] for prob in self.predict_proba(X) ] ) def _predict_proba(self, X, return_component_probas=False): """Predicts labels probabilities for sequences in X. Parameters ---------- X : 3D np.array of shape = [n_instances, n_dimensions, series_length] The data to make predict probabilities for. Returns ------- y : array-like, shape = [n_instances, n_classes_] Predicted probabilities using the ordering in classes_. """ dists = np.zeros((X.shape[0], self.n_classes_)) # Call predict proba on each classifier, multiply the probabilities by the # classifiers weight then add them to the current HC2 probabilities stc_probas = self._stc.predict_proba(X) dists = np.add( dists, stc_probas * (np.ones(self.n_classes_) * self.stc_weight_), ) drcif_probas = self._drcif.predict_proba(X) dists = np.add( dists, drcif_probas * (np.ones(self.n_classes_) * self.drcif_weight_), ) arsenal_probas = self._arsenal.predict_proba(X) dists = np.add( dists, arsenal_probas * (np.ones(self.n_classes_) * self.arsenal_weight_), ) tde_probas = self._tde.predict_proba(X) dists = np.add( dists, tde_probas * (np.ones(self.n_classes_) * self.tde_weight_), ) if self.save_component_probas: self.component_probas = { "STC": stc_probas, "DrCIF": drcif_probas, "Arsenal": arsenal_probas, "TDE": tde_probas, } # Make each instances probability array sum to 1 and return return dists / dists.sum(axis=1, keepdims=True)