def test_stc_on_unit_test_data(): """Test of ShapeletTransformClassifier on unit test data.""" # load unit test data X_train, y_train = load_unit_test(split="train", return_X_y=True) X_test, y_test = load_unit_test(split="test", return_X_y=True) indices = np.random.RandomState(0).choice(len(y_train), 10, replace=False) # train STC stc = ShapeletTransformClassifier( estimator=RotationForest(n_estimators=3), max_shapelets=20, n_shapelet_samples=500, batch_size=100, random_state=0, save_transformed_data=True, ) stc.fit(X_train, y_train) # assert probabilities are the same probas = stc.predict_proba(X_test.iloc[indices]) testing.assert_array_equal(probas, stc_unit_test_probas) # test train estimate train_probas = stc._get_train_probs(X_train, y_train) train_preds = stc.classes_[np.argmax(train_probas, axis=1)] assert accuracy_score(y_train, train_preds) >= 0.75
def test_stc_on_basic_motions(): """Test of ShapeletTransformClassifier on basic motions data.""" # load basic motions data X_train, y_train = load_basic_motions(split="train", return_X_y=True) X_test, y_test = load_basic_motions(split="test", return_X_y=True) indices = np.random.RandomState(4).choice(len(y_train), 15, replace=False) # train STC stc = ShapeletTransformClassifier( estimator=RotationForest(n_estimators=3), max_shapelets=20, n_shapelet_samples=500, batch_size=100, random_state=0, ) stc.fit(X_train.iloc[indices], y_train[indices]) # assert probabilities are the same probas = stc.predict_proba(X_test.iloc[indices[:10]]) testing.assert_array_equal(probas, stc_basic_motions_probas)
class HIVECOTEV1(BaseClassifier): """Hierarchical Vote Collective of Transformation-based Ensembles (HIVE-COTE) V1. An ensemble of the STC, TSF, RISE and cBOSS classifiers from different feature representations using the CAWPE structure as described in [1]. Parameters ---------- verbose : int, level of output printed to the console (for information only) (default = 0) n_jobs : int, optional (default=1) The number of jobs to run in parallel for both `fit` and `predict`. ``-1`` means using all processors. random_state : int or None, seed for random, integer, optional (default to no seed) Attributes ---------- n_classes : extracted from the data Notes ----- ..[1] Anthony Bagnall, Michael Flynn, James Large, Jason Lines and Matthew Middlehurst. "On the usage and performance of the Hierarchical Vote Collective of Transformation-based Ensembles version 1.0 (hive-cote v1. 0)" International Workshop on Advanced Analytics and Learning on Temporal Data 2020 Java version https://github.com/uea-machine-learning/tsml/blob/master/src/main/java/ tsml/classifiers/hybrids/HIVE_COTE.java """ # Capability tags capabilities = { "multivariate": False, "unequal_length": False, "missing_values": False, "train_estimate": False, "contractable": False, } def __init__( self, stc_params=None, tsf_params=None, rise_params=None, cboss_params=None, verbose=0, n_jobs=1, random_state=None, ): if stc_params is None: stc_params = {"n_estimators": 500} if tsf_params is None: tsf_params = {"n_estimators": 500} if rise_params is None: rise_params = {"n_estimators": 500} if cboss_params is None: cboss_params = {} self.stc_params = stc_params self.tsf_params = tsf_params self.rise_params = rise_params self.cboss_params = cboss_params self.verbose = verbose self.n_jobs = n_jobs self.random_state = random_state self.stc = None self.tsf = None self.rise = None self.cboss = None self.stc_weight = 0 self.tsf_weight = 0 self.rise_weight = 0 self.cboss_weight = 0 self.n_classes = 0 self.classes_ = [] super(HIVECOTEV1, self).__init__() def fit(self, X, y): """Fit a HIVE-COTEv1.0 classifier. Parameters ---------- X : nested pandas DataFrame of shape [n_instances, 1] Nested dataframe with univariate time-series in cells. y : array-like, shape = [n_instances] The class labels. Returns ------- self : object """ X, y = check_X_y(X, y, enforce_univariate=True) self.n_classes = np.unique(y).shape[0] self.classes_ = class_distribution(np.asarray(y).reshape(-1, 1))[0][0] cv_size = 10 _, counts = np.unique(y, return_counts=True) min_class = np.min(counts) if min_class < cv_size: cv_size = min_class self.stc = ShapeletTransformClassifier( **self.stc_params, random_state=self.random_state, ) self.stc.fit(X, y) if self.verbose > 0: print("STC ", datetime.now().strftime("%H:%M:%S %d/%m/%Y")) # noqa train_preds = cross_val_predict( ShapeletTransformClassifier( **self.stc_params, random_state=self.random_state, ), X=X, y=y, cv=cv_size, n_jobs=self.n_jobs, ) self.stc_weight = accuracy_score(y, train_preds)**4 if self.verbose > 0: print( # noqa "STC train estimate ", datetime.now().strftime("%H:%M:%S %d/%m/%Y"), ) print("STC weight = " + str(self.stc_weight)) # noqa self.tsf = TimeSeriesForestClassifier( **self.tsf_params, random_state=self.random_state, n_jobs=self.n_jobs, ) self.tsf.fit(X, y) if self.verbose > 0: print("TSF ", datetime.now().strftime("%H:%M:%S %d/%m/%Y")) # noqa train_preds = cross_val_predict( TimeSeriesForestClassifier(**self.tsf_params, random_state=self.random_state), X=X, y=y, cv=cv_size, n_jobs=self.n_jobs, ) self.tsf_weight = accuracy_score(y, train_preds)**4 if self.verbose > 0: print( # noqa "TSF train estimate ", datetime.now().strftime("%H:%M:%S %d/%m/%Y"), ) print("TSF weight = " + str(self.tsf_weight)) # noqa self.rise = RandomIntervalSpectralForest( **self.rise_params, random_state=self.random_state, n_jobs=self.n_jobs, ) self.rise.fit(X, y) if self.verbose > 0: print("RISE ", datetime.now().strftime("%H:%M:%S %d/%m/%Y")) # noqa train_preds = cross_val_predict( RandomIntervalSpectralForest( **self.rise_params, random_state=self.random_state, ), X=X, y=y, cv=cv_size, n_jobs=self.n_jobs, ) self.rise_weight = accuracy_score(y, train_preds)**4 if self.verbose > 0: print( # noqa "RISE train estimate ", datetime.now().strftime("%H:%M:%S %d/%m/%Y"), ) print("RISE weight = " + str(self.rise_weight)) # noqa self.cboss = ContractableBOSS(**self.cboss_params, random_state=self.random_state, n_jobs=self.n_jobs) self.cboss.fit(X, y) train_probs = self.cboss._get_train_probs(X) train_preds = self.cboss.classes_[np.argmax(train_probs, axis=1)] self.cboss_weight = accuracy_score(y, train_preds)**4 if self.verbose > 0: print( # noqa "cBOSS (estimate included) ", datetime.now().strftime("%H:%M:%S %d/%m/%Y"), ) print("cBOSS weight = " + str(self.cboss_weight)) # noqa self._is_fitted = True return self def predict(self, X): """Make predictions for all cases in X. Parameters ---------- X : The testing input samples of shape [n_instances,1]. Returns ------- output : numpy array of shape = [n_instances] """ rng = check_random_state(self.random_state) return np.array([ self.classes_[int(rng.choice(np.flatnonzero(prob == prob.max())))] for prob in self.predict_proba(X) ]) def predict_proba(self, X): """Make class probability estimates on each case in X. Parameters ---------- X - pandas dataframe of testing data of shape [n_instances,1]. Returns ------- output : numpy array of shape = [n_instances, num_classes] of probabilities """ self.check_is_fitted() X = check_X(X, enforce_univariate=True) dists = np.zeros((X.shape[0], self.n_classes)) dists = np.add( dists, self.stc.predict_proba(X) * (np.ones(self.n_classes) * self.stc_weight), ) dists = np.add( dists, self.tsf.predict_proba(X) * (np.ones(self.n_classes) * self.tsf_weight), ) dists = np.add( dists, self.rise.predict_proba(X) * (np.ones(self.n_classes) * self.rise_weight), ) dists = np.add( dists, self.cboss.predict_proba(X) * (np.ones(self.n_classes) * self.cboss_weight), ) return dists / dists.sum(axis=1, keepdims=True)
class HIVECOTEV2(BaseClassifier): """Hierarchical Vote Collective of Transformation-based Ensembles (HIVE-COTE) V2. An ensemble of the STC, DrCIF, Arsenal and TDE classifiers from different feature representations using the CAWPE structure as described in [1]. Parameters ---------- stc_params : dict or None, default=None Parameters for the ShapeletTransformClassifier module. If None, uses the default parameters with a 2 hour transform contract. drcif_params : dict or None, default=None Parameters for the DrCIF module. If None, uses the default parameters with n_estimators set to 500. arsenal_params : dict or None, default=None Parameters for the Arsenal module. If None, uses the default parameters. tde_params : dict or None, default=None Parameters for the TemporalDictionaryEnsemble module. If None, uses the default parameters. time_limit_in_minutes : int, default=0 Time contract to limit build time in minutes, overriding n_estimators/n_parameter_samples for each component. Default of 0 means n_estimators/n_parameter_samples for each component is used. save_component_probas : bool, default=False When predict/predict_proba is called, save each HIVE-COTEV2 component probability predictions in component_probas. verbose : int, default=0 Level of output printed to the console (for information only). n_jobs : int, default=1 The number of jobs to run in parallel for both `fit` and `predict`. ``-1`` means using all processors. random_state : int or None, default=None Seed for random number generation. Attributes ---------- n_classes_ : int The number of classes. classes_ : list The unique class labels. stc_weight_ : float The weight for STC probabilities. drcif_weight_ : float The weight for DrCIF probabilities. arsenal_weight_ : float The weight for Arsenal probabilities. tde_weight_ : float The weight for TDE probabilities. component_probas : dict Only used if save_component_probas is true. Saved probability predictions for each HIVE-COTEV2 component. See Also -------- HIVECOTEV1, ShapeletTransformClassifier, DrCIF, Arsenal, TemporalDictionaryEnsemble Notes ----- For the Java version, see `https://github.com/uea-machine-learning/tsml/blob/master/src/main/java/ tsml/classifiers/hybrids/HIVE_COTE.java`_. References ---------- .. [1] Middlehurst, Matthew, James Large, Michael Flynn, Jason Lines, Aaron Bostrom, and Anthony Bagnall. "HIVE-COTE 2.0: a new meta ensemble for time series classification." Machine Learning (2021). Examples -------- >>> from sktime.classification.hybrid import HIVECOTEV2 >>> from sktime.contrib.vector_classifiers._rotation_forest import RotationForest >>> from sktime.datasets import load_unit_test >>> X_train, y_train = load_unit_test(split="train", return_X_y=True) >>> X_test, y_test = load_unit_test(split="test", return_X_y=True) >>> clf = HIVECOTEV2( ... stc_params={ ... "estimator": RotationForest(n_estimators=3), ... "n_shapelet_samples": 500, ... "max_shapelets": 20, ... "batch_size": 100, ... }, ... drcif_params={"n_estimators": 10}, ... arsenal_params={"num_kernels": 100, "n_estimators": 5}, ... tde_params={ ... "n_parameter_samples": 25, ... "max_ensemble_size": 5, ... "randomly_selected_params": 10, ... }, ... ) >>> clf.fit(X_train, y_train) HIVECOTEV2(...) >>> y_pred = clf.predict(X_test) """ _tags = { "capability:multivariate": True, "capability:contractable": True, "capability:multithreading": True, } def __init__( self, stc_params=None, drcif_params=None, arsenal_params=None, tde_params=None, time_limit_in_minutes=0, save_component_probas=False, verbose=0, n_jobs=1, random_state=None, ): self.stc_params = stc_params self.drcif_params = drcif_params self.arsenal_params = arsenal_params self.tde_params = tde_params self.time_limit_in_minutes = time_limit_in_minutes self.save_component_probas = save_component_probas self.verbose = verbose self.n_jobs = n_jobs self.random_state = random_state self.stc_weight_ = 0 self.drcif_weight_ = 0 self.arsenal_weight_ = 0 self.tde_weight_ = 0 self.component_probas = {} self._stc_params = stc_params self._drcif_params = drcif_params self._arsenal_params = arsenal_params self._tde_params = tde_params self._stc = None self._drcif = None self._arsenal = None self._tde = None super(HIVECOTEV2, self).__init__() def _fit(self, X, y): """Fit HIVE-COTE 2.0 to training data. Parameters ---------- X : 3D np.array of shape = [n_instances, n_dimensions, series_length] The training data. y : array-like, shape = [n_instances] The class labels. Returns ------- self : Reference to self. Notes ----- Changes state by creating a fitted model that updates attributes ending in "_" and sets is_fitted flag to True. """ # Default values from HC2 paper if self.stc_params is None: self._stc_params = {"transform_limit_in_minutes": 120} if self.drcif_params is None: self._drcif_params = {"n_estimators": 500} if self.arsenal_params is None: self._arsenal_params = {} if self.tde_params is None: self._tde_params = {} # If we are contracting split the contract time between each algorithm if self.time_limit_in_minutes > 0: # Leave 1/3 for train estimates ct = self.time_limit_in_minutes / 6 self._stc_params["time_limit_in_minutes"] = ct self._drcif_params["time_limit_in_minutes"] = ct self._arsenal_params["time_limit_in_minutes"] = ct self._tde_params["time_limit_in_minutes"] = ct # Build STC self._stc = ShapeletTransformClassifier( **self._stc_params, save_transformed_data=True, random_state=self.random_state, n_jobs=self._threads_to_use, ) self._stc.fit(X, y) if self.verbose > 0: print("STC ", datetime.now().strftime("%H:%M:%S %d/%m/%Y")) # noqa # Find STC weight using train set estimate train_probs = self._stc._get_train_probs(X, y) train_preds = self._stc.classes_[np.argmax(train_probs, axis=1)] self.stc_weight_ = accuracy_score(y, train_preds) ** 4 if self.verbose > 0: print( # noqa "STC train estimate ", datetime.now().strftime("%H:%M:%S %d/%m/%Y"), ) print("STC weight = " + str(self.stc_weight_)) # noqa # Build DrCIF self._drcif = DrCIF( **self._drcif_params, save_transformed_data=True, random_state=self.random_state, n_jobs=self._threads_to_use, ) self._drcif.fit(X, y) if self.verbose > 0: print("DrCIF ", datetime.now().strftime("%H:%M:%S %d/%m/%Y")) # noqa # Find DrCIF weight using train set estimate train_probs = self._drcif._get_train_probs(X, y) train_preds = self._drcif.classes_[np.argmax(train_probs, axis=1)] self.drcif_weight_ = accuracy_score(y, train_preds) ** 4 if self.verbose > 0: print( # noqa "DrCIF train estimate ", datetime.now().strftime("%H:%M:%S %d/%m/%Y"), ) print("DrCIF weight = " + str(self.drcif_weight_)) # noqa # Build Arsenal self._arsenal = Arsenal( **self._arsenal_params, save_transformed_data=True, random_state=self.random_state, n_jobs=self._threads_to_use, ) self._arsenal.fit(X, y) if self.verbose > 0: print("Arsenal ", datetime.now().strftime("%H:%M:%S %d/%m/%Y")) # noqa # Find Arsenal weight using train set estimate train_probs = self._arsenal._get_train_probs(X, y) train_preds = self._arsenal.classes_[np.argmax(train_probs, axis=1)] self.arsenal_weight_ = accuracy_score(y, train_preds) ** 4 if self.verbose > 0: print( # noqa "Arsenal train estimate ", datetime.now().strftime("%H:%M:%S %d/%m/%Y"), ) print("Arsenal weight = " + str(self.arsenal_weight_)) # noqa # Build TDE self._tde = TemporalDictionaryEnsemble( **self._tde_params, save_train_predictions=True, random_state=self.random_state, n_jobs=self._threads_to_use, ) self._tde.fit(X, y) if self.verbose > 0: print("TDE ", datetime.now().strftime("%H:%M:%S %d/%m/%Y")) # noqa # Find TDE weight using train set estimate train_probs = self._tde._get_train_probs(X, y, train_estimate_method="loocv") train_preds = self._tde.classes_[np.argmax(train_probs, axis=1)] self.tde_weight_ = accuracy_score(y, train_preds) ** 4 if self.verbose > 0: print( # noqa "TDE train estimate ", datetime.now().strftime("%H:%M:%S %d/%m/%Y"), ) print("TDE weight = " + str(self.tde_weight_)) # noqa return self def _predict(self, X): """Predicts labels for sequences in X. Parameters ---------- X : 3D np.array of shape = [n_instances, n_dimensions, series_length] The data to make predictions for. Returns ------- y : array-like, shape = [n_instances] Predicted class labels. """ rng = check_random_state(self.random_state) return np.array( [ self.classes_[int(rng.choice(np.flatnonzero(prob == prob.max())))] for prob in self.predict_proba(X) ] ) def _predict_proba(self, X, return_component_probas=False): """Predicts labels probabilities for sequences in X. Parameters ---------- X : 3D np.array of shape = [n_instances, n_dimensions, series_length] The data to make predict probabilities for. Returns ------- y : array-like, shape = [n_instances, n_classes_] Predicted probabilities using the ordering in classes_. """ dists = np.zeros((X.shape[0], self.n_classes_)) # Call predict proba on each classifier, multiply the probabilities by the # classifiers weight then add them to the current HC2 probabilities stc_probas = self._stc.predict_proba(X) dists = np.add( dists, stc_probas * (np.ones(self.n_classes_) * self.stc_weight_), ) drcif_probas = self._drcif.predict_proba(X) dists = np.add( dists, drcif_probas * (np.ones(self.n_classes_) * self.drcif_weight_), ) arsenal_probas = self._arsenal.predict_proba(X) dists = np.add( dists, arsenal_probas * (np.ones(self.n_classes_) * self.arsenal_weight_), ) tde_probas = self._tde.predict_proba(X) dists = np.add( dists, tde_probas * (np.ones(self.n_classes_) * self.tde_weight_), ) if self.save_component_probas: self.component_probas = { "STC": stc_probas, "DrCIF": drcif_probas, "Arsenal": arsenal_probas, "TDE": tde_probas, } # Make each instances probability array sum to 1 and return return dists / dists.sum(axis=1, keepdims=True)
class HIVECOTEV1(BaseClassifier): """ Hierarchical Vote Collective of Transformation-based Ensembles (HIVE-COTE) V1 as described in [1]. An ensemble of the STC, TSF, RISE and cBOSS classifiers from different feature representations using the CAWPE structure. Parameters ---------- random_state : int or None, seed for random, integer, optional (default to no seed) Attributes ---------- n_classes : extracted from the data Notes ----- @article{bagnall2020usage, title={On the Usage and Performance of The Hierarchical Vote Collective of Transformation-based Ensembles version 1.0 (HIVE-COTE 1.0)}, author={Bagnall, Anthony and Flynn, Michael and Large, James and Lines, Jason and Middlehurst, Matthew}, journal={arXiv preprint arXiv:2004.06069}, year={2020} } Java version https://github.com/uea-machine-learning/tsml/blob/master/src/main/java/ tsml/classifiers/hybrids/HIVE_COTE.java """ # Capability tags capabilities = { "multivariate": False, "unequal_length": False, "missing_values": False, } def __init__( self, random_state=None, ): self.random_state = random_state self.stc = None self.tsf = None self.rise = None self.cboss = None self.stc_weight = 0 self.tsf_weight = 0 self.rise_weight = 0 self.cboss_weight = 0 self.n_classes = 0 self.classes_ = [] super(HIVECOTEV1, self).__init__() def fit(self, X, y): X, y = check_X_y(X, y, enforce_univariate=True, coerce_to_numpy=True) self.n_classes = np.unique(y).shape[0] self.classes_ = class_distribution(np.asarray(y).reshape(-1, 1))[0][0] cv_size = 10 _, counts = np.unique(y, return_counts=True) min_class = np.min(counts) if min_class < cv_size: cv_size = min_class self.stc = ShapeletTransformClassifier( random_state=self.random_state, time_contract_in_mins=60, ) self.stc.fit(X, y) train_preds = cross_val_predict( ShapeletTransformClassifier( random_state=self.random_state, time_contract_in_mins=60, ), X=X, y=y, cv=cv_size, ) self.stc_weight = accuracy_score(y, train_preds)**4 self.tsf = TimeSeriesForest(random_state=self.random_state) self.tsf.fit(X, y) train_preds = cross_val_predict( TimeSeriesForest(random_state=self.random_state), X=X, y=y, cv=cv_size, ) self.tsf_weight = accuracy_score(y, train_preds)**4 self.rise = RandomIntervalSpectralForest( random_state=self.random_state) self.fit(X, y) train_preds = cross_val_predict( RandomIntervalSpectralForest(random_state=self.random_state), X=X, y=y, cv=cv_size, ) self.rise_weight = accuracy_score(y, train_preds)**4 self.cboss = ContractableBOSS(random_state=self.random_state) self.cboss.fit(X, y) train_probs = self.cboss._get_train_probs(X) train_preds = self.cboss.classes_[np.argmax(train_probs, axis=1)] self.cboss_weight = accuracy_score(y, train_preds)**4 return self def predict(self, X): rng = check_random_state(self.random_state) return np.array([ self.classes_[int(rng.choice(np.flatnonzero(prob == prob.max())))] for prob in self.predict_proba(X) ]) def predict_proba(self, X): self.check_is_fitted() X = check_X(X, enforce_univariate=True, coerce_to_numpy=True) dists = np.zeros((X.shape[0], self.n_classes)) dists = np.add( dists, self.stc.predict_proba(X) * (np.ones(self.n_classes) * self.stc_weight), ) dists = np.add( dists, self.tsf.predict_proba(X) * (np.ones(self.n_classes) * self.tsf_weight), ) dists = np.add( dists, self.rise.predict_proba(X) * (np.ones(self.n_classes) * self.rise_weight), ) dists = np.add( dists, self.cboss.predict_proba(X) * (np.ones(self.n_classes) * self.cboss_weight), ) return dists / dists.sum(axis=1, keepdims=True)
class HIVECOTEV1(BaseClassifier): """Hierarchical Vote Collective of Transformation-based Ensembles (HIVE-COTE) V1. An ensemble of the STC, TSF, RISE and cBOSS classifiers from different feature representations using the CAWPE structure as described in [1]_. Parameters ---------- stc_params : dict or None, default=None Parameters for the ShapeletTransformClassifier module. If None, uses the default parameters with a 2 hour transform contract. tsf_params : dict or None, default=None Parameters for the TimeSeriesForestClassifier module. If None, uses the default parameters with n_estimators set to 500. rise_params : dict or None, default=None Parameters for the RandomIntervalSpectralForest module. If None, uses the default parameters with n_estimators set to 500. cboss_params : dict or None, default=None Parameters for the ContractableBOSS module. If None, uses the default parameters. verbose : int, default=0 Level of output printed to the console (for information only). n_jobs : int, default=1 The number of jobs to run in parallel for both `fit` and `predict`. ``-1`` means using all processors. random_state : int or None, default=None Seed for random number generation. Attributes ---------- n_classes_ : int The number of classes. classes_ : list The unique class labels. stc_weight_ : float The weight for STC probabilities. tsf_weight_ : float The weight for TSF probabilities. rise_weight_ : float The weight for RISE probabilities. cboss_weight_ : float The weight for cBOSS probabilities. See Also -------- HIVECOTEV2, ShapeletTransformClassifier, TimeSeriesForestClassifier, RandomIntervalSpectralForest, ContractableBOSS Notes ----- For the Java version, see `https://github.com/uea-machine-learning/tsml/blob/master/src/main/java/ tsml/classifiers/hybrids/HIVE_COTE.java`_. References ---------- .. [1] Anthony Bagnall, Michael Flynn, James Large, Jason Lines and Matthew Middlehurst. "On the usage and performance of the Hierarchical Vote Collective of Transformation-based Ensembles version 1.0 (hive-cote v1.0)" International Workshop on Advanced Analytics and Learning on Temporal Data 2020 Examples -------- >>> from sktime.classification.hybrid import HIVECOTEV1 >>> from sktime.contrib.vector_classifiers._rotation_forest import RotationForest >>> from sktime.datasets import load_unit_test >>> X_train, y_train = load_unit_test(split="train", return_X_y=True) >>> X_test, y_test = load_unit_test(split="test", return_X_y=True) >>> clf = HIVECOTEV1( ... stc_params={ ... "estimator": RotationForest(n_estimators=3), ... "n_shapelet_samples": 500, ... "max_shapelets": 20, ... "batch_size": 100, ... }, ... tsf_params={"n_estimators": 10}, ... rise_params={"n_estimators": 10}, ... cboss_params={"n_parameter_samples": 25, "max_ensemble_size": 5}, ... ) >>> clf.fit(X_train, y_train) HIVECOTEV1(...) >>> y_pred = clf.predict(X_test) """ _tags = { "capability:multithreading": True, } def __init__( self, stc_params=None, tsf_params=None, rise_params=None, cboss_params=None, verbose=0, n_jobs=1, random_state=None, ): self.stc_params = stc_params self.tsf_params = tsf_params self.rise_params = rise_params self.cboss_params = cboss_params self.verbose = verbose self.n_jobs = n_jobs self.random_state = random_state self.stc_weight_ = 0 self.tsf_weight_ = 0 self.rise_weight_ = 0 self.cboss_weight_ = 0 self._stc_params = stc_params self._tsf_params = tsf_params self._rise_params = rise_params self._cboss_params = cboss_params self._stc = None self._tsf = None self._rise = None self._cboss = None super(HIVECOTEV1, self).__init__() def _fit(self, X, y): """Fit HIVE-COTE 1.0 to training data. Parameters ---------- X : 3D np.array of shape = [n_instances, n_dimensions, series_length] The training data. y : array-like, shape = [n_instances] The class labels. Returns ------- self : Reference to self. Notes ----- Changes state by creating a fitted model that updates attributes ending in "_" and sets is_fitted flag to True. """ # Default values from HC1 paper if self.stc_params is None: self._stc_params = {"transform_limit_in_minutes": 120} if self.tsf_params is None: self._tsf_params = {"n_estimators": 500} if self.rise_params is None: self._rise_params = {"n_estimators": 500} if self.cboss_params is None: self._cboss_params = {} # Cross-validation size for TSF and RISE cv_size = 10 _, counts = np.unique(y, return_counts=True) min_class = np.min(counts) if min_class < cv_size: cv_size = min_class # Build STC self._stc = ShapeletTransformClassifier( **self._stc_params, save_transformed_data=True, random_state=self.random_state, n_jobs=self._threads_to_use, ) self._stc.fit(X, y) if self.verbose > 0: print("STC ", datetime.now().strftime("%H:%M:%S %d/%m/%Y")) # noqa # Find STC weight using train set estimate train_probs = self._stc._get_train_probs(X, y) train_preds = self._stc.classes_[np.argmax(train_probs, axis=1)] self.stc_weight_ = accuracy_score(y, train_preds)**4 if self.verbose > 0: print( # noqa "STC train estimate ", datetime.now().strftime("%H:%M:%S %d/%m/%Y"), ) print("STC weight = " + str(self.stc_weight_)) # noqa # Build TSF self._tsf = TimeSeriesForestClassifier( **self._tsf_params, random_state=self.random_state, n_jobs=self._threads_to_use, ) self._tsf.fit(X, y) if self.verbose > 0: print("TSF ", datetime.now().strftime("%H:%M:%S %d/%m/%Y")) # noqa # Find TSF weight using train set estimate found through CV train_preds = cross_val_predict( TimeSeriesForestClassifier(**self._tsf_params, random_state=self.random_state), X=X, y=y, cv=cv_size, n_jobs=self._threads_to_use, ) self.tsf_weight_ = accuracy_score(y, train_preds)**4 if self.verbose > 0: print( # noqa "TSF train estimate ", datetime.now().strftime("%H:%M:%S %d/%m/%Y"), ) print("TSF weight = " + str(self.tsf_weight_)) # noqa # Build RISE self._rise = RandomIntervalSpectralEnsemble( **self._rise_params, random_state=self.random_state, n_jobs=self._threads_to_use, ) self._rise.fit(X, y) if self.verbose > 0: print("RISE ", datetime.now().strftime("%H:%M:%S %d/%m/%Y")) # noqa # Find RISE weight using train set estimate found through CV train_preds = cross_val_predict( RandomIntervalSpectralEnsemble( **self._rise_params, random_state=self.random_state, ), X=X, y=y, cv=cv_size, n_jobs=self._threads_to_use, ) self.rise_weight_ = accuracy_score(y, train_preds)**4 if self.verbose > 0: print( # noqa "RISE train estimate ", datetime.now().strftime("%H:%M:%S %d/%m/%Y"), ) print("RISE weight = " + str(self.rise_weight_)) # noqa # Build cBOSS self._cboss = ContractableBOSS( **self._cboss_params, random_state=self.random_state, n_jobs=self._threads_to_use, ) self._cboss.fit(X, y) # Find cBOSS weight using train set estimate train_probs = self._cboss._get_train_probs(X, y) train_preds = self._cboss.classes_[np.argmax(train_probs, axis=1)] self.cboss_weight_ = accuracy_score(y, train_preds)**4 if self.verbose > 0: print( # noqa "cBOSS (estimate included)", datetime.now().strftime("%H:%M:%S %d/%m/%Y"), ) print("cBOSS weight = " + str(self.cboss_weight_)) # noqa return self def _predict(self, X): """Predicts labels for sequences in X. Parameters ---------- X : 3D np.array of shape = [n_instances, n_dimensions, series_length] The data to make predictions for. Returns ------- y : array-like, shape = [n_instances] Predicted class labels. """ rng = check_random_state(self.random_state) return np.array([ self.classes_[int(rng.choice(np.flatnonzero(prob == prob.max())))] for prob in self.predict_proba(X) ]) def _predict_proba(self, X): """Predicts labels probabilities for sequences in X. Parameters ---------- X : 3D np.array of shape = [n_instances, n_dimensions, series_length] The data to make predict probabilities for. Returns ------- y : array-like, shape = [n_instances, n_classes_] Predicted probabilities using the ordering in classes_. """ dists = np.zeros((X.shape[0], self.n_classes_)) # Call predict proba on each classifier, multiply the probabilities by the # classifiers weight then add them to the current HC1 probabilities dists = np.add( dists, self._stc.predict_proba(X) * (np.ones(self.n_classes_) * self.stc_weight_), ) dists = np.add( dists, self._tsf.predict_proba(X) * (np.ones(self.n_classes_) * self.tsf_weight_), ) dists = np.add( dists, self._rise.predict_proba(X) * (np.ones(self.n_classes_) * self.rise_weight_), ) dists = np.add( dists, self._cboss.predict_proba(X) * (np.ones(self.n_classes_) * self.cboss_weight_), ) # Make each instances probability array sum to 1 and return return dists / dists.sum(axis=1, keepdims=True)