def _predict_proba_for_estimator( self, X, X_p, X_d, classifier, intervals, dims, atts ): c22 = Catch22(outlier_norm=True) if isinstance(self._base_estimator, ContinuousIntervalTree): return classifier._predict_proba_drcif( X, X_p, X_d, c22, self._n_intervals, intervals, dims, atts ) else: T = [X, X_p, X_d] transformed_x = np.empty( shape=(self._att_subsample_size * self.total_intervals, X.shape[0]), dtype=np.float32, ) p = 0 j = 0 for r in range(0, len(T)): for _ in range(0, self._n_intervals[r]): for a in range(0, self._att_subsample_size): transformed_x[p] = _drcif_feature( T[r], intervals[j], dims[j], atts[a], c22 ) p += 1 j += 1 transformed_x = transformed_x.T transformed_x.round(8) np.nan_to_num(transformed_x, False, 0, 0, 0) return classifier.predict_proba(transformed_x)
def _predict_proba_for_estimator(self, X, classifier, intervals, dims, atts): c22 = Catch22(outlier_norm=True) if isinstance(self._base_estimator, ContinuousIntervalTree): return classifier._predict_proba_cif(X, c22, intervals, dims, atts) else: transformed_x = np.empty( shape=(self._att_subsample_size * self._n_intervals, X.shape[0]), dtype=np.float32, ) for j in range(0, self._n_intervals): for a in range(0, self._att_subsample_size): transformed_x[self._att_subsample_size * j + a] = _drcif_feature(X, intervals[j], dims[j], atts[a], c22, case_id=j) transformed_x = transformed_x.T transformed_x.round(8) np.nan_to_num(transformed_x, False, 0, 0, 0) return classifier.predict_proba(transformed_x)
def _fit(self, X, y): """Fit an estimator using transformed data from the Catch22 transformer. Parameters ---------- X : nested pandas DataFrame of shape [n_instances, n_dims] Nested dataframe with univariate time-series in cells. y : array-like, shape = [n_instances] The class labels. Returns ------- self : object """ self.classes_ = class_distribution(np.asarray(y).reshape(-1, 1))[0][0] self.n_classes = np.unique(y).shape[0] self._transformer = Catch22(outlier_norm=self.outlier_norm) self._estimator = _clone_estimator( RandomForestClassifier(n_estimators=200) if self.estimator is None else self.estimator, self.random_state, ) m = getattr(self._estimator, "n_jobs", None) if m is not None: self._estimator.n_jobs = self.n_jobs X_t = self._transformer.fit_transform(X, y) X_t = np.nan_to_num(X_t, False, 0, 0, 0) self._estimator.fit(X_t, y) return self
def fit(self, X, y): """Fit a random catch22 feature forest classifier. Parameters ---------- X : nested pandas DataFrame of shape [n_instances, 1] Nested dataframe with univariate time-series in cells. y : array-like, shape = [n_instances] The class labels. Returns ------- self : object """ X = check_X(X, enforce_univariate=False, coerce_to_numpy=True) self.classes_ = class_distribution(np.asarray(y).reshape(-1, 1))[0][0] c22 = Catch22(outlier_norm=self.outlier_norm) c22_list = c22.fit_transform(X) self.classifier = RandomForestClassifier( n_jobs=self.n_jobs, n_estimators=self.n_estimators, random_state=self.random_state, ) X_c22 = np.nan_to_num(np.array(c22_list, dtype=np.float32), False, 0, 0, 0) self.classifier.fit(X_c22, y) self._is_fitted = True return self
def _fit_estimator(self, X, y, idx): c22 = Catch22(outlier_norm=True) rs = 255 if self.random_state == 0 else self.random_state rs = None if self.random_state is None else rs * 37 * (idx + 1) rng = check_random_state(rs) transformed_x = np.empty( shape=(self._att_subsample_size * self._n_intervals, self.n_instances), dtype=np.float32, ) atts = rng.choice(25, self._att_subsample_size, replace=False) dims = rng.choice(self.n_dims, self._n_intervals, replace=True) intervals = np.zeros((self._n_intervals, 2), dtype=int) # Find the random intervals for classifier i and concatenate # features for j in range(0, self._n_intervals): if rng.random() < 0.5: intervals[j][0] = rng.randint( 0, self.series_length - self._min_interval ) len_range = min( self.series_length - intervals[j][0], self._max_interval, ) length = ( rng.randint(0, len_range - self._min_interval) + self._min_interval ) intervals[j][1] = intervals[j][0] + length else: intervals[j][1] = ( rng.randint(0, self.series_length - self._min_interval) + self._min_interval ) len_range = min(intervals[j][1], self._max_interval) length = ( rng.randint(0, len_range - self._min_interval) + self._min_interval if len_range - self._min_interval > 0 else self._min_interval ) intervals[j][0] = intervals[j][1] - length for a in range(0, self._att_subsample_size): transformed_x[self._att_subsample_size * j + a] = _cif_feature( X, intervals[j], dims[j], atts[a], c22 ) tree = _clone_estimator(self._base_estimator, random_state=rs) transformed_x = transformed_x.T transformed_x = transformed_x.round(8) transformed_x = np.nan_to_num(transformed_x, False, 0, 0, 0) tree.fit(transformed_x, y) return [tree, intervals, dims, atts]
def test_catch22_on_gunpoint(): # load gunpoint data X_train, y_train = load_gunpoint(split="train", return_X_y=True) indices = np.random.RandomState(0).permutation(10) # fit catch22 c22 = Catch22() c22.fit(X_train.iloc[indices], y_train[indices]) # assert transformed data is the same data = c22.transform(X_train.iloc[indices]) testing.assert_array_almost_equal(data, catch22_gunpoint_data)
def test_catch22_on_unit_test(): """Test of Catch22 on unit test data.""" # load unit test data X_train, y_train = load_unit_test(split="train") indices = np.random.RandomState(0).choice(len(y_train), 5, replace=False) # fit catch22 c22 = Catch22(outlier_norm=True) c22.fit(X_train.iloc[indices], y_train[indices]) # assert transformed data is the same data = np.nan_to_num(c22.transform(X_train.iloc[indices]), False, 0, 0, 0) testing.assert_array_almost_equal(data, catch22_unit_test_data)
def test_catch22_on_basic_motions(): """Test of Catch22 on basic motions data.""" # load basic motions data X_train, y_train = load_basic_motions(split="train") indices = np.random.RandomState(4).choice(len(y_train), 5, replace=False) # fit catch22 c22 = Catch22() c22.fit(X_train.iloc[indices], y_train[indices]) # assert transformed data is the same data = np.nan_to_num(c22.transform(X_train.iloc[indices]), False, 0, 0, 0) testing.assert_array_almost_equal(data, catch22_basic_motions_data)
def test_catch22_single_feature_on_gunpoint(): # load gunpoint data X_train, y_train = load_gunpoint(split="train", return_X_y=True) indices = np.random.RandomState(0).permutation(10) # fit catch22 c22 = Catch22() c22.fit(X_train.iloc[indices], y_train[indices]) # assert transformed data is the same data = [] for i in range(22): data.append(c22._transform_single_feature(X_train.iloc[indices], i)) testing.assert_array_almost_equal(data, catch22_single_feature_gunpoint_data)
def test_catch22_single_feature_on_unit_test(): """Test of Catch22 on unit test data.""" # load unit test data X_train, y_train = load_unit_test(split="train") indices = np.random.RandomState(0).choice(len(y_train), 2, replace=False) # fit catch22 c22 = Catch22(outlier_norm=True) c22.fit(X_train.iloc[indices], y_train[indices]) # assert transformed data is the same results = catch22_unit_test_data.transpose() for i in range(22): data = np.nan_to_num( c22.transform_single_feature(X_train.iloc[indices], i), False, 0, 0, 0 ) testing.assert_array_almost_equal(data, results[i][:2])
def _fit(self, X, y): """Fit a pipeline on cases (X,y), where y is the target variable. Parameters ---------- X : 3D np.array of shape = [n_instances, n_dimensions, series_length] The training data. y : array-like, shape = [n_instances] The class labels. Returns ------- self : Reference to self. Notes ----- Changes state by creating a fitted model that updates attributes ending in "_" and sets is_fitted flag to True. """ interval_transformers = (Catch22(outlier_norm=True, replace_nans=True) if self.interval_transformers is None else self.interval_transformers) self._transformer = RandomIntervals( n_intervals=self.n_intervals, transformers=interval_transformers, random_state=self.random_state, n_jobs=self._threads_to_use, ) self._estimator = _clone_estimator( RotationForest() if self.estimator is None else self.estimator, self.random_state, ) m = getattr(self._estimator, "n_jobs", None) if m is not None: self._estimator.n_jobs = self._threads_to_use X_t = self._transformer.fit_transform(X, y) self._estimator.fit(X_t, y) return self
def predict(self, X): """Make predictions for all cases in X. Parameters ---------- X : The testing input samples of shape [n_instances,1]. Returns ------- output : numpy array of shape = [n_instances] """ self.check_is_fitted() X = check_X(X, enforce_univariate=False, coerce_to_numpy=True) c22 = Catch22(outlier_norm=self.outlier_norm) c22_list = c22.fit_transform(X) X_c22 = np.nan_to_num(np.array(c22_list, dtype=np.float32), False, 0, 0, 0) return self.classifier.predict(X_c22)
def predict_proba(self, X): """Make class probability estimates on each case in X. Parameters ---------- X - pandas dataframe of testing data of shape [n_instances,1]. Returns ------- output : numpy array of shape = [n_instances, num_classes] of probabilities """ self.check_is_fitted() X = check_X(X, enforce_univariate=False, coerce_to_numpy=True) c22 = Catch22(outlier_norm=self.outlier_norm) c22_list = c22.fit_transform(X) X_c22 = np.nan_to_num(np.array(c22_list, dtype=np.float32), False, 0, 0, 0) return self.classifier.predict_proba(X_c22)
def _fit(self, X, y): """Fit a pipeline on cases (X,y), where y is the target variable. Parameters ---------- X : 3D np.array of shape = [n_instances, n_dimensions, series_length] The training data. y : array-like, shape = [n_instances] The class labels. Returns ------- self : Reference to self. Notes ----- Changes state by creating a fitted model that updates attributes ending in "_" and sets is_fitted flag to True. """ self._transformer = Catch22(outlier_norm=self.outlier_norm) self._estimator = _clone_estimator( RandomForestClassifier(n_estimators=200) if self.estimator is None else self.estimator, self.random_state, ) m = getattr(self._estimator, "n_jobs", None) if m is not None: self._estimator.n_jobs = self._threads_to_use X_t = self._transformer.fit_transform(X, y) X_t = np.nan_to_num(X_t, False, 0, 0, 0) self._estimator.fit(X_t, y) return self
def _fit_estimator(self, X, X_p, X_d, y, idx): c22 = Catch22(outlier_norm=True) T = [X, X_p, X_d] rs = 255 if self.random_state == 0 else self.random_state rs = None if self.random_state is None else rs * 37 * (idx + 1) rng = check_random_state(rs) transformed_x = np.empty( shape=(self._att_subsample_size * self.total_intervals, self.n_instances), dtype=np.float32, ) atts = rng.choice(29, self._att_subsample_size, replace=False) dims = rng.choice(self.n_dims, self.total_intervals, replace=True) intervals = np.zeros((self.total_intervals, 2), dtype=int) p = 0 j = 0 for r in range(0, len(T)): transform_length = T[r].shape[2] # Find the random intervals for classifier i, transformation r # and concatenate features for _ in range(0, self._n_intervals[r]): if rng.random() < 0.5: intervals[j][0] = rng.randint( 0, transform_length - self._min_interval[r] ) len_range = min( transform_length - intervals[j][0], self._max_interval[r], ) length = ( rng.randint(0, len_range - self._min_interval[r]) + self._min_interval[r] ) intervals[j][1] = intervals[j][0] + length else: intervals[j][1] = ( rng.randint(0, transform_length - self._min_interval[r]) + self._min_interval[r] ) len_range = min(intervals[j][1], self._max_interval[r]) length = ( rng.randint(0, len_range - self._min_interval[r]) + self._min_interval[r] if len_range - self._min_interval[r] > 0 else self._min_interval[r] ) intervals[j][0] = intervals[j][1] - length for a in range(0, self._att_subsample_size): transformed_x[p] = _drcif_feature( T[r], intervals[j], dims[j], atts[a], c22 ) p += 1 j += 1 tree = _clone_estimator(self._base_estimator, random_state=rs) transformed_x = transformed_x.T transformed_x = transformed_x.round(8) transformed_x = np.nan_to_num(transformed_x, False, 0, 0, 0) tree.fit(transformed_x, y) return [ tree, intervals, dims, atts, transformed_x if self.save_transformed_data else None, ]
_print_array( "ShapeletTransformClassifier - BasicMotions", _reproduce_classification_basic_motions( ShapeletTransformClassifier( estimator=RotationForest(n_estimators=3), max_shapelets=20, n_shapelet_samples=500, batch_size=100, random_state=0, ) ), ) _print_array( "Catch22 - UnitTest", _reproduce_transform_unit_test(Catch22(outlier_norm=True)), ) _print_array( "Catch22 - BasicMotions", _reproduce_transform_basic_motions(Catch22()), ) _print_array( "RandomIntervals - UnitTest", _reproduce_transform_unit_test(RandomIntervals(random_state=0, n_intervals=3)), ) _print_array( "RandomIntervals - BasicMotions", _reproduce_transform_basic_motions( RandomIntervals(random_state=0, n_intervals=3) ), )