def test_output_format_dim(n_instances, n_timepoints, n_intervals, features): X = _make_nested_from_array(np.ones(n_timepoints), n_instances=n_instances, n_columns=1) n_rows, n_cols = X.shape trans = RandomIntervalFeatureExtractor(n_intervals=n_intervals, features=features) Xt = trans.fit_transform(X) assert isinstance(Xt, pd.DataFrame) assert Xt.shape[0] == n_rows assert np.array_equal(Xt.values, np.ones(Xt.shape))
def test_TimeSeriesForest_predictions(n_estimators, n_intervals): random_state = 1234 X_train, y_train = load_gunpoint(split="train", return_X_y=True) X_test, y_test = load_gunpoint(split="test", return_X_y=True) features = [np.mean, np.std, _slope] steps = [ ( "transform", RandomIntervalFeatureExtractor( random_state=random_state, features=features ), ), ("clf", DecisionTreeClassifier()), ] estimator = Pipeline(steps) clf1 = ComposableTimeSeriesForestClassifier( estimator=estimator, random_state=random_state, n_estimators=n_estimators ) clf1.fit(X_train, y_train) a = clf1.predict_proba(X_test) # default, semi-modular implementation using # RandomIntervalFeatureExtractor internally clf2 = ComposableTimeSeriesForestClassifier( random_state=random_state, n_estimators=n_estimators ) clf2.fit(X_train, y_train) b = clf2.predict_proba(X_test) np.testing.assert_array_equal(a, b)
def _validate_estimator(self): if not isinstance(self.n_estimators, numbers.Integral): raise ValueError("n_estimators must be an integer, " "got {0}.".format(type(self.n_estimators))) if self.n_estimators <= 0: raise ValueError("n_estimators must be greater than zero, " "got {0}.".format(self.n_estimators)) # Set base estimator if self.estimator is None: # Set default time series forest features = [np.mean, np.std, _slope] steps = [ ( "transform", RandomIntervalFeatureExtractor( n_intervals="sqrt", features=features, random_state=self.random_state, ), ), ("clf", DecisionTreeRegressor(random_state=self.random_state)), ] self.estimator_ = Pipeline(steps) else: # else check given estimator is a pipeline with prior # transformations and final decision tree if not isinstance(self.estimator, Pipeline): raise ValueError( "`estimator` must be pipeline with transforms.") if not isinstance(self.estimator.steps[-1][1], DecisionTreeRegressor): raise ValueError( "Last step in `estimator` must be DecisionTreeRegressor.") self.estimator_ = self.estimator # Set parameters according to naming in pipeline estimator_params = { "criterion": self.criterion, "max_depth": self.max_depth, "min_samples_split": self.min_samples_split, "min_samples_leaf": self.min_samples_leaf, "min_weight_fraction_leaf": self.min_weight_fraction_leaf, "max_features": self.max_features, "max_leaf_nodes": self.max_leaf_nodes, "min_impurity_decrease": self.min_impurity_decrease, "min_impurity_split": self.min_impurity_split, } final_estimator = self.estimator_.steps[-1][0] self.estimator_params = { f"{final_estimator}__{pname}": pval for pname, pval in estimator_params.items() } # Set renamed estimator parameters for pname, pval in self.estimator_params.items(): self.__setattr__(pname, pval)
def test_different_implementations(): random_state = 1233 X_train, y_train = make_classification_problem() # Compare with chained transformations. tran1 = RandomIntervalSegmenter(n_intervals=1, random_state=random_state) tran2 = SeriesToPrimitivesRowTransformer(FunctionTransformer( func=np.mean, validate=False), check_transformer=False) A = tran2.fit_transform(tran1.fit_transform(X_train)) tran = RandomIntervalFeatureExtractor(n_intervals=1, features=[np.mean], random_state=random_state) B = tran.fit_transform(X_train) np.testing.assert_array_almost_equal(A, B)
def test_different_pipelines(): """Compare with transformer pipeline using TSFeatureUnion.""" random_state = 1233 X_train, y_train = make_classification_problem() steps = [ ( "segment", RandomIntervalSegmenter(n_intervals=1, random_state=random_state), ), ( "transform", FeatureUnion([ ( "mean", SeriesToPrimitivesRowTransformer( FunctionTransformer(func=np.mean, validate=False), check_transformer=False, ), ), ( "std", SeriesToPrimitivesRowTransformer( FunctionTransformer(func=np.std, validate=False), check_transformer=False, ), ), ( "slope", SeriesToPrimitivesRowTransformer( FunctionTransformer(func=_slope, validate=False), check_transformer=False, ), ), ]), ), ] pipe = Pipeline(steps) a = pipe.fit_transform(X_train) tran = RandomIntervalFeatureExtractor( n_intervals=1, features=[np.mean, np.std, _slope], random_state=random_state, ) b = tran.fit_transform(X_train) np.testing.assert_array_equal(a, b) np.testing.assert_array_equal(pipe.steps[0][1].intervals_, tran.intervals_)
def test_results(n_instances, n_timepoints, n_intervals): X, _ = make_classification_problem(n_instances=n_instances, n_timepoints=n_timepoints, return_numpy=True) transformer = RandomIntervalFeatureExtractor(n_intervals=n_intervals, features=[np.mean, np.std]) Xt = transformer.fit_transform(X) Xt = Xt.loc[:, ~Xt.columns.duplicated()] # Check results intervals = transformer.intervals_ for start, end in intervals: expected_mean = np.mean(X[:, 0, start:end], axis=-1) expected_std = np.std(X[:, 0, start:end], axis=-1) actual_means = Xt.loc[:, f"{start}_{end}_mean"].to_numpy().ravel() actual_stds = Xt.loc[:, f"{start}_{end}_std"].to_numpy().ravel() np.testing.assert_array_equal(actual_means, expected_mean) np.testing.assert_array_equal(actual_stds, expected_std)
def test_equivalent_model_specifications(n_intervals, n_estimators): """Test composable TSF vs an equivalent model.""" random_state = 1234 X_train, y_train = load_unit_test(split="train") X_test, y_test = load_unit_test(split="test") # Due to tie-breaking/floating point rounding in the final decision tree # classifier, the results depend on the # exact column order of the input data # Compare pipeline predictions outside of ensemble. steps = [ ( "segment", RandomIntervalSegmenter(n_intervals=n_intervals, random_state=random_state), ), ( "transform", FeatureUnion([("mean", mean_transformer), ("std", std_transformer)]), ), ("clf", DecisionTreeClassifier(random_state=random_state)), ] clf1 = Pipeline(steps) clf1.fit(X_train, y_train) a = clf1.predict(X_test) steps = [ ( "transform", RandomIntervalFeatureExtractor( n_intervals=n_intervals, features=[np.mean, np.std], random_state=random_state, ), ), ("clf", DecisionTreeClassifier(random_state=random_state)), ] clf2 = Pipeline(steps) clf2.fit(X_train, y_train) b = clf2.predict(X_test) np.array_equal(a, b)
def test_bad_features(bad_features): X, y = make_classification_problem() with pytest.raises(ValueError): RandomIntervalFeatureExtractor(n_intervals=bad_features).fit(X)
def test_bad_n_intervals(bad_n_intervals): """Check that exception is raised for bad input args.""" X, y = make_classification_problem() with pytest.raises(ValueError): RandomIntervalFeatureExtractor(n_intervals=bad_n_intervals).fit(X)