def _make_fit_args(estimator, **kwargs): if isinstance(estimator, BaseForecaster): # we need to handle the TransformedTargetForecaster separately if isinstance(estimator, _SeriesToSeriesTransformer): y = _make_series(**kwargs) else: y = make_forecasting_problem(**kwargs) fh = 1 X = None return y, X, fh elif isinstance(estimator, BaseSeriesAnnotator): X = make_annotation_problem(**kwargs) return (X,) elif isinstance(estimator, BaseClassifier): return make_classification_problem(**kwargs) elif isinstance(estimator, BaseRegressor): return make_regression_problem(**kwargs) elif isinstance( estimator, (_SeriesToPrimitivesTransformer, _SeriesToSeriesTransformer) ): X = _make_series(**kwargs) return (X,) elif isinstance(estimator, (_PanelToTabularTransformer, _PanelToPanelTransformer)): return make_classification_problem(**kwargs) elif isinstance(estimator, BaseClusterer): return (make_clustering_problem(**kwargs),) else: raise ValueError(_get_err_msg(estimator))
def _make_fit_args(estimator, **kwargs): if isinstance(estimator, BaseForecaster): y = make_forecasting_problem(**kwargs) fh = 1 X = None return y, X, fh elif isinstance(estimator, BaseClassifier): return make_classification_problem(**kwargs) elif isinstance(estimator, BaseRegressor): return make_regression_problem(**kwargs) elif isinstance( estimator, (_SeriesToPrimitivesTransformer, _SeriesToSeriesTransformer)): X = _make_series(**kwargs) return (X, ) elif isinstance( estimator, ( _PanelToTabularTransformer, _PanelToPanelTransformer, ), ): return make_classification_problem(**kwargs) else: raise ValueError(_get_err_msg(estimator))
def _make_fit_args(estimator, **kwargs): if isinstance(estimator, BaseForecaster): # we need to handle the TransformedTargetForecaster separately if isinstance(estimator, _SeriesToSeriesTransformer): y = _make_series(**kwargs) else: # create matching n_columns input, if n_columns not passed # e.g., to give bivariate y to strictly multivariate forecaster if "n_columns" not in kwargs.keys(): n_columns = _get_n_columns( estimator.get_tag(tag_name="scitype:y", raise_error=False))[0] y = make_forecasting_problem(n_columns=n_columns, **kwargs) else: y = make_forecasting_problem(**kwargs) fh = 1 X = None return y, X, fh elif isinstance(estimator, BaseSeriesAnnotator): X = make_annotation_problem(**kwargs) return (X, ) elif isinstance(estimator, BaseClassifier): return make_classification_problem(**kwargs) elif isinstance(estimator, BaseRegressor): return make_regression_problem(**kwargs) elif isinstance( estimator, (_SeriesToPrimitivesTransformer, _SeriesToSeriesTransformer)): X = _make_series(**kwargs) return (X, ) elif isinstance(estimator, (_PanelToTabularTransformer, _PanelToPanelTransformer)): return make_classification_problem(**kwargs) elif isinstance(estimator, BaseTransformer) and estimator.get_tag("requires_y"): return make_classification_problem(**kwargs) elif isinstance(estimator, BaseTransformer): X = _make_series(**kwargs) return (X, ) elif isinstance(estimator, BaseClusterer): return (make_clustering_problem(**kwargs), ) elif isinstance(estimator, BasePairwiseTransformer): return None, None elif isinstance(estimator, BasePairwiseTransformerPanel): return None, None elif isinstance(estimator, BaseAligner): X = [ _make_series(n_columns=2, **kwargs), _make_series(n_columns=2, **kwargs) ] return (X, ) else: raise ValueError(_get_err_msg(estimator))
def test_from_nested_to_2d_array(n_instances, n_columns, n_timepoints): nested, _ = make_classification_problem(n_instances, n_columns, n_timepoints) array = from_nested_to_2d_array(nested) assert array.shape == (n_instances, n_columns * n_timepoints) assert array.index.equals(nested.index)
def test_check_X_enforce_min_columns(): X, y = make_classification_problem(n_columns=2) msg = r"columns" with pytest.raises(ValueError, match=msg): check_X(X, enforce_min_columns=3) with pytest.raises(ValueError, match=msg): check_X_y(X, y, enforce_min_columns=3)
def test_check_X_enforce_univariate(): X, y = make_classification_problem(n_columns=2) msg = r"univariate" with pytest.raises(ValueError, match=msg): check_X(X, enforce_univariate=True) with pytest.raises(ValueError, match=msg): check_X_y(X, y, enforce_univariate=True)
def test_from_nested_to_3d_numpy(n_instances, n_columns, n_timepoints): nested, _ = make_classification_problem(n_instances, n_columns, n_timepoints) array = from_nested_to_3d_numpy(nested) # check types and shapes assert isinstance(array, np.ndarray) assert array.shape == (n_instances, n_columns, n_timepoints) # check values of random series np.testing.assert_array_equal(nested.iloc[1, 0], array[1, 0, :])
def test_check_enforce_min_instances(): X, y = make_classification_problem(n_instances=3) msg = r"instance" with pytest.raises(ValueError, match=msg): check_X(X, enforce_min_instances=4) with pytest.raises(ValueError, match=msg): check_X_y(X, y, enforce_min_instances=4) with pytest.raises(ValueError, match=msg): check_y(y, enforce_min_instances=4)
def test_from_nested_to_multi_index(n_instances, n_columns, n_timepoints): nested, _ = make_classification_problem(n_instances, n_columns, n_timepoints) mi_df = from_nested_to_multi_index( nested, instance_index="case_id", time_index="reading_id" ) # n_timepoints_max = nested.applymap(_nested_cell_timepoints).sum().max() assert isinstance(mi_df, pd.DataFrame) assert mi_df.shape == (n_instances * n_timepoints, n_columns) assert mi_df.index.names == ["case_id", "reading_id"]
def test_from_nested_to_long(n_instances, n_columns, n_timepoints): nested, _ = make_classification_problem(n_instances, n_columns, n_timepoints) X_long = from_nested_to_long( nested, instance_column_name="case_id", time_column_name="reading_id", dimension_column_name="dim_id", ) assert isinstance(X_long, pd.DataFrame) assert X_long.shape == (n_instances * n_timepoints * n_columns, 4) assert (X_long.columns == ["case_id", "reading_id", "dim_id", "value"]).all()
def test_are_columns_nested(n_instances, n_columns, n_timepoints): nested, _ = make_classification_problem(n_instances, n_columns, n_timepoints) zero_df = pd.DataFrame(np.zeros_like(nested)) nested_heterogenous1 = pd.concat([zero_df, nested], axis=1) nested_heterogenous2 = nested.copy() nested_heterogenous2["primitive_col"] = 1.0 assert [*are_columns_nested(nested)] == [True] * n_columns assert [*are_columns_nested(nested_heterogenous1)] == [False] * n_columns + [ True ] * n_columns assert [*are_columns_nested(nested_heterogenous2)] == [True] * n_columns + [False]
def test_tsfresh_extractor(default_fc_parameters): """Test that mean feature of TSFreshFeatureExtract is identical with sample mean.""" X, _ = make_classification_problem() transformer = TSFreshFeatureExtractor( default_fc_parameters=default_fc_parameters, disable_progressbar=True) Xt = transformer.fit_transform(X) actual = Xt.filter(like="__mean", axis=1).values.ravel() converted = convert(X, from_type="nested_univ", to_type="pd-wide") expected = converted.mean(axis=1).values assert expected[0] == X.iloc[0, 0].mean() np.testing.assert_allclose(actual, expected)
def test_tsfresh_extractor(default_fc_parameters): X, y = make_classification_problem() X_train, X_test, y_train, y_test = train_test_split(X, y) transformer = TSFreshFeatureExtractor( default_fc_parameters=default_fc_parameters, disable_progressbar=True) Xt = transformer.fit_transform(X_train, y_train) actual = Xt.filter(like="__mean", axis=1).values.ravel() expected = from_nested_to_2d_array(X_train).mean(axis=1).values assert expected[0] == X_train.iloc[0, 0].mean() np.testing.assert_allclose(actual, expected)
def test_is_nested_dataframe(n_instances, n_columns, n_timepoints): array = np.random.normal(size=(n_instances, n_columns, n_timepoints)) nested, _ = make_classification_problem(n_instances, n_columns, n_timepoints) zero_df = pd.DataFrame(np.zeros_like(nested)) nested_heterogenous = pd.concat([zero_df, nested], axis=1) mi_df = make_multi_index_dataframe( n_instances=n_instances, n_timepoints=n_timepoints, n_columns=n_columns ) assert not is_nested_dataframe(array) assert not is_nested_dataframe(mi_df) assert is_nested_dataframe(nested) assert is_nested_dataframe(nested_heterogenous)
def test_make_classification_problem( n_instances, n_columns, n_timepoints, n_classes, return_numpy ): X, y = make_classification_problem( n_instances=n_instances, n_classes=n_classes, n_columns=n_columns, n_timepoints=n_timepoints, return_numpy=return_numpy, ) # check dimensions of generated data _check_X_y(X, y, n_instances, n_columns, n_timepoints, check_numpy=return_numpy) # check number of classes assert len(np.unique(y)) == n_classes
def test_different_pipelines(): """Compare with transformer pipeline using TSFeatureUnion.""" random_state = 1233 X_train, y_train = make_classification_problem() steps = [ ( "segment", RandomIntervalSegmenter(n_intervals=1, random_state=random_state), ), ( "transform", FeatureUnion([ ( "mean", SeriesToPrimitivesRowTransformer( FunctionTransformer(func=np.mean, validate=False), check_transformer=False, ), ), ( "std", SeriesToPrimitivesRowTransformer( FunctionTransformer(func=np.std, validate=False), check_transformer=False, ), ), ( "slope", SeriesToPrimitivesRowTransformer( FunctionTransformer(func=_slope, validate=False), check_transformer=False, ), ), ]), ), ] pipe = Pipeline(steps) a = pipe.fit_transform(X_train) tran = RandomIntervalFeatureExtractor( n_intervals=1, features=[np.mean, np.std, _slope], random_state=random_state, ) b = tran.fit_transform(X_train) np.testing.assert_array_equal(a, b) np.testing.assert_array_equal(pipe.steps[0][1].intervals_, tran.intervals_)
def test_different_implementations(): random_state = 1233 X_train, y_train = make_classification_problem() # Compare with chained transformations. tran1 = RandomIntervalSegmenter(n_intervals=1, random_state=random_state) tran2 = SeriesToPrimitivesRowTransformer(FunctionTransformer( func=np.mean, validate=False), check_transformer=False) A = tran2.fit_transform(tran1.fit_transform(X_train)) tran = RandomIntervalFeatureExtractor(n_intervals=1, features=[np.mean], random_state=random_state) B = tran.fit_transform(X_train) np.testing.assert_array_almost_equal(A, B)
def test_results(n_instances, n_timepoints, n_intervals): X, _ = make_classification_problem(n_instances=n_instances, n_timepoints=n_timepoints, return_numpy=True) transformer = RandomIntervalFeatureExtractor(n_intervals=n_intervals, features=[np.mean, np.std]) Xt = transformer.fit_transform(X) Xt = Xt.loc[:, ~Xt.columns.duplicated()] # Check results intervals = transformer.intervals_ for start, end in intervals: expected_mean = np.mean(X[:, 0, start:end], axis=-1) expected_std = np.std(X[:, 0, start:end], axis=-1) actual_means = Xt.loc[:, f"{start}_{end}_mean"].to_numpy().ravel() actual_stds = Xt.loc[:, f"{start}_{end}_std"].to_numpy().ravel() np.testing.assert_array_equal(actual_means, expected_mean) np.testing.assert_array_equal(actual_stds, expected_std)
from sklearn.pipeline import FeatureUnion from sklearn.pipeline import Pipeline from sklearn.preprocessing import FunctionTransformer from sklearn.tree import DecisionTreeClassifier from sktime.classification.compose._ensemble import TimeSeriesForestClassifier from sktime.transformations.panel.compose import ( SeriesToPrimitivesRowTransformer, ) from sktime.transformations.panel.segment import IntervalSegmenter from sktime.transformations.panel.summarize._extract import ( RandomIntervalFeatureExtractor, ) from sktime.utils._testing.panel import make_classification_problem X_train, y_train = make_classification_problem() # Check results of a simple case of single estimator, single feature and # single interval from different but equivalent implementations def test_feature_importances_single_feature_interval_and_estimator(): random_state = 1234 # Compute using default method features = [np.mean] steps = [ ( "transform", RandomIntervalFeatureExtractor( n_intervals=1, features=features, random_state=random_state ),
def test_bad_n_intervals(bad_n_intervals): """Check that exception is raised for bad input args.""" X, y = make_classification_problem() with pytest.raises(ValueError): RandomIntervalFeatureExtractor(n_intervals=bad_n_intervals).fit(X)
from sklearn.preprocessing import FunctionTransformer from sklearn.tree import DecisionTreeClassifier from sktime.classification.compose import ComposableTimeSeriesForestClassifier from sktime.datasets import load_gunpoint from sktime.transformations.panel.compose import ( SeriesToPrimitivesRowTransformer, ) from sktime.transformations.panel.segment import RandomIntervalSegmenter from sktime.transformations.panel.summarize import ( RandomIntervalFeatureExtractor, ) from sktime.utils._testing.panel import make_classification_problem from sktime.utils.slope_and_trend import _slope X, y = make_classification_problem() n_classes = len(np.unique(y)) mean_transformer = SeriesToPrimitivesRowTransformer( FunctionTransformer(func=np.mean, validate=False, kw_args={"axis": 0}), check_transformer=False, ) std_transformer = SeriesToPrimitivesRowTransformer( FunctionTransformer(func=np.std, validate=False, kw_args={"axis": 0}), check_transformer=False, ) # Check simple cases. def test_predict_proba(): clf = ComposableTimeSeriesForestClassifier(n_estimators=2)
def test_bad_features(bad_features): X, y = make_classification_problem() with pytest.raises(ValueError): RandomIntervalFeatureExtractor(n_intervals=bad_features).fit(X)