def test_persistence_via_pickle(estimator_instance): """Check that we can pickle all estimators.""" estimator = estimator_instance set_random_state(estimator) fit_args = _make_args(estimator, "fit") estimator.fit(*fit_args) # Generate results before pickling results = dict() args = dict() for method in NON_STATE_CHANGING_METHODS: if hasattr(estimator, method): args[method] = _make_args(estimator, method) results[method] = getattr(estimator, method)(*args[method]) # Pickle and unpickle pickled_estimator = pickle.dumps(estimator) unpickled_estimator = pickle.loads(pickled_estimator) # Compare against results after pickling for method in results: unpickled_result = getattr(unpickled_estimator, method)(*args[method]) _assert_array_almost_equal( results[method], unpickled_result, decimal=6, err_msg="Results are not the same after pickling", )
def test_fit_idempotent(estimator_instance): """Check that calling fit twice is equivalent to calling it once.""" estimator = estimator_instance set_random_state(estimator) # Fit for the first time fit_args = _make_args(estimator, "fit") estimator.fit(*fit_args) results = dict() args = dict() for method in NON_STATE_CHANGING_METHODS: if hasattr(estimator, method): args[method] = _make_args(estimator, method) results[method] = getattr(estimator, method)(*args[method]) # Fit again set_random_state(estimator) estimator.fit(*fit_args) for method in NON_STATE_CHANGING_METHODS: if hasattr(estimator, method): new_result = getattr(estimator, method)(*args[method]) _assert_array_almost_equal( results[method], new_result, # err_msg=f"Idempotency check failed for method {method}", )
def test_methods_do_not_change_state(estimator_instance): """Check that non-state-changing methods do not change state. Check that methods that are not supposed to change attributes of the estimators do not change anything (including hyper-parameters and fitted parameters) """ estimator = estimator_instance set_random_state(estimator) fit_args = _make_args(estimator, "fit") estimator.fit(*fit_args) dict_before = estimator.__dict__.copy() for method in NON_STATE_CHANGING_METHODS: if hasattr(estimator, method): args = _make_args(estimator, method) getattr(estimator, method)(*args) if method == "transform" and estimator.get_class_tag("fit-in-transform"): # Some transformations fit during transform, as they apply # some transformation to each series passed to transform, # so transform will actually change the state of these estimator. continue if method == "predict" and estimator.get_class_tag("fit-in-predict"): # Some annotators fit during predict, as they apply # some apply annotation to each series passed to predict, # so predict will actually change the state of these annotators. continue assert ( estimator.__dict__ == dict_before ), f"Estimator: {estimator} changes __dict__ during {method}"
def test_output_type(Estimator): estimator = _construct_instance(Estimator) args = _make_args(estimator, "fit") estimator.fit(*args) args = _make_args(estimator, "predict") y_pred = estimator.predict(*args) assert isinstance(y_pred, pd.Series)
def test_output_type(Estimator): """Test annotator output type.""" estimator = Estimator.create_test_instance() args = _make_args(estimator, "fit") estimator.fit(*args) args = _make_args(estimator, "predict") y_pred = estimator.predict(*args) assert isinstance(y_pred, pd.Series)
def test_sklearn_composite_classifiers(data_args, composite_classifier): """Test if sklearn composite classifiers can handle sktime data and classifiers.""" base_clf = CanonicalIntervalForest() fit_args = _make_args(base_clf, "fit", **data_args) composite_classifier.fit(*fit_args) predict_args = _make_args(base_clf, "predict", **data_args) preds = composite_classifier.predict(*predict_args) assert isinstance(preds, np.ndarray)
def check_transform_returns_same_time_index(Estimator): assert issubclass(Estimator, _SeriesToSeriesTransformer) estimator = _construct_instance(Estimator) fit_args = _make_args(estimator, "fit") estimator.fit(*fit_args) for method in ["transform", "inverse_transform"]: if hasattr(estimator, method): X = _make_args(estimator, method)[0] Xt = estimator.transform(X) np.testing.assert_array_equal(X.index, Xt.index)
def test_regressor_output(Estimator): estimator = _construct_instance(Estimator) X_train, y_train = _make_args(estimator, "fit") estimator.fit(X_train, y_train) X_new = _make_args(estimator, "predict")[0] # check predict y_pred = estimator.predict(X_new) assert isinstance(y_pred, ACCEPTED_OUTPUT_TYPES) assert y_pred.shape == (X_new.shape[0], ) assert np.issubdtype(y_pred.dtype, np.floating)
def check_transform_returns_same_time_index(Estimator): estimator = _construct_instance(Estimator) if estimator.get_tag("transform-returns-same-time-index"): assert issubclass(Estimator, (_SeriesToSeriesTransformer, BaseTransformer)) estimator = _construct_instance(Estimator) fit_args = _make_args(estimator, "fit") estimator.fit(*fit_args) for method in ["transform", "inverse_transform"]: if _has_capability(estimator, method): X = _make_args(estimator, method)[0] Xt = estimator.transform(X) np.testing.assert_array_equal(X.index, Xt.index)
def test_sklearn_cross_validation(data_args): """Test sklearn cross-validation works with sktime panel data and classifiers.""" clf = CanonicalIntervalForest.create_test_instance() fit_args = _make_args(clf, "fit", **data_args) scores = cross_val_score(clf, *fit_args, cv=KFold(n_splits=3)) assert isinstance(scores, np.ndarray)
def test_fit_returns_self(estimator_instance): """Check that fit returns self.""" estimator = estimator_instance fit_args = _make_args(estimator, "fit") assert ( estimator.fit(*fit_args) is estimator ), f"Estimator: {estimator} does not return self when calling fit"
def test_fit_updates_state(estimator_instance): """Check fit/update state change.""" # Check that fit updates the is-fitted states attrs = ["_is_fitted", "is_fitted"] estimator = estimator_instance assert hasattr( estimator, "_is_fitted" ), f"Estimator: {estimator.__name__} does not set_is_fitted in construction" # Check it's not fitted before calling fit for attr in attrs: assert not getattr( estimator, attr ), f"Estimator: {estimator} does not initiate attribute: {attr} to False" fit_args = _make_args(estimator, "fit") estimator.fit(*fit_args) # Check states are updated after calling fit for attr in attrs: assert getattr( estimator, attr ), f"Estimator: {estimator} does not update attribute: {attr} during fit"
def test_fit_does_not_overwrite_hyper_params(estimator_instance): """Check that we do not overwrite hyper-parameters in fit.""" estimator = estimator_instance set_random_state(estimator) # Make a physical copy of the original estimator parameters before fitting. params = estimator.get_params() original_params = deepcopy(params) # Fit the model fit_args = _make_args(estimator, "fit") estimator.fit(*fit_args) # Compare the state of the model parameters with the original parameters new_params = estimator.get_params() for param_name, original_value in original_params.items(): new_value = new_params[param_name] # We should never change or mutate the internal state of input # parameters by default. To check this we use the joblib.hash function # that introspects recursively any subobjects to compute a checksum. # The only exception to this rule of immutable constructor parameters # is possible RandomState instance but in this check we explicitly # fixed the random_state params recursively to be integer seeds. assert joblib.hash(new_value) == joblib.hash(original_value), ( "Estimator %s should not change or mutate " " the parameter %s from %s to %s during fit." % (estimator.__class__.__name__, param_name, original_value, new_value))
def test_sklearn_cross_validation_iterators(data_args, cross_validation_method): """Test if sklearn cross-validation iterators can handle sktime panel data.""" clf = CanonicalIntervalForest.create_test_instance() fit_args = _make_args(clf, "fit", **data_args) groups = [1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10] for train, test in cross_validation_method.split(*fit_args, groups=groups): assert isinstance(train, np.ndarray) and isinstance(test, np.ndarray)
def _construct_fit_transform(Estimator, **kwargs): estimator = _construct_instance(Estimator) # For forecasters which are also transformations (e.g. pipelines), we cannot # the forecasting horizon to transform, so we only return the first two # arguments here. Note that this will fail for forecasters which require the # forecasting horizon in fit. args = _make_args(estimator, "fit", **kwargs)[:2] return estimator.fit_transform(*args)
def check_transform_inverse_transform_equivalent(Estimator): estimator = _construct_instance(Estimator) X = _make_args(estimator, "fit")[0] Xt = estimator.fit_transform(X) Xit = estimator.inverse_transform(Xt) if estimator.get_tag("transform-returns-same-time-index"): _assert_array_almost_equal(X, Xit) else: _assert_array_almost_equal(X.loc[Xit.index], Xit)
def test_multiprocessing_idempotent(estimator_class): """Test that single and multi-process run results are identical. Check that running an estimator on a single process is no different to running it on multiple processes. We also check that we can set n_jobs=-1 to make use of all CPUs. The test is not really necessary though, as we rely on joblib for parallelization and can trust that it works as expected. """ estimator = estimator_class.create_test_instance() params = estimator.get_params() if "n_jobs" in params: results = dict() args = dict() # run on a single process estimator = estimator_class.create_test_instance() estimator.set_params(n_jobs=1) set_random_state(estimator) args["fit"] = _make_args(estimator, "fit") estimator.fit(*args["fit"]) # compute and store results for method in NON_STATE_CHANGING_METHODS: if hasattr(estimator, method): args[method] = _make_args(estimator, method) results[method] = getattr(estimator, method)(*args[method]) # run on multiple processes, reusing the same input arguments estimator = estimator_class.create_test_instance() estimator.set_params(n_jobs=-1) set_random_state(estimator) estimator.fit(*args["fit"]) # compute and compare results for method in results: if hasattr(estimator, method): result = getattr(estimator, method)(*args[method]) _assert_array_equal( results[method], result, err_msg="Results are not equal for n_jobs=1 and n_jobs=-1", )
def test_raises_not_fitted_error(estimator_instance): """Check that we raise appropriate error for unfitted estimators.""" estimator = estimator_instance # call methods without prior fitting and check that they raise our # NotFittedError for method in NON_STATE_CHANGING_METHODS: if hasattr(estimator, method): args = _make_args(estimator, method) with pytest.raises(NotFittedError, match=r"has not been fitted"): getattr(estimator, method)(*args)
def test_classifier_output(Estimator): estimator = _construct_instance(Estimator) X_train, y_train = _make_args(estimator, "fit", n_classes=N_CLASSES) estimator.fit(X_train, y_train) X_new = _make_args(estimator, "predict")[0] # check predict y_pred = estimator.predict(X_new) assert isinstance(y_pred, ACCEPTED_OUTPUT_TYPES) assert y_pred.shape == (X_new.shape[0], ) assert np.all(np.isin(np.unique(y_pred), np.unique(y_train))) # check predict proba if hasattr(estimator, "predict_proba"): y_proba = estimator.predict_proba(X_new) assert isinstance(y_proba, ACCEPTED_OUTPUT_TYPES) assert y_proba.shape == (X_new.shape[0], N_CLASSES) np.testing.assert_allclose(y_proba.sum(axis=1), 1)
def test_3d_numpy_input(Estimator): estimator = _construct_instance(Estimator) fit_args = _make_args(estimator, "fit", return_numpy=True) estimator.fit(*fit_args) for method in NON_STATE_CHANGING_METHODS: if hasattr(estimator, method): # try if methods can handle 3d numpy input data try: args = _make_args(estimator, method, return_numpy=True) getattr(estimator, method)(*args) # if not, check if they raise the appropriate error message except ValueError as e: error_msg = "This method requires X to be a nested pd.DataFrame" assert error_msg in str(e), ( f"{estimator.__class__.__name__} does " f"not handle 3d numpy input data correctly")
def test_sklearn_parameter_tuning(data_args, parameter_tuning_method): """Test if sklearn parameter tuners can handle sktime panel data and classifiers.""" clf = CanonicalIntervalForest.create_test_instance() param_grid = {"n_intervals": [2, 3], "att_subsample_size": [2, 3]} fit_args = _make_args(clf, "fit", **data_args) parameter_tuning_method = parameter_tuning_method( clf, param_grid, cv=KFold(n_splits=3) ) parameter_tuning_method.fit(*fit_args) assert isinstance(parameter_tuning_method.best_estimator_, CanonicalIntervalForest)
def test_multivariate_input(Estimator): """Test classifiers handle multivariate pd.DataFrame input correctly.""" # check if multivariate input is correctly handled n_columns = 2 error_msg = "X must be univariate" estimator = _construct_instance(Estimator) X_train, y_train = _make_args(estimator, "fit", n_columns=n_columns) # check if estimator can handle multivariate data try: estimator.fit(X_train, y_train) for method in ("predict", "predict_proba"): X = _make_args(estimator, method, n_columns=n_columns)[0] getattr(estimator, method)(X) # if not, check if we raise error with appropriate message except ValueError as e: assert error_msg in str(e), ( f"{estimator.__class__.__name__} does not handle multivariate " f"data and does not raise an appropriate error when multivariate " f"data is passed")
def test_methods_have_no_side_effects(estimator_instance): """Check that calling methods has no side effects on args.""" estimator = estimator_instance set_random_state(estimator) # Fit for the first time fit_args = _make_args(estimator, "fit") old_fit_args = deepcopy(fit_args) estimator.fit(*fit_args) assert deep_equals( old_fit_args, fit_args ), f"Estimator: {estimator} has side effects on arguments of fit" for method in NON_STATE_CHANGING_METHODS: if hasattr(estimator, method): new_args = _make_args(estimator, method) old_args = deepcopy(new_args) getattr(estimator, method)(*new_args) assert deep_equals( old_args, new_args ), f"Estimator: {estimator} has side effects on arguments of {method}"
def test_classifier_output(Estimator): """Test classifier outputs the correct data types and values. Test predict produces a np.array or pd.Series with only values seen in the train data, and that predict_proba probability estimates add up to one. """ estimator = _construct_instance(Estimator) X_train, y_train = _make_args(estimator, "fit", n_classes=N_CLASSES) estimator.fit(X_train, y_train) X_new = _make_args(estimator, "predict")[0] # check predict y_pred = estimator.predict(X_new) assert isinstance(y_pred, ACCEPTED_OUTPUT_TYPES) assert y_pred.shape == (X_new.shape[0], ) assert np.all(np.isin(np.unique(y_pred), np.unique(y_train))) # check predict proba if hasattr(estimator, "predict_proba"): y_proba = estimator.predict_proba(X_new) assert isinstance(y_proba, ACCEPTED_OUTPUT_TYPES) assert y_proba.shape == (X_new.shape[0], N_CLASSES) np.testing.assert_allclose(y_proba.sum(axis=1), 1)
def _construct_fit(Estimator, **kwargs): estimator = _construct_instance(Estimator) args = _make_args(estimator, "fit", **kwargs)[:2] return estimator.fit(*args)
def check_transform_inverse_transform_equivalent(Estimator): estimator = _construct_instance(Estimator) X = _make_args(estimator, "fit")[0] Xt = estimator.fit_transform(X) Xit = estimator.inverse_transform(Xt) _assert_array_almost_equal(X, Xit)