def test_persistence_via_pickle(estimator_instance):
    """Check that we can pickle all estimators."""
    estimator = estimator_instance
    set_random_state(estimator)
    fit_args = _make_args(estimator, "fit")
    estimator.fit(*fit_args)

    # Generate results before pickling
    results = dict()
    args = dict()
    for method in NON_STATE_CHANGING_METHODS:
        if hasattr(estimator, method):
            args[method] = _make_args(estimator, method)
            results[method] = getattr(estimator, method)(*args[method])

    # Pickle and unpickle
    pickled_estimator = pickle.dumps(estimator)
    unpickled_estimator = pickle.loads(pickled_estimator)

    # Compare against results after pickling
    for method in results:
        unpickled_result = getattr(unpickled_estimator, method)(*args[method])
        _assert_array_almost_equal(
            results[method],
            unpickled_result,
            decimal=6,
            err_msg="Results are not the same after pickling",
        )
def test_fit_idempotent(estimator_instance):
    """Check that calling fit twice is equivalent to calling it once."""
    estimator = estimator_instance

    set_random_state(estimator)

    # Fit for the first time
    fit_args = _make_args(estimator, "fit")
    estimator.fit(*fit_args)

    results = dict()
    args = dict()
    for method in NON_STATE_CHANGING_METHODS:
        if hasattr(estimator, method):
            args[method] = _make_args(estimator, method)
            results[method] = getattr(estimator, method)(*args[method])

    # Fit again
    set_random_state(estimator)
    estimator.fit(*fit_args)

    for method in NON_STATE_CHANGING_METHODS:
        if hasattr(estimator, method):
            new_result = getattr(estimator, method)(*args[method])
            _assert_array_almost_equal(
                results[method],
                new_result,
                # err_msg=f"Idempotency check failed for method {method}",
            )
def test_methods_do_not_change_state(estimator_instance):
    """Check that non-state-changing methods do not change state.

    Check that methods that are not supposed to change attributes of the
    estimators do not change anything (including hyper-parameters and
    fitted parameters)
    """
    estimator = estimator_instance
    set_random_state(estimator)

    fit_args = _make_args(estimator, "fit")
    estimator.fit(*fit_args)
    dict_before = estimator.__dict__.copy()

    for method in NON_STATE_CHANGING_METHODS:
        if hasattr(estimator, method):
            args = _make_args(estimator, method)
            getattr(estimator, method)(*args)

            if method == "transform" and estimator.get_class_tag("fit-in-transform"):
                # Some transformations fit during transform, as they apply
                # some transformation to each series passed to transform,
                # so transform will actually change the state of these estimator.
                continue

            if method == "predict" and estimator.get_class_tag("fit-in-predict"):
                # Some annotators fit during predict, as they apply
                # some apply annotation to each series passed to predict,
                # so predict will actually change the state of these annotators.
                continue

            assert (
                estimator.__dict__ == dict_before
            ), f"Estimator: {estimator} changes __dict__ during {method}"
def test_output_type(Estimator):
    estimator = _construct_instance(Estimator)

    args = _make_args(estimator, "fit")
    estimator.fit(*args)
    args = _make_args(estimator, "predict")
    y_pred = estimator.predict(*args)
    assert isinstance(y_pred, pd.Series)
def test_output_type(Estimator):
    """Test annotator output type."""
    estimator = Estimator.create_test_instance()

    args = _make_args(estimator, "fit")
    estimator.fit(*args)
    args = _make_args(estimator, "predict")
    y_pred = estimator.predict(*args)
    assert isinstance(y_pred, pd.Series)
Example #6
0
def test_sklearn_composite_classifiers(data_args, composite_classifier):
    """Test if sklearn composite classifiers can handle sktime data and classifiers."""
    base_clf = CanonicalIntervalForest()
    fit_args = _make_args(base_clf, "fit", **data_args)
    composite_classifier.fit(*fit_args)

    predict_args = _make_args(base_clf, "predict", **data_args)
    preds = composite_classifier.predict(*predict_args)
    assert isinstance(preds, np.ndarray)
def check_transform_returns_same_time_index(Estimator):
    assert issubclass(Estimator, _SeriesToSeriesTransformer)
    estimator = _construct_instance(Estimator)
    fit_args = _make_args(estimator, "fit")
    estimator.fit(*fit_args)
    for method in ["transform", "inverse_transform"]:
        if hasattr(estimator, method):
            X = _make_args(estimator, method)[0]
            Xt = estimator.transform(X)
            np.testing.assert_array_equal(X.index, Xt.index)
Example #8
0
def test_regressor_output(Estimator):
    estimator = _construct_instance(Estimator)
    X_train, y_train = _make_args(estimator, "fit")
    estimator.fit(X_train, y_train)

    X_new = _make_args(estimator, "predict")[0]

    # check predict
    y_pred = estimator.predict(X_new)
    assert isinstance(y_pred, ACCEPTED_OUTPUT_TYPES)
    assert y_pred.shape == (X_new.shape[0], )
    assert np.issubdtype(y_pred.dtype, np.floating)
def check_transform_returns_same_time_index(Estimator):
    estimator = _construct_instance(Estimator)
    if estimator.get_tag("transform-returns-same-time-index"):
        assert issubclass(Estimator,
                          (_SeriesToSeriesTransformer, BaseTransformer))
        estimator = _construct_instance(Estimator)
        fit_args = _make_args(estimator, "fit")
        estimator.fit(*fit_args)
        for method in ["transform", "inverse_transform"]:
            if _has_capability(estimator, method):
                X = _make_args(estimator, method)[0]
                Xt = estimator.transform(X)
                np.testing.assert_array_equal(X.index, Xt.index)
Example #10
0
def test_sklearn_cross_validation(data_args):
    """Test sklearn cross-validation works with sktime panel data and classifiers."""
    clf = CanonicalIntervalForest.create_test_instance()
    fit_args = _make_args(clf, "fit", **data_args)

    scores = cross_val_score(clf, *fit_args, cv=KFold(n_splits=3))
    assert isinstance(scores, np.ndarray)
def test_fit_returns_self(estimator_instance):
    """Check that fit returns self."""
    estimator = estimator_instance
    fit_args = _make_args(estimator, "fit")
    assert (
        estimator.fit(*fit_args) is estimator
    ), f"Estimator: {estimator} does not return self when calling fit"
Example #12
0
def test_fit_updates_state(estimator_instance):
    """Check fit/update state change."""
    # Check that fit updates the is-fitted states
    attrs = ["_is_fitted", "is_fitted"]

    estimator = estimator_instance

    assert hasattr(
        estimator, "_is_fitted"
    ), f"Estimator: {estimator.__name__} does not set_is_fitted in construction"

    # Check it's not fitted before calling fit
    for attr in attrs:
        assert not getattr(
            estimator, attr
        ), f"Estimator: {estimator} does not initiate attribute: {attr} to False"

    fit_args = _make_args(estimator, "fit")
    estimator.fit(*fit_args)

    # Check states are updated after calling fit
    for attr in attrs:
        assert getattr(
            estimator, attr
        ), f"Estimator: {estimator} does not update attribute: {attr} during fit"
Example #13
0
def test_fit_does_not_overwrite_hyper_params(estimator_instance):
    """Check that we do not overwrite hyper-parameters in fit."""
    estimator = estimator_instance
    set_random_state(estimator)

    # Make a physical copy of the original estimator parameters before fitting.
    params = estimator.get_params()
    original_params = deepcopy(params)

    # Fit the model
    fit_args = _make_args(estimator, "fit")
    estimator.fit(*fit_args)

    # Compare the state of the model parameters with the original parameters
    new_params = estimator.get_params()
    for param_name, original_value in original_params.items():
        new_value = new_params[param_name]

        # We should never change or mutate the internal state of input
        # parameters by default. To check this we use the joblib.hash function
        # that introspects recursively any subobjects to compute a checksum.
        # The only exception to this rule of immutable constructor parameters
        # is possible RandomState instance but in this check we explicitly
        # fixed the random_state params recursively to be integer seeds.
        assert joblib.hash(new_value) == joblib.hash(original_value), (
            "Estimator %s should not change or mutate "
            " the parameter %s from %s to %s during fit." %
            (estimator.__class__.__name__, param_name, original_value,
             new_value))
Example #14
0
def test_sklearn_cross_validation_iterators(data_args, cross_validation_method):
    """Test if sklearn cross-validation iterators can handle sktime panel data."""
    clf = CanonicalIntervalForest.create_test_instance()
    fit_args = _make_args(clf, "fit", **data_args)
    groups = [1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10]

    for train, test in cross_validation_method.split(*fit_args, groups=groups):
        assert isinstance(train, np.ndarray) and isinstance(test, np.ndarray)
def _construct_fit_transform(Estimator, **kwargs):
    estimator = _construct_instance(Estimator)

    # For forecasters which are also transformations (e.g. pipelines), we cannot
    # the forecasting horizon to transform, so we only return the first two
    # arguments here. Note that this will fail for forecasters which require the
    # forecasting horizon in fit.
    args = _make_args(estimator, "fit", **kwargs)[:2]
    return estimator.fit_transform(*args)
Example #16
0
def check_transform_inverse_transform_equivalent(Estimator):
    estimator = _construct_instance(Estimator)
    X = _make_args(estimator, "fit")[0]
    Xt = estimator.fit_transform(X)
    Xit = estimator.inverse_transform(Xt)
    if estimator.get_tag("transform-returns-same-time-index"):
        _assert_array_almost_equal(X, Xit)
    else:
        _assert_array_almost_equal(X.loc[Xit.index], Xit)
Example #17
0
def test_multiprocessing_idempotent(estimator_class):
    """Test that single and multi-process run results are identical.

    Check that running an estimator on a single process is no different to running
    it on multiple processes. We also check that we can set n_jobs=-1 to make use
    of all CPUs. The test is not really necessary though, as we rely on joblib for
    parallelization and can trust that it works as expected.
    """
    estimator = estimator_class.create_test_instance()
    params = estimator.get_params()

    if "n_jobs" in params:
        results = dict()
        args = dict()

        # run on a single process
        estimator = estimator_class.create_test_instance()
        estimator.set_params(n_jobs=1)
        set_random_state(estimator)
        args["fit"] = _make_args(estimator, "fit")
        estimator.fit(*args["fit"])

        # compute and store results
        for method in NON_STATE_CHANGING_METHODS:
            if hasattr(estimator, method):
                args[method] = _make_args(estimator, method)
                results[method] = getattr(estimator, method)(*args[method])

        # run on multiple processes, reusing the same input arguments
        estimator = estimator_class.create_test_instance()
        estimator.set_params(n_jobs=-1)
        set_random_state(estimator)
        estimator.fit(*args["fit"])

        # compute and compare results
        for method in results:
            if hasattr(estimator, method):
                result = getattr(estimator, method)(*args[method])
                _assert_array_equal(
                    results[method],
                    result,
                    err_msg="Results are not equal for n_jobs=1 and n_jobs=-1",
                )
Example #18
0
def test_raises_not_fitted_error(estimator_instance):
    """Check that we raise appropriate error for unfitted estimators."""
    estimator = estimator_instance
    # call methods without prior fitting and check that they raise our
    # NotFittedError
    for method in NON_STATE_CHANGING_METHODS:
        if hasattr(estimator, method):
            args = _make_args(estimator, method)
            with pytest.raises(NotFittedError, match=r"has not been fitted"):
                getattr(estimator, method)(*args)
Example #19
0
def test_classifier_output(Estimator):
    estimator = _construct_instance(Estimator)
    X_train, y_train = _make_args(estimator, "fit", n_classes=N_CLASSES)
    estimator.fit(X_train, y_train)

    X_new = _make_args(estimator, "predict")[0]

    # check predict
    y_pred = estimator.predict(X_new)
    assert isinstance(y_pred, ACCEPTED_OUTPUT_TYPES)
    assert y_pred.shape == (X_new.shape[0], )
    assert np.all(np.isin(np.unique(y_pred), np.unique(y_train)))

    # check predict proba
    if hasattr(estimator, "predict_proba"):
        y_proba = estimator.predict_proba(X_new)
        assert isinstance(y_proba, ACCEPTED_OUTPUT_TYPES)
        assert y_proba.shape == (X_new.shape[0], N_CLASSES)
        np.testing.assert_allclose(y_proba.sum(axis=1), 1)
Example #20
0
def test_3d_numpy_input(Estimator):
    estimator = _construct_instance(Estimator)
    fit_args = _make_args(estimator, "fit", return_numpy=True)
    estimator.fit(*fit_args)

    for method in NON_STATE_CHANGING_METHODS:
        if hasattr(estimator, method):

            # try if methods can handle 3d numpy input data
            try:
                args = _make_args(estimator, method, return_numpy=True)
                getattr(estimator, method)(*args)

            # if not, check if they raise the appropriate error message
            except ValueError as e:
                error_msg = "This method requires X to be a nested pd.DataFrame"
                assert error_msg in str(e), (
                    f"{estimator.__class__.__name__} does "
                    f"not handle 3d numpy input data correctly")
Example #21
0
def test_sklearn_parameter_tuning(data_args, parameter_tuning_method):
    """Test if sklearn parameter tuners can handle sktime panel data and classifiers."""
    clf = CanonicalIntervalForest.create_test_instance()
    param_grid = {"n_intervals": [2, 3], "att_subsample_size": [2, 3]}
    fit_args = _make_args(clf, "fit", **data_args)

    parameter_tuning_method = parameter_tuning_method(
        clf, param_grid, cv=KFold(n_splits=3)
    )
    parameter_tuning_method.fit(*fit_args)
    assert isinstance(parameter_tuning_method.best_estimator_, CanonicalIntervalForest)
Example #22
0
def test_multivariate_input(Estimator):
    """Test classifiers handle multivariate pd.DataFrame input correctly."""
    # check if multivariate input is correctly handled
    n_columns = 2
    error_msg = "X must be univariate"

    estimator = _construct_instance(Estimator)
    X_train, y_train = _make_args(estimator, "fit", n_columns=n_columns)

    # check if estimator can handle multivariate data
    try:
        estimator.fit(X_train, y_train)
        for method in ("predict", "predict_proba"):
            X = _make_args(estimator, method, n_columns=n_columns)[0]
            getattr(estimator, method)(X)

    # if not, check if we raise error with appropriate message
    except ValueError as e:
        assert error_msg in str(e), (
            f"{estimator.__class__.__name__} does not handle multivariate "
            f"data and does not raise an appropriate error when multivariate "
            f"data is passed")
Example #23
0
def test_methods_have_no_side_effects(estimator_instance):
    """Check that calling methods has no side effects on args."""
    estimator = estimator_instance

    set_random_state(estimator)

    # Fit for the first time
    fit_args = _make_args(estimator, "fit")
    old_fit_args = deepcopy(fit_args)
    estimator.fit(*fit_args)

    assert deep_equals(
        old_fit_args, fit_args
    ), f"Estimator: {estimator} has side effects on arguments of fit"

    for method in NON_STATE_CHANGING_METHODS:
        if hasattr(estimator, method):
            new_args = _make_args(estimator, method)
            old_args = deepcopy(new_args)
            getattr(estimator, method)(*new_args)

            assert deep_equals(
                old_args, new_args
            ), f"Estimator: {estimator} has side effects on arguments of {method}"
Example #24
0
def test_classifier_output(Estimator):
    """Test classifier outputs the correct data types and values.

    Test predict produces a np.array or pd.Series with only values seen in the train
    data, and that predict_proba probability estimates add up to one.
    """
    estimator = _construct_instance(Estimator)
    X_train, y_train = _make_args(estimator, "fit", n_classes=N_CLASSES)
    estimator.fit(X_train, y_train)

    X_new = _make_args(estimator, "predict")[0]

    # check predict
    y_pred = estimator.predict(X_new)
    assert isinstance(y_pred, ACCEPTED_OUTPUT_TYPES)
    assert y_pred.shape == (X_new.shape[0], )
    assert np.all(np.isin(np.unique(y_pred), np.unique(y_train)))

    # check predict proba
    if hasattr(estimator, "predict_proba"):
        y_proba = estimator.predict_proba(X_new)
        assert isinstance(y_proba, ACCEPTED_OUTPUT_TYPES)
        assert y_proba.shape == (X_new.shape[0], N_CLASSES)
        np.testing.assert_allclose(y_proba.sum(axis=1), 1)
def _construct_fit(Estimator, **kwargs):
    estimator = _construct_instance(Estimator)
    args = _make_args(estimator, "fit", **kwargs)[:2]
    return estimator.fit(*args)
def check_transform_inverse_transform_equivalent(Estimator):
    estimator = _construct_instance(Estimator)
    X = _make_args(estimator, "fit")[0]
    Xt = estimator.fit_transform(X)
    Xit = estimator.inverse_transform(Xt)
    _assert_array_almost_equal(X, Xit)