예제 #1
0
def check_persistence_via_pickle(Estimator):
    # Check that we can pickle all estimators
    estimator = _construct_instance(Estimator)
    set_random_state(estimator)
    fit_args = _make_args(estimator, "fit")
    estimator.fit(*fit_args)

    # Generate results before pickling
    results = {}
    args = {}
    for method in NON_STATE_CHANGING_METHODS:
        if hasattr(estimator, method):
            args[method] = _make_args(estimator, method)
            results[method] = getattr(estimator, method)(*args[method])

    # Pickle and unpickle
    pickled_estimator = pickle.dumps(estimator)
    # if estimator.__module__.startswith('sktime.'):
    #     assert b"version" in pickled_estimator
    unpickled_estimator = pickle.loads(pickled_estimator)

    # Compare against results after pickling
    for method, value in results.items():
        unpickled_result = getattr(unpickled_estimator, method)(*args[method])
        _assert_almost_equal(value, unpickled_result)
예제 #2
0
def check_fit_idempotent(Estimator):
    # Check that calling fit twice is equivalent to calling it once
    estimator = _construct_instance(Estimator)
    set_random_state(estimator)

    # Fit for the first time
    fit_args = _make_args(estimator, "fit")
    estimator.fit(*fit_args)

    results = dict()
    args = dict()
    for method in NON_STATE_CHANGING_METHODS:
        if hasattr(estimator, method):
            args[method] = _make_args(estimator, method)
            results[method] = getattr(estimator, method)(*args[method])

    # Fit again
    set_random_state(estimator)
    estimator.fit(*fit_args)

    for method in NON_STATE_CHANGING_METHODS:
        if hasattr(estimator, method):
            new_result = getattr(estimator, method)(*args[method])
            _assert_array_almost_equal(
                results[method],
                new_result,
                # err_msg=f"Idempotency check failed for method {method}",
            )
예제 #3
0
def check_methods_do_not_change_state(Estimator):
    # Check that methods that are not supposed to change attributes of the
    # estimators do not change anything (including hyper-parameters and
    # fitted parameters)
    estimator = _construct_instance(Estimator)
    set_random_state(estimator)

    fit_args = _make_args(estimator, "fit")
    estimator.fit(*fit_args)
    dict_before = estimator.__dict__.copy()

    for method in NON_STATE_CHANGING_METHODS:
        if hasattr(estimator, method):
            args = _make_args(estimator, method)
            getattr(estimator, method)(*args)

            if method == "transform" and _has_tag(Estimator,
                                                  "fit-in-transform"):
                # Some transformers fit during transform, as they apply
                # some transformation to each series passed to transform,
                # so transform will actually change the state of these estimator.
                continue

            assert (
                estimator.__dict__ == dict_before
            ), f"Estimator: {estimator} changes __dict__ during {method}"
예제 #4
0
def check_persistence_via_pickle(Estimator):
    # Check that we can pickle all estimators
    estimator = _construct_instance(Estimator)
    set_random_state(estimator)
    fit_args = _make_args(estimator, "fit")
    estimator.fit(*fit_args)

    # Generate results before pickling
    results = dict()
    args = dict()
    for method in NON_STATE_CHANGING_METHODS:
        if hasattr(estimator, method):
            args[method] = _make_args(estimator, method)
            results[method] = getattr(estimator, method)(*args[method])

    # Pickle and unpickle
    pickled_estimator = pickle.dumps(estimator)
    unpickled_estimator = pickle.loads(pickled_estimator)

    # Compare against results after pickling
    for method in results:
        unpickled_result = getattr(unpickled_estimator, method)(*args[method])
        _assert_array_almost_equal(
            results[method],
            unpickled_result,
            decimal=6,
            err_msg="Results are not the same after pickling",
        )
def test_series_as_features_multivariate_input(Estimator):
    # check if multivariate input is correctly handled
    n_columns = 2
    error_msg = (
        f"X must be univariate "
        f"with X.shape[1] == 1, but found: "
        f"X.shape[1] == {n_columns}."
    )

    estimator = _construct_instance(Estimator)
    X_train, y_train = _make_args(estimator, "fit", n_columns=n_columns)

    # check if estimator can handle multivariate data
    try:
        estimator.fit(X_train, y_train)

        # TODO include series-as-features transformers
        for method in ("predict", "predict_proba"):
            X = _make_args(estimator, method, n_columns=n_columns)[0]
            getattr(estimator, method)(X)

    # if not, check if error with appropriate message is raised
    except ValueError as e:
        assert error_msg in str(e), (
            f"{estimator.__class__.__name__} does not handle multivariate "
            f"data and does not raise an appropriate error when multivariate "
            f"data is passed"
        )
예제 #6
0
def check_multiprocessing_determinism(Estimator):
    if "n_jobs" in signature(Estimator.__init__).parameters:
        estimator = _construct_instance(Estimator)
        fit_args = _make_args(estimator, "fit")
        for method in NON_STATE_CHANGING_METHODS:
            if hasattr(estimator, method):
                args = _make_args(estimator, method)[0]
                result_set = []
                for n_jobs in [1, 4]:
                    estimator.set_params(n_jobs=n_jobs)
                    if hasattr(estimator, "n_jobs"):
                        assert estimator.n_jobs == n_jobs
                    set_random_state(estimator)
                    estimator.fit(*fit_args)
                    result_set.append(getattr(estimator, method)(args))

                if isinstance(result_set[0], pd.DataFrame):
                    assert_frame_equal(result_set[0], result_set[1])
                else:
                    np.testing.assert_array_equal(
                        result_set[0],
                        result_set[1],
                        err_msg="Results for test set not equal "
                        "between 1 and 4 job run",
                    )
예제 #7
0
def check_transform_returns_same_time_index(Estimator):
    assert issubclass(Estimator, _SeriesToSeriesTransformer)
    estimator = _construct_instance(Estimator)
    fit_args = _make_args(estimator, "fit")
    estimator.fit(*fit_args)
    for method in ["transform", "inverse_transform"]:
        if hasattr(estimator, method):
            X = _make_args(estimator, method)[0]
            Xt = estimator.transform(X)
            np.testing.assert_array_equal(X.index, Xt.index)
def test_regressor_output(Estimator):
    estimator = _construct_instance(Estimator)
    X_train, y_train = _make_args(estimator, "fit")
    estimator.fit(X_train, y_train)

    X = _make_args(estimator, "predict")[0]

    # check predict
    y_pred = getattr(estimator, "predict")(X)
    assert isinstance(y_pred, ACCEPTED_OUTPUT_TYPES)
    assert y_pred.shape == (X.shape[0], )
    assert np.issubdtype(y_pred.dtype, np.floating)
예제 #9
0
def check_fit_returns_self(Estimator):
    # Check that fit returns self
    estimator = _construct_instance(Estimator)
    fit_args = _make_args(estimator, "fit")
    assert estimator.fit(*fit_args) is estimator, (
        f"Estimator: {estimator} does not return self when calling "
        f"fit")
예제 #10
0
def check_fit_does_not_overwrite_hyper_params(Estimator):
    # Check that we do not overwrite hyper-parameters in fit
    estimator = _construct_instance(Estimator)
    set_random_state(estimator)

    # Make a physical copy of the original estimator parameters before fitting.
    params = estimator.get_params()
    original_params = deepcopy(params)

    # Fit the model
    fit_args = _make_args(estimator, "fit")
    estimator.fit(*fit_args)

    # Compare the state of the model parameters with the original parameters
    new_params = estimator.get_params()
    for param_name, original_value in original_params.items():
        new_value = new_params[param_name]

        # We should never change or mutate the internal state of input
        # parameters by default. To check this we use the joblib.hash function
        # that introspects recursively any subobjects to compute a checksum.
        # The only exception to this rule of immutable constructor parameters
        # is possible RandomState instance but in this check we explicitly
        # fixed the random_state params recursively to be integer seeds.
        assert joblib.hash(new_value) == joblib.hash(original_value), (
            "Estimator %s should not change or mutate "
            " the parameter %s from %s to %s during fit." %
            (estimator.__class__.__name__, param_name, original_value,
             new_value))
예제 #11
0
def _construct_fit_transform(Estimator, **kwargs):
    estimator = _construct_instance(Estimator)

    # For forecasters which are also transformers (e.g. pipelines), we cannot
    # the forecasting horizon to transform, so we only return the first two
    # arguments here. Note that this will fail for forecasters which require the
    # forecasting horizon in fit.
    args = _make_args(estimator, "fit", **kwargs)[:2]
    return estimator.fit_transform(*args)
예제 #12
0
def test_3d_numpy_input(Estimator):
    estimator = _construct_instance(Estimator)
    fit_args = _make_args(estimator, "fit", return_numpy=True)
    estimator.fit(*fit_args)

    for method in NON_STATE_CHANGING_METHODS:
        if hasattr(estimator, method):

            # try if methods can handle 3d numpy input data
            try:
                args = _make_args(estimator, method, return_numpy=True)
                getattr(estimator, method)(*args)

            # if not, check if they raise the appropriate error message
            except ValueError as e:
                error_msg = "This method requires X to be a nested pd.DataFrame"
                assert error_msg in str(e), (
                    f"{estimator.__class__.__name__} does "
                    f"not handle 3d numpy input data correctly")
예제 #13
0
def test_classifier_output(Estimator):
    estimator = _construct_instance(Estimator)
    X_train, y_train = _make_args(estimator, "fit", n_classes=N_CLASSES)
    estimator.fit(X_train, y_train)

    X_new = _make_args(estimator, "predict")[0]

    # check predict
    y_pred = estimator.predict(X_new)
    assert isinstance(y_pred, ACCEPTED_OUTPUT_TYPES)
    assert y_pred.shape == (X_new.shape[0], )
    assert np.all(np.isin(np.unique(y_pred), np.unique(y_train)))

    # check predict proba
    if hasattr(estimator, "predict_proba"):
        y_proba = estimator.predict_proba(X_new)
        assert isinstance(y_proba, ACCEPTED_OUTPUT_TYPES)
        assert y_proba.shape == (X_new.shape[0], N_CLASSES)
        np.testing.assert_allclose(y_proba.sum(axis=1), 1)
예제 #14
0
def check_raises_not_fitted_error(Estimator):
    # Check that we raise appropriate error for unfitted estimators
    estimator = _construct_instance(Estimator)

    # call methods without prior fitting and check that they raise our
    # NotFittedError
    for method in NON_STATE_CHANGING_METHODS:
        if hasattr(estimator, method):
            args = _make_args(estimator, method)
            with pytest.raises(NotFittedError, match=r"has not been fitted"):
                getattr(estimator, method)(*args)
예제 #15
0
def check_multiprocessing_idempotent(Estimator):
    # Check that running an estimator on a single process is no different to running
    # it on multiple processes. We also check that we can set n_jobs=-1 to make use
    # of all CPUs. The test is not really necessary though, as we rely on joblib for
    # parallelization and can trust that it works as expected.
    estimator = _construct_instance(Estimator)
    params = estimator.get_params()

    if "n_jobs" in params:
        results = dict()
        args = dict()

        # run on a single process
        estimator = _construct_instance(Estimator)
        estimator.set_params(n_jobs=1)
        set_random_state(estimator)
        args["fit"] = _make_args(estimator, "fit")
        estimator.fit(*args["fit"])

        # compute and store results
        for method in NON_STATE_CHANGING_METHODS:
            if hasattr(estimator, method):
                args[method] = _make_args(estimator, method)
                results[method] = getattr(estimator, method)(*args[method])

        # run on multiple processes, reusing the same input arguments
        estimator = _construct_instance(Estimator)
        estimator.set_params(n_jobs=-1)
        set_random_state(estimator)
        estimator.fit(*args["fit"])

        # compute and compare results
        for method in results:
            if hasattr(estimator, method):
                result = getattr(estimator, method)(*args[method])
                _assert_array_equal(
                    results[method],
                    result,
                    err_msg="Results are not equal for n_jobs=1 and "
                    "n_jobs=-1",
                )
예제 #16
0
def check_fit_updates_state(Estimator):
    # Check that fit updates the is-fitted states
    attrs = ["_is_fitted", "is_fitted"]

    estimator = _construct_instance(Estimator)
    # Check it's not fitted before calling fit
    for attr in attrs:
        assert not getattr(
            estimator, attr
        ), f"Estimator: {estimator} does not initiate attribute: {attr} to False"

    fit_args = _make_args(estimator, "fit")
    estimator.fit(*fit_args)

    # Check states are updated after calling fit
    for attr in attrs:
        assert getattr(
            estimator, attr
        ), f"Estimator: {estimator} does not update attribute: {attr} during fit"
예제 #17
0
def check_fit_updates_state(Estimator):
    # Check that fit updates the is-fitted states
    is_fitted_states = ["_is_fitted", "is_fitted"]

    estimator = _construct_instance(Estimator)
    # Check it's not fitted before calling fit
    for state in is_fitted_states:
        assert not getattr(estimator, state), (
            f"Estimator: {estimator} does not initiate state: {state} to "
            f"False")

    fit_args = _make_args(estimator, "fit")
    estimator.fit(*fit_args)

    # Check states are updated after calling fit
    for state in is_fitted_states:
        assert getattr(
            estimator,
            state), (f"Estimator: {estimator} does not update state: {state} "
                     f"during fit")
예제 #18
0
def check_transform_inverse_transform_equivalent(Estimator):
    estimator = _construct_instance(Estimator)
    X = _make_args(estimator, "fit")[0]
    Xt = estimator.fit_transform(X)
    Xit = estimator.inverse_transform(Xt)
    _assert_array_almost_equal(X, Xit)
예제 #19
0
def _construct_fit(Estimator, **kwargs):
    estimator = _construct_instance(Estimator)
    args = _make_args(estimator, "fit", **kwargs)[:2]
    return estimator.fit(*args)
예제 #20
0
def test_transformed_data_has_same_index_as_input_data(Transformer):
    transformer = _construct_instance(Transformer)
    X, y = _make_args(transformer, "fit")
    Xt = transformer.fit_transform(X, y)
    np.testing.assert_array_equal(X.index, Xt.index)