コード例 #1
0
    def test_fit_idempotent(self, estimator_instance, scenario):
        """Check that calling fit twice is equivalent to calling it once."""
        estimator = estimator_instance

        # todo: may have to rework this, due to "if estimator has param"
        for method in NON_STATE_CHANGING_METHODS:
            # for now, we have to skip predict_proba, since current output comparison
            #   does not work for tensorflow Distribution
            if (isinstance(estimator_instance, BaseForecaster)
                    and method == "predict_proba"):
                continue
            if _has_capability(estimator, method):
                set_random_state(estimator)
                results = scenario.run(
                    estimator,
                    method_sequence=["fit", method],
                    return_all=True,
                    deepcopy_return=True,
                )

                estimator = results[0]
                set_random_state(estimator)

                results_2nd = scenario.run(
                    estimator,
                    method_sequence=["fit", method],
                    return_all=True,
                    deepcopy_return=True,
                )

                _assert_array_almost_equal(
                    results[1],
                    results_2nd[1],
                    # err_msg=f"Idempotency check failed for method {method}",
                )
コード例 #2
0
def test_differencer_produces_expected_results(na_handling):
    """Test that Differencer produces expected results on a simple DataFrame."""
    transformer = Differencer(na_handling=na_handling)
    y_transformed = transformer.fit_transform(y_simple)
    y_expected = y_simple_expected_diff[na_handling]

    _assert_array_almost_equal(y_transformed, y_expected)
コード例 #3
0
    def test_classifier_on_unit_test_data(self, estimator_class):
        """Test classifier on unit test data."""
        # we only use the first estimator instance for testing
        classname = estimator_class.__name__

        # retrieve expected predict_proba output, and skip test if not available
        if classname in unit_test_proba.keys():
            expected_probas = unit_test_proba[classname]
        else:
            # skip test if no expected probas are registered
            return None

        # we only use the first estimator instance for testing
        estimator_instance = clone(
            estimator_class.create_test_instance(
                parameter_set="results_comparison"))
        # set random seed if possible
        if "random_state" in estimator_instance.get_params().keys():
            estimator_instance.set_params(random_state=0)

        # load unit test data
        X_train, y_train = load_unit_test(split="train")
        X_test, _ = load_unit_test(split="test")
        indices = np.random.RandomState(0).choice(len(y_train),
                                                  10,
                                                  replace=False)

        # train classifier and predict probas
        estimator_instance.fit(X_train, y_train)
        y_proba = estimator_instance.predict_proba(X_test.iloc[indices])

        # assert probabilities are the same
        _assert_array_almost_equal(y_proba, expected_probas, decimal=2)
コード例 #4
0
def test_fit_idempotent(estimator_instance, scenario):
    """Check that calling fit twice is equivalent to calling it once."""
    estimator = estimator_instance

    # todo: may have to rework this, due to "if estimator has param"
    for method in NON_STATE_CHANGING_METHODS:
        if _has_capability(estimator, method):
            set_random_state(estimator)
            results = scenario.run(
                estimator,
                method_sequence=["fit", method],
                return_all=True,
                deepcopy_return=True,
            )

            estimator = results[0]
            set_random_state(estimator)

            results_2nd = scenario.run(
                estimator,
                method_sequence=["fit", method],
                return_all=True,
                deepcopy_return=True,
            )

            _assert_array_almost_equal(
                results[1],
                results_2nd[1],
                # err_msg=f"Idempotency check failed for method {method}",
            )
コード例 #5
0
def test_load_UCR_UEA_dataset_download(tmpdir):
    # tmpdir is a pytest fixture
    extract_path = tmpdir.mkdtemp()
    name = "ArrowHead"
    actual_X, actual_y = load_UCR_UEA_dataset(name,
                                              return_X_y=True,
                                              extract_path=extract_path)
    data_path = os.path.join(extract_path, name)
    assert os.path.exists(data_path)

    # check files
    files = [
        f"{name}.txt",
        f"{name}_TEST.arff",
        f"{name}_TEST.ts",
        f"{name}_TEST.txt",
        f"{name}_TRAIN.arff",
        f"{name}_TRAIN.ts",
        f"{name}_TRAIN.txt",
        # "README.md",
    ]

    for file in os.listdir(data_path):
        assert file in files
        files.remove(file)
    assert len(files) == 0

    # check data
    expected_X, expected_y = load_arrow_head(return_X_y=True)
    _assert_array_almost_equal(actual_X, expected_X, decimal=4)
    np.testing.assert_array_equal(expected_y, actual_y)
コード例 #6
0
ファイル: test_compose.py プロジェクト: juanitorduz/sktime
def test_dunder_mul():
    """Test the mul dunder method."""
    X = pd.DataFrame({"a": [1, 2], "b": [3, 4]})

    t1 = ExponentTransformer(power=2)
    t2 = ExponentTransformer(power=5)
    t3 = ExponentTransformer(power=0.1)
    t4 = ExponentTransformer(power=1)

    t12 = t1 * t2
    t123 = t12 * t3
    t312 = t3 * t12
    t1234 = t123 * t4
    t1234_2 = t12 * (t3 * t4)

    assert isinstance(t12, TransformerPipeline)
    assert isinstance(t123, TransformerPipeline)
    assert isinstance(t312, TransformerPipeline)
    assert isinstance(t1234, TransformerPipeline)
    assert isinstance(t1234_2, TransformerPipeline)

    assert [x.power for x in t12.steps] == [2, 5]
    assert [x.power for x in t123.steps] == [2, 5, 0.1]
    assert [x.power for x in t312.steps] == [0.1, 2, 5]
    assert [x.power for x in t1234.steps] == [2, 5, 0.1, 1]
    assert [x.power for x in t1234_2.steps] == [2, 5, 0.1, 1]

    _assert_array_almost_equal(X, t123.fit_transform(X))
    _assert_array_almost_equal(X, t312.fit_transform(X))
    _assert_array_almost_equal(X, t1234.fit_transform(X))
    _assert_array_almost_equal(X, t1234_2.fit_transform(X))
    _assert_array_almost_equal(t12.fit_transform(X),
                               t3.fit(X).inverse_transform(X))
コード例 #7
0
def test_differencer_remove_missing_false(y, lags, na_handling):
    """Test transform against inverse_transform."""
    transformer = Differencer(lags=lags, na_handling=na_handling)
    y_transform = transformer.fit_transform(y)
    y_reconstructed = transformer.inverse_transform(y_transform)

    _assert_array_almost_equal(y, y_reconstructed)
コード例 #8
0
def test_persistence_via_pickle(estimator_instance):
    """Check that we can pickle all estimators."""
    estimator = estimator_instance
    set_random_state(estimator)
    fit_args = _make_args(estimator, "fit")
    estimator.fit(*fit_args)

    # Generate results before pickling
    results = dict()
    args = dict()
    for method in NON_STATE_CHANGING_METHODS:
        if hasattr(estimator, method):
            args[method] = _make_args(estimator, method)
            results[method] = getattr(estimator, method)(*args[method])

    # Pickle and unpickle
    pickled_estimator = pickle.dumps(estimator)
    unpickled_estimator = pickle.loads(pickled_estimator)

    # Compare against results after pickling
    for method in results:
        unpickled_result = getattr(unpickled_estimator, method)(*args[method])
        _assert_array_almost_equal(
            results[method],
            unpickled_result,
            decimal=6,
            err_msg="Results are not the same after pickling",
        )
コード例 #9
0
ファイル: test_pipeline.py プロジェクト: juanitorduz/sktime
def test_mul_sklearn_autoadapt():
    """Test auto-adapter for sklearn in mul."""
    RAND_SEED = 42
    y = _make_classification_y(n_instances=10, random_state=RAND_SEED)
    X = _make_panel_X(n_instances=10,
                      n_timepoints=20,
                      random_state=RAND_SEED,
                      y=y)
    X_test = _make_panel_X(n_instances=10,
                           n_timepoints=20,
                           random_state=RAND_SEED)

    t1 = ExponentTransformer(power=2)
    t2 = StandardScaler()
    c = KNeighborsTimeSeriesClassifier()

    t12c_1 = t1 * (t2 * c)
    t12c_2 = (t1 * t2) * c
    t12c_3 = t1 * t2 * c

    assert isinstance(t12c_1, ClassifierPipeline)
    assert isinstance(t12c_2, ClassifierPipeline)
    assert isinstance(t12c_3, ClassifierPipeline)

    y_pred = t12c_1.fit(X, y).predict(X_test)

    _assert_array_almost_equal(y_pred, t12c_2.fit(X, y).predict(X_test))
    _assert_array_almost_equal(y_pred, t12c_3.fit(X, y).predict(X_test))
コード例 #10
0
def test_fit_idempotent(estimator_instance):
    """Check that calling fit twice is equivalent to calling it once."""
    estimator = estimator_instance

    set_random_state(estimator)

    # Fit for the first time
    fit_args = _make_args(estimator, "fit")
    estimator.fit(*fit_args)

    results = dict()
    args = dict()
    for method in NON_STATE_CHANGING_METHODS:
        if hasattr(estimator, method):
            args[method] = _make_args(estimator, method)
            results[method] = getattr(estimator, method)(*args[method])

    # Fit again
    set_random_state(estimator)
    estimator.fit(*fit_args)

    for method in NON_STATE_CHANGING_METHODS:
        if hasattr(estimator, method):
            new_result = getattr(estimator, method)(*args[method])
            _assert_array_almost_equal(
                results[method],
                new_result,
                # err_msg=f"Idempotency check failed for method {method}",
            )
コード例 #11
0
ファイル: test_differencer.py プロジェクト: tomfisher/sktime
def test_differencer_same_series(y, lags):
    transformer = Differencer(lags=lags)
    y_transform = transformer.fit_transform(y)
    y_reconstructed = transformer.inverse_transform(y_transform)

    # Reconstruction should return the reconstructed series for same indices
    # that are in the `Z` timeseries passed to inverse_transform
    _assert_array_almost_equal(y.loc[y_reconstructed.index], y_reconstructed)
コード例 #12
0
def check_transform_inverse_transform_equivalent(Estimator):
    estimator = _construct_instance(Estimator)
    X = _make_args(estimator, "fit")[0]
    Xt = estimator.fit_transform(X)
    Xit = estimator.inverse_transform(Xt)
    if estimator.get_tag("transform-returns-same-time-index"):
        _assert_array_almost_equal(X, Xit)
    else:
        _assert_array_almost_equal(X.loc[Xit.index], Xit)
コード例 #13
0
    def test_transform_inverse_transform_equivalent(self, estimator_instance,
                                                    scenario):
        """Test that inverse_transform is indeed inverse to transform."""
        # skip this test if the estimator does not have inverse_transform
        if not estimator_instance.get_class_tag("capability:inverse_transform",
                                                False):
            return None

        X = scenario.args["transform"]["X"]
        Xt = scenario.run(estimator_instance,
                          method_sequence=["fit", "transform"])
        Xit = estimator_instance.inverse_transform(Xt)
        if estimator_instance.get_tag("transform-returns-same-time-index"):
            _assert_array_almost_equal(X, Xit)
        else:
            _assert_array_almost_equal(X.loc[Xit.index], Xit)
コード例 #14
0
ファイル: test_compose.py プロジェクト: juanitorduz/sktime
def test_mul_sklearn_autoadapt():
    """Test auto-adapter for sklearn in mul."""
    X = pd.DataFrame({"a": [1, 2], "b": [3, 4]})

    t1 = ExponentTransformer(power=2)
    t2 = StandardScaler()
    t3 = ExponentTransformer(power=0.5)

    t123 = t1 * t2 * t3
    t123r = t1 * (t2 * t3)
    t123l = (t1 * t2) * t3

    assert isinstance(t123, TransformerPipeline)
    assert isinstance(t123r, TransformerPipeline)
    assert isinstance(t123l, TransformerPipeline)

    _assert_array_almost_equal(t123.fit_transform(X), t123l.fit_transform(X))
    _assert_array_almost_equal(t123r.fit_transform(X), t123l.fit_transform(X))
コード例 #15
0
ファイル: test_differencer.py プロジェクト: tomfisher/sktime
def test_differencer_prediction(y, lags):
    y_train = y.iloc[:-12].copy()
    y_true = y.iloc[-12:].copy()

    transformer = Differencer(lags=[1, 12])
    y_transform = transformer.fit_transform(y)

    # Use the actual transformed values as predictions since we know we should
    # be able to convert them to the units of the original series and exactly
    # match the y_true values for this period
    y_pred = y_transform.iloc[-12:].copy()

    # Redo the transformer's fit and transformation
    # Now the transformer doesn't know anything about the values in y_true
    # This simulates use-case with a forecasting pipeline
    y_transform = transformer.fit_transform(y_train)

    y_pred_inv = transformer.inverse_transform(y_pred)

    _assert_array_almost_equal(y_true, y_pred_inv)
コード例 #16
0
ファイル: test_compose.py プロジェクト: juanitorduz/sktime
def test_dunder_add():
    """Test the add dunder method."""
    X = pd.DataFrame({"a": [1, 2], "b": [3, 4]})

    t1 = ExponentTransformer(power=2)
    t2 = ExponentTransformer(power=5)
    t3 = ExponentTransformer(power=3)

    t12 = t1 + t2
    t123 = t12 + t3
    t123r = t1 + (t2 + t3)

    assert isinstance(t12, FeatureUnion)
    assert isinstance(t123, FeatureUnion)
    assert isinstance(t123r, FeatureUnion)

    assert [x.power for x in t12.transformer_list] == [2, 5]
    assert [x.power for x in t123.transformer_list] == [2, 5, 3]
    assert [x.power for x in t123r.transformer_list] == [2, 5, 3]

    _assert_array_almost_equal(t123r.fit_transform(X), t123.fit_transform(X))
コード例 #17
0
ファイル: test_pipeline.py プロジェクト: juanitorduz/sktime
def test_dunder_mul():
    """Test the mul dunder method."""
    RAND_SEED = 42
    y = _make_classification_y(n_instances=10, random_state=RAND_SEED)
    X = _make_panel_X(n_instances=10,
                      n_timepoints=20,
                      random_state=RAND_SEED,
                      y=y)
    X_test = _make_panel_X(n_instances=5,
                           n_timepoints=20,
                           random_state=RAND_SEED)

    t1 = ExponentTransformer(power=4)
    t2 = ExponentTransformer(power=0.25)

    c = KNeighborsTimeSeriesClassifier()
    t12c_1 = t1 * (t2 * c)
    t12c_2 = (t1 * t2) * c
    t12c_3 = t1 * t2 * c

    assert isinstance(t12c_1, ClassifierPipeline)
    assert isinstance(t12c_2, ClassifierPipeline)
    assert isinstance(t12c_3, ClassifierPipeline)

    y_pred = c.fit(X, y).predict(X_test)

    _assert_array_almost_equal(y_pred, t12c_1.fit(X, y).predict(X_test))
    _assert_array_almost_equal(y_pred, t12c_2.fit(X, y).predict(X_test))
    _assert_array_almost_equal(y_pred, t12c_3.fit(X, y).predict(X_test))
コード例 #18
0
    def test_persistence_via_pickle(self, estimator_instance, scenario):
        """Check that we can pickle all estimators."""
        estimator = estimator_instance
        set_random_state(estimator)
        # Fit the model, get args before and after
        scenario.run(estimator, method_sequence=["fit"], return_args=True)

        # Generate results before pickling
        results = {}
        for method in NON_STATE_CHANGING_METHODS:
            if _has_capability(estimator, method):
                results[method] = scenario.run(estimator,
                                               method_sequence=[method])

        # Pickle and unpickle
        pickled_estimator = pickle.dumps(estimator)
        unpickled_estimator = pickle.loads(pickled_estimator)

        # Compare against results after pickling
        for method, vanilla_result in results.items():
            # escape predict_proba for forecasters, tfp distributions cannot be pickled
            if (isinstance(estimator_instance, BaseForecaster)
                    and method == "predict_proba"):
                continue
            unpickled_result = scenario.run(unpickled_estimator,
                                            method_sequence=[method])

            msg = (
                f"Results of {method} differ between when pickling and not pickling, "
                f"estimator {type(estimator_instance).__name__}")
            _assert_array_almost_equal(
                vanilla_result,
                unpickled_result,
                decimal=6,
                err_msg=msg,
            )
コード例 #19
0
ファイル: test_differencer.py プロジェクト: tomfisher/sktime
def test_differencer_remove_missing_false(y, lags):
    transformer = Differencer(lags=lags, drop_na=False)
    y_transform = transformer.fit_transform(y)
    y_reconstructed = transformer.inverse_transform(y_transform)

    _assert_array_almost_equal(y, y_reconstructed)
コード例 #20
0
def check_transform_inverse_transform_equivalent(Estimator):
    estimator = _construct_instance(Estimator)
    X = _make_args(estimator, "fit")[0]
    Xt = estimator.fit_transform(X)
    Xit = estimator.inverse_transform(Xt)
    _assert_array_almost_equal(X, Xit)