def test_model_workflow_partial_mode():
    """Run the workflow of deep forest with a local buffer."""

    case_kwargs = copy.deepcopy(kwargs)
    case_kwargs.update({"partial_mode": True})

    model = CascadeForestRegressor(**case_kwargs)
    model.fit(X_train, y_train)

    # Predictions before saving
    y_pred_before = model.predict(X_test).astype(np.float32)

    # Save and Reload
    model.save(save_dir)

    model = CascadeForestRegressor(**case_kwargs)
    model.load(save_dir)

    # Predictions after loading
    y_pred_after = model.predict(X_test).astype(np.float32)

    # Make sure the same predictions before and after model serialization
    assert_array_equal(y_pred_before, y_pred_after)

    model.clean()  # clear the buffer
    shutil.rmtree(save_dir)
def test_regressor_custom_cascade_layer_workflow_in_memory(y_train):

    model = CascadeForestRegressor()

    n_estimators = 4
    estimators = [DecisionTreeRegressor() for _ in range(n_estimators)]
    model.set_estimator(estimators)  # set custom base estimators

    predictor = DecisionTreeRegressor()
    model.set_predictor(predictor)

    model.fit(X_train_reg, y_train)
    y_pred_before = model.predict(X_test_reg)

    # Save and Reload
    model.save(save_dir)

    model = CascadeForestRegressor()
    model.load(save_dir)

    # Predictions after loading
    y_pred_after = model.predict(X_test_reg)

    # Make sure the same predictions before and after model serialization
    assert_array_equal(y_pred_before, y_pred_after)

    assert (model.get_estimator(0, 0, "custom") is
            model._get_layer(0).estimators_["0-0-custom"].estimator_)

    model.clean()  # clear the buffer
    shutil.rmtree(save_dir)
def test_model_workflow_in_memory(backend):
    """Run the workflow of deep forest with in-memory mode."""

    case_kwargs = copy.deepcopy(kwargs)
    case_kwargs.update({"partial_mode": False})
    case_kwargs.update({"backend": backend})

    model = CascadeForestRegressor(**case_kwargs)
    model.fit(X_train, y_train)

    # Predictions before saving
    y_pred_before = model.predict(X_test).astype(np.float32)

    # Save and Reload
    model.save(save_dir)

    model = CascadeForestRegressor(**case_kwargs)
    model.load(save_dir)

    # Make sure the same predictions before and after model serialization
    y_pred_after = model.predict(X_test).astype(np.float32)

    assert_array_equal(y_pred_before, y_pred_after)

    shutil.rmtree(save_dir)
def test_model_invalid_training_params(param):
    case_kwargs = copy.deepcopy(toy_kwargs)
    case_kwargs.update(param[1])

    model = CascadeForestRegressor(**case_kwargs)

    with pytest.raises(ValueError) as excinfo:
        model.fit(X_train, y_train)

    if param[0] == 0:
        assert "max_layers" in str(excinfo.value)
    elif param[0] == 1:
        assert "n_tolerant_rounds" in str(excinfo.value)
    elif param[0] == 2:
        assert "delta " in str(excinfo.value)
def test_model_properties_after_fitting():
    """Check the model properties after fitting a deep forest model."""
    model = CascadeForestRegressor(**toy_kwargs)
    model.fit(X_train, y_train)

    assert len(model) == model.n_layers_

    assert model[0] is model._get_layer(0)

    with pytest.raises(ValueError) as excinfo:
        model._get_layer(model.n_layers_)
    assert "The layer index should be in the range" in str(excinfo.value)

    with pytest.raises(RuntimeError) as excinfo:
        model._set_layer(0, None)
    assert "already exists in the internal container" in str(excinfo.value)

    with pytest.raises(ValueError) as excinfo:
        model._get_binner(model.n_layers_ + 1)
    assert "The binner index should be in the range" in str(excinfo.value)

    with pytest.raises(RuntimeError) as excinfo:
        model._set_binner(0, None)
    assert "already exists in the internal container" in str(excinfo.value)
Esempio n. 6
0
                n_estimators=n_estimators,
                n_trees=n_trees,
                max_depth=max_depth,
                min_samples_leaf=min_samples_leaf,
                use_predictor=use_predictor,
                predictor=predictor,
                n_tolerant_rounds=n_tolerant_rounds,
                partial_mode=partial_mode,
                delta=delta,
                n_jobs=n_jobs,
                random_state=random_state,
                verbose=verbose,
            )

            tic = time.time()
            model.fit(X_train, y_train)
            toc = time.time()
            training_time = toc - tic

            tic = time.time()
            y_pred = model.predict(X_test)
            toc = time.time()
            testing_time = toc - tic

            testing_mse = mean_squared_error(y_test, y_pred)
            records.append(
                (training_time, testing_time, testing_mse, len(model)))
            model.clean()

        # Writing
        with open("{}_deep_forest_regression.txt".format(dataset),