Esempio n. 1
0
def test_model_workflow_partial_mode():
    """Run the workflow of deep forest with a local buffer."""

    case_kwargs = copy.deepcopy(kwargs)
    case_kwargs.update({"partial_mode": True})

    model = CascadeForestClassifier(**case_kwargs)
    model.fit(X_train, y_train)

    # Predictions before saving
    y_pred_before = model.predict(X_test)

    # Save and Reload
    model.save(save_dir)

    model = CascadeForestClassifier(**case_kwargs)
    model.load(save_dir)

    # Predictions after loading
    y_pred_after = model.predict(X_test)

    # Make sure the same predictions before and after model serialization
    assert_array_equal(y_pred_before, y_pred_after)

    model.clean()  # clear the buffer
    shutil.rmtree(save_dir)
Esempio n. 2
0
def test_model_sample_weight():
    """Run the workflow of deep forest with a local buffer."""

    case_kwargs = copy.deepcopy(kwargs)

    # Training without sample_weight
    model = CascadeForestClassifier(**case_kwargs)
    model.fit(X_train, y_train)
    y_pred_no_sample_weight = model.predict(X_test)

    # Training with equal sample_weight
    model = CascadeForestClassifier(**case_kwargs)
    sample_weight = np.ones(y_train.size)
    model.fit(X_train, y_train, sample_weight=sample_weight)
    y_pred_equal_sample_weight = model.predict(X_test)

    # Make sure the same predictions with None and equal sample_weight
    assert_array_equal(y_pred_no_sample_weight, y_pred_equal_sample_weight)

    model = CascadeForestClassifier(**case_kwargs)
    sample_weight = np.where(y_train == 0, 0.1, y_train)
    model.fit(X_train, y_train, sample_weight=y_train)
    y_pred_skewed_sample_weight = model.predict(X_test)

    # Make sure the different predictions with None and equal sample_weight
    assert_raises(AssertionError, assert_array_equal,
                  y_pred_skewed_sample_weight, y_pred_equal_sample_weight)

    model.clean()  # clear the buffer
Esempio n. 3
0
def test_model_workflow_partial_mode(backend):
    """Run the workflow of deep forest with a local buffer."""

    case_kwargs = copy.deepcopy(kwargs)
    case_kwargs.update({"partial_mode": True})
    case_kwargs.update({"backend": backend})

    model = CascadeForestClassifier(**case_kwargs)
    model.fit(X_train, y_train)

    # Test feature_importances_
    if backend == "sklearn":
        model.get_layer_feature_importances(0)
    else:
        with pytest.raises(RuntimeError) as excinfo:
            model.get_layer_feature_importances(0)
        assert "Please use the sklearn backend" in str(excinfo.value)

    # Predictions before saving
    y_pred_before = model.predict(X_test)

    # Save and Reload
    model.save(save_dir)

    model = CascadeForestClassifier(**case_kwargs)
    model.load(save_dir)

    # Predictions after loading
    y_pred_after = model.predict(X_test)

    # Make sure the same predictions before and after model serialization
    assert_array_equal(y_pred_before, y_pred_after)

    model.clean()  # clear the buffer
    shutil.rmtree(save_dir)
Esempio n. 4
0
def test_model_input_label_encoder():
    """Test if the model behaves the same with and without label encoding."""

    # Load data
    X, y = load_digits(return_X_y=True)
    y_as_str = np.char.add("label_", y.astype(str))

    # Train model on integer labels. Labels should look like: 1, 2, 3, ...
    model = CascadeForestClassifier(random_state=1)
    model.fit(X, y)
    y_pred_int_labels = model.predict(X)

    # Train model on string labels. Labels should look like: "label_1", "label_2", "label_3", ...
    model = CascadeForestClassifier(random_state=1)
    model.fit(X, y_as_str)
    y_pred_str_labels = model.predict(X)

    # Check if the underlying data are the same
    y_pred_int_labels_as_str = np.char.add(
        "label_", y_pred_int_labels.astype(str)
    )
    assert_array_equal(y_pred_str_labels, y_pred_int_labels_as_str)

    # Clean up buffer
    model.clean()
Esempio n. 5
0
def test_custom_cascade_layer_workflow_partial_mode():

    model = CascadeForestClassifier(partial_mode=True)

    n_estimators = 4
    estimators = [DecisionTreeClassifier() for _ in range(n_estimators)]
    model.set_estimator(estimators)  # set custom base estimators

    predictor = DecisionTreeClassifier()
    model.set_predictor(predictor)

    model.fit(X_train, y_train)
    y_pred_before = model.predict(X_test)

    # Save and Reload
    model.save(save_dir)

    model = CascadeForestClassifier()
    model.load(save_dir)

    # Predictions after loading
    y_pred_after = model.predict(X_test)

    # Make sure the same predictions before and after model serialization
    assert_array_equal(y_pred_before, y_pred_after)

    model.clean()  # clear the buffer
    shutil.rmtree(save_dir)
def test_classifier_custom_cascade_layer_workflow_in_memory():

    model = CascadeForestClassifier()

    n_estimators = 4
    estimators = [DecisionTreeClassifier() for _ in range(n_estimators)]
    model.set_estimator(estimators)  # set custom base estimators

    predictor = DecisionTreeClassifier()
    model.set_predictor(predictor)

    model.fit(X_train_clf, y_train_clf)
    y_pred_before = model.predict(X_test_clf)

    # Save and Reload
    model.save(save_dir)

    model = CascadeForestClassifier()
    model.load(save_dir)

    # Predictions after loading
    y_pred_after = model.predict(X_test_clf)

    # Make sure the same predictions before and after model serialization
    assert_array_equal(y_pred_before, y_pred_after)

    assert (model.get_estimator(0, 0, "custom") is
            model._get_layer(0).estimators_["0-0-custom"].estimator_)

    model.clean()  # clear the buffer
    shutil.rmtree(save_dir)
                partial_mode=partial_mode,
                delta=delta,
                n_jobs=n_jobs,
                random_state=random_state,
                verbose=verbose,
            )

            tic = time.time()
            model.fit(X_train, y_train)
            toc = time.time()
            training_time = toc - tic

            tic = time.time()
            y_pred = model.predict(X_test)
            toc = time.time()
            testing_time = toc - tic

            testing_acc = accuracy_score(y_test, y_pred)
            records.append(
                (training_time, testing_time, testing_acc, len(model)))
            model.clean()

        # Writing
        with open("{}_deep_forest_classification.txt".format(dataset),
                  'w') as file:
            for training_time, testing_time, testing_acc, n_layers in records:
                string = "{:.5f}\t{:.5f}\t{:.5f}\t{}\n".format(
                    training_time, testing_time, testing_acc, n_layers)
                file.write(string)
            file.close()