コード例 #1
0
def test_model_input_label_encoder():
    """Test if the model behaves the same with and without label encoding."""

    # Load data
    X, y = load_digits(return_X_y=True)
    y_as_str = np.char.add("label_", y.astype(str))

    # Train model on integer labels. Labels should look like: 1, 2, 3, ...
    model = CascadeForestClassifier(random_state=1)
    model.fit(X, y)
    y_pred_int_labels = model.predict(X)

    # Train model on string labels. Labels should look like: "label_1", "label_2", "label_3", ...
    model = CascadeForestClassifier(random_state=1)
    model.fit(X, y_as_str)
    y_pred_str_labels = model.predict(X)

    # Check if the underlying data are the same
    y_pred_int_labels_as_str = np.char.add(
        "label_", y_pred_int_labels.astype(str)
    )
    assert_array_equal(y_pred_str_labels, y_pred_int_labels_as_str)

    # Clean up buffer
    model.clean()
コード例 #2
0
def test_model_workflow_partial_mode():
    """Run the workflow of deep forest with a local buffer."""

    case_kwargs = copy.deepcopy(kwargs)
    case_kwargs.update({"partial_mode": True})

    model = CascadeForestClassifier(**case_kwargs)
    model.fit(X_train, y_train)

    # Predictions before saving
    y_pred_before = model.predict(X_test)

    # Save and Reload
    model.save(save_dir)

    model = CascadeForestClassifier(**case_kwargs)
    model.load(save_dir)

    # Predictions after loading
    y_pred_after = model.predict(X_test)

    # Make sure the same predictions before and after model serialization
    assert_array_equal(y_pred_before, y_pred_after)

    model.clean()  # clear the buffer
    shutil.rmtree(save_dir)
コード例 #3
0
def test_model_workflow_in_memory(backend):
    """Run the workflow of deep forest with in-memory mode."""

    case_kwargs = copy.deepcopy(kwargs)
    case_kwargs.update({"partial_mode": False})
    case_kwargs.update({"backend": backend})

    model = CascadeForestClassifier(**case_kwargs)
    model.fit(X_train, y_train)

    # Test feature_importances_
    if backend == "sklearn":
        model.get_layer_feature_importances(0)
    else:
        with pytest.raises(RuntimeError) as excinfo:
            model.get_layer_feature_importances(0)
        assert "Please use the sklearn backend" in str(excinfo.value)

    # Predictions before saving
    y_pred_before = model.predict(X_test)

    # Save and Reload
    model.save(save_dir)

    model = CascadeForestClassifier(**case_kwargs)
    model.load(save_dir)

    # Make sure the same predictions before and after model serialization
    y_pred_after = model.predict(X_test)

    assert_array_equal(y_pred_before, y_pred_after)

    shutil.rmtree(save_dir)
コード例 #4
0
def test_model_workflow_in_memory(backend):
    """Run the workflow of deep forest with in-memory mode."""

    case_kwargs = copy.deepcopy(kwargs)
    case_kwargs.update({"partial_mode": False})
    case_kwargs.update({"backend": backend})

    model = CascadeForestClassifier(**case_kwargs)
    model.fit(X_train, y_train)

    # Predictions before saving
    y_pred_before = model.predict(X_test)

    # Save and Reload
    model.save(save_dir)

    model = CascadeForestClassifier(**case_kwargs)
    model.load(save_dir)

    # Make sure the same predictions before and after model serialization
    y_pred_after = model.predict(X_test)

    assert_array_equal(y_pred_before, y_pred_after)

    shutil.rmtree(save_dir)
コード例 #5
0
def test_custom_cascade_layer_workflow_partial_mode():

    model = CascadeForestClassifier(partial_mode=True)

    n_estimators = 4
    estimators = [DecisionTreeClassifier() for _ in range(n_estimators)]
    model.set_estimator(estimators)  # set custom base estimators

    predictor = DecisionTreeClassifier()
    model.set_predictor(predictor)

    model.fit(X_train, y_train)
    y_pred_before = model.predict(X_test)

    # Save and Reload
    model.save(save_dir)

    model = CascadeForestClassifier()
    model.load(save_dir)

    # Predictions after loading
    y_pred_after = model.predict(X_test)

    # Make sure the same predictions before and after model serialization
    assert_array_equal(y_pred_before, y_pred_after)

    model.clean()  # clear the buffer
    shutil.rmtree(save_dir)
コード例 #6
0
def test_classifier_custom_cascade_layer_workflow_in_memory():

    model = CascadeForestClassifier()

    n_estimators = 4
    estimators = [DecisionTreeClassifier() for _ in range(n_estimators)]
    model.set_estimator(estimators)  # set custom base estimators

    predictor = DecisionTreeClassifier()
    model.set_predictor(predictor)

    model.fit(X_train_clf, y_train_clf)
    y_pred_before = model.predict(X_test_clf)

    # Save and Reload
    model.save(save_dir)

    model = CascadeForestClassifier()
    model.load(save_dir)

    # Predictions after loading
    y_pred_after = model.predict(X_test_clf)

    # Make sure the same predictions before and after model serialization
    assert_array_equal(y_pred_before, y_pred_after)

    assert (model.get_estimator(0, 0, "custom") is
            model._get_layer(0).estimators_["0-0-custom"].estimator_)

    model.clean()  # clear the buffer
    shutil.rmtree(save_dir)
コード例 #7
0
def test_model_invalid_training_params(param):
    case_kwargs = copy.deepcopy(toy_kwargs)
    case_kwargs.update(param[1])

    model = CascadeForestClassifier(**case_kwargs)

    with pytest.raises(ValueError) as excinfo:
        model.fit(X_train, y_train)

    if param[0] == 0:
        assert "max_layers" in str(excinfo.value)
    elif param[0] == 1:
        assert "n_tolerant_rounds" in str(excinfo.value)
    elif param[0] == 2:
        assert "delta " in str(excinfo.value)
コード例 #8
0
def test_model_properties_after_fitting():
    """Check the model properties after fitting a deep forest model."""
    model = CascadeForestClassifier(**toy_kwargs)
    model.fit(X_train, y_train)

    assert len(model) == model.n_layers_

    assert model[0] is model._get_layer(0)

    with pytest.raises(ValueError) as excinfo:
        model._get_layer(model.n_layers_)
    assert "The layer index should be in the range" in str(excinfo.value)

    with pytest.raises(RuntimeError) as excinfo:
        model._set_layer(0, None)
    assert "already exists in the internal container" in str(excinfo.value)

    with pytest.raises(ValueError) as excinfo:
        model._get_binner(model.n_layers_ + 1)
    assert "The binner index should be in the range" in str(excinfo.value)

    with pytest.raises(RuntimeError) as excinfo:
        model._set_binner(0, None)
    assert "already exists in the internal container" in str(excinfo.value)

    # Test the hook on forest estimator
    assert (
        model.get_forest(0, 0, "rf")
        is model._get_layer(0).estimators_["0-0-rf"].estimator_
    )

    with pytest.raises(ValueError) as excinfo:
        model.get_forest(model.n_layers_, 0, "rf")
    assert "`layer_idx` should be in the range" in str(excinfo.value)

    with pytest.raises(ValueError) as excinfo:
        model.get_forest(0, model.n_estimators, "rf")
    assert "`est_idx` should be in the range" in str(excinfo.value)

    with pytest.raises(ValueError) as excinfo:
        model.get_forest(0, 0, "Unknown")
    assert "`forest_type` should be one of" in str(excinfo.value)
コード例 #9
0
def test_model_sample_weight():
    """Run the workflow of deep forest with a local buffer."""

    case_kwargs = copy.deepcopy(kwargs)

    # Training without sample_weight
    model = CascadeForestClassifier(**case_kwargs)
    model.fit(X_train, y_train)
    y_pred_no_sample_weight = model.predict(X_test)

    # Training with equal sample_weight
    model = CascadeForestClassifier(**case_kwargs)
    sample_weight = np.ones(y_train.size)
    model.fit(X_train, y_train, sample_weight=sample_weight)
    y_pred_equal_sample_weight = model.predict(X_test)

    # Make sure the same predictions with None and equal sample_weight
    assert_array_equal(y_pred_no_sample_weight, y_pred_equal_sample_weight)

    model = CascadeForestClassifier(**case_kwargs)
    sample_weight = np.where(y_train == 0, 0.1, y_train)
    model.fit(X_train, y_train, sample_weight=y_train)
    y_pred_skewed_sample_weight = model.predict(X_test)

    # Make sure the different predictions with None and equal sample_weight
    assert_raises(AssertionError, assert_array_equal,
                  y_pred_skewed_sample_weight, y_pred_equal_sample_weight)

    model.clean()  # clear the buffer
コード例 #10
0
def test_model_properties_after_fitting():
    """Check the model properties after fitting a deep forest model."""
    model = CascadeForestClassifier(**toy_kwargs)
    model.fit(X_train, y_train)

    assert len(model) == model.n_layers_

    assert model[0] is model._get_layer(0)

    with pytest.raises(ValueError) as excinfo:
        model._get_layer(model.n_layers_)
    assert "The layer index should be in the range" in str(excinfo.value)

    with pytest.raises(RuntimeError) as excinfo:
        model._set_layer(0, None)
    assert "already exists in the internal container" in str(excinfo.value)

    with pytest.raises(ValueError) as excinfo:
        model._get_binner(model.n_layers_ + 1)
    assert "The binner index should be in the range" in str(excinfo.value)

    with pytest.raises(RuntimeError) as excinfo:
        model._set_binner(0, None)
    assert "already exists in the internal container" in str(excinfo.value)
コード例 #11
0
ファイル: test_explainer.py プロジェクト: Maryom/DF21
# feature_names


X, y = shap.datasets.iris()

# print("X", X.head())
# print("y", y)

feature_names = X.columns
X = np.array(X)
y = np.array(y)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

model = CascadeForestClassifier(backend="sklearn")
model.fit(X_train, y_train)
forest = model.get_forest(0, 0, "rf")
explainer = shap.TreeExplainer(forest)
shap_values = explainer.shap_values(X_test)

# shap_values = np.array(explainer.shap_values(X_train))
# print(shap_values.shape)
# print(shap_values[2].shape)

# shap.summary_plot(shap_values[2], X_train)
#
#
# clf = RandomForestClassifier(n_estimators=100)
# clf.fit(X_train, y_train)
# explainer = shap.TreeExplainer(clf)
# shap_values = np.array(explainer.shap_values(X_test))
コード例 #12
0
               random_state=args.randomseed,
               n_jobs=-1)
    x_resampled, y_resampled = sm.fit_sample(X, y)
    # after over sampleing 读取分类信息并返回数量
    np_resampled_y = np.asarray(np.unique(y_resampled, return_counts=True))
    df_resampled_y = pd.DataFrame(np_resampled_y.T, columns=['Class', 'Sum'])
    print("\nNumber of samples after over sampleing:\n{0}\n".format(
        df_resampled_y))

    # 初始化 classifier
    clf = CascadeForestClassifier(random_state=args.randomseed)
    print("\nClassifier parameters:")
    print(clf.get_params())
    print("\nSMOTE parameters:")
    print(sm.get_params())
    print("\n")

    # 使用SMOTE后数据进行训练
    clf.fit(x_resampled, y_resampled)
    # 预测测试集
    y_pred = clf.predict(X_test)

    # 输出测试集统计结果
    if (num_categories > 2):
        model_evaluation(num_categories, y_test, y_pred)
    else:
        bi_model_evaluation(y_test, y_pred)
    end_time = time.time()  # 程序结束时间
    print("\n[Finished in: {0:.6f} mins = {1:.6f} seconds]".format(
        ((end_time - start_time) / 60), (end_time - start_time)))
コード例 #13
0
ファイル: model.py プロジェクト: fshutong/fang
valx = valx[:, 0:10]  #选取前n个波段作为特征

train_data = train_data.astype(np.uint8)
test_data = test_data.astype(np.uint8)
train_label = train_label.astype(np.uint8)
test_label = test_label.astype(np.uint8)
#验证集
valx = valx.astype(np.uint8)
valy = valy.astype(np.uint8)

classification = CascadeForestClassifier(max_depth=5,
                                         n_trees=100,
                                         n_jobs=6,
                                         use_predictor="forest")

classification.fit(train_data, train_label.ravel())
prediction = classification.predict_proba(valx)
probability = [prob[1] for prob in prediction]
pre_class = []
for i in probability:
    if i > 0.5:
        pre_class.append(1)
    else:
        pre_class.append(0)
prediction = prediction[:, 1]

pred = classification.predict(valx)

print(pred.shape)

print("训练集:", classification.score(train_data, train_label))