Beispiel #1
0
def test_multioutput():
    """Check estimators on multi-output problems."""

    X = [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1], [-2, 1],
         [-1, 1], [-1, 2], [2, -1], [1, -1], [1, -2]]

    y = [[-1, 0], [-1, 0], [-1, 0], [1, 1], [1, 1], [1, 1], [-1, 2], [-1, 2],
         [-1, 2], [1, 3], [1, 3], [1, 3]]

    T = [[-1, -1], [1, 1], [-1, 1], [1, -1]]
    y_true = [[-1, 0], [1, 1], [-1, 2], [1, 3]]

    # toy classification problem
    clf = ExtraTreesClassifier(random_state=0)
    y_hat = clf.fit(X, y).predict(T)
    assert_array_equal(y_hat, y_true)
    assert_equal(y_hat.shape, (4, 2))

    proba = clf.predict_proba(T)
    assert_equal(len(proba), 2)
    assert_equal(proba[0].shape, (4, 2))
    assert_equal(proba[1].shape, (4, 4))

    log_proba = clf.predict_log_proba(T)
    assert_equal(len(log_proba), 2)
    assert_equal(log_proba[0].shape, (4, 2))
    assert_equal(log_proba[1].shape, (4, 4))

    # toy regression problem
    clf = ExtraTreesRegressor(random_state=5)
    y_hat = clf.fit(X, y).predict(T)
    assert_almost_equal(y_hat, y_true)
    assert_equal(y_hat.shape, (4, 2))
Beispiel #2
0
def test_multioutput():
    """Check estimators on multi-output problems."""
    olderr = np.seterr(divide="ignore")

    X = [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1], [-2, 1], [-1, 1], [-1, 2], [2, -1], [1, -1], [1, -2]]

    y = [[-1, 0], [-1, 0], [-1, 0], [1, 1], [1, 1], [1, 1], [-1, 2], [-1, 2], [-1, 2], [1, 3], [1, 3], [1, 3]]

    T = [[-1, -1], [1, 1], [-1, 1], [1, -1]]
    y_true = [[-1, 0], [1, 1], [-1, 2], [1, 3]]

    # toy classification problem
    clf = ExtraTreesClassifier(random_state=0)
    y_hat = clf.fit(X, y).predict(T)
    assert_array_equal(y_hat, y_true)
    assert_equal(y_hat.shape, (4, 2))

    proba = clf.predict_proba(T)
    assert_equal(len(proba), 2)
    assert_equal(proba[0].shape, (4, 2))
    assert_equal(proba[1].shape, (4, 4))

    log_proba = clf.predict_log_proba(T)
    assert_equal(len(log_proba), 2)
    assert_equal(log_proba[0].shape, (4, 2))
    assert_equal(log_proba[1].shape, (4, 4))

    # toy regression problem
    clf = ExtraTreesRegressor(random_state=5)
    y_hat = clf.fit(X, y).predict(T)
    assert_almost_equal(y_hat, y_true)
    assert_equal(y_hat.shape, (4, 2))

    np.seterr(**olderr)
                                max_depth=None,
                                min_samples_split=2,
                                min_samples_leaf=1,
                                bootstrap=False,
                                max_features='auto')
    # 模型训练
    algo.fit(X_train, Y_train)
    # 模型效果评估
    print('训练集上的准确率:{}'.format(algo.score(X_train, Y_train)))
    print('测试集上的准确率:{}'.format(algo.score(X_test, Y_test)))
    # 查看下API属性
    X_test = [[6.9, 3.1, 5.1, 2.3], [6.1, 2.8, 4.0, 1.3], [5.2, 3.4, 1.4, 0.2]]
    print('样本的预测值:')
    print(algo.predict(X_test))
    print('样本预测值概率:')
    print(algo.predict_log_proba(X_test))
    print('样本预测概率值的Log转换:')
    print(algo.predict_log_proba(X_test))
    # print('训练好的所有子模型:{}'.format(algo.estimators_))

    for index, estimators in enumerate(algo.estimators_):
        print('第{}个子模型对于数据的预测值为:{}'.format(index + 1, algo.predict(X_test)))
    print('各个特征属性的重要性权重列表:\n{}'.format(algo.feature_importances_))
    # print('Bagging模型的袋外准确率:\n{}'.format(algo.oob_score_))

    # 所有子模型可视化
    for index, estimators in enumerate(algo.estimators_):
        dot_data = tree.export_graphviz(decision_tree=estimators,
                                        out_file=None,
                                        feature_names=['c1', 'c2', 'c3', 'c4'],
                                        class_names=['A', 'B', 'C'],