Python GradientBoostedDTの例

プログラミング言語: Python

名前空間/パッケージ名: gbdtree

メソッド/関数: GradientBoostedDT

hotexamples.comのコード掲載数: 4

Python GradientBoostedDT - 4件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのgbdtree.GradientBoostedDTの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

コード例 #1

ファイルを表示

ファイル: sample.py プロジェクト: longye35/gradient-boosted-decision-tree

def regression_sample(true_func=np.sin, x_scale=3.):
    """
    regression problem for continuous targets
    :param float x_scale: データのスケール. [-x_scale, x_scale] の範囲のデータを生成する.
    :return:
    """
    x, t = generate_continuous_data(true_function=true_func, x_scale=x_scale)

    trained_models = []
    iteration_dist = [5, 10, 20, 40, 100]
    for n_iter in iteration_dist:
        # GradientBoostedDTの定義
        # 連続変数に対しての回帰問題なので
        # 目的関数：二乗ロス（LeastSquare)
        # 活性化関数：恒等写像（f(x)=x)
        # 今の当てはまりがどの程度なのか評価するロス関数に二乗ロス関数を与える
        rmse_objective = gb.LeastSquare()
        loss_function = gb.functions.least_square
        clf = gb.GradientBoostedDT(objective=rmse_objective,
                                   loss=loss_function,
                                   max_depth=4,
                                   num_iter=n_iter,
                                   gamma=.01,
                                   lam=.1,
                                   eta=.1)
        clf.fit(x=x, t=t)
        trained_models.append(clf)

    x_test = np.linspace(-x_scale, x_scale, 100).reshape(100, 1)
    fig = plt.figure(figsize=(6, 6))
    ax_i = fig.add_subplot(1, 1, 1)
    ax_i.plot(x_test,
              true_func(x_test),
              "--",
              label='True Function',
              color="C0")
    ax_i.scatter(x,
                 t,
                 s=50,
                 label='Training Data',
                 linewidth=1.,
                 edgecolors="C0",
                 color="white")
    ax_i.set_xlabel("Input")
    ax_i.set_ylabel("Target")

    for i, (n_iter, model) in enumerate(zip(iteration_dist, trained_models)):
        y = model.predict(x_test)
        ax_i.plot(x_test,
                  y,
                  "-",
                  label='n_iter: {}'.format(n_iter),
                  color=cm.viridis(i / len(iteration_dist), 1))
    ax_i.legend(loc=4)
    ax_i.set_title("Transition by Number of Iterations")
    fig.savefig('experiment_figures/regression.png')

コード例 #2

ファイルを表示

ファイル: mnist.py プロジェクト: yasutakakuniyoshi/gradient-boosted-decision-tree

    n_train = 5000
    x_train, t_train = x[perm[:n_train]], y[perm[:n_train]]
    x_test, t_test = x[perm[n_train:]], y[perm[n_train:]]

    logger.info('training datasize: {0}'.format(t_train.shape[0]))
    logger.info('test datasize: {0}'.format(t_test.shape[0]))

    # setup regression object for training and
    # loss function for evaluating the predict quarity
    regobj = fn.CrossEntropy()
    loss = fn.logistic_loss

    clf = gb.GradientBoostedDT(regobj,
                               loss,
                               num_iter=100,
                               eta=.2,
                               max_leaves=15,
                               max_depth=5,
                               gamma=.01)
    clf.fit(x_train, t_train, validation_data=(x_test, t_test), verbose=1)
    f_importance = clf.feature_importance()
    pd.Series(f_importance).reset_index().to_csv(os.path.join(
        OUTPUT_DIR, 'feature_importance.csv'),
                                                 index=False)

    fig, ax = plt.subplots(figsize=(6, 6))
    ax.set_title('seqence of training and test loss')
    ax.plot(clf.training_loss, 'o-', label='training loss')
    ax.plot(clf.validation_loss, 'o-', label='test loss')
    ax.set_yscale('log')
    ax.legend()

コード例 #3

ファイルを表示

ファイル: sample.py プロジェクト: longye35/gradient-boosted-decision-tree

def binary_classification_sample():
    """入力次元数二次元のサンプル問題
    """
    np.random.seed = 71
    x = (
        np.random.normal(loc=.7, scale=1., size=400).reshape(200, 2),
        np.random.normal(loc=-.7, scale=1., size=400).reshape(200, 2),
    )
    t = np.zeros_like(x[0]), np.ones_like(x[1])
    x = np.append(x[0], x[1], axis=0)
    t = np.append(t[0], t[1], axis=0)[:, 0]

    x_train, x_test, t_train, t_test = train_test_split(x,
                                                        t,
                                                        test_size=.3,
                                                        random_state=71)

    # 二値分類問題なので目的関数を交差エントロピー、活性化関数をシグモイドに設定
    regobj = gb.CrossEntropy()

    # ロス関数はロジスティクスロス
    loss = gb.logistic_loss

    clf = gb.GradientBoostedDT(regobj,
                               loss,
                               max_depth=5,
                               gamma=.05,
                               lam=3e-2,
                               eta=.1,
                               num_iter=50)
    clf.fit(x=x_train, t=t_train, validation_data=(x_test, t_test))

    networks = clf.show_network()
    import json
    with open('./view/src/assets/node_edge.json', "w") as f:
        json.dump(list(networks), f)

    fig = plt.figure(figsize=(6, 6))
    ax = fig.add_subplot(1, 1, 1)
    ax.set_title('Training Transitions')
    ax.plot(clf.training_loss, 'o-', label='Training')
    ax.plot(clf.validation_loss, 'o-', label='Validation')
    ax.set_xlabel("Iterations")
    ax.set_ylabel("Loss Transition")
    ax.legend()
    fig.savefig("experiment_figures/training_transitions.png", dpi=150)

    plt.figure(figsize=(6, 6))

    xx = np.linspace(start=-4, stop=4, num=50)
    yy = np.linspace(start=-4, stop=4, num=50)
    X, Y = np.meshgrid(xx, yy)
    Z = [
        clf.predict(np.array([a, b]).reshape(1, 2))[0] for a in xx for b in yy
    ]
    Z = np.array(Z).reshape(len(xx), len(yy))
    levels = np.linspace(0, 1, 11)
    plt.contour(X, Y, Z, levels, colors=["gray"], alpha=.05)
    plt.contourf(X, Y, Z, levels, cmap=cm.GnBu)
    # plt.contour(X, Y, Z, levels, cmap=cm.PuBu_r)
    cbar = plt.colorbar()
    plt.scatter(x[:200, 0],
                x[:200, 1],
                s=80,
                label="t = 0",
                edgecolors="C2",
                alpha=.6,
                linewidth=2,
                facecolor="white")
    plt.scatter(x[200:, 0],
                x[200:, 1],
                s=80,
                label="t = 1",
                edgecolors="C0",
                alpha=.6,
                linewidth=2,
                facecolor="white")
    plt.legend(loc=2)
    plt.title("binary regression")
    plt.tight_layout()
    plt.savefig('experiment_figures/binary_classification.png', dpi=100)

    pred_prob = clf.predict(x_test)
    pred_t = np.where(pred_prob >= .5, 1, 0)
    acc = np.where(pred_t == t_test, 1, 0).sum() / len(t_test)
    acc_str = 'test accuracy:{0:.2f}'.format(acc)
    print(acc_str)

コード例 #4

ファイルを表示

ファイル: mnist.py プロジェクト: longye35/gradient-boosted-decision-tree

    # split train and test dataset
    # I shoud have use sklearn.cross_validation.train_test_split...
    np.random.seed(71)
    perm = np.random.permutation(len(t))
    x_train, t_train = x[perm[:2000]], t[perm[:2000]]
    x_test, t_test = x[perm[2000:]], t[perm[2000:]]

    logger.info('training datasize: {0}'.format(t_train.shape[0]))
    logger.info('test datasize: {0}'.format(t_test.shape[0]))

    # setup regression object for training and
    # loss function for evaluating the predict quarity
    regobj = fn.CrossEntropy()
    loss = fn.logistic_loss

    clf = gb.GradientBoostedDT(regobj, loss, num_iter=30, eta=.4)
    clf.fit(x_train, t_train, validation_data=(x_test, t_test))

    plt.title('seqence of training and test loss')
    plt.plot(clf.training_loss, 'o-', label='training loss')
    plt.plot(clf.validation_loss, 'o-', label='test loss')
    plt.yscale('log')
    plt.legend()
    plt.show()

    pred_prob = clf.predict(x_test)
    pred_cls = np.where(pred_prob > .5, 1., .0)
    df_pred = pd.DataFrame({
        'probability': pred_prob,
        'predict': pred_cls,
        'true': t_test