コード例 #1
0
ファイル: gradient_booster.py プロジェクト: OlexiyO/HPcontest
def LearnGradientBoost(iX, iy, param_overrides=None, test_ratio=0.2):
    """Learns a model to approximate iy from iX.

  Args:
    iX: List of pandas.Series.
    iy: pandas.Series.
    param_overrides: Dict of param overrides for GradientBoost algorithm.
    test_ratio: Which part to put into test set.

  Returns:
    Model.
  """
    assert all(len(x) == len(iy) for x in iX), "%s != %d" % (map(len, iX), len(iy))

    X, y = PrepareForTraining(iX, iy)

    L = len(y)
    # print '%d docs have y defined out of total %d docs' % (L, len(iy))

    X_test, X_train, y_test, y_train = _SplitIntoTrainAndTest(X, y, test_ratio)

    params = {"n_estimators": 500, "max_depth": 1, "min_samples_split": 5, "learn_rate": 0.1, "loss": "ls"}
    if param_overrides:
        params.update(param_overrides)
    clf = ensemble.GradientBoostingRegressor(**params)
    clf.fit(X_train, y_train)

    best_func = _CutAtBestStep(clf, X_test, y_test)
    _PrintDebugInfo(best_func, X_test, X_train, y_test, y_train)
    return clf
コード例 #2
0
ファイル: gradient_booster.py プロジェクト: OlexiyO/HPcontest
def LearnGradientBoostInTwoHalves(iX, iy, param_overrides=None, min_steps=10):
    """Learns a model to approximate iy from iX.

  Args:
    iX: List of pandas.Series.
    iy: pandas.Series.
    param_overrides: Dict of param overrides for GradientBoost algorithm.
    test_ratio: Which part to put into test set.
    min_steps: int. If training for any part of the data took more than this number of steps, we retrain.

  Returns:
    Model.
  """
    assert all(len(x) == len(iy) for x in iX), "%s != %d" % (map(len, iX), len(iy))

    X, y = PrepareForTraining(iX, iy)

    L = len(y)
    # print '%d docs have y defined out of total %d docs' % (L, len(iy))

    while True:
        X_1, X_2, y_1, y_2 = _SplitIntoTrainAndTest(X, y, 0.5)

        params = {
            "n_estimators": 500,
            "max_depth": 1,
            "min_samples_split": 5,
            "min_samples_leaf": 5,
            "learn_rate": 0.1,
            "loss": "ls",
        }
        if param_overrides:
            params.update(param_overrides)

        clf1 = ensemble.GradientBoostingRegressor(**params)
        clf1.fit(X_1, y_1)
        step1 = _CutAtBestStep(clf1, X_2, y_2)
        if step1 < min_steps:
            continue
        clf2 = ensemble.GradientBoostingRegressor(**params)
        clf2.fit(X_2, y_2)
        step2 = _CutAtBestStep(clf2, X_1, y_1)
        if step1 >= min_steps and step2 >= min_steps:
            print step1, step2
            return base_predictor.AveragePredictor(clf1, clf2)
        """