def main():
    SpamTrain = "../DataSets/spam.train.txt"
    SpamTest = "../DataSets/spam.test.txt"

    logging.basicConfig(stream=sys.stderr)
    log = logging.getLogger("GradientTreeBoosting.test_on_spam_dataset")
    log.setLevel(logging.DEBUG)

    data = np.loadtxt(SpamTrain)

    train_x, train_y = data[::, 1::], data[::, 0]

    ensemble = GradientTreeBoosting(count_steps=200, max_tree_depth=3, step=1e-2, debug=True)
    ensemble.fit(train_x, train_y)

    ensemble_with_sktree = GradientTreeBoostingViaSklearnTree(count_steps=200, max_tree_depth=3, step=1e-2, debug=True)
    ensemble_with_sktree.fit(train_x, train_y)

    # for tree in ensemble.models:
    #     tree.visualize()
    #     print >>sys.stderr, "\n"
    #
    # for tree in ensemble_with_sktree.models:
    #     sklearn_trees.export_graphviz(tree, out_file=sys.stderr)
    #     print >>sys.stderr, "\n"


    sktree = sklearn_trees.DecisionTreeRegressor()
    sktree.fit(train_x, train_y)

    skensemble = GradientBoostingRegressor(n_estimators=200, max_depth=3)
    skensemble.fit(train_x, train_y)


    data = np.loadtxt(SpamTest)

    test_x, test_y = data[::, 1::], data[::, 0]

    prediction = ensemble.predict(test_x)
    skprediction = sktree.predict(test_x)
    skboosting_prediction = skensemble.predict(test_x)
    boosting_with_sktree_prediction = ensemble_with_sktree.predict(test_x)

    log.debug("Target: %s" % test_y)
    log.debug("Prediction my boosting: %s" % prediction)
    log.debug("Prediction boosting with sklearn tree: %s" % boosting_with_sktree_prediction)

    log.debug("Mean squared error my boosting: %f" % mean_squared_error(test_y, prediction))
    log.debug("Mean squared error sklearn tree: %f" % mean_squared_error(test_y, skprediction))


    log.debug("Mean squared error boosting with sklearn tree: %f" %
             mean_squared_error(test_y, boosting_with_sktree_prediction))
    log.debug("Mean squared error sklearn boosting: %f" % mean_squared_error(test_y, skboosting_prediction))
    def test_on_housing_dataset(self):
        """
        Test on housing data set
        Logging result and MSE for some different models
        :return: None
        """
        log = logging.getLogger("GradientTreeBoosting.test_on_housing_dataset")
        data = np.loadtxt(DecisionTreeTest.HousingDataPath)

        x, y = data[::, :-1:], data[::, -1]

        kf = KFold(x.shape[0], n_folds=5)

        for train, test in kf:
            train_x, train_y = x[train], y[train]
            test_x, test_y = x[test], y[test]

            ensemble = GradientTreeBoosting(count_steps=200, step=1e-2, max_tree_depth=8)
            ensemble.fit(train_x, train_y)

            ensemble_with_sktree = GradientTreeBoostingViaSklearnTree(count_steps=200, max_tree_depth=8, step=1e-2)
            ensemble_with_sktree.fit(train_x, train_y)

            sktree = sklearn_trees.DecisionTreeRegressor()
            sktree.fit(train_x, train_y)

            skensemble = GradientBoostingRegressor()
            skensemble.fit(train_x, train_y)

            prediction = ensemble.predict(test_x)
            skprediction = sktree.predict(test_x)
            skboosting_prediction = skensemble.predict(test_x)
            boosting_with_sktree_prediction = ensemble_with_sktree.predict(test_x)

            log.debug("Target: %s" % test_y)
            log.debug("Prediction: %s" % prediction)
            log.debug("Mean squared error my boosting: %f" % mean_squared_error(test_y, prediction))
            log.debug("Mean squared error sklearn tree: %f" % mean_squared_error(test_y, skprediction))
            log.debug("Mean squared error boosting with sklearn tree: %f" %
                      mean_squared_error(test_y, boosting_with_sktree_prediction))
            log.debug("Mean squared error sklearn boosting: %f" % mean_squared_error(test_y, skboosting_prediction))
Exemple #3
0
def gradientboost():
    req = request.json
    result = GradientTreeBoosting(req['url'], req['test_size']).run()
    #result = GradientTreeBoosting(req['url'], req['test_size'], req['ignore_features']).run()
    #print(result)
    return result