Exemple #1
0
def test_depth_tune_regression():
    """
    test depth_tune, regression rf (structure check)
    """
    n = 200

    X = np.random.uniform(size=(n, 510), low=-1, high=1)
    y = 10 * np.sin(np.pi * X[:,0]*X[:,1]) + 20 * ( X[:,2] - .5)**2 +\
        10 * X[:,3] + 5 * X[:,4] + np.random.normal(size = n)

    rf_class = sklearn.ensemble.RandomForestRegressor(n_estimators=2)
    random_forest = rf_class.fit(X=X, y=y.ravel())
    X_trained = X
    y_trained = y

    try:
        new_rf = smooth_rf.depth_tune(random_forest,
                                      X_trained,
                                      y_trained,
                                      verbose=False)
    except:
        assert False, \
            "Error in tuning regression rf with oob depth"

    assert type(new_rf) == type(random_forest), \
        "updated random forest should be same class as random forest put in"

    loss_vec = new_rf.loss_vec_depth

    _, max_depth = smooth_rf.calc_depth_for_forest(random_forest,
                                                   verbose=False)

    assert loss_vec.shape[0] == np.int(max_depth), \
        "loss vector is incorrect dimension relative to maximum depth of rf"
Exemple #2
0
def test_depth_tune_classification():
    """
    test depth_tune, classification rf (structure check)
    """
    n = 200
    min_size_leaf = 1

    X = np.random.uniform(size=(n, 510), low=-1, high=1)
    y = 10 * np.sin(np.pi * X[:,0]*X[:,1]) + 20 * ( X[:,2] - .5)**2 +\
        10 * X[:,3] + 5 * X[:,4] + np.random.normal(size = n)

    y_cat = np.array(pd.cut(y, bins=5, labels=np.arange(5, dtype=np.int)),
                     dtype=np.int)

    y = y_cat

    num_classes = len(Counter(y_cat).keys())

    rf_class = sklearn.ensemble.RandomForestClassifier(
        n_estimators=2, min_samples_leaf=min_size_leaf)
    random_forest = rf_class.fit(X=X, y=y.ravel())

    try:
        new_rf = smooth_rf.depth_tune(random_forest, X, y, verbose=False)
    except:
        assert False, \
            "Error in tuning regression rf with oob depth"

    assert type(new_rf) == type(random_forest), \
        "updated random forest should be same class as random forest put in"

    loss_vec = new_rf.loss_vec_depth

    _, max_depth = smooth_rf.calc_depth_for_forest(random_forest,
                                                   verbose=False)

    assert loss_vec.shape[0] == np.int(max_depth), \
        "loss vector is incorrect dimension relative to maximum depth of rf"
Exemple #3
0
## models ##

# base rf --------------
rf_base = generate_rf(X_train, y_train, n_trees, reg_or_class=reg_or_class)

info_dict["seed"] = my_seed
scoring_dict["rf_base"] = assess_rf(rf_base, X_test, y_test)

random_forest = copy.deepcopy(rf_base)

# depth analysis --------------
print("Depth tune:")
time_start = time.time()

depth_tune_rf = smooth_rf.depth_tune(random_forest,
                                     X_trained=X_train,
                                     y_trained=y_train)

scoring_dict["depth_tune"] = assess_rf(depth_tune_rf, X_test, y_test)
info_dict["depth_tune"] = depth_tune_rf.loss_vec_depth

depth_spent = time.time() - time_start
times = [depth_spent]

# wOLS analysis --------------
if reg_or_class == "reg":
    print("wOLS:")
    time_start = time.time()

    parent_all_opts = [True, False]  # parent_all
    no_constraint_opts = [True, False]  # no_constraint