def test_depth_tune_regression(): """ test depth_tune, regression rf (structure check) """ n = 200 X = np.random.uniform(size=(n, 510), low=-1, high=1) y = 10 * np.sin(np.pi * X[:,0]*X[:,1]) + 20 * ( X[:,2] - .5)**2 +\ 10 * X[:,3] + 5 * X[:,4] + np.random.normal(size = n) rf_class = sklearn.ensemble.RandomForestRegressor(n_estimators=2) random_forest = rf_class.fit(X=X, y=y.ravel()) X_trained = X y_trained = y try: new_rf = smooth_rf.depth_tune(random_forest, X_trained, y_trained, verbose=False) except: assert False, \ "Error in tuning regression rf with oob depth" assert type(new_rf) == type(random_forest), \ "updated random forest should be same class as random forest put in" loss_vec = new_rf.loss_vec_depth _, max_depth = smooth_rf.calc_depth_for_forest(random_forest, verbose=False) assert loss_vec.shape[0] == np.int(max_depth), \ "loss vector is incorrect dimension relative to maximum depth of rf"
def test_depth_tune_classification(): """ test depth_tune, classification rf (structure check) """ n = 200 min_size_leaf = 1 X = np.random.uniform(size=(n, 510), low=-1, high=1) y = 10 * np.sin(np.pi * X[:,0]*X[:,1]) + 20 * ( X[:,2] - .5)**2 +\ 10 * X[:,3] + 5 * X[:,4] + np.random.normal(size = n) y_cat = np.array(pd.cut(y, bins=5, labels=np.arange(5, dtype=np.int)), dtype=np.int) y = y_cat num_classes = len(Counter(y_cat).keys()) rf_class = sklearn.ensemble.RandomForestClassifier( n_estimators=2, min_samples_leaf=min_size_leaf) random_forest = rf_class.fit(X=X, y=y.ravel()) try: new_rf = smooth_rf.depth_tune(random_forest, X, y, verbose=False) except: assert False, \ "Error in tuning regression rf with oob depth" assert type(new_rf) == type(random_forest), \ "updated random forest should be same class as random forest put in" loss_vec = new_rf.loss_vec_depth _, max_depth = smooth_rf.calc_depth_for_forest(random_forest, verbose=False) assert loss_vec.shape[0] == np.int(max_depth), \ "loss vector is incorrect dimension relative to maximum depth of rf"
## models ## # base rf -------------- rf_base = generate_rf(X_train, y_train, n_trees, reg_or_class=reg_or_class) info_dict["seed"] = my_seed scoring_dict["rf_base"] = assess_rf(rf_base, X_test, y_test) random_forest = copy.deepcopy(rf_base) # depth analysis -------------- print("Depth tune:") time_start = time.time() depth_tune_rf = smooth_rf.depth_tune(random_forest, X_trained=X_train, y_trained=y_train) scoring_dict["depth_tune"] = assess_rf(depth_tune_rf, X_test, y_test) info_dict["depth_tune"] = depth_tune_rf.loss_vec_depth depth_spent = time.time() - time_start times = [depth_spent] # wOLS analysis -------------- if reg_or_class == "reg": print("wOLS:") time_start = time.time() parent_all_opts = [True, False] # parent_all no_constraint_opts = [True, False] # no_constraint