def test_min_variance(): rng = np.random.RandomState(0) X = rng.normal(size=(1000, 1)) y = np.ones(1000) rf = RandomForestRegressor(min_variance=0.1) rf.fit(X, y) mean, std = rf.predict(X, return_std=True) assert_array_almost_equal(mean, y) assert_array_almost_equal(std, np.sqrt(0.1 * np.ones(1000)))
def test_min_variance(): rng = np.random.RandomState(0) X = rng.normal(size=(1000, 1)) y = np.ones(1000) rf = RandomForestRegressor(min_variance=0.1) rf.fit(X, y) mean, std = rf.predict(X, return_std=True) assert_array_almost_equal(mean, y) assert_array_almost_equal(std, np.sqrt(0.1*np.ones(1000)))
def rfbo_cycle(ndim, space, target_f, n_iters=10, acq_function=ei, n_samples=int(1.0e+5), model=None, show_progress=True): xrange = (lambda title, n: tqdm_notebook(range(n), postfix=title) ) if show_progress else (lambda title, n: range(n)) space = np.array(space) if model is None: model = RandomForestRegressor(n_estimators=200, n_jobs=20, min_variance=1.0e-3, random_state=1234) known_points = [] known_values = [] cost = [] for i in xrange('BO iteration', n_iters): acq = acq_function(model, known_points, known_values) candidates = np.random.uniform(size=( n_samples, ndim, )) f = acq(candidates) best = np.argmin(f) suggestion = reverse_transform(candidates[best].reshape(1, -1), space)[0, :] point_cost, observed = target_f(suggestion) known_points.append(suggestion) known_values.append(observed) cost.append(point_cost) model.fit(transform(np.array(known_points), space), np.array(known_values)) yield model, acq, space, known_points, known_values, cost
def test_random_forest(): # toy sample X = [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]] y = [-1, -1, -1, 1, 1, 1] T = [[-1, -1], [2, 2], [3, 2]] true_result = [-1, 1, 1] clf = RandomForestRegressor(n_estimators=10, random_state=1) clf.fit(X, y) assert_array_equal(clf.predict(T), true_result) assert 10 == len(clf) clf = RandomForestRegressor(n_estimators=10, min_impurity_decrease=0.1, random_state=1) clf.fit(X, y) assert_array_equal(clf.predict(T), true_result) assert 10 == len(clf) clf = RandomForestRegressor(n_estimators=10, criterion="mse", max_depth=None, min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0., max_features="auto", max_leaf_nodes=None, min_impurity_decrease=0., bootstrap=True, oob_score=False, n_jobs=1, random_state=1, verbose=0, warm_start=False) clf.fit(X, y) assert_array_equal(clf.predict(T), true_result) assert 10 == len(clf) clf = RandomForestRegressor(n_estimators=10, max_features=1, random_state=1) clf.fit(X, y) assert_array_equal(clf.predict(T), true_result) assert 10 == len(clf) # also test apply leaf_indices = clf.apply(X) assert leaf_indices.shape == (len(X), clf.n_estimators)