Beispiel #1
0
def test_min_variance():
    rng = np.random.RandomState(0)
    X = rng.normal(size=(1000, 1))
    y = np.ones(1000)
    rf = RandomForestRegressor(min_variance=0.1)
    rf.fit(X, y)
    mean, std = rf.predict(X, return_std=True)
    assert_array_almost_equal(mean, y)
    assert_array_almost_equal(std, np.sqrt(0.1 * np.ones(1000)))
Beispiel #2
0
def test_min_variance():
    rng = np.random.RandomState(0)
    X = rng.normal(size=(1000, 1))
    y = np.ones(1000)
    rf = RandomForestRegressor(min_variance=0.1)
    rf.fit(X, y)
    mean, std = rf.predict(X, return_std=True)
    assert_array_almost_equal(mean, y)
    assert_array_almost_equal(std, np.sqrt(0.1*np.ones(1000)))
Beispiel #3
0
def rfbo_cycle(ndim,
               space,
               target_f,
               n_iters=10,
               acq_function=ei,
               n_samples=int(1.0e+5),
               model=None,
               show_progress=True):
    xrange = (lambda title, n: tqdm_notebook(range(n), postfix=title)
              ) if show_progress else (lambda title, n: range(n))
    space = np.array(space)

    if model is None:
        model = RandomForestRegressor(n_estimators=200,
                                      n_jobs=20,
                                      min_variance=1.0e-3,
                                      random_state=1234)

    known_points = []
    known_values = []
    cost = []

    for i in xrange('BO iteration', n_iters):
        acq = acq_function(model, known_points, known_values)

        candidates = np.random.uniform(size=(
            n_samples,
            ndim,
        ))
        f = acq(candidates)

        best = np.argmin(f)
        suggestion = reverse_transform(candidates[best].reshape(1, -1),
                                       space)[0, :]

        point_cost, observed = target_f(suggestion)

        known_points.append(suggestion)
        known_values.append(observed)
        cost.append(point_cost)

        model.fit(transform(np.array(known_points), space),
                  np.array(known_values))

        yield model, acq, space, known_points, known_values, cost
def test_random_forest():
    # toy sample
    X = [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]]
    y = [-1, -1, -1, 1, 1, 1]
    T = [[-1, -1], [2, 2], [3, 2]]
    true_result = [-1, 1, 1]

    clf = RandomForestRegressor(n_estimators=10, random_state=1)
    clf.fit(X, y)

    assert_array_equal(clf.predict(T), true_result)
    assert 10 == len(clf)

    clf = RandomForestRegressor(n_estimators=10,
                                min_impurity_decrease=0.1,
                                random_state=1)
    clf.fit(X, y)

    assert_array_equal(clf.predict(T), true_result)
    assert 10 == len(clf)

    clf = RandomForestRegressor(n_estimators=10,
                                criterion="mse",
                                max_depth=None,
                                min_samples_split=2,
                                min_samples_leaf=1,
                                min_weight_fraction_leaf=0.,
                                max_features="auto",
                                max_leaf_nodes=None,
                                min_impurity_decrease=0.,
                                bootstrap=True,
                                oob_score=False,
                                n_jobs=1,
                                random_state=1,
                                verbose=0,
                                warm_start=False)
    clf.fit(X, y)
    assert_array_equal(clf.predict(T), true_result)
    assert 10 == len(clf)

    clf = RandomForestRegressor(n_estimators=10,
                                max_features=1,
                                random_state=1)
    clf.fit(X, y)
    assert_array_equal(clf.predict(T), true_result)
    assert 10 == len(clf)

    # also test apply
    leaf_indices = clf.apply(X)
    assert leaf_indices.shape == (len(X), clf.n_estimators)