Beispiel #1
0
 def test_train_regressor(self, space, fspace):
     """Test training different models"""
     array = flatten_numpy(to_numpy(data, space), fspace)
     model = train_regressor("AdaBoostRegressor", array)
     assert isinstance(model, AdaBoostRegressor)
     model = train_regressor("BaggingRegressor", array)
     assert isinstance(model, BaggingRegressor)
     model = train_regressor("ExtraTreesRegressor", array)
     assert isinstance(model, ExtraTreesRegressor)
     model = train_regressor("GradientBoostingRegressor", array)
     assert isinstance(model, GradientBoostingRegressor)
     model = train_regressor("RandomForestRegressor", array)
     assert isinstance(model, RandomForestRegressor)
Beispiel #2
0
 def test_train_regressor_kwargs(self, space, fspace):
     """Test training models with kwargs"""
     array = flatten_numpy(to_numpy(data, space), fspace)
     model = train_regressor("RandomForestRegressor",
                             array,
                             max_depth=2,
                             max_features="sqrt")
     assert model.max_depth == 2
     assert model.max_features == "sqrt"
Beispiel #3
0
def test_make_grid():
    """Test grid has correct format"""
    trials = to_numpy(data, space)
    model = train_regressor("RandomForestRegressor", trials)
    best_point = trials[numpy.argmin(trials[:, -1])]
    grid = make_grid(best_point, space, model, 4)

    # Are fixed to anchor value
    numpy.testing.assert_equal(grid[0][:, 1], best_point[1])
    numpy.testing.assert_equal(grid[1][:, 0], best_point[0])

    # Is a grid in search space
    numpy.testing.assert_equal(grid[0][:, 0], [0, 2, 4, 6])
    numpy.testing.assert_equal(grid[1][:, 1], [0, 1, 2, 3])
Beispiel #4
0
def test_make_grid_predictor(monkeypatch):
    """Test grid contains corresponding predictions from the model"""
    trials = to_numpy(data, space)
    model = train_regressor("RandomForestRegressor", trials)
    best_point = trials[numpy.argmin(trials[:, -1])]

    # Make sure model is not predicting exactly the original objective
    with numpy.testing.assert_raises(AssertionError):
        numpy.testing.assert_equal(
            best_point[-1], model.predict(best_point[:-1].reshape(1, -1))
        )

    grid = make_grid(best_point, space, model, 4)

    # Verify that grid predictions are those of the model
    numpy.testing.assert_equal(grid[0][:, -1], model.predict(grid[0][:, :-1]))
    numpy.testing.assert_equal(grid[1][:, -1], model.predict(grid[1][:, :-1]))

    # Verify model predictions differ on different points
    with numpy.testing.assert_raises(AssertionError):
        numpy.testing.assert_equal(grid[0][:, -1], grid[1][:, -1])
Beispiel #5
0
def lpi(
    trials,
    space,
    mode="best",
    model="RandomForestRegressor",
    n_points=20,
    n_runs=10,
    **kwargs
):
    """
    Calculates the Local Parameter Importance for a collection of
    :class:`orion.core.worker.trial.Trial`.

    For more information on the metric, see original paper at
    https://ml.informatik.uni-freiburg.de/papers/18-LION12-CAVE.pdf.

    Biedenkapp, André, et al. "Cave: Configuration assessment, visualization and evaluation."
    International Conference on Learning and Intelligent Optimization. Springer, Cham, 2018.

    Parameters
    ----------
    trials: DataFrame or dict
        A dataframe of trials containing, at least, the columns 'objective' and 'id'. Or a dict
        equivalent.

    space: Space object
        A space object from an experiment.

    mode: str
        Mode to compute the LPI.
        - ``best``: Take the best trial found as the anchor for the LPI
        - ``linear``: Recompute LPI for all values on a grid

    model: str
        Name of the regression model to use. Can be one of
        - AdaBoostRegressor
        - BaggingRegressor
        - ExtraTreesRegressor
        - GradientBoostingRegressor
        - RandomForestRegressor (Default)

    n_points: int
        Number of points to compute the variances. Default is 20.

    n_runs: int
        Number of runs to compute the standard error of the LPI. Default is 10.

    ``**kwargs``
        Arguments for the regressor model.

    Returns
    -------
    DataFrame
        LPI value for each parameter. If ``mode`` is `linear`, then a list of
        param values and LPI metrics are returned in a DataFrame format.

    """
    flattened_space = build_required_space(
        space,
        dist_requirement="linear",
        type_requirement="numerical",
        shape_requirement="flattened",
    )
    if trials.empty or trials.shape[0] == 0:
        return pd.DataFrame(
            data=[0] * len(flattened_space),
            index=flattened_space.keys(),
            columns=["LPI"],
        )

    data = to_numpy(trials, space)
    data = flatten_numpy(data, flattened_space)
    best_point = data[numpy.argmin(data[:, -1])]
    rng = numpy.random.RandomState(kwargs.pop("random_state", None))
    results = numpy.zeros((n_runs, len(flattened_space)))
    for i in range(n_runs):
        trained_model = train_regressor(
            model, data, random_state=rng.randint(2 ** 32 - 1), **kwargs
        )
        results[i] = modes[mode](best_point, flattened_space, trained_model, n_points)

    averages = results.mean(0)
    standard_errors = results.std(0)
    frame = pd.DataFrame(
        data=numpy.array([averages, standard_errors]).T,
        index=flattened_space.keys(),
        columns=["LPI", "STD"],
    )
    return frame
Beispiel #6
0
def partial_dependency(trials,
                       space,
                       params=None,
                       model="RandomForestRegressor",
                       n_grid_points=10,
                       n_samples=50,
                       **kwargs):
    """
    Calculates the partial dependency of parameters in a collection of :class:`Trial`.

    Parameters
    ----------
    trials: DataFrame or dict
        A dataframe of trials containing, at least, the columns 'objective' and 'id'. Or a dict
        equivalent.

    space: Space object
        A space object from an experiment.

    params: list of str, optional
        The parameters to include in the computation. All parameters are included by default.

    model: str
        Name of the regression model to use. Can be one of
        - AdaBoostRegressor
        - BaggingRegressor
        - ExtraTreesRegressor
        - GradientBoostingRegressor
        - RandomForestRegressor (Default)

    n_grid_points: int
        Number of points in the grid to compute partial dependency. Default is 10.

    n_samples: int
        Number of samples to randomly generate the grid used to compute the partial dependency.
        Default is 50.

    **kwargs
        Arguments for the regressor model.

    Returns
    -------
    dict
        Dictionary of DataFrames. Each combination of parameters as keys (dim1.name, dim2.name)
        and for each parameters individually (dim1.name). Columns are
        (dim1.name, dim2.name, objective) or (dim1.name, objective).

    """
    params = flatten_params(space, params)

    flattened_space = build_required_space(
        space,
        dist_requirement="linear",
        type_requirement="numerical",
        shape_requirement="flattened",
    )

    if trials.empty or trials.shape[0] == 0:
        return {}

    data = to_numpy(trials, space)
    data = flatten_numpy(data, flattened_space)
    model = train_regressor(model, data, **kwargs)

    data = flattened_space.sample(n_samples)
    data = pandas.DataFrame(data, columns=flattened_space.keys())

    partial_dependencies = dict()
    for x_i, x_name in enumerate(params):
        grid, averages, stds = partial_dependency_grid(flattened_space, model,
                                                       [x_name], data,
                                                       n_grid_points)
        grid = reverse(flattened_space, grid)
        partial_dependencies[x_name] = (grid, averages, stds)
        for y_i in range(x_i + 1, len(params)):
            y_name = params[y_i]
            grid, averages, stds = partial_dependency_grid(
                flattened_space, model, [x_name, y_name], data, n_grid_points)
            grid = reverse(flattened_space, grid)
            partial_dependencies[(x_name, y_name)] = (grid, averages, stds)

    return partial_dependencies
Beispiel #7
0
 def test_train_regressor_invalid(self, space, fspace):
     """Test error message for invalid model names"""
     array = flatten_numpy(to_numpy(data, space), fspace)
     with pytest.raises(ValueError) as exc:
         train_regressor("IDontExist", array)
     assert exc.match("IDontExist is not a supported regressor")
Beispiel #8
0
 def mock_train_regressor(*args, **kwargs):
     nonlocal n_runs
     n_runs += 1
     seeds.add(kwargs["random_state"])
     return train_regressor(*args, **kwargs)