コード例 #1
0
def create(datasets,
           model_factory,
           model_parameters,
           evaluator=_default_evaluator,
           environment=None,
           return_model=True,
           perform_trial_run=True):
    """
    Evaluate model performance, in parallel, over a list of parameter
    combinations.

    Parameters
    ----------
    {param_data}
    {param_model_factory}

    model_parameters : list
      A list of dicts containing valid model parameter settings.

    {param_evaluator}
    {param_environment}
    {param_return_model}
    {param_perform_trial_run}
    {param_returns}

    See Also
    --------
    graphlab.toolkits.model_parameter_search.create,
    graphlab.toolkits.model_parameter_search.random_search.create

    Examples
    --------

    Fit models over a list of valid parameter settings.

    .. sourcecode:: python

        >>> import graphlab as gl
        >>> sf = gl.SFrame()
        >>> sf['x'] = range(100)
        >>> sf['y'] = [0, 1]* 50
        >>> factory = gl.boosted_trees_classifier.create
        >>> params = [dict([('target', 'y'), ('max_depth', 3)]),
                      dict([('target', 'y'), ('max_depth', 6)])]
        >>> job = gl.manual_search.create((training, validation),
                                          factory, params)

    """

    return _create_model_search(datasets,
                                model_factory,
                              model_parameters,
                              strategy='manual',
                              evaluator=evaluator,
                              environment=environment,
                              return_model=return_model,
                              perform_trial_run=perform_trial_run)
コード例 #2
0
def create(datasets,
           model_factory,
           model_parameters,
           evaluator=_default_evaluator,
           environment=None,
           return_model=True,
           perform_trial_run=True,
           max_models=10):
    """
    Evaluate model performance, in parallel, over a set of parameters, where
    the parameters are chosen randomly.

    Parameters
    ----------
    {param_data}
    {param_model_factory}
    {param_model_params}
        A user can also specify a random variable as the value for an argument.
        For each model, the parameter value will be sampled from this distribution.
        For a given scipy.distribution, v, each model will first call v.rvs(1)
        to sample a single value from the distribution.
        For example, 'step_size': scipy.stats.distribution.expon(.1)
        would choose step_size to be the result of calling the `rvs` method
        on the exponential distribution.

    {param_evaluator}
    {param_environment}
    {param_return_model}
    {param_perform_trial_run}
    {param_max_models}
    {param_returns}

    See Also
    --------
    graphlab.toolkits.model_parameter_search.create, graphlab.toolkits.model_parameter_search.manual_search.create

    Examples
    --------
    Perform a random search on a single train/test split.

    .. sourcecode:: python

        >>> import scipy.stats
        >>> sf = gl.SFrame()
        >>> sf['x'] = range(100)
        >>> sf['y'] = [0, 1]* 50
        >>> train, valid = sf.random_split(.5)
        >>> params = dict([('target', 'y'),
                           ('step_size', scipy.stats.distributions.expon(.1)),
                           ('max_depth', [5, 7])])
        >>> job = gl.random_search.create((train, valid),
                                        gl.boosted_trees_regression.create,
                                        params)
        >>> job.get_results()

    Perform a random search on a k-fold split.

    .. sourcecode:: python

        >>> folds = gl.cross_validation.KFold(sf, 5)
        >>> params = dict([('target', 'y'),
                           ('step_size', scipy.stats.distributions.expon(.1)),
                           ('max_depth', [5, 7])])
        >>> job = gl.random_search.create(folds,
                                          gl.boosted_trees_classifier.create,
                                          params)
        >>> job.get_results()

    """

    # Create a model_factory if the provided factory is from sklearn
    model_factory = _check_if_sklearn_factory(model_factory, model_parameters)

    # Construct an iterable of all the desired free_param settings.
    model_param_list = []
    for _ in range(max_models):
        model_params = _random_choice(model_parameters)
        model_param_list.append(model_params)

    return _create_model_search(datasets,
                                model_factory,
                                model_param_list,
                                strategy='random',
                                evaluator=evaluator,
                                environment=environment,
                                return_model=return_model,
                                perform_trial_run=perform_trial_run)
コード例 #3
0
def create(datasets,
           model_factory,
           model_parameters,
           evaluator=_default_evaluator,
           environment=None,
           return_model=True,
           perform_trial_run=True):
    """
    Evaluate model performance, in parallel, over a grid of parameters.

    Parameters
    ----------
    {param_data}
    {param_model_factory}
    {param_model_params}
        The collection of all combinations of valid parameter values defines a
        grid of model parameters that will be considered.

    {param_evaluator}
    {param_environment}
    {param_return_model}
    {param_perform_trial_run}
    {param_returns}

    See Also
    --------
    graphlab.toolkits.model_parameter_search.create,
    graphlab.toolkits.model_parameter_search.random_search.create,
    graphlab.toolkits.cross_validation.cross_val_score

    Examples
    --------

    Perform a grid search on a single train/test split.

    >>> train, valid = sf.random_split()
    >>> params = dict([('target', 'Y'),
                       ('step_size', [0.01, 0.1]),
                       ('max_depth', [5, 7])])
    >>> job = gl.grid_search.create((train, valid),
                                    gl.boosted_trees_classifier.create,
                                    params)
    >>> job.get_results()

    Perform a grid search on a k-fold split.

    >>> folds = gl.cross_validation.KFold(sf, 5)
    >>> params = dict([('target', 'Y'),
                       ('step_size', [0.01, 0.1]),
                       ('max_depth', [5, 7])])
    >>> job = gl.grid_search.create(folds,
                                    gl.boosted_trees_classifier.create,
                                    params)
    >>> job.get_results()
    """

    model_factory = _check_if_sklearn_factory(model_factory, model_parameters)

    search_space = _get_all_parameters_combinations(model_parameters)

    return _create_model_search(datasets,
                                model_factory,
                                search_space,
                                strategy='grid',
                                evaluator=evaluator,
                                environment=environment,
                                return_model=return_model,
                                perform_trial_run=perform_trial_run)