def create(datasets, model_factory, model_parameters, evaluator=_default_evaluator, environment=None, return_model=True, perform_trial_run=True): """ Evaluate model performance, in parallel, over a list of parameter combinations. Parameters ---------- {param_data} {param_model_factory} model_parameters : list A list of dicts containing valid model parameter settings. {param_evaluator} {param_environment} {param_return_model} {param_perform_trial_run} {param_returns} See Also -------- graphlab.toolkits.model_parameter_search.create, graphlab.toolkits.model_parameter_search.random_search.create Examples -------- Fit models over a list of valid parameter settings. .. sourcecode:: python >>> import graphlab as gl >>> sf = gl.SFrame() >>> sf['x'] = range(100) >>> sf['y'] = [0, 1]* 50 >>> factory = gl.boosted_trees_classifier.create >>> params = [dict([('target', 'y'), ('max_depth', 3)]), dict([('target', 'y'), ('max_depth', 6)])] >>> job = gl.manual_search.create((training, validation), factory, params) """ return _create_model_search(datasets, model_factory, model_parameters, strategy='manual', evaluator=evaluator, environment=environment, return_model=return_model, perform_trial_run=perform_trial_run)
def create(datasets, model_factory, model_parameters, evaluator=_default_evaluator, environment=None, return_model=True, perform_trial_run=True, max_models=10): """ Evaluate model performance, in parallel, over a set of parameters, where the parameters are chosen randomly. Parameters ---------- {param_data} {param_model_factory} {param_model_params} A user can also specify a random variable as the value for an argument. For each model, the parameter value will be sampled from this distribution. For a given scipy.distribution, v, each model will first call v.rvs(1) to sample a single value from the distribution. For example, 'step_size': scipy.stats.distribution.expon(.1) would choose step_size to be the result of calling the `rvs` method on the exponential distribution. {param_evaluator} {param_environment} {param_return_model} {param_perform_trial_run} {param_max_models} {param_returns} See Also -------- graphlab.toolkits.model_parameter_search.create, graphlab.toolkits.model_parameter_search.manual_search.create Examples -------- Perform a random search on a single train/test split. .. sourcecode:: python >>> import scipy.stats >>> sf = gl.SFrame() >>> sf['x'] = range(100) >>> sf['y'] = [0, 1]* 50 >>> train, valid = sf.random_split(.5) >>> params = dict([('target', 'y'), ('step_size', scipy.stats.distributions.expon(.1)), ('max_depth', [5, 7])]) >>> job = gl.random_search.create((train, valid), gl.boosted_trees_regression.create, params) >>> job.get_results() Perform a random search on a k-fold split. .. sourcecode:: python >>> folds = gl.cross_validation.KFold(sf, 5) >>> params = dict([('target', 'y'), ('step_size', scipy.stats.distributions.expon(.1)), ('max_depth', [5, 7])]) >>> job = gl.random_search.create(folds, gl.boosted_trees_classifier.create, params) >>> job.get_results() """ # Create a model_factory if the provided factory is from sklearn model_factory = _check_if_sklearn_factory(model_factory, model_parameters) # Construct an iterable of all the desired free_param settings. model_param_list = [] for _ in range(max_models): model_params = _random_choice(model_parameters) model_param_list.append(model_params) return _create_model_search(datasets, model_factory, model_param_list, strategy='random', evaluator=evaluator, environment=environment, return_model=return_model, perform_trial_run=perform_trial_run)
def create(datasets, model_factory, model_parameters, evaluator=_default_evaluator, environment=None, return_model=True, perform_trial_run=True): """ Evaluate model performance, in parallel, over a grid of parameters. Parameters ---------- {param_data} {param_model_factory} {param_model_params} The collection of all combinations of valid parameter values defines a grid of model parameters that will be considered. {param_evaluator} {param_environment} {param_return_model} {param_perform_trial_run} {param_returns} See Also -------- graphlab.toolkits.model_parameter_search.create, graphlab.toolkits.model_parameter_search.random_search.create, graphlab.toolkits.cross_validation.cross_val_score Examples -------- Perform a grid search on a single train/test split. >>> train, valid = sf.random_split() >>> params = dict([('target', 'Y'), ('step_size', [0.01, 0.1]), ('max_depth', [5, 7])]) >>> job = gl.grid_search.create((train, valid), gl.boosted_trees_classifier.create, params) >>> job.get_results() Perform a grid search on a k-fold split. >>> folds = gl.cross_validation.KFold(sf, 5) >>> params = dict([('target', 'Y'), ('step_size', [0.01, 0.1]), ('max_depth', [5, 7])]) >>> job = gl.grid_search.create(folds, gl.boosted_trees_classifier.create, params) >>> job.get_results() """ model_factory = _check_if_sklearn_factory(model_factory, model_parameters) search_space = _get_all_parameters_combinations(model_parameters) return _create_model_search(datasets, model_factory, search_space, strategy='grid', evaluator=evaluator, environment=environment, return_model=return_model, perform_trial_run=perform_trial_run)