Python _check_if_sklearn_factory Examples, graphlab.toolkits.model_parameter_search._model_parameter_search._check_if_sklearn_factory Python Examples

Example #1

0

Show file

File: cross_validation.py Project: Mawul4j/Machine-Learning-Course

def cross_val_score(datasets,
                    model_factory,
                    model_parameters,
                    evaluator=_default_evaluator,
                    environment=None,
                    return_model=True):
    """
    Evaluate model performance via cross validation for a given set of
    parameters.

    Parameters
    ----------
    {param_data}
    {param_model_factory}

    model_parameters : dict
        The params argument takes a dictionary containing parameters that will
        be passed to the provided model factory.

    {param_evaluator}
    {param_environment}
    {param_return_model}
    {param_returns}

    See Also
    --------
    graphlab.toolkits.model_parameter_search.create

    Examples
    --------
    >>> url = 'http://s3.amazonaws.com/gl-testdata/xgboost/mushroom.csv'
    >>> data = gl.SFrame.read_csv(url)
    >>> data['label'] = (data['label'] == 'p')
    >>> folds = gl.cross_validation.KFold(data, 5)
    >>> params = dict([('target', 'label'), ('max_depth', 5)])
    >>> job = gl.cross_validation.cross_val_score(folds,
                                                  gl.boosted_trees_classifier.create,
                                                  params)
    >>> print job.get_results()
    """
    _get_metric_tracker().track('cross_validation.cross_val_score')

    if isinstance(datasets, _graphlab.SFrame):
        folds = [(datasets, None)]
    elif isinstance(datasets, tuple):
        if len(datasets) != 2:
            raise ValueError("Provided dataset tuple must be train/test pair.")
        folds = [datasets]
    else:
        folds = datasets

    if (not isinstance(folds, KFold)):
        folds = KFold.from_list(folds)

    num_folds = folds.num_folds
    include_fold_id = num_folds > 1

    model_factory = _check_if_sklearn_factory(model_factory, model_parameters)

    params = []
    model_id = 0

    for fold_id in range(num_folds):

        metadata = {'model_id': model_id}
        if include_fold_id:
            metadata['fold_id'] = fold_id
        model_id += 1

        params.append({
            'model_factory': model_factory,
            'model_parameters': model_parameters,
            'folds': folds,
            'evaluator': evaluator,
            'return_model': return_model,
            'metadata': metadata
        })
    now = _datetime.now().strftime('%b-%d-%Y-%H-%M-%S-%f')

    random_hash = str(hash( (id(folds), ("%.21f" % _time()) ) ) )[:8]

    job_name = "Cross-Validation-%s-%s" % (now, random_hash)

    return _map_job.create(_train_test_model,
                           parameter_set=params,
                           name=job_name,
                           environment=environment,
                           combiner_function=_combiner)

Example #2

0

Show file

File: random_search.py Project: Mawul4j/Machine-Learning-Course

def create(datasets,
           model_factory,
           model_parameters,
           evaluator=_default_evaluator,
           environment=None,
           return_model=True,
           perform_trial_run=True,
           max_models=10):
    """
    Evaluate model performance, in parallel, over a set of parameters, where
    the parameters are chosen randomly.

    Parameters
    ----------
    {param_data}
    {param_model_factory}
    {param_model_params}
        A user can also specify a random variable as the value for an argument.
        For each model, the parameter value will be sampled from this distribution.
        For a given scipy.distribution, v, each model will first call v.rvs(1)
        to sample a single value from the distribution.
        For example, 'step_size': scipy.stats.distribution.expon(.1)
        would choose step_size to be the result of calling the `rvs` method
        on the exponential distribution.

    {param_evaluator}
    {param_environment}
    {param_return_model}
    {param_perform_trial_run}
    {param_max_models}
    {param_returns}

    See Also
    --------
    graphlab.toolkits.model_parameter_search.create, graphlab.toolkits.model_parameter_search.manual_search.create

    Examples
    --------
    Perform a random search on a single train/test split.

    .. sourcecode:: python

        >>> import scipy.stats
        >>> sf = gl.SFrame()
        >>> sf['x'] = range(100)
        >>> sf['y'] = [0, 1]* 50
        >>> train, valid = sf.random_split(.5)
        >>> params = dict([('target', 'y'),
                           ('step_size', scipy.stats.distributions.expon(.1)),
                           ('max_depth', [5, 7])])
        >>> job = gl.random_search.create((train, valid),
                                        gl.boosted_trees_regression.create,
                                        params)
        >>> job.get_results()

    Perform a random search on a k-fold split.

    .. sourcecode:: python

        >>> folds = gl.cross_validation.KFold(sf, 5)
        >>> params = dict([('target', 'y'),
                           ('step_size', scipy.stats.distributions.expon(.1)),
                           ('max_depth', [5, 7])])
        >>> job = gl.random_search.create(folds,
                                          gl.boosted_trees_classifier.create,
                                          params)
        >>> job.get_results()

    """

    # Create a model_factory if the provided factory is from sklearn
    model_factory = _check_if_sklearn_factory(model_factory, model_parameters)

    # Construct an iterable of all the desired free_param settings.
    model_param_list = []
    for _ in range(max_models):
        model_params = _random_choice(model_parameters)
        model_param_list.append(model_params)

    return _create_model_search(datasets,
                                model_factory,
                                model_param_list,
                                strategy='random',
                                evaluator=evaluator,
                                environment=environment,
                                return_model=return_model,
                                perform_trial_run=perform_trial_run)

Example #3

0

Show file

File: cross_validation.py Project: divya2661/food-recommendation-engine-

def cross_val_score(datasets,
                    model_factory,
                    model_parameters,
                    evaluator=_default_evaluator,
                    environment=None,
                    return_model=True):
    """
    Evaluate model performance via cross validation for a given set of
    parameters.

    Parameters
    ----------
    {param_data}
    {param_model_factory}

    model_parameters : dict
        The params argument takes a dictionary containing parameters that will
        be passed to the provided model factory.

    {param_evaluator}
    {param_environment}
    {param_return_model}
    {param_returns}

    See Also
    --------
    graphlab.toolkits.model_parameter_search.create

    Examples
    --------
    >>> url = 'https://static.turi.com/datasets/xgboost/mushroom.csv'
    >>> data = gl.SFrame.read_csv(url)
    >>> data['label'] = (data['label'] == 'p')
    >>> folds = gl.cross_validation.KFold(data, 5)
    >>> params = dict([('target', 'label'), ('max_depth', 5)])
    >>> job = gl.cross_validation.cross_val_score(folds,
                                                  gl.boosted_trees_classifier.create,
                                                  params)
    >>> print job.get_results()
    """
    _get_metric_tracker().track('cross_validation.cross_val_score')

    if isinstance(datasets, _graphlab.SFrame):
        folds = [(datasets, None)]
    elif isinstance(datasets, tuple):
        if len(datasets) != 2:
            raise ValueError("Provided dataset tuple must be train/test pair.")
        folds = [datasets]
    else:
        folds = datasets

    if (not isinstance(folds, KFold)):
        folds = KFold.from_list(folds)

    num_folds = folds.num_folds
    include_fold_id = num_folds > 1

    params = []
    model_id = 0

    for fold_id in range(num_folds):

        metadata = {'model_id': model_id}
        if include_fold_id:
            metadata['fold_id'] = fold_id
        model_id += 1

        params.append({
            'model_factory':
            _check_if_sklearn_factory(model_factory, model_parameters),
            'model_parameters':
            model_parameters,
            'folds':
            folds,
            'evaluator':
            evaluator,
            'return_model':
            return_model,
            'metadata':
            metadata
        })
    now = _datetime.now().strftime('%b-%d-%Y-%H-%M-%S-%f')

    random_hash = str(hash((id(folds), ("%.21f" % _time()))))[:8]

    job_name = "Cross-Validation-%s-%s" % (now, random_hash)

    return _map_job.create(_train_test_model,
                           parameter_set=params,
                           name=job_name,
                           environment=environment,
                           combiner_function=_combiner)

Example #4

0

Show file

File: grid_search.py Project: Mawul4j/Machine-Learning-Course

def create(datasets,
           model_factory,
           model_parameters,
           evaluator=_default_evaluator,
           environment=None,
           return_model=True,
           perform_trial_run=True):
    """
    Evaluate model performance, in parallel, over a grid of parameters.

    Parameters
    ----------
    {param_data}
    {param_model_factory}
    {param_model_params}
        The collection of all combinations of valid parameter values defines a
        grid of model parameters that will be considered.

    {param_evaluator}
    {param_environment}
    {param_return_model}
    {param_perform_trial_run}
    {param_returns}

    See Also
    --------
    graphlab.toolkits.model_parameter_search.create,
    graphlab.toolkits.model_parameter_search.random_search.create,
    graphlab.toolkits.cross_validation.cross_val_score

    Examples
    --------

    Perform a grid search on a single train/test split.

    >>> train, valid = sf.random_split()
    >>> params = dict([('target', 'Y'),
                       ('step_size', [0.01, 0.1]),
                       ('max_depth', [5, 7])])
    >>> job = gl.grid_search.create((train, valid),
                                    gl.boosted_trees_classifier.create,
                                    params)
    >>> job.get_results()

    Perform a grid search on a k-fold split.

    >>> folds = gl.cross_validation.KFold(sf, 5)
    >>> params = dict([('target', 'Y'),
                       ('step_size', [0.01, 0.1]),
                       ('max_depth', [5, 7])])
    >>> job = gl.grid_search.create(folds,
                                    gl.boosted_trees_classifier.create,
                                    params)
    >>> job.get_results()
    """

    model_factory = _check_if_sklearn_factory(model_factory, model_parameters)

    search_space = _get_all_parameters_combinations(model_parameters)

    return _create_model_search(datasets,
                                model_factory,
                                search_space,
                                strategy='grid',
                                evaluator=evaluator,
                                environment=environment,
                                return_model=return_model,
                                perform_trial_run=perform_trial_run)