def wrapper(model, *args, **kwargs):
        result = f(model, *args, **kwargs)
        dataset_label = None
        dataset = []
        # model inherits from CustomModel
        if isinstance(model, graphlab.toolkits._model.CustomModel):
            if len(args) >= 1:
                dataset = args[0]
            elif 'dataset' in kwargs:
                dataset = kwargs['dataset']
            elif 'train_data' in kwargs:
                dataset = kwargs['train_data']

            if isinstance(dataset, (graphlab.data_structures.sframe.SFrame,
                                    graphlab.data_structures.sarray.SArray,
                                    dict, list, tuple, _array.array)):
                from graphlab.canvas.inspect import _find_variable_name
                (dataset_label,
                 dataset_variable) = _find_variable_name(dataset)

            if dataset_label is None:
                if isinstance(dataset_variable,
                              graphlab.data_structures.sframe.SFrame):
                    dataset_label = temporarySFrameString
                elif isinstance(dataset_variable,
                                graphlab.data_structures.sarray.SArray):
                    dataset_label = temporarySArrayString
                elif isinstance(dataset_variable,
                                (dict, list, tuple, _array.array)):
                    dataset_label = "<%s>" % str(type(dataset_variable))

            model._get_workflow().add_step(dataset_label, result)
        return result
    def wrapper(model, *args, **kwargs):
        result = f(model, *args, **kwargs)
        dataset_label = None
        dataset = []
        # model inherits from CustomModel
        if isinstance(model, graphlab.toolkits._model.CustomModel):
            if len(args) >= 1:
                dataset = args[0]
            elif 'dataset' in kwargs:
                dataset = kwargs['dataset']
            elif 'train_data' in kwargs:
                dataset = kwargs['train_data']

            if isinstance(dataset, ( graphlab.data_structures.sframe.SFrame, \
                                     graphlab.data_structures.sarray.SArray) ):
                from graphlab.canvas.inspect import _find_variable_name
                (dataset_label, dataset_variable) = _find_variable_name(dataset)

            if dataset_label is None:
                if isinstance(dataset_variable, graphlab.data_structures.sframe.SFrame):
                    dataset_label = temporarySFrameString
                elif isinstance(dataset_variable, graphlab.data_structures.sarray.SArray):
                    dataset_label = temporarySArrayString

            model._get_workflow().add_step(dataset_label, result)
        return result
Exemple #3
0
def compare(dataset, models, **kwargs):
    r"""
    Compare the prediction (or model-equivalent action) performance of models
    on a common test dataset.


    .. warning::

        This currently only works on Recommender type models.


    Parameters
    ----------
    dataset : SFrame
        The dataset to use for model evaluation.

    models : list[ models]
        List of trained models.


    Returns
    -------
    out : list[SFrame]
        A list of results where each one is an sframe of evaluation results of
        the respective model on the given dataset

    Examples
    --------
    If you have created two ItemSimilarityRecommenders ``m1`` and ``m2`` and have
    an :class:`~graphlab.SFrame` ``test_data``, then you may compare the
    performance of the two models on test data using:

    >>> import graphlab
    >>> train_data = graphlab.SFrame({'user_id': ["0", "0", "0", "1", "1", "2", "2", "2"],
    ...                               'item_id': ["a", "c", "e", "b", "f", "b", "c", "d"]})
    >>> test_data = graphlab.SFrame({'user_id': ["0", "0", "1", "1", "1", "2", "2"],
    ...                              'item_id': ["b", "d", "a", "c", "e", "a", "e"]})
    >>> m1 = graphlab.item_similarity_recommender.create(train_data)
    >>> m2 = graphlab.item_similarity_recommender.create(train_data, only_top_k=1)
    >>> model_comp = graphlab.compare(test_data, [m1, m2])

    The evaluation metric is automatically set to 'precision_recall', and the
    evaluation will be based on recommendations that exclude items seen in the
    training data.

    If you want to evaluate on the original training set:

    >>> model_comp = graphlab.compare(train_data, [m1, m2])

    Suppose you have four models, two trained with a target rating column, and
    the other two trained without a target. By default, the models are put into
    two different groups with "rmse", and "precision-recall" as the evaluation
    metric respectively.

    >>> train_data2 = graphlab.SFrame({'user_id': ["0", "0", "0", "1", "1", "2", "2", "2"],
    ...                                'item_id': ["a", "c", "e", "b", "f", "b", "c", "d"],
    ...                                'rating': [1, 3, 4, 5, 3, 4, 2, 5]})
    >>> test_data2 = graphlab.SFrame({'user_id': ["0", "0", "1", "1", "1", "2", "2"],
    ...                               'item_id': ["b", "d", "a", "c", "e", "a", "e"],
    ...                               'rating': [3, 5, 4, 4, 3, 5, 2]})
    >>> m3 = graphlab.factorization_recommender.create(train_data2, target='rating')
    >>> m4 = graphlab.factorization_recommender.create(train_data2, target='rating')
    >>> model_comp = graphlab.compare(test_data2, [m3, m4])

    To compare all four models, you can do:

    >>> model_comp = graphlab.compare(test_data2, [m1, m2, m3, m4])
    """

    _mt._get_metric_tracker().track('toolkit.compare_models')

    num_models = len(models)

    model_names = []
    results = []

    if num_models < 1:
        raise ValueError("Must pass in at least one model to \
                           evaluate")

    base_model_types = map(lambda m: m.__class__.__base__, models)
    unique_base_model_types = list(set(base_model_types))

    if len(unique_base_model_types) != 1:
        raise ValueError("Must pass in related model types")
    metric = 'precision_recall'
    if unique_base_model_types[0] == _Recommender:
        results = __evaluate_recomenders(dataset,
                                         models,
                                         metric=metric,
                                         **kwargs)
    else:
        import logging
        logging.warn(
            'Model comparison currently only supports comparing models created by graphlab.toolkits.recommender or its subclasses such as item_similarity_recommender.'
        )
        return _graphlab.SFrame()

    print("Model compare metric: " + metric)

    metric_label = metric
    if metric == 'precision_recall':
        metric_label = 'precision_recall_overall'

    #cast result SFrames to lists
    results = map(lambda x: {metric: list(x[metric_label])}, results)

    for i in range(0, len(models)):
        name = _find_variable_name(models[i])[0]
        if name is None or not isinstance(name, str):
            name = 'Model_' + str(i)
        model_names.append(name)

    model_types = map(lambda m: m.__class__.__name__, models)
    dataset_label = _find_variable_name(dataset)[0]
    dataset_labels = [dataset_label] * num_models
    metrics = [metric] * num_models

    sframe_results = _graphlab.SFrame({
        'model': model_names,
        'dataset': dataset_labels,
        'model_type': list(model_types),
        'metric': metrics,
        'results': list(results)
    })

    return sframe_results
Exemple #4
0
def compare(dataset, models, **kwargs):
    r"""
    Compare the prediction (or model-equivalent action) performance of models
    on a common test dataset.


    .. warning::

        This currently only works on Recommender type models.

        The comparison toolkit is currently in beta, and feedback is
        welcome! Please send comments to [email protected].


    Parameters
    ----------
    dataset : SFrame
        The dataset to use for model evaluation.

    models : list[ models]
        List of trained models.


    Returns
    -------
    out : list[SFrame]
        A list of results where each one is an sframe of evaluation results of
        the respective model on the given dataset

    Examples
    --------
    If you have created two ItemSimilarityRecommenders ``m1`` and ``m2`` and have
    an :class:`~graphlab.SFrame` ``test_data``, then you may compare the
    performance of the two models on test data using:

    >>> import graphlab
    >>> train_data = graphlab.SFrame({'user_id': ["0", "0", "0", "1", "1", "2", "2", "2"],
    ...                               'item_id': ["a", "c", "e", "b", "f", "b", "c", "d"]})
    >>> test_data = graphlab.SFrame({'user_id': ["0", "0", "1", "1", "1", "2", "2"],
    ...                              'item_id': ["b", "d", "a", "c", "e", "a", "e"]})
    >>> m1 = graphlab.item_similarity_recommender.create(train_data)
    >>> m2 = graphlab.item_similarity_recommender.create(train_data, only_top_k=1)
    >>> graphlab.compare(test_data, [m1, m2])

    The evaluation metric is automatically set to 'precision_recall', and the
    evaluation will be based on recommendations that exclude items seen in the
    training data.

    If you want to evaluate on the original training set:

    >>> graphlab.compare(train_data, [m1, m2])

    Suppose you have four models, two trained with a target rating column, and
    the other two trained without a target. By default, the models are put into
    two different groups with "rmse", and "precision-recall" as the evaluation
    metric respectively.

    >>> train_data2 = graphlab.SFrame({'user_id': ["0", "0", "0", "1", "1", "2", "2", "2"],
    ...                                'item_id': ["a", "c", "e", "b", "f", "b", "c", "d"],
    ...                                'rating': [1, 3, 4, 5, 3, 4, 2, 5]})
    >>> test_data2 = graphlab.SFrame({'user_id': ["0", "0", "1", "1", "1", "2", "2"],
    ...                               'item_id': ["b", "d", "a", "c", "e", "a", "e"],
    ...                               'rating': [3, 5, 4, 4, 3, 5, 2]})
    >>> m3 = graphlab.factorization_recommender.create(train_data2, target='rating')
    >>> m4 = graphlab.factorization_recommender.create(train_data2, target='rating')
    >>> graphlab.compare(test_data2, [m3, m4])

    To compare all four models, you can do:

    >>> model_comp = graphlab.compare(test_data2, [m1, m2, m3, m4])
    """

    _mt._get_metric_tracker().track('toolkit.compare_models')

    num_models = len(models)

    model_names = []
    results = []

    if num_models < 1:
        raise ValueError("Must pass in at least one model to \
                           evaluate")

    base_model_types = map(lambda m: m.__class__.__base__, models)
    unique_base_model_types = list(set(base_model_types))

    if len(unique_base_model_types) != 1:
        raise ValueError("Must pass in related model types")
    metric='precision_recall'
    if unique_base_model_types[0] == _Recommender:
        results = __evaluate_recomenders(dataset, models, metric=metric, **kwargs)
    else:
        import logging
        logging.warn('Model comparison currently only supports comparing models created by graphlab.toolkits.recommender or its subclasses such as item_similarity_recommender.')
        return _graphlab.SFrame()

    print "Model compare metric: " + metric

    metric_label = metric
    if metric == 'precision_recall':
        metric_label = 'precision_recall_overall'

    #cast result SFrames to lists
    results = map( lambda x: {metric: list(x[metric_label]) }, results)

    for i in range(0,len(models)):
        name = _find_variable_name(models[i])[0]
        if name is None or not isinstance(name,str):
            name = 'Model_'+str(i)
        model_names.append(name)

    model_types = map(lambda m: m.__class__.__name__, models)
    dataset_label = _find_variable_name(dataset)[0]
    dataset_labels = [dataset_label] * num_models
    metrics = [metric] * num_models

    sframe_results = _graphlab.SFrame({'model': model_names,
                                        'dataset': dataset_labels,
                                        'model_type': model_types,
                                        'metric': metrics,
                                        'results': results
                                        })

    return sframe_results