def evaluate(self, dataset, metric='auto', missing_value_action='auto'):
        """
        Evaluate the model on the given dataset.


        Parameters
        ----------
        dataset : SFrame
            Dataset in the same format used for training. The columns names and
            types of the dataset must be the same as that used in training.

        metric : str, optional
            Name of the evaluation metric.  Possible values are:
            'auto'      : Compute all metrics.
            'rmse'      : Rooted mean squared error.
            'max_error' : Maximum error.

        missing_value_action : str, optional
            Action to perform when missing values are encountered. Can be
            one of:

            - 'auto': By default the model will treat missing value as is.
            - 'impute': Proceed with evaluation by filling in the missing
              values with the mean of the training data. Missing
              values are also imputed if an entire column of data is
              missing during evaluation.
            - 'error': Do not proceed with evaluation and terminate with
              an error message.

        Returns
        -------
        out : dict
            A dictionary containing the evaluation result.

        See Also
        ----------
        create, predict

        Examples
        --------
        >>> results = model.evaluate(test_data, 'rmse')

        """
        _mt._get_metric_tracker().track('toolkit.regression.random_forest_regression.evaluate')
        _raise_error_evaluation_metric_is_valid(metric, ['auto', 'rmse', 'max_error'])

        results = {}
        if metric in ['rmse', 'auto']:
            results = super(RandomForestRegression, self).evaluate(dataset, metric=metric,
                                                                   missing_value_action=missing_value_action)

        if metric in ['max_error', 'auto']:
            predictions = self.predict(dataset, missing_value_action=missing_value_action)
            target = self.get('target')
            _raise_error_if_column_exists(dataset, predictions, 'dataset', target + '(target column)')
            results['max_error'] = _graphlab.evaluation.max_error(predictions, dataset[target])
        return results
    def evaluate(self, dataset, metric='auto'):
        """
        Evaluate the model on the given dataset.

        Parameters
        ----------
        dataset : SFrame
            Dataset in the same format used for training. The columns names and
            types of the dataset must be the same as that used in training.

        metric : str, optional
            Name of the evaluation metric.  Possible values are:
            'auto'             : Returns all available metrics.
            'accuracy '        : Classification accuracy.
            'confusion_matrix' : An SFrame with counts of possible prediction/true 
                                 label combinations.

        Returns
        -------
        out : dict
            A dictionary containing the evaluation result.

        See Also
        ----------
        create, predict, classify

        Examples
        --------
        >>> results = model.evaluate(test_data)
        >>> results = model.evaluate(test_data, metric='accuracy')
        >>> results = model.evaluate(test_data, metric='confusion_matrix')

        Notes
        -----
        When evaluating for classification metrics (e.g. auc,
        confusion_matrix), the classification threshold is set to 0.5. For more
        flexible classification accuracy, please use functions in the
        :py:mod:`~graphlab.toolkits.evaluation` module.
        """
        _mt._get_metric_tracker().track('toolkit.classifier.boosted_trees_classifier.evaluate')
        _raise_error_evaluation_metric_is_valid(metric,
                                      ['auto', 'accuracy', 'confusion_matrix'])

        results = {}
        if metric in ['auto', 'accuracy']:
          results = super(_Classifier, self).evaluate(dataset, metric = metric)

        if metric in ['confusion_matrix', 'auto']:
          predictions = self.predict(dataset, output_type = 'class')
          target = self.get('target')
          _raise_error_if_column_exists(dataset, predictions, 'dataset', target + '(target column)')
          results['confusion_matrix'] = _graphlab.evaluation.confusion_matrix(\
                                         predictions, dataset[target])
        return results
    def evaluate(self, dataset, metric='auto', missing_value_action='auto'):
        """
        Evaluate the model on the given dataset.

        Parameters
        ----------
        dataset : SFrame
            Dataset in the same format used for training. The columns names and
            types of the dataset must be the same as that used in training.

        metric : str, optional
            Name of the evaluation metric.  Can be one of:

            - 'auto': Compute all metrics.
            - 'rmse': Rooted mean squared error.
            - 'max_error': Maximum error.

        missing_value_action : str, optional
            Action to perform when missing values are encountered. Can be
            one of:

            - 'auto': By default the model will treat missing value as is.
            - 'impute': Proceed with evaluation by filling in the missing
              values with the mean of the training data. Missing
              values are also imputed if an entire column of data is
              missing during evaluation.
            - 'error': Do not proceed with evaluation and terminate with
              an error message.

        Returns
        -------
        out : dict
            A dictionary containing the evaluation result.

        See Also
        ----------
        create, predict

        Examples
        --------
        ..sourcecode:: python

          >>> results = model.evaluate(test_data, 'rmse')

        """
        _mt._get_metric_tracker().track(
            'toolkit.regression.boosted_trees_regression.evaluate')
        _raise_error_evaluation_metric_is_valid(metric,
                                                ['auto', 'rmse', 'max_error'])
        return super(BoostedTreesRegression,
                     self).evaluate(dataset,
                                    missing_value_action=missing_value_action,
                                    metric=metric)
    def evaluate(self, dataset, metric='auto', missing_value_action='auto'):
        """
        Evaluate the model on the given dataset.

        Parameters
        ----------
        dataset : SFrame
            Dataset in the same format used for training. The columns names and
            types of the dataset must be the same as that used in training.

        metric : str, optional
            Name of the evaluation metric.  Can be one of:

            - 'auto': Compute all metrics.
            - 'rmse': Rooted mean squared error.
            - 'max_error': Maximum error.

        missing_value_action : str, optional
            Action to perform when missing values are encountered. Can be
            one of:

            - 'auto': By default the model will treat missing value as is.
            - 'impute': Proceed with evaluation by filling in the missing
              values with the mean of the training data. Missing
              values are also imputed if an entire column of data is
              missing during evaluation.
            - 'error': Do not proceed with evaluation and terminate with
              an error message.

        Returns
        -------
        out : dict
            A dictionary containing the evaluation result.

        See Also
        ----------
        create, predict

        Examples
        --------
        ..sourcecode:: python

          >>> results = model.evaluate(test_data, 'rmse')

        """
        _mt._get_metric_tracker().track(
                'toolkit.regression.boosted_trees_regression.evaluate')
        _raise_error_evaluation_metric_is_valid(
                metric, ['auto', 'rmse', 'max_error'])
        return super(BoostedTreesRegression, self).evaluate(dataset,
                                 missing_value_action=missing_value_action,
                                 metric=metric)
    def evaluate(self, dataset, metric='auto'):
        """
        Evaluate the model on the given dataset.


        Parameters
        ----------
        dataset : SFrame
            Dataset in the same format used for training. The columns names and
            types of the dataset must be the same as that used in training.

        metric : str, optional
            Name of the evaluation metric.  Possible values are:
            'auto'      : Compute all metrics.
            'rmse'      : Rooted mean squared error.
            'max_error' : Maximum error.

        Returns
        -------
        out : dict
            A dictionary containing the evaluation result.

        See Also
        ----------
        create, predict

        Examples
        --------
        >>> results = model.evaluate(test_data, 'rmse')

        Notes
        -----
        When evaluating for classifier metrics (e.g. auc,
        confusion_matrix), the classifier threshold is set to 0.5. 
        """        
        _mt._get_metric_tracker().track('toolkit.regression.boosted_trees_regression.evaluate')
        _raise_error_evaluation_metric_is_valid(metric,
                                      ['auto', 'rmse', 'max_error'])

        results = {}
        if metric in ['rmse', 'auto']:
          results = super(BoostedTreesRegression, self).evaluate(dataset, metric = metric)

        if metric in ['max_error', 'auto']:
          predictions = self.predict(dataset)
          target = self.get('target')
          _raise_error_if_column_exists(dataset, predictions, 'dataset', target + '(target column)')
          results['max_error'] = _graphlab.evaluation.max_error(\
                                         predictions, dataset[target])
        return results
    def evaluate(self, dataset, metric = 'auto', missing_value_action = 'impute'):
        r"""Evaluate the model by making target value predictions and comparing
        to actual values.

        Two metrics are used to evaluate linear regression models.  The first
        is root-mean-squared error (RMSE) while the second is the absolute
        value of the maximum error between the actual and predicted values.
        Let :math:`y` and :math:`\hat{y}` denote vectors of length :math:`N`
        (number of examples) with actual and predicted values. The RMSE is
        defined as:

        .. math::

            RMSE = \sqrt{\frac{1}{N} \sum_{i=1}^N (\widehat{y}_i - y_i)^2}

        while the max-error is defined as

        .. math::

            max-error = \max_{i=1}^N \|\widehat{y}_i - y_i\|

        Parameters
        ----------
        dataset : SFrame
            Dataset of new observations. Must include columns with the same
            names as the target and features used for model training. Additional
            columns are ignored.

        metric : str, optional
            Name of the evaluation metric.  Possible values are:
            `auto`      : Compute all metrics.
            `rmse`      : Rooted mean squared error.
            `max_error` : Maximum error.

        missing_value_action: str, optional
            Action to perform when missing values are encountered. This can be
            one of:

            - `impute`: Proceed with evaluation by filling in the missing
                        values with the mean of the training data. Missing
                        values are also imputed if an entire column of data is
                        missing during evaluation.
            - `error` : Do not proceed with evaluation and terminate with
                        an error message.

        Returns
        -------
        out : dict
            Results from  model evaluation procedure.

        See Also
        ----------
        create, predict

        Examples
        ----------
        >>> data =  graphlab.SFrame('http://s3.amazonaws.com/GraphLab-Datasets/regression/houses.csv')

        >>> model = graphlab.linear_regression.create(data,
                                             target='price',
                                             features=['bath', 'bedroom', 'size'])
        >>> results = model.evaluate(data)
        """

        _mt._get_metric_tracker().track(
            'toolkit.regression.linear_regression.evaluate')
        _raise_error_evaluation_metric_is_valid(metric, 
                                          ['auto', 'rmse', 'max_error'])
        return super(LinearRegression, self).evaluate(dataset,
                      missing_value_action = missing_value_action, 
                      metric = metric)
Example #7
0
    def evaluate(self, dataset, metric='auto', missing_value_action='auto'):
        """
        Evaluate the model by making predictions of target values and comparing
        these to actual values.

        Two metrics are used to evaluate SVM. The confusion table contains the
        cross-tabulation of actual and predicted classes for the target
        variable. classifier accuracy is the fraction of examples whose
        predicted and actual classes match.

        Parameters
        ----------
        dataset : SFrame
            Dataset of new observations. Must include columns with the same
            names as the target and features used for model training. Additional
            columns are ignored.

        metric : str, optional
            Name of the evaluation metric.  Possible values are:
            - 'auto': Returns all available metrics.
            - 'accuracy ': Classification accuracy.
            - 'confusion_matrix': An SFrame with counts of possible prediction/true
                                 label combinations.
            - 'roc_curve': An SFrame containing information needed for an roc curve

        missing_value_action : str, optional
            Action to perform when missing values are encountered. This can be
            one of:

            - 'auto': Default to 'impute'
            - 'impute': Proceed with evaluation by filling in the missing
              values with the mean of the training data. Missing
              values are also imputed if an entire column of data is
              missing during evaluation.
            - 'error': Do not proceed with evaluation and terminate with
              an error message.

        Returns
        -------
        out : dict
            Dictionary of evaluation results. The dictionary keys are *accuracy*
            and *confusion_matrix*.

        See Also
        ----------
        create, predict, classify

        Examples
        ----------
        >>> data =  graphlab.SFrame('http://s3.amazonaws.com/dato-datasets/regression/houses.csv')

        >>> data['is_expensive'] = data['price'] > 30000
        >>> model = graphlab.svm_classifier.create(data,
                                  target='is_expensive',
                                  features=['bath', 'bedroom', 'size'])

        >>> results = model.evaluate(data)
        >>> print results['accuracy']
        """
        _mt._get_metric_tracker().track(
            'toolkit.classifier.svm_classifier.evaluate')
        _raise_error_evaluation_metric_is_valid(metric, ['auto', 'accuracy', 'confusion_matrix', 'roc_curve'])
        return super(_Classifier, self).evaluate(dataset,
                                                 missing_value_action=missing_value_action,
                                                 metric=metric)
    def evaluate(self, dataset, metric='auto', max_neighbors=10, radius=None):
        """
        Evaluate model accuracy by making predicting target classes for a new
        dataset and comparing to actual target values.

        Parameters
        ----------
        dataset : SFrame
            Dataset of new observations. Must include columns with the same
            names as the target and features used for model training. Additional
            columns are ignored.

        metric : string, optional
            Name of the evaluation metric.  Possible values are:

            'auto'             : Returns all available metrics.
            'accuracy '        : Classification accuracy.
            'confusion_matrix' : An SFrame with counts of possible prediction/true
                                 label combinations.
            'roc_curve'        : An SFrame containing information needed for an roc curve


        max_neighbors : int, optional
            Maximum number of neighbors to consider for each point.

        radius : float, optional
            Maximum distance from each point to a neighbor in the reference
            dataset.

        Returns
        -------
        out : dict
            Evaluation results. The dictionary keys are *accuracy* and
            *confusion_matrix* and *roc_curve*.

        See also
        --------
        create, predict, predict_topk, classify

        Notes
        -----
        - Because the model randomly breaks ties between predicted classes, the
          results of repeated calls to `evaluate` method may differ.

        Examples
        --------
        >>> sf_train = graphlab.SFrame({'species': ['cat', 'dog', 'fossa', 'dog'],
        ...                             'height': [9, 25, 20, 23],
        ...                             'weight': [13, 28, 33, 22]})
        >>> m = graphlab.nearest_neighbor_classifier.create(sf, target='species')
        >>> ans = m.evaluate(sf_train, max_neighbors=2,
        ...                  metric='confusion_matrix')
        >>> print ans['confusion_matrix']
        +--------------+-----------------+-------+
        | target_label | predicted_label | count |
        +--------------+-----------------+-------+
        |     cat      |       dog       |   1   |
        |     dog      |       dog       |   2   |
        |    fossa     |       dog       |   1   |
        +--------------+-----------------+-------+
        """
        _mt._get_metric_tracker().track(
            'toolkit.classifier.nearest_neighbor_classifier.evaluate')

        ## Validate the metric name
        _raise_error_evaluation_metric_is_valid(metric,
                    ['auto', 'accuracy', 'confusion_matrix', 'roc_curve'])

        ## Make sure the input dataset has a target column with an appropriate
        #  type.
        target = self.get('target')
        _raise_error_if_column_exists(dataset, target, 'dataset', target)

        if not dataset[target].dtype() == str and not dataset[target].dtype() == int:
            raise TypeError("The target column of the evaluation dataset must "
                            "contain integers or strings.")


        ## Compute predictions with the input dataset.
        ystar = self.predict(dataset, output_type='class',
                             max_neighbors=max_neighbors, radius=radius)
        ystar_prob = self.predict(dataset, output_type='probability',
                             max_neighbors=max_neighbors, radius=radius)


        ## Compile accuracy metrics
        results = {}

        if metric in ['accuracy', 'auto']:
            results['accuracy'] = _gl.evaluation.accuracy(targets=dataset[target],
                                                          predictions=ystar)

        if metric in ['confusion_matrix', 'auto']:
            results['confusion_matrix'] = \
                _gl.evaluation.confusion_matrix(targets=dataset[target],
                                                predictions=ystar)

        if metric in ['roc_curve', 'auto']:
            results['roc_curve'] = \
                _gl.evaluation.roc_curve(targets=dataset[target],
                                         predictions=ystar_prob)

        return results
    def evaluate(self, dataset, metric='auto', missing_value_action='auto'):
        """
        Evaluate the model by making predictions of target values and comparing
        these to actual values.

        Parameters
        ----------
        dataset : SFrame
            Dataset of new observations. Must include columns with the same
            names as the target and features used for model training. Additional
            columns are ignored.

        metric : str, optional
            Name of the evaluation metric.  Possible values are:

            - 'auto'             : Returns all available metrics.
            - 'accuracy'         : Classification accuracy (micro average).
            - 'auc'              : Area under the ROC curve (macro average)
            - 'precision'        : Precision score (macro average)
            - 'recall'           : Recall score (macro average)
            - 'f1_score'         : F1 score (macro average)
            - 'log_loss'         : Log loss
            - 'confusion_matrix' : An SFrame with counts of possible prediction/true label combinations.
            - 'roc_curve'        : An SFrame containing information needed for an ROC curve

            For more flexibility in calculating evaluation metrics, use the
            :class:`~graphlab.evaluation` module.

        missing_value_action : str, optional
            Action to perform when missing values are encountered. This can be
            one of:

            - 'auto': Default to 'impute'
            - 'impute': Proceed with evaluation by filling in the missing
              values with the mean of the training data. Missing
              values are also imputed if an entire column of data is
              missing during evaluation.
            - 'error': Do not proceed with evaluation and terminate with
              an error message.

        Returns
        -------
        out : dict
            Dictionary of evaluation results where the key is the name of the
            evaluation metric (e.g. `accuracy`) and the value is the evaluation
            score.

        See Also
        ----------
        create, predict, classify

        Examples
        ----------
        .. sourcecode:: python

          >>> data =  graphlab.SFrame('http://s3.amazonaws.com/dato-datasets/regression/houses.csv')
          >>> data['is_expensive'] = data['price'] > 30000
          >>> model = graphlab.logistic_classifier.create(data,
          ...                             target='is_expensive',
          ...                             features=['bath', 'bedroom', 'size'])
          >>> results = model.evaluate(data)
          >>> print results['accuracy']
        """

        _mt._get_metric_tracker().track(
                'toolkit.classifier.logistic_classifier.evaluate')
        _raise_error_evaluation_metric_is_valid(metric,
                ['auto', 'accuracy', 'confusion_matrix', 'roc_curve', 'auc',
                 'log_loss', 'precision', 'recall', 'f1_score'])
        return super(_Classifier, self).evaluate(dataset,
                                 missing_value_action=missing_value_action,
                                 metric=metric)
    def evaluate(self, dataset, metric='auto', missing_value_action='auto'):
        """
        Evaluate the model on the given dataset.

        Parameters
        ----------
        dataset : SFrame
            Dataset in the same format used for training. The columns names and
            types of the dataset must be the same as that used in training.

        metric : str, optional
            Name of the evaluation metric.  Possible values are:

             - 'auto'             : Returns all available metrics.
             - 'accuracy '        : Classification accuracy.
             - 'confusion_matrix' : An SFrame with counts of possible prediction/true
                                  label combinations.
             - 'roc_curve'        : An SFrame containing information needed for an roc curve

        missing_value_action : str, optional
            Action to perform when missing values are encountered. Can be
            one of:

             - 'auto': By default the model will treat missing value as is.
             - 'impute': Proceed with evaluation by filling in the missing
               values with the mean of the training data. Missing
               values are also imputed if an entire column of data is
               missing during evaluation.
             - 'error': Do not proceed with evaluation and terminate with
              an error message.

        Returns
        -------
        out : dict
            A dictionary containing the evaluation result.

        See Also
        ----------
        create, predict, classify

        Examples
        --------
        >>> results = model.evaluate(test_data)
        >>> results = model.evaluate(test_data, metric='accuracy')
        >>> results = model.evaluate(test_data, metric='confusion_matrix')

        Notes
        -----
        When evaluating for classification metrics (e.g. auc,
        confusion_matrix), the classification threshold is set to 0.5. For more
        flexible classification accuracy, please use functions in the
        :py:mod:`~graphlab.toolkits.evaluation` module.
        """
        _mt._get_metric_tracker().track('toolkit.classifier.boosted_trees_classifier.evaluate')
        _raise_error_evaluation_metric_is_valid(metric,
                                                ['auto', 'accuracy', 'confusion_matrix', 'roc_curve'])

        results = {}
        if metric in ['auto', 'accuracy', 'roc_curve']:
            results = super(_Classifier, self).evaluate(dataset, metric=metric,
                                                        missing_value_action=missing_value_action)

        if metric in ['confusion_matrix', 'auto']:
            predictions = self.predict(dataset, output_type='class', missing_value_action=missing_value_action)
            target = self.get('target')
            _raise_error_if_column_exists(dataset, target, 'dataset', target)
            results['confusion_matrix'] = _graphlab.evaluation.confusion_matrix(dataset[target], predictions)

        return results
    def evaluate(self, dataset, metric='auto', missing_value_action='auto'):
        """
        Evaluate the model by making predictions of target values and comparing
        these to actual values.

        Parameters
        ----------
        dataset : SFrame
            Dataset of new observations. Must include columns with the same
            names as the target and features used for model training. Additional
            columns are ignored.

        metric : str, optional
            Name of the evaluation metric.  Possible values are:

            - 'auto'             : Returns all available metrics.
            - 'accuracy'         : Classification accuracy (micro average).
            - 'auc'              : Area under the ROC curve (macro average)
            - 'precision'        : Precision score (macro average)
            - 'recall'           : Recall score (macro average)
            - 'f1_score'         : F1 score (macro average)
            - 'log_loss'         : Log loss
            - 'confusion_matrix' : An SFrame with counts of possible prediction/true label combinations.
            - 'roc_curve'        : An SFrame containing information needed for an ROC curve

            For more flexibility in calculating evaluation metrics, use the
            :class:`~graphlab.evaluation` module.

        missing_value_action : str, optional
            Action to perform when missing values are encountered. This can be
            one of:

            - 'auto': Default to 'impute'
            - 'impute': Proceed with evaluation by filling in the missing
              values with the mean of the training data. Missing
              values are also imputed if an entire column of data is
              missing during evaluation.
            - 'error': Do not proceed with evaluation and terminate with
              an error message.

        Returns
        -------
        out : dict
            Dictionary of evaluation results where the key is the name of the
            evaluation metric (e.g. `accuracy`) and the value is the evaluation
            score.

        See Also
        ----------
        create, predict, classify

        Examples
        --------
        .. sourcecode:: python

          >>> results = model.evaluate(test_data)
          >>> results = model.evaluate(test_data, metric='accuracy')
          >>> results = model.evaluate(test_data, metric='confusion_matrix')

        """
        _mt._get_metric_tracker().track(
            'toolkit.classifier.random_forest_classifier.evaluate')
        _raise_error_evaluation_metric_is_valid(metric, [
            'auto', 'accuracy', 'confusion_matrix', 'roc_curve', 'auc',
            'log_loss', 'precision', 'recall', 'f1_score'
        ])
        return super(_Classifier,
                     self).evaluate(dataset,
                                    missing_value_action=missing_value_action,
                                    metric=metric)
    def evaluate(self, dataset, metric='auto', missing_value_action='auto'):
        r"""Evaluate the model by making target value predictions and comparing
        to actual values.

        Two metrics are used to evaluate linear regression models.  The first
        is root-mean-squared error (RMSE) while the second is the absolute
        value of the maximum error between the actual and predicted values.
        Let :math:`y` and :math:`\hat{y}` denote vectors of length :math:`N`
        (number of examples) with actual and predicted values. The RMSE is
        defined as:

        .. math::

            RMSE = \sqrt{\frac{1}{N} \sum_{i=1}^N (\widehat{y}_i - y_i)^2}

        while the max-error is defined as

        .. math::

            max-error = \max_{i=1}^N \|\widehat{y}_i - y_i\|

        Parameters
        ----------
        dataset : SFrame
            Dataset of new observations. Must include columns with the same
            names as the target and features used for model training. Additional
            columns are ignored.

        metric : str, optional
            Name of the evaluation metric.  Possible values are:
            - 'auto': Compute all metrics.
            - 'rmse': Rooted mean squared error.
            - 'max_error': Maximum error.

        missing_value_action : str, optional
            Action to perform when missing values are encountered. This can be
            one of:

            - 'auto': Default to 'impute'
            - 'impute': Proceed with evaluation by filling in the missing
              values with the mean of the training data. Missing
              values are also imputed if an entire column of data is
              missing during evaluation.
            - 'error': Do not proceed with evaluation and terminate with
              an error message.

        Returns
        -------
        out : dict
            Results from  model evaluation procedure.

        See Also
        ----------
        create, predict

        Examples
        ----------
        >>> data =  graphlab.SFrame('https://static.turi.com/datasets/regression/houses.csv')

        >>> model = graphlab.linear_regression.create(data,
                                             target='price',
                                             features=['bath', 'bedroom', 'size'])
        >>> results = model.evaluate(data)
        """

        _mt._get_metric_tracker().track(
            'toolkit.regression.linear_regression.evaluate')
        _raise_error_evaluation_metric_is_valid(metric,
                                                ['auto', 'rmse', 'max_error'])
        return super(LinearRegression,
                     self).evaluate(dataset,
                                    missing_value_action=missing_value_action,
                                    metric=metric)
Example #13
0
    def evaluate(self, dataset, metric='auto', max_neighbors=10, radius=None):
        """
        Evaluate the model's predictive accuracy. This is done by predicting the
        target class for instances in a new dataset and comparing to known
        target values.

        Parameters
        ----------
        dataset : SFrame
            Dataset of new observations. Must include columns with the same
            names as the target and features used for model training. Additional
            columns are ignored.

        metric : str, optional
            Name of the evaluation metric.  Possible values are:

            - 'auto': Returns all available metrics.

            - 'accuracy': Classification accuracy.

            - 'confusion_matrix': An SFrame with counts of possible
              prediction/true label combinations.

            - 'roc_curve': An SFrame containing information needed for an roc
              curve (binary classification only).

        max_neighbors : int, optional
            Maximum number of neighbors to consider for each point.

        radius : float, optional
            Maximum distance from each point to a neighbor in the reference
            dataset.

        Returns
        -------
        out : dict
            Evaluation results. The dictionary keys are *accuracy* and
            *confusion_matrix* and *roc_curve* (if applicable).

        See also
        --------
        create, predict, predict_topk, classify

        Notes
        -----
        - Because the model randomly breaks ties between predicted classes, the
          results of repeated calls to `evaluate` method may differ.

        Examples
        --------
        >>> sf_train = graphlab.SFrame({'species': ['cat', 'dog', 'fossa', 'dog'],
        ...                             'height': [9, 25, 20, 23],
        ...                             'weight': [13, 28, 33, 22]})
        >>> m = graphlab.nearest_neighbor_classifier.create(sf, target='species')
        >>> ans = m.evaluate(sf_train, max_neighbors=2,
        ...                  metric='confusion_matrix')
        >>> print ans['confusion_matrix']
        +--------------+-----------------+-------+
        | target_label | predicted_label | count |
        +--------------+-----------------+-------+
        |     cat      |       dog       |   1   |
        |     dog      |       dog       |   2   |
        |    fossa     |       dog       |   1   |
        +--------------+-----------------+-------+
        """
        _mt._get_metric_tracker().track(
            'toolkit.classifier.nearest_neighbor_classifier.evaluate')

        ## Validate the metric name
        _raise_error_evaluation_metric_is_valid(
            metric, ['auto', 'accuracy', 'confusion_matrix', 'roc_curve'])

        ## Make sure the input dataset has a target column with an appropriate
        #  type.
        target = self.get('target')
        _raise_error_if_column_exists(dataset, target, 'dataset', target)

        if not dataset[target].dtype() == str and not dataset[target].dtype(
        ) == int:
            raise TypeError("The target column of the evaluation dataset must "
                            "contain integers or strings.")

        if self._state["num_classes"] != 2:
            if (metric == 'roc_curve') or (metric == ['roc_curve']):
                err_msg = "Currently, ROC curve is not supported for "
                err_msg += "multi-class classification in this model."
                raise _ToolkitError(err_msg)
            else:
                warn_msg = "WARNING: Ignoring `roc_curve`. "
                warn_msg += "Not supported for multi-class classification."
                print(warn_msg)

        ## Compute predictions with the input dataset.
        ystar = self.predict(dataset,
                             output_type='class',
                             max_neighbors=max_neighbors,
                             radius=radius)
        ystar_prob = self.predict(dataset,
                                  output_type='probability',
                                  max_neighbors=max_neighbors,
                                  radius=radius)

        ## Compile accuracy metrics
        results = {}

        if metric in ['accuracy', 'auto']:
            results['accuracy'] = _gl.evaluation.accuracy(
                targets=dataset[target], predictions=ystar)

        if metric in ['confusion_matrix', 'auto']:
            results['confusion_matrix'] = \
                _gl.evaluation.confusion_matrix(targets=dataset[target],
                                                predictions=ystar)

        if self._state["num_classes"] == 2:
            if metric in ['roc_curve', 'auto']:
                results['roc_curve'] = \
                      _gl.evaluation.roc_curve(targets=dataset[target],
                                               predictions=ystar_prob)
        return results
    def evaluate(self, dataset, metric='auto', missing_value_action='auto'):
        """
        Evaluate the model by making predictions of target values and comparing
        these to actual values.

        Two metrics are used to evaluate SVM. The confusion table contains the
        cross-tabulation of actual and predicted classes for the target
        variable. classifier accuracy is the fraction of examples whose
        predicted and actual classes match.

        Parameters
        ----------
        dataset : SFrame
            Dataset of new observations. Must include columns with the same
            names as the target and features used for model training. Additional
            columns are ignored.

        metric : str, optional
            Name of the evaluation metric.  Possible values are:

            - 'auto'             : Returns all available metrics.
            - 'accuracy '        : Classification accuracy (micro average).
            - 'precision'        : Precision score (micro average)
            - 'recall'           : Recall score (micro average)
            - 'f1_score'         : F1 score (micro average)
            - 'confusion_matrix' : An SFrame with counts of possible prediction/true
                                   label combinations.

        missing_value_action : str, optional
            Action to perform when missing values are encountered. This can be
            one of:

            - 'auto': Default to 'impute'
            - 'impute': Proceed with evaluation by filling in the missing
              values with the mean of the training data. Missing
              values are also imputed if an entire column of data is
              missing during evaluation.
            - 'error': Do not proceed with evaluation and terminate with
              an error message.

        Returns
        -------
        out : dict
            Dictionary of evaluation results where the key is the name of the
            evaluation metric (e.g. `accuracy`) and the value is the evaluation
            score.

        See Also
        ----------
        create, predict, classify

        Examples
        ----------
        .. sourcecode:: python

          >>> data =  graphlab.SFrame('https://static.turi.com/datasets/regression/houses.csv')

          >>> data['is_expensive'] = data['price'] > 30000
          >>> model = graphlab.svm_classifier.create(data, \
          ...                                        target='is_expensive', \
          ...                                        features=['bath', 'bedroom', 'size'])
          >>> results = model.evaluate(data)
          >>> print results['accuracy']
        """
        _mt._get_metric_tracker().track(
            'toolkit.classifier.svm_classifier.evaluate')
        _raise_error_evaluation_metric_is_valid(metric, [
            'auto', 'accuracy', 'confusion_matrix', 'precision', 'recall',
            'f1_score'
        ])
        return super(_Classifier,
                     self).evaluate(dataset,
                                    missing_value_action=missing_value_action,
                                    metric=metric)