Python recall Examples

Programming Language: Python

Namespace/Package Name: turicreate.toolkits.evaluation

Method/Function: recall

Examples at hotexamples.com: 6

Python recall - 6 examples found. These are the top rated real world Python examples of turicreate.toolkits.evaluation.recall extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: test_boosted_trees.py Project: zero91/turicreate

    def test_evaluate(self):
        t = self.dtrain[self.target]
        c = self.model.predict(self.dtrain, "class")
        p = self.model.predict(self.dtrain, "probability_vector")
        ans_metrics = [
            "accuracy",
            "auc",
            "confusion_matrix",
            "f1_score",
            "log_loss",
            "precision",
            "recall",
            "roc_curve",
        ]

        self.sm_metrics = {
            "accuracy": evaluation.accuracy(t, c),
            "auc": evaluation.auc(t, p),
            "confusion_matrix": evaluation.confusion_matrix(t, c),
            "f1_score": evaluation.f1_score(t, c),
            "log_loss": evaluation.log_loss(t, p),
            "precision": evaluation.precision(t, c),
            "recall": evaluation.recall(t, c),
            "roc_curve": evaluation.roc_curve(t, p),
        }
        model = self.model

        def check_cf_matrix(ans):
            self.assertTrue(ans is not None)
            self.assertTrue("confusion_matrix" in ans)
            cf = ans["confusion_matrix"].sort(
                ["target_label", "predicted_label"])
            ans_cf = self.sm_metrics["confusion_matrix"].sort(
                ["target_label", "predicted_label"])
            self.assertEqual(list(cf["count"]), list(ans_cf["count"]))

        def check_roc_curve(ans):
            self.assertTrue(ans is not None)
            self.assertTrue("roc_curve" in ans)
            roc = ans["roc_curve"]
            self.assertEqual(type(roc), tc.SFrame)

        def check_metric(ans, metric):
            if metric == "confusion_matrix":
                check_cf_matrix(ans)
            elif metric == "roc_curve":
                check_roc_curve(ans)
            else:
                self.assertTrue(ans is not None)
                self.assertTrue(metric in ans)
                self.assertAlmostEqual(
                    ans[metric],
                    self.sm_metrics[metric],
                    places=4,
                    msg="%s = (%s,%s)" %
                    (metric, ans[metric], self.sm_metrics[metric]),
                )

        # Default
        ans = model.evaluate(self.dtrain)
        self.assertEqual(sorted(ans.keys()), sorted(ans_metrics))
        for m in ans_metrics:
            check_metric(ans, m)

        # Individual
        for m in ans_metrics:
            ans = model.evaluate(self.dtrain, metric=m)
            check_metric(ans, m)

        # Test evaluate with new class
        test_data = self.dtrain.copy().head()
        test_data[self.target] = test_data[self.target].apply(
            lambda x: str(x) + "-new")
        for m in ans_metrics:
            ans = model.evaluate(test_data, metric=m)

Example #2

Show file

File: _activity_classifier.py Project: zgfhill/turicreate

    def evaluate(self, dataset, metric='auto'):
        """
        Evaluate the model by making predictions of target values and comparing
        these to actual values.

        Parameters
        ----------
        dataset : SFrame
            Dataset of new observations. Must include columns with the same
            names as the session_id, target and features used for model training.
            Additional columns are ignored.

        metric : str, optional
            Name of the evaluation metric.  Possible values are:

            - 'auto'             : Returns all available metrics.
            - 'accuracy'         : Classification accuracy (micro average).
            - 'auc'              : Area under the ROC curve (macro average)
            - 'precision'        : Precision score (macro average)
            - 'recall'           : Recall score (macro average)
            - 'f1_score'         : F1 score (macro average)
            - 'log_loss'         : Log loss
            - 'confusion_matrix' : An SFrame with counts of possible
                                   prediction/true label combinations.
            - 'roc_curve'        : An SFrame containing information needed for an
                                   ROC curve

        Returns
        -------
        out : dict
            Dictionary of evaluation results where the key is the name of the
            evaluation metric (e.g. `accuracy`) and the value is the evaluation
            score.

        See Also
        ----------
        create, predict

        Examples
        ----------
        .. sourcecode:: python

          >>> results = model.evaluate(data)
          >>> print results['accuracy']
        """

        avail_metrics = ['accuracy', 'auc', 'precision', 'recall',
                         'f1_score', 'log_loss', 'confusion_matrix', 'roc_curve']
        _tkutl._check_categorical_option_type(
            'metric', metric, avail_metrics + ['auto'])

        if metric == 'auto':
            metrics = avail_metrics
        else:
            metrics = [metric]

        probs = self.predict(dataset, output_type='probability_vector')
        classes = self.predict(dataset, output_type='class')

        ret = {}
        if 'accuracy' in metrics:
            ret['accuracy'] = _evaluation.accuracy(dataset[self.target], classes)
        if 'auc' in metrics:
            ret['auc'] = _evaluation.auc(dataset[self.target], probs)
        if 'precision' in metrics:
            ret['precision'] = _evaluation.precision(dataset[self.target], classes)
        if 'recall' in metrics:
            ret['recall'] = _evaluation.recall(dataset[self.target], classes)
        if 'f1_score' in metrics:
            ret['f1_score'] = _evaluation.f1_score(dataset[self.target], classes)
        if 'log_loss' in metrics:
            ret['log_loss'] = _evaluation.log_loss(dataset[self.target], probs)
        if 'confusion_matrix' in metrics:
            ret['confusion_matrix'] = _evaluation.confusion_matrix(dataset[self.target], classes)
        if 'roc_curve' in metrics:
            ret['roc_curve'] = _evaluation.roc_curve(dataset[self.target], probs)

        return ret

Example #3

Show file

    def evaluate(self, dataset, metric='auto', verbose=True, batch_size=64):
        """
        Evaluate the model by making predictions of target values and comparing
        these to actual values.

        Parameters
        ----------
        dataset : SFrame
            Dataset to use for evaluation, must include a column with the same
            name as the features used for model training. Additional columns
            are ignored.

        metric : str, optional
            Name of the evaluation metric.  Possible values are:

            - 'auto'             : Returns all available metrics.
            - 'accuracy'         : Classification accuracy (micro average).
            - 'auc'              : Area under the ROC curve (macro average)
            - 'precision'        : Precision score (macro average)
            - 'recall'           : Recall score (macro average)
            - 'f1_score'         : F1 score (macro average)
            - 'log_loss'         : Log loss
            - 'confusion_matrix' : An SFrame with counts of possible
                                   prediction/true label combinations.
            - 'roc_curve'        : An SFrame containing information needed for an
                                   ROC curve

        verbose : bool, optional
            If True, prints progress updates and model details.

        batch_size : int, optional
            If you are getting memory errors, try decreasing this value. If you
            have a powerful computer, increasing this value may improve performance.

        Returns
        -------
        out : dict
            Dictionary of evaluation results where the key is the name of the
            evaluation metric (e.g. `accuracy`) and the value is the evaluation
            score.

        See Also
        ----------
        classify, predict

        Examples
        ----------
        .. sourcecode:: python

          >>> results = model.evaluate(data)
          >>> print results['accuracy']
        """
        from turicreate.toolkits import evaluation

        # parameter checking
        if not isinstance(dataset, _tc.SFrame):
            raise TypeError('\'dataset\' parameter must be an SFrame')

        avail_metrics = ['accuracy', 'auc', 'precision', 'recall',
                         'f1_score', 'log_loss', 'confusion_matrix', 'roc_curve']
        _tk_utils._check_categorical_option_type(
            'metric', metric, avail_metrics + ['auto'])

        if metric == 'auto':
            metrics = avail_metrics
        else:
            metrics = [metric]

        if _is_deep_feature_sarray(dataset[self.feature]):
            deep_features = dataset[self.feature]
        else:
            deep_features = get_deep_features(dataset[self.feature], verbose=verbose)
        data = _tc.SFrame({'deep features': deep_features})
        data = data.add_row_number()
        missing_ids = data.filter_by([[]], 'deep features')['id']

        if len(missing_ids) > 0:
            data = data.filter_by([[]], 'deep features', exclude=True)
            # Remove the labels for entries without deep features
            _logging.warning("Dropping %d examples which are less than 975ms in length." % len(missing_ids))
            labels = dataset[[self.target]].add_row_number()
            labels = data.join(labels, how='left')[self.target]
        else:
            labels = dataset[self.target]
        assert(len(labels) == len(data))

        if any([m in metrics for m in ('roc_curve', 'log_loss', 'auc')]):
            probs = self.predict(data['deep features'], output_type='probability_vector',
                                 verbose=verbose, batch_size=batch_size)
        if any([m in metrics for m in ('accuracy', 'precision', 'recall', 'f1_score', 'confusion_matrix')]):
            classes = self.predict(data['deep features'], output_type='class',
                                   verbose=verbose, batch_size=batch_size)

        ret = {}
        if 'accuracy' in metrics:
            ret['accuracy'] = evaluation.accuracy(labels, classes)
        if 'auc' in metrics:
            ret['auc'] = evaluation.auc(labels, probs, index_map=self._class_label_to_id)
        if 'precision' in metrics:
            ret['precision'] = evaluation.precision(labels, classes)
        if 'recall' in metrics:
            ret['recall'] = evaluation.recall(labels, classes)
        if 'f1_score' in metrics:
            ret['f1_score'] = evaluation.f1_score(labels, classes)
        if 'log_loss' in metrics:
            ret['log_loss'] = evaluation.log_loss(labels, probs, index_map=self._class_label_to_id)
        if 'confusion_matrix' in metrics:
            ret['confusion_matrix'] = evaluation.confusion_matrix(labels, classes)
        if 'roc_curve' in metrics:
            ret['roc_curve'] = evaluation.roc_curve(labels, probs, index_map=self._class_label_to_id)

        return ret

Example #4

Show file

    def evaluate(self, dataset, metric='auto', batch_size=256, verbose=True):
        """
        Evaluate the model by making predictions of target values and comparing
        these to actual values.

        Parameters
        ----------
        dataset : SFrame
            Dataset of new observations. Must include columns with the same
            names as the feature and target columns used for model training.
            Additional columns are ignored.

        metric : str, optional
            Name of the evaluation metric. Possible values are:

            - 'auto'             : Returns all available metrics.
            - 'accuracy'         : Classification accuracy (micro average).
            - 'auc'              : Area under the ROC curve (macro average)
            - 'precision'        : Precision score (macro average)
            - 'recall'           : Recall score (macro average)
            - 'f1_score'         : F1 score (macro average)
            - 'log_loss'         : Log loss
            - 'confusion_matrix' : An SFrame with counts of possible
                                   prediction/true label combinations.
            - 'roc_curve'        : An SFrame containing information needed for an
                                   ROC curve

        batch_size : int, optional
            If you are getting memory errors, try decreasing this value. If you
            have a powerful computer, increasing this value may improve
            performance.

        verbose : bool, optional
            If True, prints prediction progress.

        Returns
        -------
        out : dict
            Dictionary of evaluation results where the key is the name of the
            evaluation metric (e.g. `accuracy`) and the value is the evaluation
            score.

        See Also
        ----------
        create, predict

        Examples
        ----------
        .. sourcecode:: python

          >>> results = model.evaluate(data)
          >>> print(results['accuracy'])
        """
        import os, json, math

        if self.target not in dataset.column_names():
            raise _ToolkitError("Must provide ground truth column, '"
                + self.target + "' in the evaluation dataset.")

        predicted = self._predict_with_probabilities(dataset, batch_size, verbose)

        avail_metrics = ['accuracy', 'auc', 'precision', 'recall',
                         'f1_score', 'confusion_matrix', 'roc_curve', 'log_loss']

        _tkutl._check_categorical_option_type(
                        'metric', metric, avail_metrics + ['auto'])

        metrics = avail_metrics if metric == 'auto' else [metric]

        labels = self.classes

        ret = {}
        if 'accuracy' in metrics:
            ret['accuracy'] = _evaluation.accuracy(
                dataset[self.target], predicted[self.target])
        if 'auc' in metrics:
            ret['auc'] = _evaluation.auc(
                dataset[self.target], predicted['probability'],
                index_map=self._class_to_index)
        if 'precision' in metrics:
            ret['precision'] = _evaluation.precision(
                dataset[self.target], predicted[self.target])
        if 'recall' in metrics:
            ret['recall'] = _evaluation.recall(
                dataset[self.target], predicted[self.target])
        if 'f1_score' in metrics:
            ret['f1_score'] = _evaluation.f1_score(
                dataset[self.target], predicted[self.target])
        if 'confusion_matrix' in metrics:
            ret['confusion_matrix'] = _evaluation.confusion_matrix(
                dataset[self.target], predicted[self.target])
        if 'roc_curve' in metrics:
            ret['roc_curve'] = _evaluation.roc_curve(
                dataset[self.target], predicted['probability'],
                index_map=self._class_to_index)
        if 'log_loss' in metrics:
            ret['log_loss'] = _evaluation.log_loss(
                dataset[self.target], predicted['probability'],
                index_map=self._class_to_index)

        from .._evaluate_utils import  (
            entropy,
            confidence,
            relative_confidence,
            get_confusion_matrix,
            hclusterSort,
            l2Dist
        )
        evaluation_result = {k: ret[k] for k in metrics}
        evaluation_result['num_test_examples'] = len(dataset)
        for k in ['num_classes', 'num_examples', 'training_loss', 'training_time', 'max_iterations']:
            evaluation_result[k] = getattr(self, k)

        #evaluation_result['input_image_shape'] = getattr(self, 'input_image_shape')

        evaluation_result["model_name"] = "Drawing Classifier"
        extended_test = dataset.add_column(predicted["probability"], 'probs')
        extended_test['label'] = dataset[self.target]

        extended_test = extended_test.add_columns( [extended_test.apply(lambda d: labels[d['probs'].index(confidence(d['probs']))]),
            extended_test.apply(lambda d: entropy(d['probs'])),
            extended_test.apply(lambda d: confidence(d['probs'])),
            extended_test.apply(lambda d: relative_confidence(d['probs']))],
            ['predicted_label', 'entropy', 'confidence', 'relative_confidence'])

        extended_test = extended_test.add_column(extended_test.apply(lambda d: d['label'] == d['predicted_label']), 'correct')

        sf_conf_mat = get_confusion_matrix(extended_test, labels)
        confidence_threshold = 0.5
        hesitant_threshold = 0.2
        evaluation_result['confidence_threshold'] = confidence_threshold
        evaluation_result['hesitant_threshold'] = hesitant_threshold
        evaluation_result['confidence_metric_for_threshold'] = 'relative_confidence'

        evaluation_result['conf_mat'] = list(sf_conf_mat)

        vectors = map(lambda l: {'name': l, 'pos':list(sf_conf_mat[sf_conf_mat['target_label']==l].sort('predicted_label')['norm_prob'])},
                    labels)
        evaluation_result['sorted_labels'] = hclusterSort(vectors, l2Dist)[0]['name'].split("|")

        per_l = extended_test.groupby(['label'], {'count': _tc.aggregate.COUNT, 'correct_count': _tc.aggregate.SUM('correct') })
        per_l['recall'] = per_l.apply(lambda l: l['correct_count']*1.0 / l['count'])

        per_pl = extended_test.groupby(['predicted_label'], {'predicted_count': _tc.aggregate.COUNT, 'correct_count': _tc.aggregate.SUM('correct') })
        per_pl['precision'] = per_pl.apply(lambda l: l['correct_count']*1.0 / l['predicted_count'])
        per_pl = per_pl.rename({'predicted_label': 'label'})
        evaluation_result['label_metrics'] = list(per_l.join(per_pl, on='label', how='outer').select_columns(['label', 'count', 'correct_count', 'predicted_count', 'recall', 'precision']))
        evaluation_result['labels'] = labels

        extended_test = extended_test.add_row_number('__idx').rename({'label': 'target_label'})

        evaluation_result['test_data'] = extended_test
        evaluation_result['feature'] = self.feature

        return _Evaluation(evaluation_result)

Example #5

Show file

File: drawing_classifier.py Project: schaicdn/turicreate

    def evaluate(self, dataset, metric = 'auto', verbose = True):
        """
        Evaluate the model by making predictions of target values and comparing
        these to actual values.
        
        Parameters
        ----------
        dataset : SFrame
        Dataset of new observations. Must include columns with the same
        names as the feature and target columns used for model training.
        Additional columns are ignored.
        
        metric : str optional
        Name of the evaluation metric. Possible values are:
        
        - 'auto'             : Returns all available metrics.
        - 'accuracy'         : Classification accuracy (micro average).
        - 'auc'              : Area under the ROC curve (macro average)
        - 'precision'        : Precision score (macro average)
        - 'recall'           : Recall score (macro average)
        - 'f1_score'         : F1 score (macro average)
        - 'confusion_matrix' : An SFrame with counts of possible 
                               prediction/true label combinations.
        - 'roc_curve'        : An SFrame containing information needed for an
                               ROC curve
        
        verbose : bool optional
        If True, prints prediction progress.

        Returns
        -------
        out : dict
        Dictionary of evaluation results where the key is the name of the
        evaluation metric (e.g. `accuracy`) and the value is the evaluation
        score.
        
        See Also
        ----------
        create, predict
        
        Examples
        ----------
        .. sourcecode:: python
        
        >>> results = model.evaluate(data)
        >>> print(results['accuracy'])
        """

        if self.target not in dataset.column_names():
            raise _ToolkitError("Dataset provided to evaluate does not have " 
                + "ground truth in the " + self.target + " column.")

        predicted = self._predict_with_probabilities(dataset, verbose)

        avail_metrics = ['accuracy', 'auc', 'precision', 'recall',
                         'f1_score', 'confusion_matrix', 'roc_curve']

        _tkutl._check_categorical_option_type(
                        'metric', metric, avail_metrics + ['auto'])

        metrics = avail_metrics if metric == 'auto' else [metric]
        
        ret = {}
        if 'accuracy' in metrics:
            ret['accuracy'] = _evaluation.accuracy(
                dataset[self.target], predicted[self.target])
        if 'auc' in metrics:
            ret['auc'] = _evaluation.auc(
                dataset[self.target], predicted['probability'], 
                index_map=self._class_to_index)
        if 'precision' in metrics:
            ret['precision'] = _evaluation.precision(
                dataset[self.target], predicted[self.target])
        if 'recall' in metrics:
            ret['recall'] = _evaluation.recall(
                dataset[self.target], predicted[self.target])
        if 'f1_score' in metrics:
            ret['f1_score'] = _evaluation.f1_score(
                dataset[self.target], predicted[self.target])
        if 'confusion_matrix' in metrics:
            ret['confusion_matrix'] = _evaluation.confusion_matrix(
                dataset[self.target], predicted[self.target])
        if 'roc_curve' in metrics:
            ret['roc_curve'] = _evaluation.roc_curve(
                dataset[self.target], predicted['probability'], 
                index_map=self._class_to_index)
        
        return ret

Example #6

Show file

File: sound_classifier.py Project: ablades/turicreate

    def evaluate(self, dataset, metric='auto', batch_size=64):
        """
        Evaluate the model by making predictions of target values and comparing
        these to actual values.

        Parameters
        ----------
        dataset : SFrame
            Dataset to use for evaluation, must include a column with the same
            name as the features used for model training. Additional columns
            are ignored.

        metric : str, optional
            Name of the evaluation metric.  Possible values are:

            - 'auto'             : Returns all available metrics.
            - 'accuracy'         : Classification accuracy (micro average).
            - 'auc'              : Area under the ROC curve (macro average)
            - 'precision'        : Precision score (macro average)
            - 'recall'           : Recall score (macro average)
            - 'f1_score'         : F1 score (macro average)
            - 'log_loss'         : Log loss
            - 'confusion_matrix' : An SFrame with counts of possible
                                   prediction/true label combinations.
            - 'roc_curve'        : An SFrame containing information needed for an
                                   ROC curve

        batch_size : int, optional
            If you are getting memory errors, try decreasing this value. If you
            have a powerful computer, increasing this value may improve performance.

        Returns
        -------
        out : dict
            Dictionary of evaluation results where the key is the name of the
            evaluation metric (e.g. `accuracy`) and the value is the evaluation
            score.

        See Also
        ----------
        classify, predict

        Examples
        ----------
        .. sourcecode:: python

          >>> results = model.evaluate(data)
          >>> print results['accuracy']
        """
        from turicreate.toolkits import evaluation

        # parameter checking
        if not isinstance(dataset, _tc.SFrame):
            raise TypeError('\'dataset\' parameter must be an SFrame')
        if(batch_size < 1):
            raise ValueError('\'batch_size\' must be greater than or equal to 1')

        avail_metrics = ['accuracy', 'auc', 'precision', 'recall',
                         'f1_score', 'log_loss', 'confusion_matrix', 'roc_curve']
        _tk_utils._check_categorical_option_type(
            'metric', metric, avail_metrics + ['auto'])

        if metric == 'auto':
            metrics = avail_metrics
        else:
            metrics = [metric]

        if any([m in metrics for m in ('roc_curve', 'log_loss', 'auc')]):
            probs = self.predict(dataset, output_type='probability_vector', batch_size=batch_size)
        if any([m in metrics for m in ('accuracy', 'precision', 'recall', 'f1_score', 'confusion_matrix')]):
            classes = self.predict(dataset, output_type='class', batch_size=batch_size)

        ret = {}
        if 'accuracy' in metrics:
            ret['accuracy'] = evaluation.accuracy(dataset[self.target], classes)
        if 'auc' in metrics:
            ret['auc'] = evaluation.auc(dataset[self.target], probs, index_map=self._class_label_to_id)
        if 'precision' in metrics:
            ret['precision'] = evaluation.precision(dataset[self.target], classes)
        if 'recall' in metrics:
            ret['recall'] = evaluation.recall(dataset[self.target], classes)
        if 'f1_score' in metrics:
            ret['f1_score'] = evaluation.f1_score(dataset[self.target], classes)
        if 'log_loss' in metrics:
            ret['log_loss'] = evaluation.log_loss(dataset[self.target], probs, index_map=self._class_label_to_id)
        if 'confusion_matrix' in metrics:
            ret['confusion_matrix'] = evaluation.confusion_matrix(dataset[self.target], classes)
        if 'roc_curve' in metrics:
            ret['roc_curve'] = evaluation.roc_curve(dataset[self.target], probs, index_map=self._class_label_to_id)

        return ret