Python ClassifierAnalyzer Examples

Programming Language: Python

Namespace/Package Name: medinfo.ml.ClassifierAnalyzer

Examples at hotexamples.com: 5

Python ClassifierAnalyzer - 5 examples found. These are the top rated real world Python examples of medinfo.ml.ClassifierAnalyzer.ClassifierAnalyzer extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

ClassifierAnalyzer(5)

output_direct_comparisons(3)

plot_precision_at_k_curve(3)

plot_precision_recall_curve(3)

plot_roc_curve(3)

write_report(3)

build_report(1)

compute_precision_at_k_curve(1)

compute_precision_recall_curve(1)

compute_roc_curve(1)

score(1)

Example #1

Show file

File: SupervisedLearningPipeline.py Project: xxxx3/CDSS

 def _analyze_predictor_traindata(self, dest_dir):
     analyzer = ClassifierAnalyzer(self._predictor, self._X_train,
                                   self._y_train)
     direct_comparisons_name = 'direct_comparisons_train.csv'
     direct_comparisons_path = '/'.join([dest_dir, direct_comparisons_name])
     log.debug('direct_comparisons_path: %s' % direct_comparisons_path)
     analyzer.output_direct_comparisons(direct_comparisons_path)

Example #2

Show file

File: SupervisedLearningPipeline.py Project: onlyrohits/CDSS

    def _analyze_predictor(self, dest_dir, pipeline_prefix):
        analyzer = ClassifierAnalyzer(self._predictor, self._X_test,
                                      self._y_test)

        # Build names for output plots and report.
        direct_comparisons_name = 'direct_comparisons.csv'  #'%s-direct-compare-results.csv' % pipeline_prefix
        precision_at_k_plot_name = '%s-precision-at-k-plot.png' % pipeline_prefix
        precision_recall_plot_name = '%s-precision-recall-plot.png' % pipeline_prefix
        roc_plot_name = '%s-roc-plot.png' % pipeline_prefix
        report_name = '%s-report.tab' % pipeline_prefix

        # Build paths.
        direct_comparisons_path = '/'.join([dest_dir, direct_comparisons_name])
        log.debug('direct_comparisons_path: %s' % direct_comparisons_path)
        precision_at_k_plot_path = '/'.join(
            [dest_dir, precision_at_k_plot_name])
        log.debug('precision_at_k_plot_path: %s' % precision_at_k_plot_path)
        precision_recall_plot_path = '/'.join(
            [dest_dir, precision_recall_plot_name])
        log.debug('precision_recall_plot_path: %s' %
                  precision_recall_plot_path)
        roc_plot_path = '/'.join([dest_dir, roc_plot_name])
        log.debug('roc_plot_path: %s' % roc_plot_path)
        report_path = '/'.join([dest_dir, report_name])
        log.debug('report_path: %s' % report_path)

        # Build plot titles.
        roc_plot_title = 'ROC (%s)' % pipeline_prefix
        precision_recall_plot_title = 'Precision-Recall (%s)' % pipeline_prefix
        precision_at_k_plot_title = 'Precision @K (%s)' % pipeline_prefix

        # Write output.
        analyzer.output_direct_comparisons(direct_comparisons_path)

Example #3

Show file

File: TestClassifierAnalyzer.py Project: xxxx3/CDSS

    def setUp(self):
        log.level = logging.ERROR
        # Use simple classifier and test case for testing non-ROC analyses.
        X = RANDOM_10_TEST_CASE['X']
        y = RANDOM_10_TEST_CASE['y']
        self._list_classifier = ListPredictor([0, 1])
        self._lc_analyzer = ClassifierAnalyzer(self._list_classifier, X, y)

        # Use ml classifier and complex test case.
        X = RANDOM_100_TEST_CASE['X']
        y = RANDOM_100_TEST_CASE['y']
        # Generate train/test split.
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, random_state=123456789)
        # Train logistic regression model.
        hyperparams = {
            'algorithm': SupervisedClassifier.REGRESS_AND_ROUND,
            'random_state': 123456789
        }
        self._ml_classifier = SupervisedClassifier([0, 1], hyperparams)
        self._ml_classifier.train(X_train, column_or_1d(y_train))
        self._ml_analyzer = ClassifierAnalyzer(self._ml_classifier, X_test,
                                               y_test)

Example #4

Show file

File: TestClassifierAnalyzer.py Project: xxxx3/CDSS

class TestClassifierAnalyzer(MedInfoTestCase):
    def setUp(self):
        log.level = logging.ERROR
        # Use simple classifier and test case for testing non-ROC analyses.
        X = RANDOM_10_TEST_CASE['X']
        y = RANDOM_10_TEST_CASE['y']
        self._list_classifier = ListPredictor([0, 1])
        self._lc_analyzer = ClassifierAnalyzer(self._list_classifier, X, y)

        # Use ml classifier and complex test case.
        X = RANDOM_100_TEST_CASE['X']
        y = RANDOM_100_TEST_CASE['y']
        # Generate train/test split.
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, random_state=123456789)
        # Train logistic regression model.
        hyperparams = {
            'algorithm': SupervisedClassifier.REGRESS_AND_ROUND,
            'random_state': 123456789
        }
        self._ml_classifier = SupervisedClassifier([0, 1], hyperparams)
        self._ml_classifier.train(X_train, column_or_1d(y_train))
        self._ml_analyzer = ClassifierAnalyzer(self._ml_classifier, X_test,
                                               y_test)

    def tearDown(self):
        test_dir = os.path.dirname(os.path.abspath(__file__))
        # Clean up the actual report file.
        try:
            actual_report_name = 'actual-list-classifier.report'
            actual_report_path = '/'.join([test_dir, actual_report_name])
            os.remove(actual_report_path)
        except OSError:
            pass

        # Clean up the actual precision-recall plot.
        try:
            actual_plot_name = 'actual-precision-recall-plot.png'
            actual_plot_path = '/'.join([test_dir, actual_plot_name])
            os.remove(actual_plot_path)
        except OSError:
            pass

        # Clean up the actual roc plot.
        try:
            actual_plot_name = 'actual-roc-plot.png'
            actual_plot_path = '/'.join([test_dir, actual_plot_name])
            os.remove(actual_plot_path)
        except OSError:
            pass

        # Clean up the actual precision at k plot.
        try:
            actual_plot_name = 'actual-precision-at-k-plot.png'
            actual_plot_path = '/'.join([test_dir, actual_plot_name])
            os.remove(actual_plot_path)
        except OSError:
            pass

    def _assert_fuzzy_equality(self, expected, actual):
        abs_diff = actual - expected
        rel_diff = (abs_diff) / expected
        self.assertTrue(rel_diff < 0.1)

    def test_score_accuracy(self):
        # Test accuracy.
        expected_accuracy = RANDOM_10_TEST_CASE['accuracy']
        actual_accuracy = self._lc_analyzer.score()
        self.assertEqual(expected_accuracy, actual_accuracy)

        # Test accuracy.
        expected_accuracy = RANDOM_100_TEST_CASE['accuracy']
        actual_accuracy = self._ml_analyzer.score()
        self.assertEqual(expected_accuracy, actual_accuracy)

        # Test bootstrapped CIs.
        actual_accuracy, actual_lower_ci, actual_upper_ci = self._ml_analyzer.score(
            ci=0.95, n_bootstrap_iter=1000)
        self.assertEqual(expected_accuracy, actual_accuracy)
        expected_lower_ci = RANDOM_100_TEST_CASE['ci']['accuracy']['lower']
        self.assertEqual(expected_lower_ci, actual_lower_ci)
        expected_upper_ci = RANDOM_100_TEST_CASE['ci']['accuracy']['upper']
        self.assertEqual(expected_upper_ci, actual_upper_ci)

    def test_score_recall(self):
        # Test recall.
        expected_recall = RANDOM_10_TEST_CASE['recall']
        actual_recall = self._lc_analyzer.score(
            metric=ClassifierAnalyzer.RECALL_SCORE)
        self.assertEqual(expected_recall, actual_recall)

        # Test recall.
        expected_recall = RANDOM_100_TEST_CASE['recall']
        actual_recall = self._ml_analyzer.score(
            metric=ClassifierAnalyzer.RECALL_SCORE)
        self.assertEqual(expected_recall, actual_recall)

        # Test bootstrapped CIs.
        actual_recall, actual_lower_ci, actual_upper_ci = self._ml_analyzer.score(
            metric=ClassifierAnalyzer.RECALL_SCORE,
            ci=0.95,
            n_bootstrap_iter=1000)
        self.assertEqual(expected_recall, actual_recall)
        expected_lower_ci = RANDOM_100_TEST_CASE['ci']['recall']['lower']
        self.assertEqual(expected_lower_ci, actual_lower_ci)
        expected_upper_ci = RANDOM_100_TEST_CASE['ci']['recall']['upper']
        self.assertEqual(expected_upper_ci, actual_upper_ci)

    def test_score_precision(self):
        # Test precision.
        expected_precision = RANDOM_10_TEST_CASE['precision']
        actual_precision = self._lc_analyzer.score(
            metric=ClassifierAnalyzer.PRECISION_SCORE)
        self.assertEqual(expected_precision, actual_precision)

        # Test precision.
        expected_precision = RANDOM_100_TEST_CASE['precision']
        actual_precision = self._ml_analyzer.score(
            metric=ClassifierAnalyzer.PRECISION_SCORE)
        self.assertEqual(expected_precision, actual_precision)

        # Test bootstrapped CIs.
        actual_precision, actual_lower_ci, actual_upper_ci = self._ml_analyzer.score(
            metric=ClassifierAnalyzer.PRECISION_SCORE,
            ci=0.95,
            n_bootstrap_iter=1000)
        self.assertEqual(expected_precision, actual_precision)
        expected_lower_ci = RANDOM_100_TEST_CASE['ci']['precision']['lower']
        self.assertEqual(expected_lower_ci, actual_lower_ci)
        expected_upper_ci = RANDOM_100_TEST_CASE['ci']['precision']['upper']
        self.assertEqual(expected_upper_ci, actual_upper_ci)

    def test_score_f1(self):
        # Test F1 score.
        expected_f1 = RANDOM_10_TEST_CASE['f1']
        actual_f1 = self._lc_analyzer.score(metric=ClassifierAnalyzer.F1_SCORE)
        self.assertEqual(expected_f1, actual_f1)

        # Test f1.
        expected_f1 = RANDOM_100_TEST_CASE['f1']
        actual_f1 = self._ml_analyzer.score(metric=ClassifierAnalyzer.F1_SCORE)
        self.assertEqual(expected_f1, actual_f1)

        # Test bootstrapped CIs.
        actual_f1, actual_lower_ci, actual_upper_ci = self._ml_analyzer.score(
            metric=ClassifierAnalyzer.F1_SCORE, ci=0.95, n_bootstrap_iter=1000)
        self.assertEqual(expected_f1, actual_f1)
        expected_lower_ci = RANDOM_100_TEST_CASE['ci']['f1']['lower']
        self.assertEqual(expected_lower_ci, actual_lower_ci)
        expected_upper_ci = RANDOM_100_TEST_CASE['ci']['f1']['upper']
        self.assertEqual(expected_upper_ci, actual_upper_ci)

    def test_score_average_precision(self):
        # Test average precision.
        expected_average_precision = RANDOM_100_TEST_CASE['average_precision']
        actual_average_precision = self._ml_analyzer.score(
            metric=ClassifierAnalyzer.AVERAGE_PRECISION_SCORE)
        self.assertEqual(expected_average_precision, actual_average_precision)

        # Test bootstrapped CIs.
        actual_average_precision, actual_lower_ci, actual_upper_ci = self._ml_analyzer.score(
            metric=ClassifierAnalyzer.AVERAGE_PRECISION_SCORE,
            ci=0.95,
            n_bootstrap_iter=1000)
        self.assertEqual(expected_average_precision, actual_average_precision)
        expected_lower_ci = RANDOM_100_TEST_CASE['ci']['average_precision'][
            'lower']
        self.assertEqual(expected_lower_ci, actual_lower_ci)
        expected_upper_ci = RANDOM_100_TEST_CASE['ci']['average_precision'][
            'upper']
        self.assertEqual(expected_upper_ci, actual_upper_ci)

    def test_score_roc_auc(self):
        # Test roc_auc.
        expected_roc_auc = RANDOM_100_TEST_CASE['roc_auc']
        actual_roc_auc = self._ml_analyzer.score(
            metric=ClassifierAnalyzer.ROC_AUC_SCORE)
        self.assertEqual(expected_roc_auc, actual_roc_auc)

        # Test bootstrapped CIs.
        actual_roc_auc, actual_lower_ci, actual_upper_ci = self._ml_analyzer.score(
            metric=ClassifierAnalyzer.ROC_AUC_SCORE,
            ci=0.95,
            n_bootstrap_iter=1000)
        self.assertEqual(expected_roc_auc, actual_roc_auc)
        expected_lower_ci = RANDOM_100_TEST_CASE['ci']['roc_auc']['lower']
        self.assertEqual(expected_lower_ci, actual_lower_ci)
        expected_upper_ci = RANDOM_100_TEST_CASE['ci']['roc_auc']['upper']
        self.assertEqual(expected_upper_ci, actual_upper_ci)

    def test_score_precision_at_k(self):
        # Test precision at K.
        prev_precision = 1.0
        for k in range(1, 20):
            actual_precision_at_k = self._ml_analyzer.score(
                metric=ClassifierAnalyzer.PRECISION_AT_K_SCORE, k=k)
            expected_precision_at_k = RANDOM_100_TEST_CASE['precision_at_k'][k]
            self.assertEqual(expected_precision_at_k, actual_precision_at_k)

        # Test bootstrapped CIs.
        actual_precision_at_k, actual_lower_ci, actual_upper_ci = self._ml_analyzer.score(
            metric=ClassifierAnalyzer.PRECISION_AT_K_SCORE,
            k=10,
            ci=0.95,
            n_bootstrap_iter=1000)
        expected_precision_at_k = RANDOM_100_TEST_CASE['precision_at_k'][10]
        self.assertEqual(expected_precision_at_k, actual_precision_at_k)
        expected_lower_ci = RANDOM_100_TEST_CASE['ci']['precision_at_k'][
            'lower'][10]
        self.assertEqual(expected_lower_ci, actual_lower_ci)
        expected_upper_ci = RANDOM_100_TEST_CASE['ci']['precision_at_k'][
            'upper'][10]
        self.assertEqual(expected_upper_ci, actual_upper_ci)

    def test_score_percent_predictably_positive(self):
        # Test roc_auc.
        expected_ppp = RANDOM_100_TEST_CASE['percent_predictably_positive']
        actual_ppp = self._ml_analyzer.score(
            metric=ClassifierAnalyzer.PERCENT_PREDICTABLY_POSITIVE)
        self.assertEqual(expected_ppp, actual_ppp)

        # Test bootstrapped CIs.
        actual_ppp, actual_lower_ci, actual_upper_ci = self._ml_analyzer.score(
            metric=ClassifierAnalyzer.PERCENT_PREDICTABLY_POSITIVE,
            ci=0.95,
            n_bootstrap_iter=1000)
        self.assertEqual(expected_ppp, actual_ppp)
        expected_lower_ci = RANDOM_100_TEST_CASE['ci'][
            'percent_predictably_positive']['lower']
        self.assertEqual(expected_lower_ci, actual_lower_ci)
        expected_upper_ci = RANDOM_100_TEST_CASE['ci'][
            'percent_predictably_positive']['upper']
        self.assertEqual(expected_upper_ci, actual_upper_ci)

    def test_plot_precision_recall_curve(self):
        # Compute precision-recall curve.
        precision_recall_curve = self._ml_analyzer.compute_precision_recall_curve(
        )

        # Build paths for expected and actual plots.
        test_dir = os.path.dirname(os.path.abspath(__file__))
        actual_plot_name = 'actual-precision-recall-plot.png'
        actual_plot_path = '/'.join([test_dir, actual_plot_name])

        self._ml_analyzer.plot_precision_recall_curve('Precision-Recall Curve',
                                                      actual_plot_path)

        # Not sure how to validate this at the moment, so just validate
        # that it actually passes.
        self.assertTrue(True)

    def test_plot_roc_curve(self):
        # Compute ROC curve.
        roc_curve = self._ml_analyzer.compute_roc_curve()

        # Build paths for expected and actual plots.
        test_dir = os.path.dirname(os.path.abspath(__file__))
        actual_plot_name = 'actual-roc-plot.png'
        actual_plot_path = '/'.join([test_dir, actual_plot_name])

        self._ml_analyzer.plot_roc_curve('ROC', actual_plot_path)

        # Not sure how to validate this at the moment, so just validate
        # that it actually passes.
        self.assertTrue(True)

    def test_plot_precision_at_k_curve(self):
        # Compute precision_recall_curve.
        k_vals, precision_vals = self._ml_analyzer.compute_precision_at_k_curve(
        )

        # Build paths for expected and actual plots.
        test_dir = os.path.dirname(os.path.abspath(__file__))
        actual_plot_name = 'actual-precision-at-k-plot.png'
        actual_plot_path = '/'.join([test_dir, actual_plot_name])

        self._ml_analyzer.plot_precision_at_k_curve('Precision at K',
                                                    actual_plot_path)

        # Not sure how to validate this at the moment, so just validate
        # that it actually passes.
        self.assertTrue(True)

    def test_build_report(self):
        # Build report.
        expected_report = RANDOM_100_TEST_CASE['report']
        actual_report = self._ml_analyzer.build_report()[0]
        log.debug('expected_report: %s' % expected_report)
        log.debug('actual_report: %s' % actual_report)
        assert_frame_equal(expected_report, actual_report)

        # Build bootstrapped report.
        expected_report = RANDOM_100_TEST_CASE['ci']['report']
        actual_report = self._ml_analyzer.build_report(ci=0.95)[0]
        assert_frame_equal(expected_report, actual_report)

        # Build paths for expected and actual report.
        test_dir = os.path.dirname(os.path.abspath(__file__))
        actual_report_name = 'actual-list-classifier.report'
        actual_report_path = '/'.join([test_dir, actual_report_name])

        # Write the report.
        self._ml_analyzer.write_report(actual_report_path)

        # Not sure how to validate this at the moment, so just validate
        # that it actually passes.
        self.assertTrue(True)

Example #5

Show file

    def _analyze_predictor_holdoutset(self, dest_dir, pipeline_prefix):
        slugified_var = '-'.join(self._var.split())
        holdout_path = dest_dir + '/../' + '%s-normality-matrix-%d-episodes-processed-holdout.tab' % (
            slugified_var, self._num_rows)
        fm_io = FeatureMatrixIO()
        processed_matrix = fm_io.read_file_to_data_frame(holdout_path)
        if self._isLabPanel:
            y_holdout = pd.DataFrame(
                processed_matrix.pop('all_components_normal'))
        else:
            y_holdout = pd.DataFrame(processed_matrix.pop('component_normal'))
        X_holdout = processed_matrix
        analyzer = ClassifierAnalyzer(self._predictor, X_holdout, y_holdout)
        train_label = 'holdoutset'

        # Build names for output plots and report.
        direct_comparisons_name = '%s-direct-compare-results-%s.csv' % (
            pipeline_prefix, train_label)
        precision_at_k_plot_name = '%s-precision-at-k-plot-%s.png' % (
            pipeline_prefix, train_label)
        precision_recall_plot_name = '%s-precision-recall-plot-%s.png' % (
            pipeline_prefix, train_label)
        roc_plot_name = '%s-roc-plot-%s.png' % (pipeline_prefix, train_label)
        report_name = '%s-report-%s.tab' % (pipeline_prefix, train_label)

        # Build paths.
        direct_comparisons_path = '/'.join([dest_dir, direct_comparisons_name])
        log.debug('direct_comparisons_path: %s' % direct_comparisons_path)
        precision_at_k_plot_path = '/'.join(
            [dest_dir, precision_at_k_plot_name])
        log.debug('precision_at_k_plot_path: %s' % precision_at_k_plot_path)
        precision_recall_plot_path = '/'.join(
            [dest_dir, precision_recall_plot_name])
        log.debug('precision_recall_plot_path: %s' %
                  precision_recall_plot_path)
        roc_plot_path = '/'.join([dest_dir, roc_plot_name])
        log.debug('roc_plot_path: %s' % roc_plot_path)
        report_path = '/'.join([dest_dir, report_name])
        log.debug('report_path: %s' % report_path)

        # Build plot titles.
        roc_plot_title = 'ROC (%s)' % pipeline_prefix
        precision_recall_plot_title = 'Precision-Recall (%s)' % pipeline_prefix
        precision_at_k_plot_title = 'Precision @K (%s)' % pipeline_prefix

        # Write output.
        analyzer.output_direct_comparisons(direct_comparisons_path)
        analyzer.plot_roc_curve(roc_plot_title, roc_plot_path)
        analyzer.plot_precision_recall_curve(precision_recall_plot_title,
                                             precision_recall_plot_path)
        analyzer.plot_precision_at_k_curve(precision_at_k_plot_title,
                                           precision_at_k_plot_path)
        analyzer.write_report(report_path, ci=0.95)