Beispiel #1
0
    def test_leave_one_out(self, mock_train_test_splitter,
                           mock_classifier_service, mock_subject_builder):
        attributed_classifier = AttributedClassifier(
            name="Logistic Regression", classifier=LogisticRegression())
        feature_sets = [[FeatureType.cosine, FeatureType.circadian_model],
                        [FeatureType.count]]

        mock_subject_builder.get_all_subject_ids.return_value = subject_ids = [
            "subjectA", "subjectB"
        ]
        mock_subject_builder.get_subject_dictionary.return_value = subject_dictionary = {
            "subjectA": [],
            "subjectB": []
        }

        mock_train_test_splitter.leave_one_out.return_value = expected_data_splits = [
            DataSplit(training_set="subjectA", testing_set="subjectB")
        ]

        mock_classifier_service.run_sw.side_effect = raw_performance_arrays = [
            [
                RawPerformance(true_labels=np.array([1, 2]),
                               class_probabilities=np.array([3, 4])),
                RawPerformance(true_labels=np.array([0, 1]),
                               class_probabilities=np.array([2, 3]))
            ],
            [
                RawPerformance(true_labels=np.array([1, 1]),
                               class_probabilities=np.array([4, 4])),
                RawPerformance(true_labels=np.array([0, 0]),
                               class_probabilities=np.array([2, 2]))
            ]
        ]

        returned_summary = SleepWakeClassifierSummaryBuilder.build_leave_one_out(
            attributed_classifier, feature_sets)

        mock_subject_builder.get_all_subject_ids.assert_called_once_with()
        mock_subject_builder.get_subject_dictionary.assert_called_once_with()
        mock_train_test_splitter.leave_one_out.assert_called_once_with(
            subject_ids)

        mock_classifier_service.run_sw.assert_has_calls([
            call(expected_data_splits, attributed_classifier,
                 subject_dictionary, feature_sets[0]),
            call(expected_data_splits, attributed_classifier,
                 subject_dictionary, feature_sets[1])
        ])
        self.assertEqual(returned_summary.attributed_classifier,
                         attributed_classifier)
        self.assertEqual(
            returned_summary.performance_dictionary[tuple(feature_sets[0])],
            raw_performance_arrays[0])
        self.assertEqual(
            returned_summary.performance_dictionary[tuple(feature_sets[1])],
            raw_performance_arrays[1])
    def convert_three_class_to_two(raw_performance: RawPerformance):
        raw_performance.true_labels = SleepLabeler.label_sleep_wake(
            raw_performance.true_labels)
        number_of_samples = np.shape(raw_performance.class_probabilities)[0]
        for index in range(number_of_samples):
            raw_performance.class_probabilities[index, 1] = raw_performance.class_probabilities[index, 1] + \
                                                            raw_performance.class_probabilities[index, 2]
        raw_performance.class_probabilities = raw_performance.class_probabilities[:, :
                                                                                  -1]

        return raw_performance
Beispiel #3
0
    def test_build_roc_from_raw_performances(self):
        raw_performances = [
            RawPerformance(true_labels=np.array([0, 1]),
                           class_probabilities=np.array([[0, 1], [1, 0]])),
            RawPerformance(true_labels=np.array([0, 1]),
                           class_probabilities=np.array([[0.2, 0.8],
                                                         [0.1, 0.9]]))
        ]

        first_false_positive_rates, first_true_positive_rates, first_thresholds = roc_curve(
            raw_performances[0].true_labels,
            raw_performances[0].class_probabilities[:, 1],
            pos_label=SleepWakeLabel.sleep.value,
            drop_intermediate=False)

        second_false_positive_rates, second_true_positive_rates, second_thresholds = roc_curve(
            raw_performances[1].true_labels,
            raw_performances[1].class_probabilities[:, 1],
            pos_label=SleepWakeLabel.sleep.value,
            drop_intermediate=False)

        horizontal_axis_bins, vertical_axis_bins = CurvePerformanceBuilder.get_axes_bins(
        )

        first_interpolated_true_positive_rates = np.interp(
            horizontal_axis_bins, first_false_positive_rates,
            first_true_positive_rates)

        second_interpolated_true_positive_rates = np.interp(
            horizontal_axis_bins, second_false_positive_rates,
            second_true_positive_rates)

        expected_true_positive_rates = (
            first_interpolated_true_positive_rates +
            second_interpolated_true_positive_rates) / 2

        horizontal_axis_bins = np.insert(horizontal_axis_bins, 0, 0, axis=0)
        expected_true_positive_rates = np.insert(expected_true_positive_rates,
                                                 0,
                                                 0,
                                                 axis=0)

        roc_performance = CurvePerformanceBuilder.build_roc_from_raw(
            raw_performances, 1)

        self.assertListEqual(horizontal_axis_bins.tolist(),
                             roc_performance.false_positive_rates.tolist())
        self.assertListEqual(expected_true_positive_rates.tolist(),
                             roc_performance.true_positive_rates.tolist())
Beispiel #4
0
    def run_three_class_with_loaded_model(data_splits, classifier,
                                          subject_dictionary, feature_set):

        raw_performances = []
        for ind in range(len(data_splits)):
            data_split = data_splits[ind]
            if ind == 0:
                training_x, training_y = ClassifierInputBuilder.get_three_class_inputs(
                    data_split.training_set,
                    subject_dictionary=subject_dictionary,
                    feature_set=feature_set)
                classifier = ClassifierService.train_classifier(
                    training_x, training_y, classifier, 'neg_log_loss')

            testing_x, testing_y = ClassifierInputBuilder.get_three_class_inputs(
                data_split.testing_set,
                subject_dictionary=subject_dictionary,
                feature_set=feature_set)
            class_probabilities = classifier.predict_proba(testing_x)

            raw_performance = RawPerformance(
                true_labels=testing_y, class_probabilities=class_probabilities)
            raw_performances.append(raw_performance)

        return raw_performances
Beispiel #5
0
    def test_build_from_raw(self):
        threshold = 0.2
        raw_performance = RawPerformance(true_labels=np.array([1, 0]),
                                         class_probabilities=np.array(
                                             [[0.1, 0.9], [0.3, 0.7]]))

        predicted_labels = np.array([1, 1])
        kappa = cohen_kappa_score(raw_performance.true_labels,
                                  predicted_labels)

        sleep_predictive_value = precision_score(raw_performance.true_labels,
                                                 predicted_labels,
                                                 pos_label=1)
        wake_predictive_value = precision_score(raw_performance.true_labels,
                                                predicted_labels,
                                                pos_label=0)
        false_positive_rates, true_positive_rates, thresholds = roc_curve(
            raw_performance.true_labels,
            raw_performance.class_probabilities[:, 1],
            pos_label=1,
            drop_intermediate=False)
        auc_value = auc(false_positive_rates, true_positive_rates)

        expected_performance = SleepWakePerformance(
            accuracy=np.float64(0.5),
            wake_correct=np.float64(0),
            sleep_correct=np.float64(1.0),
            kappa=kappa,
            auc=auc_value,
            sleep_predictive_value=sleep_predictive_value,
            wake_predictive_value=wake_predictive_value)

        performance = PerformanceBuilder.build_with_sleep_threshold(
            raw_performance, threshold)
        TestHelper.assert_models_equal(self, expected_performance, performance)
Beispiel #6
0
    def test_build_pr_from_raw_performances(self):
        raw_performances = [
            RawPerformance(true_labels=np.array([0, 1]),
                           class_probabilities=np.array([[0, 1], [1, 0]])),
            RawPerformance(true_labels=np.array([0, 1]),
                           class_probabilities=np.array([[0.2, 0.8],
                                                         [0.1, 0.9]]))
        ]

        first_precisions, first_recalls, first_thresholds = precision_recall_curve(
            raw_performances[0].true_labels,
            raw_performances[0].class_probabilities[:, 0],
            pos_label=SleepWakeLabel.wake.value)

        second_precisions, second_recalls, second_thresholds = precision_recall_curve(
            raw_performances[1].true_labels,
            raw_performances[1].class_probabilities[:, 0],
            pos_label=SleepWakeLabel.wake.value)

        horizontal_axis_bins, vertical_axis_bins = CurvePerformanceBuilder.get_axes_bins(
        )

        first_interpolated_precisions = np.interp(horizontal_axis_bins,
                                                  np.flip(first_recalls),
                                                  np.flip(first_precisions))

        second_interpolated_precisions = np.interp(horizontal_axis_bins,
                                                   np.flip(second_recalls),
                                                   np.flip(second_precisions))

        expected_precisions = (first_interpolated_precisions +
                               second_interpolated_precisions) / 2

        horizontal_axis_bins = np.insert(horizontal_axis_bins, 0, 0, axis=0)
        expected_precisions = np.insert(expected_precisions, 0, 1, axis=0)

        pr_performance = CurvePerformanceBuilder.build_precision_recall_from_raw(
            raw_performances)

        self.assertListEqual(horizontal_axis_bins.tolist(),
                             pr_performance.recalls.tolist())
        self.assertListEqual(expected_precisions.tolist(),
                             pr_performance.precisions.tolist())
    def test_properties(self):

        true_labels = np.array([0, 1, 2])
        class_probabilities = np.array([[0.1, 0.9], [0, 1]])
        raw_performance = RawPerformance(
            true_labels=true_labels, class_probabilities=class_probabilities)
        self.assertEqual(raw_performance.true_labels.tolist(),
                         true_labels.tolist())
        self.assertEqual(raw_performance.class_probabilities.tolist(),
                         class_probabilities.tolist())
Beispiel #8
0
    def test_summarize_thresholds(self, mock_performance_builder, mock_performance_summarizer_average):
        raw_performances = [RawPerformance(true_labels=np.array([0, 1]),
                                           class_probabilities=np.array([[0, 1], [1, 0]])),
                            RawPerformance(true_labels=np.array([0, 1]),
                                           class_probabilities=np.array([[0.2, 0.8], [0.1, 0.9]]))]

        mock_performance_builder.build_with_true_positive_rate_threshold.side_effect = ['return1',
                                                                                        'return2',
                                                                                        'return3',
                                                                                        'return4',
                                                                                        'return5',
                                                                                        'return6',
                                                                                        'return7',
                                                                                        'return8'
                                                                                        ]

        mock_performance_summarizer_average.side_effect = averages = ['average1',
                                                                      'average2',
                                                                      'average3',
                                                                      'average4']

        thresholds, returned_averages = PerformanceSummarizer.summarize_thresholds(raw_performances)

        mock_performance_builder.build_with_true_positive_rate_threshold.assert_has_calls(
            [call(raw_performances[0], 0.8),
             call(raw_performances[1], 0.8),
             call(raw_performances[0], 0.9),
             call(raw_performances[1], 0.9),
             call(raw_performances[0], 0.93),
             call(raw_performances[1], 0.93),
             call(raw_performances[0], 0.95),
             call(raw_performances[1], 0.95)])

        mock_performance_summarizer_average.assert_has_calls([call(['return1', 'return2']),
                                                              call(['return3', 'return4']),
                                                              call(['return5', 'return6']),
                                                              call(['return7', 'return8'])
                                                              ])

        self.assertListEqual([0.8, 0.9, 0.93, 0.95], thresholds)
        self.assertListEqual(averages, returned_averages)
    def test_properties(self):
        attributed_classifier = AttributedClassifier(
            name="Logistic Regression", classifier=LogisticRegression())

        performance_dictionary = {
            (FeatureType.count, FeatureType.cosine): [
                RawPerformance(true_labels=np.array([1, 0, 1, 0]),
                               class_probabilities=np.array([0.1, 0.9]))
            ],
            FeatureType.count: [
                RawPerformance(true_labels=np.array([0, 0, 1, 0]),
                               class_probabilities=np.array([0.9, 0.1]))
            ]
        }

        classifier_summary = ClassifierSummary(
            attributed_classifier=attributed_classifier,
            performance_dictionary=performance_dictionary)

        TestHelper.assert_models_equal(
            self, attributed_classifier,
            classifier_summary.attributed_classifier)
        self.assertDictEqual(performance_dictionary,
                             classifier_summary.performance_dictionary)
Beispiel #10
0
    def run_single_data_split(training_x,
                              training_y,
                              testing_x,
                              testing_y,
                              attributed_classifier,
                              scoring='roc_auc'):
        start_time = time.time()

        classifier = ClassifierService.train_classifier(
            training_x, training_y, attributed_classifier, scoring)
        class_probabilities = classifier.predict_proba(testing_x)

        raw_performance = RawPerformance(
            true_labels=testing_y, class_probabilities=class_probabilities)

        if Constants.VERBOSE:
            print('Completed data split in ' + str(time.time() - start_time))

        return raw_performance
Beispiel #11
0
    def test_print_table_sw(self, mock_print, mock_summarize_thresholds):
        first_raw_performances = [
            RawPerformance(true_labels=np.array([0, 1]),
                           class_probabilities=np.array([[0, 1], [1, 0]])),
            RawPerformance(true_labels=np.array([0, 1]),
                           class_probabilities=np.array([[0.2, 0.8], [0.1, 0.9]]))]

        second_raw_performances = [
            RawPerformance(true_labels=np.array([1, 1]),
                           class_probabilities=np.array([[0, 1], [1, 0]])),
            RawPerformance(true_labels=np.array([0, 0]),
                           class_probabilities=np.array([[0.2, 0.8], [0.1, 0.9]]))]

        performance_dictionary = {tuple([FeatureType.count, FeatureType.heart_rate]): first_raw_performances,
                                  tuple([FeatureType.count]): second_raw_performances}

        attributed_classifier = AttributedClassifier(name="Logistic Regression", classifier=LogisticRegression())
        classifier_summary = ClassifierSummary(attributed_classifier=attributed_classifier,
                                               performance_dictionary=performance_dictionary)

        first_performance = SleepWakePerformance(accuracy=0, wake_correct=0, sleep_correct=0, kappa=0, auc=0,
                                                 sleep_predictive_value=0,
                                                 wake_predictive_value=0)
        second_performance = SleepWakePerformance(accuracy=1, wake_correct=1, sleep_correct=1, kappa=1, auc=1,
                                                  sleep_predictive_value=1,
                                                  wake_predictive_value=1)
        third_performance = SleepWakePerformance(accuracy=0.5, wake_correct=0.5, sleep_correct=0.5, kappa=0.5, auc=0.5,
                                                 sleep_predictive_value=0.5,
                                                 wake_predictive_value=0.5)
        fourth_performance = SleepWakePerformance(accuracy=0.2, wake_correct=0.2, sleep_correct=0.2, kappa=0.2, auc=0.2,
                                                  sleep_predictive_value=0.2,
                                                  wake_predictive_value=0.2)

        mock_summarize_thresholds.side_effect = [([0.3, 0.7], [first_performance, second_performance]),
                                                 ([0.2, 0.8], [third_performance, fourth_performance])]
        TableBuilder.print_table_sw(classifier_summary)

        frontmatter = '\\begin{table}  \\caption{Sleep/wake differentiation performance by Logistic Regression ' \
                      + 'across different feature inputs in the Apple Watch (PPG, MEMS) dataset} ' \
                        '\\begin{tabular}{l*{5}{c}} & Accuracy & Wake correct (specificity) ' \
                        '& Sleep correct (sensitivity) & $\\kappa$ & AUC \\\\ '
        header_line_1 = '\\hline Motion, HR &'
        header_line_2 = '\\hline Motion only &'

        results_line_1 = '& ' + str(first_performance.accuracy) + ' & ' + str(
            first_performance.wake_correct) + ' & ' + str(
            first_performance.sleep_correct) + ' & ' + str(first_performance.kappa) + ' &   \\\\'
        results_line_2 = '& ' + str(second_performance.accuracy) + ' & ' + str(
            second_performance.wake_correct) + ' & ' + str(
            second_performance.sleep_correct) + ' & ' + str(second_performance.kappa) + ' &   \\\\'
        results_line_3 = '& ' + str(third_performance.accuracy) + ' & ' + str(
            third_performance.wake_correct) + ' & ' + str(
            third_performance.sleep_correct) + ' & ' + str(third_performance.kappa) + ' &   \\\\'
        results_line_4 = str(fourth_performance.accuracy) + ' & ' + str(
            fourth_performance.wake_correct) + ' & ' + str(
            fourth_performance.sleep_correct) + ' & ' + str(fourth_performance.kappa) + ' & ' + str(
            fourth_performance.auc) + '  \\\\'

        backmatter = '\\hline \\end{tabular}  \\label{tab:' \
                     + attributed_classifier.name[0:4] \
                     + 'params} \\small \\vspace{.2cm} ' \
                       '\\caption*{Fraction of wake correct, fraction of sleep correct, accuracy, ' \
                       '$\\kappa$, and AUC for sleep-wake predictions of Logistic Regression' \
                       ' with use of motion, HR, clock proxy, or combination of features. PPG, ' \
                       'photoplethysmography; MEMS, microelectromechanical systems; HR, heart rate; ' \
                       'AUC, area under the curve.} \\end{table}'

        mock_print.assert_has_calls([call(frontmatter),
                                     call(header_line_1),
                                     call(results_line_1),
                                     call(results_line_2),
                                     call(header_line_2),
                                     call(results_line_3),
                                     call(results_line_4),
                                     call(backmatter)])
    def test_plot_pr(self, mock_curve_performance_builder,
                     mock_feature_set_service, mock_plt):
        first_raw_performances = [
            RawPerformance(true_labels=np.array([0, 1]),
                           class_probabilities=np.array([[0, 1], [1, 0]])),
            RawPerformance(true_labels=np.array([0, 1]),
                           class_probabilities=np.array([[0.2, 0.8],
                                                         [0.1, 0.9]]))
        ]

        second_raw_performances = [
            RawPerformance(true_labels=np.array([1, 1]),
                           class_probabilities=np.array([[0, 1], [1, 0]])),
            RawPerformance(true_labels=np.array([0, 0]),
                           class_probabilities=np.array([[0.2, 0.8],
                                                         [0.1, 0.9]]))
        ]

        performance_dictionary = {
            tuple([FeatureType.count, FeatureType.heart_rate]):
            first_raw_performances,
            tuple([FeatureType.count]): second_raw_performances
        }

        attributed_classifier = AttributedClassifier(
            name="Logistic Regression", classifier=LogisticRegression())
        classifier_summary = ClassifierSummary(
            attributed_classifier=attributed_classifier,
            performance_dictionary=performance_dictionary)

        first_pr_performance = PrecisionRecallPerformance(
            recalls=np.array([1]), precisions=np.array([.2]))
        second_pr_performance = PrecisionRecallPerformance(
            recalls=np.array([.3]), precisions=np.array([1]))

        first_label = 'Label 1'
        second_label = 'Label 2'
        first_color = '#ffffff'
        second_color = '#123456'

        mock_curve_performance_builder.build_precision_recall_from_raw.side_effect = [
            first_pr_performance, second_pr_performance
        ]
        mock_feature_set_service.get_label.side_effect = [
            first_label, second_label
        ]
        mock_feature_set_service.get_color.side_effect = [
            first_color, second_color
        ]

        CurvePlotBuilder.build_pr_plot(classifier_summary)

        mock_curve_performance_builder.build_precision_recall_from_raw.assert_has_calls(
            [call(first_raw_performances)])
        mock_curve_performance_builder.build_precision_recall_from_raw.assert_has_calls(
            [call(second_raw_performances)])

        mock_feature_set_service.get_label.assert_has_calls(
            [call([FeatureType.count, FeatureType.heart_rate])])

        mock_feature_set_service.get_color.assert_has_calls(
            [call([FeatureType.count, FeatureType.heart_rate])])

        mock_feature_set_service.get_label.assert_has_calls(
            [call([FeatureType.count])])

        mock_feature_set_service.get_color.assert_has_calls(
            [call([FeatureType.count])])

        mock_plt.plot.assert_has_calls([
            call(first_pr_performance.recalls,
                 first_pr_performance.precisions,
                 label=first_label,
                 color=first_color)
        ])
        mock_plt.plot.assert_has_calls([
            call(second_pr_performance.recalls,
                 second_pr_performance.precisions,
                 label=second_label,
                 color=second_color)
        ])