def get_classifiers(): return [ AttributedClassifier(name='Random Forest', classifier=RandomForestClassifier( n_estimators=100, max_features=1.0, max_depth=10, min_samples_split=10, min_samples_leaf=32, bootstrap=True)), AttributedClassifier(name='Logistic Regression', classifier=LogisticRegression( penalty='l1', solver='liblinear', verbose=0, multi_class='auto')), AttributedClassifier( name='k-Nearest Neighbors', classifier=KNeighborsClassifier(weights='distance')), AttributedClassifier(name='Neural Net', classifier=MLPClassifier(activation='relu', hidden_layer_sizes=(15, 15, 15), max_iter=2000, alpha=0.01, solver='adam', verbose=False, n_iter_no_change=20)) ]
def test_get_classifiers(self): all_classifiers = utils.get_classifiers() TestHelper.assert_models_equal( self, all_classifiers[0], AttributedClassifier(name='Random Forest', classifier=RandomForestClassifier( n_estimators=500, max_features=1.0, max_depth=10, min_samples_split=10, min_samples_leaf=1))) TestHelper.assert_models_equal( self, all_classifiers[1], AttributedClassifier(name='Logistic Regression', classifier=LogisticRegression( penalty='l1', solver='liblinear', verbose=0))) TestHelper.assert_models_equal( self, all_classifiers[2], AttributedClassifier(name='k-Nearest Neighbors', classifier=KNeighborsClassifier())) TestHelper.assert_models_equal( self, all_classifiers[3], AttributedClassifier(name='Neural Net', classifier=MLPClassifier( activation='relu', hidden_layer_sizes=(30, 30, 30), max_iter=1000, alpha=0.01)))
def test_make_pr_plot(self, mock_build_pr, mock_tidy_plot, mock_set_labels, mock_plt): performance_dictionary = { tuple([FeatureType.count, FeatureType.heart_rate]): ['placeholder', 'for', 'raw', 'performances'], tuple([FeatureType.count]): ['placeholder', 'for', 'raw', 'performances'] } attributed_classifier = AttributedClassifier( name="Logistic Regression", classifier=LogisticRegression()) classifier_summary = ClassifierSummary( attributed_classifier=attributed_classifier, performance_dictionary=performance_dictionary) CurvePlotBuilder.make_pr_sw(classifier_summary) mock_build_pr.assert_called_once_with(classifier_summary) mock_tidy_plot.assert_called_once_with() mock_set_labels.assert_called_once_with( attributed_classifier, 'Fraction of wake scored as wake', 'Fraction of predicted wake correct', (0.5, 1.0)) mock_plt.savefig.assert_called_once_with( str( Constants.FIGURE_FILE_PATH.joinpath( attributed_classifier.name + '_' + str(4) + '__sw_pr.png'))) mock_plt.close.assert_called_once_with()
def test_properties(self): classifier = KNeighborsClassifier() name = "k-Nearest Neighbors" attributed_classifier = AttributedClassifier(name=name, classifier=classifier) self.assertEqual(name, attributed_classifier.name) self.assertEqual(classifier, attributed_classifier.classifier)
def test_leave_one_out(self, mock_train_test_splitter, mock_classifier_service, mock_subject_builder): attributed_classifier = AttributedClassifier( name="Logistic Regression", classifier=LogisticRegression()) feature_sets = [[FeatureType.cosine, FeatureType.circadian_model], [FeatureType.count]] mock_subject_builder.get_all_subject_ids.return_value = subject_ids = [ "subjectA", "subjectB" ] mock_subject_builder.get_subject_dictionary.return_value = subject_dictionary = { "subjectA": [], "subjectB": [] } mock_train_test_splitter.leave_one_out.return_value = expected_data_splits = [ DataSplit(training_set="subjectA", testing_set="subjectB") ] mock_classifier_service.run_sw.side_effect = raw_performance_arrays = [ [ RawPerformance(true_labels=np.array([1, 2]), class_probabilities=np.array([3, 4])), RawPerformance(true_labels=np.array([0, 1]), class_probabilities=np.array([2, 3])) ], [ RawPerformance(true_labels=np.array([1, 1]), class_probabilities=np.array([4, 4])), RawPerformance(true_labels=np.array([0, 0]), class_probabilities=np.array([2, 2])) ] ] returned_summary = SleepWakeClassifierSummaryBuilder.build_leave_one_out( attributed_classifier, feature_sets) mock_subject_builder.get_all_subject_ids.assert_called_once_with() mock_subject_builder.get_subject_dictionary.assert_called_once_with() mock_train_test_splitter.leave_one_out.assert_called_once_with( subject_ids) mock_classifier_service.run_sw.assert_has_calls([ call(expected_data_splits, attributed_classifier, subject_dictionary, feature_sets[0]), call(expected_data_splits, attributed_classifier, subject_dictionary, feature_sets[1]) ]) self.assertEqual(returned_summary.attributed_classifier, attributed_classifier) self.assertEqual( returned_summary.performance_dictionary[tuple(feature_sets[0])], raw_performance_arrays[0]) self.assertEqual( returned_summary.performance_dictionary[tuple(feature_sets[1])], raw_performance_arrays[1])
def get_classifiers(): return [ AttributedClassifier(name='Random Forest', classifier=RandomForestClassifier( n_estimators=100, max_features=1.0, max_depth=10, min_samples_split=10, min_samples_leaf=32, bootstrap=True)), AttributedClassifier(name='Logistic Regression', classifier=LogisticRegression( penalty='l1', solver='liblinear', verbose=0, multi_class='auto')), AttributedClassifier( name='k-Nearest Neighbors', classifier=KNeighborsClassifier(weights='distance')) ] #,
def test_set_labels(self, mock_font_manager, mock_plt): attributed_classifier = AttributedClassifier( name="Logistic Regression", classifier=LogisticRegression()) x_label = 'X Label Text' y_label = 'Y Label Text' legend_location = (1.0, 0.2) mock_font_manager.FontProperties.return_value = font_placeholder = 'FontPlaceholder' CurvePlotBuilder.set_labels(attributed_classifier, x_label, y_label, legend_location) font_name = "Arial" font_size = 14 mock_font_manager.FontProperties.assert_called_once_with( family=font_name, style='normal', size=font_size) mock_plt.xlabel.assert_called_once_with(x_label, fontsize=font_size, fontname=font_name) mock_plt.ylabel.assert_called_once_with(y_label, fontsize=font_size, fontname=font_name) mock_plt.title.assert_called_once_with(attributed_classifier.name, fontsize=18, fontname=font_name, fontweight='bold') attributed_classifier = AttributedClassifier( name="Neural Net", classifier=LogisticRegression()) CurvePlotBuilder.set_labels(attributed_classifier, x_label, y_label, legend_location) mock_plt.legend.assert_called_once_with(bbox_to_anchor=legend_location, borderaxespad=0., prop=font_placeholder)
def figures_leave_one_out_three_class_performance(): attributed_classifier = AttributedClassifier( name='Neural Net', classifier=MLPClassifier(activation='relu', hidden_layer_sizes=(15, 15, 15), max_iter=1000, alpha=0.01, solver='lbfgs')) feature_sets = utils.get_base_feature_sets() if Constants.VERBOSE: print('Running ' + attributed_classifier.name + '...') classifier_summary = ThreeClassClassifierSummaryBuilder.build_leave_one_out( attributed_classifier, feature_sets) PerformancePlotBuilder.make_bland_altman(classifier_summary)
def test_run_best_parameter_search(self, mock_grid_search): attributed_classifier = AttributedClassifier(name="Logistic Regression", classifier=LogisticRegression()) training_x = np.array([1, 2, 3]) training_y = np.array([4, 5, 6]) expected_parameter_range = ParameterSearch.parameter_dictionary[attributed_classifier.name] scoring = 'roc_auc' mock_parameter_search_classifier = MagicMock() mock_grid_search.return_value = mock_parameter_search_classifier mock_parameter_search_classifier.best_params_ = expected_parameters = {'parameter': 'value'} returned_parameters = ParameterSearch.run_search(attributed_classifier, training_x, training_y, scoring=scoring) mock_grid_search.assert_called_once_with(attributed_classifier.classifier, expected_parameter_range, scoring=scoring, iid=False, cv=3) mock_parameter_search_classifier.fit.assert_called_once_with(training_x, training_y) self.assertDictEqual(expected_parameters, returned_parameters)
def figures_leave_one_out_sleep_wake_performance(): attributed_classifier = AttributedClassifier( name='Neural Net', classifier=MLPClassifier(activation='relu', hidden_layer_sizes=(15, 15, 15), max_iter=1000, alpha=0.01, solver='lbfgs')) feature_sets = [[ FeatureType.count, FeatureType.heart_rate, FeatureType.circadian_model ]] if Constants.VERBOSE: print('Running ' + attributed_classifier.name + '...') classifier_summary = SleepWakeClassifierSummaryBuilder.build_leave_one_out( attributed_classifier, feature_sets) PerformancePlotBuilder.make_histogram_with_thresholds(classifier_summary) PerformancePlotBuilder.make_single_threshold_histograms(classifier_summary)
def test_properties(self): attributed_classifier = AttributedClassifier( name="Logistic Regression", classifier=LogisticRegression()) performance_dictionary = { (FeatureType.count, FeatureType.cosine): [ RawPerformance(true_labels=np.array([1, 0, 1, 0]), class_probabilities=np.array([0.1, 0.9])) ], FeatureType.count: [ RawPerformance(true_labels=np.array([0, 0, 1, 0]), class_probabilities=np.array([0.9, 0.1])) ] } classifier_summary = ClassifierSummary( attributed_classifier=attributed_classifier, performance_dictionary=performance_dictionary) TestHelper.assert_models_equal( self, attributed_classifier, classifier_summary.attributed_classifier) self.assertDictEqual(performance_dictionary, classifier_summary.performance_dictionary)
def test_print_table_sw(self, mock_print, mock_summarize_thresholds): first_raw_performances = [ RawPerformance(true_labels=np.array([0, 1]), class_probabilities=np.array([[0, 1], [1, 0]])), RawPerformance(true_labels=np.array([0, 1]), class_probabilities=np.array([[0.2, 0.8], [0.1, 0.9]]))] second_raw_performances = [ RawPerformance(true_labels=np.array([1, 1]), class_probabilities=np.array([[0, 1], [1, 0]])), RawPerformance(true_labels=np.array([0, 0]), class_probabilities=np.array([[0.2, 0.8], [0.1, 0.9]]))] performance_dictionary = {tuple([FeatureType.count, FeatureType.heart_rate]): first_raw_performances, tuple([FeatureType.count]): second_raw_performances} attributed_classifier = AttributedClassifier(name="Logistic Regression", classifier=LogisticRegression()) classifier_summary = ClassifierSummary(attributed_classifier=attributed_classifier, performance_dictionary=performance_dictionary) first_performance = SleepWakePerformance(accuracy=0, wake_correct=0, sleep_correct=0, kappa=0, auc=0, sleep_predictive_value=0, wake_predictive_value=0) second_performance = SleepWakePerformance(accuracy=1, wake_correct=1, sleep_correct=1, kappa=1, auc=1, sleep_predictive_value=1, wake_predictive_value=1) third_performance = SleepWakePerformance(accuracy=0.5, wake_correct=0.5, sleep_correct=0.5, kappa=0.5, auc=0.5, sleep_predictive_value=0.5, wake_predictive_value=0.5) fourth_performance = SleepWakePerformance(accuracy=0.2, wake_correct=0.2, sleep_correct=0.2, kappa=0.2, auc=0.2, sleep_predictive_value=0.2, wake_predictive_value=0.2) mock_summarize_thresholds.side_effect = [([0.3, 0.7], [first_performance, second_performance]), ([0.2, 0.8], [third_performance, fourth_performance])] TableBuilder.print_table_sw(classifier_summary) frontmatter = '\\begin{table} \\caption{Sleep/wake differentiation performance by Logistic Regression ' \ + 'across different feature inputs in the Apple Watch (PPG, MEMS) dataset} ' \ '\\begin{tabular}{l*{5}{c}} & Accuracy & Wake correct (specificity) ' \ '& Sleep correct (sensitivity) & $\\kappa$ & AUC \\\\ ' header_line_1 = '\\hline Motion, HR &' header_line_2 = '\\hline Motion only &' results_line_1 = '& ' + str(first_performance.accuracy) + ' & ' + str( first_performance.wake_correct) + ' & ' + str( first_performance.sleep_correct) + ' & ' + str(first_performance.kappa) + ' & \\\\' results_line_2 = '& ' + str(second_performance.accuracy) + ' & ' + str( second_performance.wake_correct) + ' & ' + str( second_performance.sleep_correct) + ' & ' + str(second_performance.kappa) + ' & \\\\' results_line_3 = '& ' + str(third_performance.accuracy) + ' & ' + str( third_performance.wake_correct) + ' & ' + str( third_performance.sleep_correct) + ' & ' + str(third_performance.kappa) + ' & \\\\' results_line_4 = str(fourth_performance.accuracy) + ' & ' + str( fourth_performance.wake_correct) + ' & ' + str( fourth_performance.sleep_correct) + ' & ' + str(fourth_performance.kappa) + ' & ' + str( fourth_performance.auc) + ' \\\\' backmatter = '\\hline \\end{tabular} \\label{tab:' \ + attributed_classifier.name[0:4] \ + 'params} \\small \\vspace{.2cm} ' \ '\\caption*{Fraction of wake correct, fraction of sleep correct, accuracy, ' \ '$\\kappa$, and AUC for sleep-wake predictions of Logistic Regression' \ ' with use of motion, HR, clock proxy, or combination of features. PPG, ' \ 'photoplethysmography; MEMS, microelectromechanical systems; HR, heart rate; ' \ 'AUC, area under the curve.} \\end{table}' mock_print.assert_has_calls([call(frontmatter), call(header_line_1), call(results_line_1), call(results_line_2), call(header_line_2), call(results_line_3), call(results_line_4), call(backmatter)])
def test_plot_pr(self, mock_curve_performance_builder, mock_feature_set_service, mock_plt): first_raw_performances = [ RawPerformance(true_labels=np.array([0, 1]), class_probabilities=np.array([[0, 1], [1, 0]])), RawPerformance(true_labels=np.array([0, 1]), class_probabilities=np.array([[0.2, 0.8], [0.1, 0.9]])) ] second_raw_performances = [ RawPerformance(true_labels=np.array([1, 1]), class_probabilities=np.array([[0, 1], [1, 0]])), RawPerformance(true_labels=np.array([0, 0]), class_probabilities=np.array([[0.2, 0.8], [0.1, 0.9]])) ] performance_dictionary = { tuple([FeatureType.count, FeatureType.heart_rate]): first_raw_performances, tuple([FeatureType.count]): second_raw_performances } attributed_classifier = AttributedClassifier( name="Logistic Regression", classifier=LogisticRegression()) classifier_summary = ClassifierSummary( attributed_classifier=attributed_classifier, performance_dictionary=performance_dictionary) first_pr_performance = PrecisionRecallPerformance( recalls=np.array([1]), precisions=np.array([.2])) second_pr_performance = PrecisionRecallPerformance( recalls=np.array([.3]), precisions=np.array([1])) first_label = 'Label 1' second_label = 'Label 2' first_color = '#ffffff' second_color = '#123456' mock_curve_performance_builder.build_precision_recall_from_raw.side_effect = [ first_pr_performance, second_pr_performance ] mock_feature_set_service.get_label.side_effect = [ first_label, second_label ] mock_feature_set_service.get_color.side_effect = [ first_color, second_color ] CurvePlotBuilder.build_pr_plot(classifier_summary) mock_curve_performance_builder.build_precision_recall_from_raw.assert_has_calls( [call(first_raw_performances)]) mock_curve_performance_builder.build_precision_recall_from_raw.assert_has_calls( [call(second_raw_performances)]) mock_feature_set_service.get_label.assert_has_calls( [call([FeatureType.count, FeatureType.heart_rate])]) mock_feature_set_service.get_color.assert_has_calls( [call([FeatureType.count, FeatureType.heart_rate])]) mock_feature_set_service.get_label.assert_has_calls( [call([FeatureType.count])]) mock_feature_set_service.get_color.assert_has_calls( [call([FeatureType.count])]) mock_plt.plot.assert_has_calls([ call(first_pr_performance.recalls, first_pr_performance.precisions, label=first_label, color=first_color) ]) mock_plt.plot.assert_has_calls([ call(second_pr_performance.recalls, second_pr_performance.precisions, label=second_label, color=second_color) ])
def test_make_histogram_with_thresholds(self, mock_plt, mock_np, mock_performance_summarizer, mock_image, mock_image_draw, mock_image_font): raw_performances = [ 'raw_performance_placeholder_1', 'raw_performance_placeholder_2' ] performance_dictionary = {tuple([FeatureType.count]): raw_performances} attributed_classifier = AttributedClassifier( name="Logistic Regression", classifier=LogisticRegression()) classifier_summary = ClassifierSummary( attributed_classifier=attributed_classifier, performance_dictionary=performance_dictionary) mock_ax = [[MagicMock(), MagicMock()], [MagicMock(), MagicMock()], [MagicMock(), MagicMock()], [MagicMock(), MagicMock()]] number_of_subjects = 2 mock_plt.subplots.return_value = 'placeholder1', mock_ax mock_np.zeros.side_effect = [ np.zeros((number_of_subjects, 4)), np.zeros((number_of_subjects, 4)) ] dt = 0.02 expected_range = np.arange(0, 1 + dt, dt) mock_np.arange.return_value = expected_range mock_subject1_threshold1 = MagicMock() mock_subject1_threshold2 = MagicMock() mock_subject1_threshold3 = MagicMock() mock_subject1_threshold4 = MagicMock() mock_subject2_threshold1 = MagicMock() mock_subject2_threshold2 = MagicMock() mock_subject2_threshold3 = MagicMock() mock_subject2_threshold4 = MagicMock() mock_subject1_threshold1.accuracy = 0 mock_subject1_threshold1.wake_correct = 1 mock_subject1_threshold2.accuracy = 2 mock_subject1_threshold2.wake_correct = 3 mock_subject1_threshold3.accuracy = 4 mock_subject1_threshold3.wake_correct = 5 mock_subject1_threshold4.accuracy = 6 mock_subject1_threshold4.wake_correct = 7 mock_subject2_threshold1.accuracy = 10 mock_subject2_threshold1.wake_correct = 11 mock_subject2_threshold2.accuracy = 12 mock_subject2_threshold2.wake_correct = 13 mock_subject2_threshold3.accuracy = 14 mock_subject2_threshold3.wake_correct = 15 mock_subject2_threshold4.accuracy = 16 mock_subject2_threshold4.wake_correct = 17 mock_performance_summarizer.summarize_thresholds.side_effect = [ ([1, 2, 3, 4], [ mock_subject1_threshold1, mock_subject1_threshold2, mock_subject1_threshold3, mock_subject1_threshold4 ]), ([1, 2, 3, 4], [ mock_subject2_threshold1, mock_subject2_threshold2, mock_subject2_threshold3, mock_subject2_threshold4 ]) ] file_save_name = str( Constants.FIGURE_FILE_PATH ) + '/' + 'Motion only_Logistic Regression_histograms_with_thresholds.png' mock_image.open.return_value = mock_opened_image = MagicMock() mock_opened_image.size = 100, 200 mock_image.new.return_value = new_image = MagicMock() mock_image_draw.Draw.return_value = image_draw = MagicMock() mock_image_font.truetype.return_value = font = "true type font" PerformancePlotBuilder.make_histogram_with_thresholds( classifier_summary) mock_plt.subplots.assert_called_once_with(nrows=4, ncols=2, figsize=(8, 8), sharex=True, sharey=True) mock_np.zeros.assert_has_calls([call((2, 4)), call((2, 4))]) mock_performance_summarizer.summarize_thresholds.assert_has_calls( [call([raw_performances[0]]), call([raw_performances[1]])]) mock_ax[0][0].hist.assert_called_once_with([0, 10], bins=expected_range, color="skyblue", ec="skyblue") mock_ax[0][1].hist.assert_called_once_with([1, 11], bins=expected_range, color="lightsalmon", ec="lightsalmon") mock_ax[1][0].hist.assert_called_once_with([2, 12], bins=expected_range, color="skyblue", ec="skyblue") mock_ax[1][1].hist.assert_called_once_with([3, 13], bins=expected_range, color="lightsalmon", ec="lightsalmon") mock_ax[2][0].hist.assert_called_once_with([4, 14], bins=expected_range, color="skyblue", ec="skyblue") mock_ax[2][1].hist.assert_called_once_with([5, 15], bins=expected_range, color="lightsalmon", ec="lightsalmon") mock_ax[3][0].hist.assert_called_once_with([6, 16], bins=expected_range, color="skyblue", ec="skyblue") mock_ax[3][1].hist.assert_called_once_with([7, 17], bins=expected_range, color="lightsalmon", ec="lightsalmon") mock_ax[3][0].set_xlabel.assert_called_once_with('Accuracy', fontsize=16, fontname='Arial') mock_ax[3][1].set_xlabel.assert_called_once_with('Wake correct', fontsize=16, fontname='Arial') mock_ax[3][0].set_ylabel.assert_called_once_with('Count', fontsize=16, fontname='Arial') mock_ax[3][0].set_xlim.assert_called_once_with((0, 1)) mock_ax[3][1].set_xlim.assert_called_once_with((0, 1)) mock_plt.tight_layout.assert_called_once_with() mock_plt.savefig.assert_called_once_with(file_save_name, dpi=300) mock_plt.close.assert_called_once_with() mock_image.open.assert_called_once_with(file_save_name) mock_image.new.assert_called_once_with('RGB', (int( (1 + 0.3) * 100), 200), "white") new_image.paste.assert_called_once_with(mock_opened_image, (int(0.3 * 100), 0)) mock_image_draw.Draw.assert_called_once_with(new_image) mock_image_font.truetype.assert_called_once_with( '/Library/Fonts/Arial Unicode.ttf', 75) image_draw.text.assert_has_calls([ call((int(0.3 * 100 / 3), int((200 * 0.9) * 0.125)), "TPR = 0.8", (0, 0, 0), font=font), call((int(0.3 * 100 / 3), int((200 * 0.9) * 0.375)), "TPR = 0.9", (0, 0, 0), font=font), call((int(0.3 * 100 / 3), int((200 * 0.9) * 0.625)), "TPR = 0.93", (0, 0, 0), font=font), call((int(0.3 * 100 / 3), int((200 * 0.9) * 0.875)), "TPR = 0.95", (0, 0, 0), font=font) ]) new_image.save.assert_called_once_with( str(Constants.FIGURE_FILE_PATH) + '/' + 'figure_threshold_histogram.png')