def test_binned_counts__works(self): data = [[0.1, 0, 1, pd.Interval(0.0, 0.5)], [0.1, 0, 1, pd.Interval(0.0, 0.5)], [0.1, 0, 1, pd.Interval(0.0, 0.5)], [0.9, 1, 2, pd.Interval(0.5, 1.0)], [0.9, 1, 2, pd.Interval(0.5, 1.0)], [0.9, 1, 2, pd.Interval(0.5, 1.0)], ] df = pd.DataFrame(data, columns = ['val', 'actual', 'pre_ind', 'pre_range']) result = binned_counts(df, actual_col='actual', bin_col='pre_range') # print(result) expected_data = [[pd.Interval(0.0, 0.5), 0, 3, 0.5, 1.5, 0.0], [pd.Interval(0.5, 1.0), 3, 3, 1.0, 3.0, 1.0], ] expected = pd.DataFrame(expected_data, columns = ['pre_range', 'actual', 'count', 'rate', 'expected', 'actual_rate']) assert_array_equal(result, expected)
for prob_col in prediction_cols: add_binning_cols(pfa_vs_dnn_binned, prob_col=prob_col, prefix=prob_col, bins=bins, bin_labels=bin_labels) # prediction_cols = ['pfa_pred', 'dnn_d_pred', 'dnn_pred', 'pfa_d_pred'] pfa_vs_dnn_binned.to_csv(os.path.join( result_dir, f'pfa_pred_vs_dnn_pred_w_dash_bin_validate.csv'), index=False) pfa_gb = binned_counts(pfa_vs_dnn_binned, actual_col='correct', bin_col='pfa_pred' + '_range') pfa_d_gb = binned_counts(pfa_vs_dnn_binned, actual_col='correct', bin_col='pfa_d_pred' + '_range') dnn_gb = binned_counts(pfa_vs_dnn_binned, actual_col='correct', bin_col='dnn_pred' + '_range') dnn_d_gb = binned_counts(pfa_vs_dnn_binned, actual_col='correct', bin_col='dnn_d_pred' + '_range') # ========= Plot # https://www.codespeedy.com/fill-area-with-color-in-matplotlib-with-python/ def f1(x):
# pfa_vs_dnn_binned_s1_pfa_d = pfa_vs_dnn_binned_all_s1[picked].sample(n=100) # pfa_vs_dnn_binned_s1_pfa_d.to_csv(os.path.join(result_dir, f'pfa_pred_vs_dnn_pred_w_dash_bin_test_s1_pfa_d.csv'), index=False) # pfa_d_gb_s1 = binned_counts(pfa_vs_dnn_binned_s1_pfa_d, actual_col='correct', bin_col='pfa_d_pred' + '_range') # c2_stats_pfa_d = stats.chisquare(f_obs=pfa_d_gb_s1.dropna()['actual'], f_exp=pfa_d_gb_s1.dropna()['count_expected']) # with open(os.path.join(result_dir, f'pfa_d_gb_s1_chi.txt'), 'w+') as fileObject: # fileObject.write(str(c2_stats_pfa_d)) # print(f'c2_stats_pfa_d {c2_stats_pfa_d}') # pfa_d_gb_s1.to_csv(os.path.join(result_dir, f'pfa_d_gb_s1.csv'), index=False) pfa_vs_dnn_binned_s1_dnn = pfa_vs_dnn_binned_all_s1[picked].sample(n=100) pfa_vs_dnn_binned_s1_dnn.to_csv(os.path.join( result_dir, f'pfa_pred_vs_dnn_pred_w_dash_bin_test_s1_dnn.csv'), index=False) dnn_gb_s1 = binned_counts(pfa_vs_dnn_binned_s1_dnn, actual_col='correct', bin_col='dnn_pred' + '_range') c2_stats_dnn = stats.chisquare(f_obs=dnn_gb_s1.dropna()['actual'], f_exp=dnn_gb_s1.dropna()['count_expected']) with open(os.path.join(result_dir, f'dnn_gb_s1_chi.txt'), 'w+') as fileObject: fileObject.write(str(c2_stats_dnn)) print(f'c2_stats_dnn {c2_stats_dnn}') dnn_gb_s1.to_csv(os.path.join(result_dir, f'dnn_gb_s1.csv'), index=False) # pfa_vs_dnn_binned_s1_dnn_d = pfa_vs_dnn_binned_all_s1[picked].sample(n=100) # pfa_vs_dnn_binned_s1_dnn_d.to_csv(os.path.join(result_dir, f'pfa_pred_vs_dnn_pred_w_dash_bin_test_s1_dnn_d.csv'), index=False) # dnn_d_gb_s1 = binned_counts(pfa_vs_dnn_binned_s1_dnn_d, actual_col='correct', bin_col='dnn_d_pred' + '_range') # c2_stats_dnn_d = stats.chisquare(f_obs=dnn_d_gb_s1.dropna()['actual'], f_exp=dnn_d_gb_s1.dropna()['count_expected']) # print(f'c2_stats_dnn_d {c2_stats_dnn_d}') # dnn_d_gb_s1.to_csv(os.path.join(result_dir, f'dnn_d_gb_s1.csv'), index=False) # ====================================================================================================================================