def test_binned_counts__works(self):
        data = [[0.1,  0, 1, pd.Interval(0.0, 0.5)],
                [0.1,  0, 1, pd.Interval(0.0, 0.5)],
                [0.1,  0, 1, pd.Interval(0.0, 0.5)],
                [0.9,  1, 2, pd.Interval(0.5, 1.0)],
                [0.9,  1, 2, pd.Interval(0.5, 1.0)],
                [0.9,  1, 2, pd.Interval(0.5, 1.0)],
                ]
        df = pd.DataFrame(data, columns = ['val', 'actual', 'pre_ind', 'pre_range'])

        result = binned_counts(df, actual_col='actual', bin_col='pre_range')
        # print(result)

        expected_data = [[pd.Interval(0.0, 0.5),  0,  3, 0.5, 1.5, 0.0],
                         [pd.Interval(0.5, 1.0),  3,  3, 1.0, 3.0, 1.0],
                ]
        expected = pd.DataFrame(expected_data, columns = ['pre_range', 'actual', 'count', 'rate', 'expected', 'actual_rate'])
        assert_array_equal(result, expected)
for prob_col in prediction_cols:
    add_binning_cols(pfa_vs_dnn_binned,
                     prob_col=prob_col,
                     prefix=prob_col,
                     bins=bins,
                     bin_labels=bin_labels)

# prediction_cols = ['pfa_pred', 'dnn_d_pred', 'dnn_pred', 'pfa_d_pred']

pfa_vs_dnn_binned.to_csv(os.path.join(
    result_dir, f'pfa_pred_vs_dnn_pred_w_dash_bin_validate.csv'),
                         index=False)

pfa_gb = binned_counts(pfa_vs_dnn_binned,
                       actual_col='correct',
                       bin_col='pfa_pred' + '_range')
pfa_d_gb = binned_counts(pfa_vs_dnn_binned,
                         actual_col='correct',
                         bin_col='pfa_d_pred' + '_range')
dnn_gb = binned_counts(pfa_vs_dnn_binned,
                       actual_col='correct',
                       bin_col='dnn_pred' + '_range')
dnn_d_gb = binned_counts(pfa_vs_dnn_binned,
                         actual_col='correct',
                         bin_col='dnn_d_pred' + '_range')


# ========= Plot
# https://www.codespeedy.com/fill-area-with-color-in-matplotlib-with-python/
def f1(x):
Exemple #3
0
# pfa_vs_dnn_binned_s1_pfa_d = pfa_vs_dnn_binned_all_s1[picked].sample(n=100)
# pfa_vs_dnn_binned_s1_pfa_d.to_csv(os.path.join(result_dir, f'pfa_pred_vs_dnn_pred_w_dash_bin_test_s1_pfa_d.csv'), index=False)
# pfa_d_gb_s1 = binned_counts(pfa_vs_dnn_binned_s1_pfa_d, actual_col='correct', bin_col='pfa_d_pred' + '_range')
# c2_stats_pfa_d = stats.chisquare(f_obs=pfa_d_gb_s1.dropna()['actual'], f_exp=pfa_d_gb_s1.dropna()['count_expected'])
# with open(os.path.join(result_dir, f'pfa_d_gb_s1_chi.txt'), 'w+') as fileObject:
#     fileObject.write(str(c2_stats_pfa_d))
# print(f'c2_stats_pfa_d {c2_stats_pfa_d}')
# pfa_d_gb_s1.to_csv(os.path.join(result_dir, f'pfa_d_gb_s1.csv'), index=False)

pfa_vs_dnn_binned_s1_dnn = pfa_vs_dnn_binned_all_s1[picked].sample(n=100)
pfa_vs_dnn_binned_s1_dnn.to_csv(os.path.join(
    result_dir, f'pfa_pred_vs_dnn_pred_w_dash_bin_test_s1_dnn.csv'),
                                index=False)
dnn_gb_s1 = binned_counts(pfa_vs_dnn_binned_s1_dnn,
                          actual_col='correct',
                          bin_col='dnn_pred' + '_range')
c2_stats_dnn = stats.chisquare(f_obs=dnn_gb_s1.dropna()['actual'],
                               f_exp=dnn_gb_s1.dropna()['count_expected'])
with open(os.path.join(result_dir, f'dnn_gb_s1_chi.txt'), 'w+') as fileObject:
    fileObject.write(str(c2_stats_dnn))
print(f'c2_stats_dnn {c2_stats_dnn}')
dnn_gb_s1.to_csv(os.path.join(result_dir, f'dnn_gb_s1.csv'), index=False)

# pfa_vs_dnn_binned_s1_dnn_d = pfa_vs_dnn_binned_all_s1[picked].sample(n=100)
# pfa_vs_dnn_binned_s1_dnn_d.to_csv(os.path.join(result_dir, f'pfa_pred_vs_dnn_pred_w_dash_bin_test_s1_dnn_d.csv'), index=False)
# dnn_d_gb_s1 = binned_counts(pfa_vs_dnn_binned_s1_dnn_d, actual_col='correct', bin_col='dnn_d_pred' + '_range')
# c2_stats_dnn_d = stats.chisquare(f_obs=dnn_d_gb_s1.dropna()['actual'], f_exp=dnn_d_gb_s1.dropna()['count_expected'])
# print(f'c2_stats_dnn_d {c2_stats_dnn_d}')
# dnn_d_gb_s1.to_csv(os.path.join(result_dir, f'dnn_d_gb_s1.csv'), index=False)
# ====================================================================================================================================