def test_add_binning_cols__works(self): data = [[0.3,], [0.6,], [1.0,], [1.3,], [1.6,], [2.0,], ] input = pd.DataFrame(data, columns = ['val']) result = add_binning_cols(input, prob_col='val', prefix='pre', bins=list(drange_inc(0, 2, '1')), bin_labels=list(range(1, 3))) # print('result') # print(result) item = result.iloc[0]['pre_range'] # print('item') # print(item) # print('type') # print(type(item).__name__) data = [[0.3, 1, pd.Interval(0.0, 1.0)], [0.6, 1, pd.Interval(0.0, 1.0)], [1.0, 1, pd.Interval(0.0, 1.0)], [1.3, 2, pd.Interval(1.0, 2.0)], [1.6, 2, pd.Interval(1.0, 2.0)], [2.0, 2, pd.Interval(1.0, 2.0)], ] expected = pd.DataFrame(data, columns = ['val', 'pre_ind', 'pre_range']) assert_array_equal(result, expected)
def test_drange_inc__works(self): result = list(drange_inc(0, .1, '0.05')) expected = [0, 0.05, 0.1] assert_equal(result, expected)
history_ids_validate, pfa_pred_validate, pfa_dash_pred_validate, dnn_pred_validate, dnn_dash_pred_validate ], axis=1) pfa_vs_dnn.to_csv(os.path.join(result_dir, f'pfa_pred_vs_dnn_pred_w_dash_validate.csv'), index=False) # Compute correlations pfa_vs_dnn_just_pred = pfa_vs_dnn.loc[:, [ 'pfa_pred', 'pfa_d_pred', 'dnn_pred', 'dnn_d_pred' ]] pfa_vs_dnn_just_pred.corr() bins = list(drange_inc(0, 1, '0.05')) # 5% point bin size bin_labels = list(range(1, 21)) base_cols = ['pfa', 'pfa_d', 'dnn', 'dnn_d'] correct_cols = [c + "_cor" for c in base_cols] prediction_cols = [c + "_pred" for c in base_cols] # correct_cols = ['pfa_cor', 'dnn_d_cor', 'dnn_cor', 'pfa_d_cor'] # prediction_cols = ['pfa_pred', 'dnn_d_pred', 'dnn_pred', 'pfa_d_pred'] pfa_vs_dnn_binned = pfa_vs_dnn.copy() pfa_vs_dnn_binned = pfa_vs_dnn_binned.drop(columns=correct_cols) # pfa_vs_dnn_binned.to_csv(os.path.join(result_dir, f'pfa_pred_vs_dnn_pred_w_dash_validate_no_cor.csv'), index=False) for prob_col in prediction_cols: add_binning_cols(pfa_vs_dnn_binned,