def compare_to(self, comp, frac_comp: bool = True) -> None: for r in reversed(sorted(self.results)): if r not in comp.results: continue res = f'{r}:' if isinstance(self.results[r], dict): for d in sorted(self.results[r]): if d not in comp.results[r]: continue diff = self.results[r][d][0] - comp.results[r][d][0] if frac_comp: diff /= 0.01 * comp.results[r][d][0] diff_unc = np.sqrt( np.square(comp.results[r][d][1]) + np.square(self.results[r][d][1])) if frac_comp: diff_unc = np.abs(diff * np.sqrt( np.square(diff_unc / diff) + np.square(comp.results[r][d][1] / comp.results[r][d][0]))) val = uncert_round(diff, diff_unc) res += f'\n\t{d}\t{val[0]}±{val[1]}' if frac_comp: res += ' %' else: diff = self.results[r][0] - comp.results[r][0] if frac_comp: diff /= 0.01 * comp.results[r][0] diff_unc = np.sqrt( np.square(comp.results[r][1]) + np.square(self.results[r][1])) if frac_comp: diff_unc = np.abs(diff * np.sqrt( np.square(diff_unc / diff) + np.square(comp.results[r][1] / comp.results[r][0]))) val = uncert_round(diff, diff_unc) res += f'\t{val[0]}±{val[1]}' if frac_comp: res += ' %' print(res)
def score_test_data_per_fold(test_fy, cut, pred_name='pred', zero_preds=['pred_0', 'pred_1', 'pred_2'], one_preds=['pred_3']): private = test_fy.get_column('private') n_tot_pub, n_tot_pri = len(private[private == 0]), len( private[private == 1]) public_ams, private_ams = [], [] for i in range(test_fy.n_folds): data = pandas.DataFrame() pred = test_fy.get_column(pred_name, 1, i) if len(pred.shape) > 1: for p in range(pred.shape[-1]): data[f'pred_{p}'] = pred[:, p] to_binary_class(data, zero_preds, one_preds) else: data['pred'] = pred data['gen_weight'] = test_fy.get_column('weights', 1, i) data['gen_target'] = test_fy.get_column('targets', 1, i) data['private'] = test_fy.get_column('private', 1, i) data.loc[data.private == 1, 'gen_weight'] *= n_tot_pri / len(data[data.private == 1]) data.loc[data.private == 0, 'gen_weight'] *= n_tot_pub / len(data[data.private == 0]) accept = (data.pred >= cut) signal = (data.gen_target == 1) bkg = (data.gen_target == 0) public = (data.private == 0) private = (data.private == 1) public_ams.append( calc_ams(np.sum(data.loc[accept & public & signal, 'gen_weight']), np.sum(data.loc[accept & public & bkg, 'gen_weight']))) private_ams.append( calc_ams(np.sum(data.loc[accept & private & signal, 'gen_weight']), np.sum(data.loc[accept & private & bkg, 'gen_weight']))) public_mean, public_std = np.mean(public_ams), np.std( public_ams, ddof=1) / np.sqrt(test_fy.n_folds) private_mean, private_std = np.mean(private_ams), np.std( private_ams, ddof=1) / np.sqrt(test_fy.n_folds) public = uncert_round(public_mean, public_std) private = uncert_round(private_mean, private_std) print( f"Mean Public:Private AMS: {public[0]}±{public[1]} : {private[0]}±{private[1]}" ) return (public_mean, public_std), (private_mean, private_std)
def print_results(self) -> None: for r in reversed(sorted(self.results)): res = f'{r}:' if isinstance(self.results[r], dict): for d in sorted(self.results[r]): val = uncert_round(self.results[r][d][0], self.results[r][d][1]) res += f'\n\t{d}\t{val[0]}±{val[1]}' else: val = uncert_round(self.results[r][0], self.results[r][1]) res += f'\t{val[0]}±{val[1]}' print(res)
def print_table_row(self, base_result) -> None: row = r'' for r in ['val_ams_max', 'val_ams_smooth', 'test_public_ams_mean']: val = uncert_round(self.results[r][0], self.results[r][1]) row += fr'${val[0]}\pm{val[1]}$ & ' for r in ['train_time', 'test_time']: bases, times = [], [] for d in sorted(self.results[r]): times.append(self.results[r][d][0]) bases.append(base_result.results[r][d][0]) bases, times = np.array(bases), np.array(times) deltas = (times - bases) / bases val = uncert_round(np.mean(deltas), np.std(deltas, ddof=1) / np.sqrt(len(deltas))) row += fr'${val[0]}\pm{val[1]}$ & ' row = row[:-3] + r'\\' print(row)
def bootstrap_score_test_data(test_fy, cut, n, pred_name='pred', zero_preds=['pred_0', 'pred_1', 'pred_2'], one_preds=['pred_3']): private = test_fy.get_column('private') data = pandas.DataFrame() pred = test_fy.get_column(pred_name) if len(pred.shape) > 1: for p in range(pred.shape[-1]): data[f'pred_{p}'] = pred[:, p] to_binary_class(data, zero_preds, one_preds) else: data['pred'] = pred data['gen_weight'] = test_fy.get_column('weights') data['gen_target'] = test_fy.get_column('targets') data['private'] = test_fy.get_column('private') amss = mp_run([{ 'n': n, 'df': data[(data.private == 0)], 'cut': cut, 'name': 'public' }, { 'n': n, 'df': data[(data.private == 1)], 'cut': cut, 'name': 'private' }], bs_ams) public = uncert_round(np.mean(amss['public_ams']), np.std(amss['public_ams'], ddof=1)) private = uncert_round(np.mean(amss['private_ams']), np.std(amss['private_ams'], ddof=1)) print( f"Public:Private AMS: {public[0]}±{public[1]} : {private[0]}±{private[1]}" ) return public, private