def plot_fit(suffix=None, wsp=None): from . import roofit_to_matplotlib from . import fit_config shapes.load_shape_class('RooCruijff') shapes.load_shape_class('RooJohnsonSU') shapes.load_shape_class('RooBackground') mode = gcm() if wsp is None: wsp = fit_config.load_workspace(mode) sel = selection.get_final_selection() df = mode.get_data([dtf_dm(), m(mode.D0)]) df = df[sel] data = fit_config.pandas_to_roodataset(df, wsp.set('datavars')) fit_config.WS_DMASS_NAME = dtf_dm() fit_config.WS_MASS_NAME = m(mode.D0) outfile = mode.get_output_path('sweight_fit') + 'fits{}.pdf'.format( suffix if suffix is not None else '') with PdfPages(outfile) as pdf: for func in [m, dtf_dm]: roofit_to_matplotlib.plot_fit( mode.D0, wsp, func, data=data, pdf=pdf, do_comb_bkg=mode.mode in config.twotag_modes) roofit_to_matplotlib.plot_fit( mode.D0, wsp, func, data=data, pdf=pdf, do_pulls=False, do_comb_bkg=mode.mode in config.twotag_modes)
def mass_fiducial_selection(df): ret = True ret &= (df[m(gcm().D0)] >= 1810.) ret &= (df[m(gcm().D0)] < 1920.) ret &= (df[dtf_dm()] >= 140.5) ret &= (df[dtf_dm()] < 160.5) return ret
def rand_spi_sideband_region(df): """Selects the signal D0 peak and delta mass sidebands to get a random slow pion enriched sample""" ret = True ret &= np.abs(df[m(gcm().D0)] - config.PDG_MASSES['D0']) < 18. ret &= np.abs(df[dtf_dm()] - config.PDG_MASSES['delta']) > 2.3 return ret
def mass_signal_region(df): """Selects the signal peak in both D0 and delta mass to create a signal enriched sample.""" ret = True ret &= np.abs(df[m(gcm().D0)] - config.PDG_MASSES['D0']) < 18. ret &= np.abs(df[dtf_dm()] - config.PDG_MASSES['delta']) < 0.5 return ret
def fit(): """Runs the mass fit. Either nominal with making pretty plots or in spearmint mode which does not save the workspace and returns a metric.""" # Get the data # TODO: rewrite selection to use gcm itself mode = gcm() sel = selection.get_final_selection() df = mode.get_data([dtf_dm(), m(mode.D0)]) df = df[sel] from . import fit_config from ROOT import RooFit as RF from .fit_setup import setup_workspace wsp, _ = setup_workspace() data = fit_config.pandas_to_roodataset(df, wsp.set('datavars')) model = wsp.pdf('total') plot_fit('_start_values', wsp=wsp) result = model.fitTo(data, RF.NumCPU(4), RF.Save(True), RF.Strategy(2), RF.Extended(True)) if not helpers.check_fit_result(result, log): log.error('Bad fit quality') fit_config.dump_workspace(mode, wsp)
def setup_workspace(): mode = modes.gcm() wsp = ROOT.RooWorkspace(mode.mode, mode.mode) fit_config.WS_DMASS_NAME = dtf_dm() fit_config.WS_MASS_NAME = m(mode.D0) wsp.factory('{}[{},{}]'.format(m(mode.D0), 1810., 1920.)) wsp.factory('{}[{},{}]'.format(dtf_dm(), 140.5, 160.5)) wsp.var(dtf_dm()).setRange('plotting', 140.5, 152.5) wsp.var(m(mode.D0)).setRange('plotting', 1820, 1910) wsp.defineSet('datavars', '{},{}'.format(dtf_dm(), m(mode.D0))) vs = setup_pdf(wsp) return wsp, vs
def _dstp_slowpi_angle(df): mode = gcm() ret = compute_delta_angle( df[vars.pt(mode.D0)], df[vars.eta(mode.D0)], df[vars.phi(mode.D0)], df[vars.m(mode.D0)], df[vars.pt(mode.Pislow)], df[vars.eta(mode.Pislow)], df[vars.phi(mode.Pislow)], config.PDG_MASSES[config.pion], ) if is_dummy_run(df): return 1 return pd.Series(ret, name='dstp_slowpi_angle', index=df.index)
def double_misid_d0(df): """Returns d0 mass with changed kaon and ss pion mass hypthesis""" mode = gcm() val = double_misid_d0_mass( df[vars.dtf_pt(mode.K)], df[vars.dtf_eta(mode.K)], df[vars.dtf_phi(mode.K)], config.PDG_MASSES['Pi'], df[vars.dtf_pt(mode.Pi_SS)], df[vars.dtf_eta(mode.Pi_SS)], df[vars.dtf_phi(mode.Pi_SS)], config.PDG_MASSES['K'], df[vars.dtf_pt(mode.Pi_OS1)], df[vars.dtf_eta(mode.Pi_OS1)], df[vars.dtf_phi(mode.Pi_OS1)], config.PDG_MASSES['Pi'], df[vars.dtf_pt(mode.Pi_OS2)], df[vars.dtf_eta(mode.Pi_OS2)], df[vars.dtf_phi(mode.Pi_OS2)], config.PDG_MASSES['Pi']) if not is_dummy_run(df): return pd.Series(val, name=vars.m(gcm().D0), index=df.index) return 1
def run_spearmint_fit(spearmint_selection=None, metric='punzi'): """Runs the mass fit. Either nominal with making pretty plots or in spearmint mode which does not save the workspace and returns a metric.""" from . import fit_config from ROOT import RooFit as RF shapes.load_shape_class('RooCruijff') shapes.load_shape_class('RooJohnsonSU') shapes.load_shape_class('RooBackground') mode = gcm() wsp = fit_config.load_workspace(mode) sel = selection.get_final_selection() # Get the data df = mode.get_data([dtf_dm(), m(mode.D0)]) if spearmint_selection is not None: sel = sel & spearmint_selection df = df[sel] data = fit_config.pandas_to_roodataset(df, wsp.set('datavars')) model = wsp.pdf('total') metric = get_metric(metric)(wsp) if spearmint_selection is not None: result = model.fitTo(data, RF.NumCPU(4), RF.Save(True), RF.Strategy(2), RF.Extended(True)) if not helpers.check_fit_result(result, log): result = model.fitTo(data, RF.NumCPU(4), RF.Save(True), RF.Strategy(1), RF.Extended(True)) if not helpers.check_fit_result(result, log): result = model.fitTo(data, RF.NumCPU(4), RF.Save(True), RF.Strategy(0), RF.Extended(True)) if not helpers.check_fit_result(result, log): log.warn('Bad fit quality') return 0.0 return metric()
def get_sweights(do_comb_bkg=False): helpers.allow_root() df = gcm().get_data([m(gcm().D0), dtf_dm()]) from . import fit_config from hep_ml import splot shapes.load_shape_class('RooCruijff') shapes.load_shape_class('RooJohnsonSU') shapes.load_shape_class('RooBackground') wsp = fit_config.load_workspace(gcm()) sel = selection.get_final_selection() do_comb_bkg = gcm().mode in config.twotag_modes df = df[sel] sig_pdf = wsp.pdf('signal') rnd_pdf = wsp.pdf('random') comb_pdf = wsp.pdf('combinatorial') sig_prob = call_after_set(sig_pdf, wsp, **df) rnd_prob = call_after_set(rnd_pdf, wsp, **df) if do_comb_bkg: comb_prob = call_after_set(comb_pdf, wsp, **df) if do_comb_bkg: probs = pd.DataFrame(dict(sig=sig_prob*wsp.var('NSig').getVal(), rnd=rnd_prob*wsp.var('NSPi').getVal(), comb=comb_prob*wsp.var('NBkg').getVal()), index=df.index) else: probs = pd.DataFrame(dict(sig=sig_prob*wsp.var('NSig').getVal(), rnd=rnd_prob*wsp.var('NSPi').getVal()), index=df.index) probs = probs.div(probs.sum(axis=1), axis=0) sweights = splot.compute_sweights(probs) sweights.index = probs.index if not do_comb_bkg: sweights['comb'] = 0.0 return sweights
def misid_plots(): """Remove wrong sign D0 candidates which are combined and end up in the signal window in the right sign sample""" # Get the necessary information from the current mode if gcm().mode in config.wrong_sign_modes: wrong_spi = add_variables.other_slowpi_ws() else: wrong_spi = add_variables.other_slowpi() dst_mass = gcm().get_data([vars.m(gcm().head)])[vars.m(gcm().head)] sel = final_selection.get_final_selection() bins, xmin, xmax = gcm().mass_var.binning ybins, ymin, ymax = gcm().dmass_var.binning bins = 30 df_sel = final_selection.get_final_selection() misid = add_variables.double_misid() data = gcm().get_data([vars.dtf_dm(), vars.m(gcm().D0)]) outfile = gcm().get_output_path('misid') + 'overview.pdf' with PdfPages(outfile) as pdf: for i, pc in enumerate(double_misid_pc): fig, ax = plt.subplots(figsize=(10, 10)) nbins, xmin, xmax = pc.binning ax.hist(misid[df_sel][pc.var], bins=nbins, range=(xmin, xmax)) ax.set_xlabel(pc.xlabel) ax.set_ylabel('Candidates') ax.set_xlim((xmin, xmax)) pdf.savefig(fig) plt.close() if i % 2 == 0: fig, ax = plt.subplots(figsize=(10, 10)) nbins, xmin, xmax = pc.binning cutvar = double_misid_pc[i+1].var narrow = misid[cutvar] < 147.5 ax.hist(misid[df_sel&narrow][pc.var], bins=nbins, range=(xmin, xmax)) # NOQA ax.set_xlabel(pc.xlabel) ax.set_ylabel(r'Candidates with $\Delta m <147.5$') ax.set_xlim((xmin, xmax)) pdf.savefig(fig) plt.close() cut = misid_selection.misid_cut() dm = gcm().dmass_var nbins, xmin, xmax = dm.binning fig, ax = plt.subplots(figsize=(10, 10)) ax.hist(data[dm.var][sel & cut], bins=nbins, color='#D3EFFB', # NOQA range=(xmin, xmax), label='Kept', edgecolor='#D3EFFB') ax.hist(data[dm.var][sel & ~cut], bins=nbins, range=(xmin, xmax), label='Removed', color='#006EB6', edgecolor='#006EB6') # NOQA ax.set_xlim((xmin, xmax)) ax.set_xlabel(dm.xlabel) ax.set_ylabel('Candidates') ax.legend() pdf.savefig(fig) plt.clf() outfile = gcm().get_output_path('misid') + 'wrong_spi.pdf' pdf = PdfPages(outfile) fig, ax = plt.subplots(figsize=(10, 10)) ax.hist(wrong_spi[sel], bins=bins, range=(xmin, xmax), normed=True, color='#006EB6', edgecolor='#006EB6') # NOQA ax.set_xlabel(gcm().mass_var.xlabel) ax.set_xlim((xmin, xmax)) ax.set_ylabel('Arbitrary units') pdf.savefig(fig) fig, ax = plt.subplots(figsize=(10, 10)) ax.hist((dst_mass - wrong_spi)[sel], bins=ybins, range=(ymin, ymax), color='#006EB6', edgecolor='#006EB6') # NOQA ax.set_xlabel(gcm().dmass_var.xlabel) ax.set_xlim((xmin, xmax)) pdf.savefig(fig) plt.clf() plt.clf() pdf.close()
def plot_mass_regions(): sel = get_final_selection() df = gcm().get_data([vars.m(gcm().D0), vars.dtf_dm()]) selected = df[sel] nbins = 100 name = 'mass_regions' if config.optimised_selection: name += '_opt' if config.candidates_selection: name += '_cand' outfile = gcm().get_output_path('selection') + name + '.pdf' with PdfPages(outfile) as pdf: fig, ax = plt.subplots(figsize=(10, 10)) # Doing D0 mass first xmin, xmax = 1810, 1920 # Signal window boundaries sw_lo = config.PDG_MASSES['D0'] - 18. sw_hi = config.PDG_MASSES['D0'] + 18. # Lower sideband boundaries sb_lo_lo = xmin sb_lo_hi = config.PDG_MASSES['D0'] - 30. # Upper sideband boundaries sb_hi_lo = config.PDG_MASSES['D0'] + 30. sb_hi_hi = xmax bkg = np.array([(sb_lo_hi + sb_lo_lo) / 2., (sb_hi_hi + sb_hi_lo) / 2.]) bkgw = np.array([(sb_lo_hi - sb_lo_lo), (sb_hi_hi - sb_hi_lo)]) sig = np.array([(sw_lo + sw_hi) / 2.]) sigw = np.array([(sw_hi - sw_lo)]) h_vals, edges = np.histogram(selected[vars.m(gcm().D0)], bins=nbins, range=(xmin, xmax)) h_errorbars = np.sqrt(h_vals) x_ctr = (edges[1:] + edges[:-1]) / 2. width = (edges[1:] - edges[:-1]) x_err = width / 2. dt_options = dict(fmt='o', markersize=5, capthick=1, capsize=0, elinewidth=2, color='#000000', markeredgecolor='#000000') ax.errorbar(x_ctr, h_vals, xerr=x_err, yerr=h_errorbars, **dt_options) hmax = np.max(ax.lines[0].get_ydata()) ax.bar(sig, 1.10 * np.array(hmax), sigw, color='#D3EFFB', edgecolor='#D3EFFB', label='Signal', alpha=0.5) ax.bar(bkg, 1.10 * np.ones(len(bkg)) * hmax, bkgw, label='Background', color='#006EB6', edgecolor='#006EB6', alpha=0.5) ax.set_xlabel(vars.m.latex((gcm().D0), with_unit=True)) unit = r'{} {}'.format((xmax - xmin) / nbins, vars.m.unit) ylabel = r'Candidates / ({0})'.format(unit) ax.set_ylabel(ylabel) ax.legend() ax.set_xlim(xmin, 0.9999 * xmax) plot_utils.y_margin_scaler(ax, lf=0, la=True) pdf.savefig(fig) plt.clf() # Now delta mass fig, ax = plt.subplots(figsize=(10, 10)) xmin, xmax = 140.5, 152.5 # Signal window boundaries sw_lo = config.PDG_MASSES['delta'] - 0.5 sw_hi = config.PDG_MASSES['delta'] + 0.5 # Lower sideband boundaries sb_lo_lo = xmin sb_lo_hi = config.PDG_MASSES['delta'] - 2.3 # Upper sideband boundaries sb_hi_lo = config.PDG_MASSES['delta'] + 2.3 sb_hi_hi = xmax bkg = np.array([(sb_lo_hi + sb_lo_lo) / 2., (sb_hi_hi + sb_hi_lo) / 2.]) bkgw = np.array([(sb_lo_hi - sb_lo_lo), (sb_hi_hi - sb_hi_lo)]) sig = np.array([(sw_lo + sw_hi) / 2.]) sigw = np.array([(sw_hi - sw_lo)]) h_vals, edges = np.histogram(selected[vars.dtf_dm()], bins=nbins, range=(xmin, xmax)) h_errorbars = np.sqrt(h_vals) x_ctr = (edges[1:] + edges[:-1]) / 2. width = (edges[1:] - edges[:-1]) x_err = width / 2. ax.errorbar(x_ctr, h_vals, xerr=x_err, yerr=h_errorbars, **dt_options) hmax = np.max(ax.lines[0].get_ydata()) ax.bar(sig, 1.10 * np.array(hmax), sigw, color='#D3EFFB', edgecolor='#D3EFFB', label='Signal', alpha=0.5) ax.bar(bkg, 1.10 * np.ones(len(bkg)) * hmax, bkgw, label='Background', color='#006EB6', edgecolor='#006EB6', alpha=0.5) ax.set_xlabel(vars.dtf_dm.latex(with_unit=True)) unit = r'{} {}'.format((xmax - xmin) / nbins, vars.dtf_dm.unit) ylabel = r'Candidates / ({0})'.format(unit) ax.set_ylabel(ylabel) ax.legend() ax.set_xlim(xmin, 0.9999 * xmax) plot_utils.y_margin_scaler(ax, lf=0, la=True) pdf.savefig(fig) plt.clf()
def download(modename, polarity, year, full, test=False, mc=None, njobs=1): import root_pandas log.info('Getting data for {} {} {}'.format( modename, polarity, year)) mode = get_mode(polarity, year, modename, mc) # I accidentally forgot the p in Dstp. Got to rename everything now for # this one exception. Hack incoming if modename == 'WS' and year == 2016: # As this is the start, hack name of the particle in the mode. mode.Dstp.name = 'Dst' sel = get_root_preselection.get(mode) # Always download the entire MC if full != 1 and mc is None: ctr = int(1./float(full)) sel = '({} % {} == 0) && '.format(evt_num(), ctr) + sel log.info('Using ({} % {} == 0)'.format(evt_num(), ctr)) tempfile.mktemp('.root') input_files = mode.get_file_list() if test: input_files = input_files[:4] chunked = list(helpers.chunks(input_files, 25)) length = len(list(chunked)) # While the code is in developement, just get any variables we can # access for part in mode.head.all_mothers() + mode.head.all_daughters(): for func in variables.__all__: try: getattr(variables, func)(part) except variables.AccessorUsage: pass # Make some sorted variables. Saves the hassle when later training BDTs arg_sorted_ip = '{},{},{},{}'.format( *[ipchi2(p) for p in mode.D0.all_daughters()]) arg_sorted_pt = '{},{},{},{}'.format( *[pt(p) for p in mode.D0.all_daughters()]) add_vars = { 'delta_m': '{} - {}'.format(m(mode.Dstp), m(mode.D0)), 'delta_m_dtf': '{} - {}'.format(dtf_m(mode.Dstp), dtf_m(mode.D0)), 'ltime_ratio': '{} / {}'.format(ltime(mode.D0), config.Dz_ltime), 'ipchi2_1': 'ROOTex::Leading({})'.format(arg_sorted_ip), 'ipchi2_2': 'ROOTex::SecondLeading({})'.format(arg_sorted_ip), 'ipchi2_3': 'ROOTex::ThirdLeading({})'.format(arg_sorted_ip), 'ipchi2_4': 'ROOTex::FourthLeading({})'.format(arg_sorted_ip), 'pt_1': 'ROOTex::Leading({})'.format(arg_sorted_pt), 'pt_2': 'ROOTex::SecondLeading({})'.format(arg_sorted_pt), 'pt_3': 'ROOTex::ThirdLeading({})'.format(arg_sorted_pt), 'pt_4': 'ROOTex::FourthLeading({})'.format(arg_sorted_pt), } variables_needed = list(variables.all_ever_used) if mc == 'mc': variables_needed.append('Dstp_BKGCAT') def run_splitter(fns): temp_file = tempfile.mktemp('.root') treesplitter(files=fns, treename=mode.get_tree_name(), output=temp_file, variables=variables_needed, selection=sel, addvariables=add_vars) return temp_file pool = ProcessingPool(njobs) temp_files = [] for r in tqdm.tqdm(pool.uimap(run_splitter, chunked), leave=True, total=length, smoothing=0): temp_files.append(r) log.info('Created {} temporary files.'.format(len(temp_files))) bcolz_folder = config.bcolz_locations.format(mode.get_store_name()) try: log.info('Removing already existing data at {}'.format( bcolz_folder)) shutil.rmtree(bcolz_folder) except OSError: log.info('No previous data found. Nothing to delete.') df_gen = root_pandas.read_root(temp_files, mode.get_tree_name(), chunksize=[500000, 100][args.test]) # New storage using bcolz because better ctuple = None for df in df_gen: log.info('Adding {} events of {} to store {}.'.format( len(df), mode.get_tree_name(), bcolz_folder)) if modename == 'WS' and year == 2016: new_names = { old: old.replace('Dst', 'Dstp') for old in df.columns if 'Dst' in old } df = df.rename(index=str, columns=new_names) if ctuple is None: ctuple = bcolz.ctable.fromdataframe(df, rootdir=bcolz_folder) else: ctuple.append(df.to_records(index=False)) for f in temp_files: os.remove(f) # Loop and delete everything in the datastore that needs to be recached remove_buffer_for_mode(mode.mode) if modename == 'WS' and year == 2016: # As this is the start, hack name of the particle in the mode. mode.Dstp.name = 'Dstp'
def comb_bkg_sideband_region(df): """Selects the D0 mass sidebands to create a comb background enriched sample""" ret = np.abs(df[m(gcm().D0)] - config.PDG_MASSES['D0']) > 30. ret &= np.abs(df[dtf_dm()] - config.PDG_MASSES['delta']) > 2.3 return ret