def d0_selection(df): ret = True if gcm().mode not in config.twotag_modes: ret &= np.log(df[ipchi2(gcm().D0)]) < 1. ret &= df[pt(gcm().D0)] > 4000. ret &= df[vchi2(gcm().D0)] < 4. ret &= df[maxdoca(gcm().D0)] < .2 return ret
def get(mode): """Get the preselection ROOT information from the mode :mode: TODO :returns: TODO """ _cuts = [] # _cuts += ['fabs(' + # m(mode.D0) + # ' - {}) < 60.'.format(config.PDG_MASSES[config.Dz])] _cuts += [build_step_cuts(ipchi2, mode.D0.all_daughters(), [4, 4, 4, 4])] for daug in mode.head.all_daughters(): _cuts += [p(daug) + ' >= 3000.'] _cuts += [p(daug) + ' < 100000.'] _cuts += [dtf_chi2(mode.head) + ' > 0.'] _cuts += [vdchi2(mode.D0) + ' > 0.'] _cuts += [maxdoca(mode.D0) + ' > 0.'] _cuts += [mindoca(mode.D0) + ' > 0.'] _cuts += [ltime(mode.D0) + ' > -10000.'] for daug in mode.head.all_daughters(): _cuts += [p(daug) + ' >= 3000.'] _cuts += [p(daug) + ' < 100000.'] for kaon in mode.head.all_pid(config.kaon): if mode.mc is None: _cuts += [probnnk(kaon) + ' > 0.3'] _cuts += [probnnpi(kaon) + ' < 0.7'] for pion in mode.head.all_pid(config.pion): if mode.mc is None: _cuts += [probnnpi(pion) + ' > 0.3'] _cuts += [probnnk(pion) + ' < 0.7'] for pion in mode.head.all_pid(config.slowpion): _cuts += [probnnghost(mode.Pislow) + ' < 0.3'] if mode.mc is None: _cuts += [probnnpi(mode.Pislow) + ' > 0.3'] _cuts += [probnnk(mode.Pislow) + ' < 0.7'] if mode.mode in config.twotag_modes: _cuts += [pt(mode.D0) + ' >= 1800.'] else: _cuts += [pt(mode.D0) + ' >= 4000.'] _cuts += ['TMath::Log(' + ipchi2(mode.D0) + ') < 1.'] return ' && '.join(['({})'.format(x) for x in _cuts])
def download(modename, polarity, year, full, test=False, mc=None, njobs=1): import root_pandas log.info('Getting data for {} {} {}'.format( modename, polarity, year)) mode = get_mode(polarity, year, modename, mc) # I accidentally forgot the p in Dstp. Got to rename everything now for # this one exception. Hack incoming if modename == 'WS' and year == 2016: # As this is the start, hack name of the particle in the mode. mode.Dstp.name = 'Dst' sel = get_root_preselection.get(mode) # Always download the entire MC if full != 1 and mc is None: ctr = int(1./float(full)) sel = '({} % {} == 0) && '.format(evt_num(), ctr) + sel log.info('Using ({} % {} == 0)'.format(evt_num(), ctr)) tempfile.mktemp('.root') input_files = mode.get_file_list() if test: input_files = input_files[:4] chunked = list(helpers.chunks(input_files, 25)) length = len(list(chunked)) # While the code is in developement, just get any variables we can # access for part in mode.head.all_mothers() + mode.head.all_daughters(): for func in variables.__all__: try: getattr(variables, func)(part) except variables.AccessorUsage: pass # Make some sorted variables. Saves the hassle when later training BDTs arg_sorted_ip = '{},{},{},{}'.format( *[ipchi2(p) for p in mode.D0.all_daughters()]) arg_sorted_pt = '{},{},{},{}'.format( *[pt(p) for p in mode.D0.all_daughters()]) add_vars = { 'delta_m': '{} - {}'.format(m(mode.Dstp), m(mode.D0)), 'delta_m_dtf': '{} - {}'.format(dtf_m(mode.Dstp), dtf_m(mode.D0)), 'ltime_ratio': '{} / {}'.format(ltime(mode.D0), config.Dz_ltime), 'ipchi2_1': 'ROOTex::Leading({})'.format(arg_sorted_ip), 'ipchi2_2': 'ROOTex::SecondLeading({})'.format(arg_sorted_ip), 'ipchi2_3': 'ROOTex::ThirdLeading({})'.format(arg_sorted_ip), 'ipchi2_4': 'ROOTex::FourthLeading({})'.format(arg_sorted_ip), 'pt_1': 'ROOTex::Leading({})'.format(arg_sorted_pt), 'pt_2': 'ROOTex::SecondLeading({})'.format(arg_sorted_pt), 'pt_3': 'ROOTex::ThirdLeading({})'.format(arg_sorted_pt), 'pt_4': 'ROOTex::FourthLeading({})'.format(arg_sorted_pt), } variables_needed = list(variables.all_ever_used) if mc == 'mc': variables_needed.append('Dstp_BKGCAT') def run_splitter(fns): temp_file = tempfile.mktemp('.root') treesplitter(files=fns, treename=mode.get_tree_name(), output=temp_file, variables=variables_needed, selection=sel, addvariables=add_vars) return temp_file pool = ProcessingPool(njobs) temp_files = [] for r in tqdm.tqdm(pool.uimap(run_splitter, chunked), leave=True, total=length, smoothing=0): temp_files.append(r) log.info('Created {} temporary files.'.format(len(temp_files))) bcolz_folder = config.bcolz_locations.format(mode.get_store_name()) try: log.info('Removing already existing data at {}'.format( bcolz_folder)) shutil.rmtree(bcolz_folder) except OSError: log.info('No previous data found. Nothing to delete.') df_gen = root_pandas.read_root(temp_files, mode.get_tree_name(), chunksize=[500000, 100][args.test]) # New storage using bcolz because better ctuple = None for df in df_gen: log.info('Adding {} events of {} to store {}.'.format( len(df), mode.get_tree_name(), bcolz_folder)) if modename == 'WS' and year == 2016: new_names = { old: old.replace('Dst', 'Dstp') for old in df.columns if 'Dst' in old } df = df.rename(index=str, columns=new_names) if ctuple is None: ctuple = bcolz.ctable.fromdataframe(df, rootdir=bcolz_folder) else: ctuple.append(df.to_records(index=False)) for f in temp_files: os.remove(f) # Loop and delete everything in the datastore that needs to be recached remove_buffer_for_mode(mode.mode) if modename == 'WS' and year == 2016: # As this is the start, hack name of the particle in the mode. mode.Dstp.name = 'Dstp'
def remove_secondary(df): return np.log(df[ipchi2(gcm().D0)]) < 1.
def _dtf_ip_diff(df): return df[vars.dtf_chi2(gcm().head)] - df[vars.ipchi2(gcm().D0)]