def load_reweighter(): mode = gcm() # Hard coded check here: Use the RS mode if WS is supplied. Also get a new # mode object to remove possible MC flags. # Just recreate the mode here to get rid of potential MC flags mode = get_mode(mode.polarity, mode.year, mode.mode_short) if mode.mode == config.D0ToKpipipi_WS: mode = get_mode(mode.polarity, mode.year, 'RS') if mode.mode == config.D0ToKpipipi_2tag_WS: mode = get_mode(mode.polarity, mode.year, '2tag_RS') outfile = mode.get_output_path('effs') + 'reweighter.p' return helpers.load(outfile)
def __init__(self, function, allow_for=None): if allow_for is not None: self.allow_for = allow_for else: self.allow_for = [None, 'mc', 'gen'] self.requested_columns = {} self._wants_mode = 'mode' in inspect.getargspec(function).args self.log = get_logger(function.__name__) for m, mc in product(config.all_modes_short, self.allow_for): d = DefaultOrderedDict(lambda: 1) # Dummy call the selection classes with the mode classes to get # the different variables needed. if self._wants_mode: mode_cls = get_mode('MagDown', 2015, m, mc) function(d, mode_cls) else: with modes.MODE('MagDown', 2015, m, mc): function(d) look_up = m if mc is not None: look_up += mc self.requested_columns[look_up] = d.keys() [accumulated_per_mode[look_up].add(n) for n in d.keys()] self._wants_mode = 'mode' in inspect.getargspec(function).args self._func = function self._func_name = function.__name__ self.__name__ = function.__name__ self.__doc__ = function.__doc__
def load_classifiers(comb_bkg=False): mode = gcm() if comb_bkg: bdt_folder = 'bdt_comb_bkg' else: bdt_folder = 'bdt_rand_spi' # Hard coded check here: Use the RS mode if WS is supplied. Also get a new # mode object to remove possible MC flags. # Just recreate the mode here to get rid of potential MC flags mode = get_mode(mode.polarity, mode.year, mode.mode_short) if mode.mode == config.D0ToKpipipi_WS: mode = get_mode(mode.polarity, mode.year, 'RS') if mode.mode == config.D0ToKpipipi_2tag_WS: mode = get_mode(mode.polarity, mode.year, '2tag_RS') outfile = mode.get_output_path(bdt_folder) + 'classifiers.p' return helpers.load(outfile)
def get_luminosity(mode, polarity, year): mode = get_mode(polarity, year, mode) # For a yet to be determined reason, some files do not contain a LumiTuple # so sort those ones out infiles = [] for f in mode.files: fl = ROOT.TFile.Open(f) if fl.Get('GetIntegratedLuminosity/LumiTuple'): infiles.append(f) fl.Close() # Get the files and stuff them into a dataframe df = root_pandas.read_root( infiles, key='GetIntegratedLuminosity/LumiTuple') log.info('Luminosity {} {}: {} +- {}'.format( year, polarity, df.sum().IntegratedLuminosity, df.sum().IntegratedLuminosityErr))
def download(modename, polarity, year, full, test=False, mc=None, njobs=1): import root_pandas log.info('Getting data for {} {} {}'.format( modename, polarity, year)) mode = get_mode(polarity, year, modename, mc) # I accidentally forgot the p in Dstp. Got to rename everything now for # this one exception. Hack incoming if modename == 'WS' and year == 2016: # As this is the start, hack name of the particle in the mode. mode.Dstp.name = 'Dst' sel = get_root_preselection.get(mode) # Always download the entire MC if full != 1 and mc is None: ctr = int(1./float(full)) sel = '({} % {} == 0) && '.format(evt_num(), ctr) + sel log.info('Using ({} % {} == 0)'.format(evt_num(), ctr)) tempfile.mktemp('.root') input_files = mode.get_file_list() if test: input_files = input_files[:4] chunked = list(helpers.chunks(input_files, 25)) length = len(list(chunked)) # While the code is in developement, just get any variables we can # access for part in mode.head.all_mothers() + mode.head.all_daughters(): for func in variables.__all__: try: getattr(variables, func)(part) except variables.AccessorUsage: pass # Make some sorted variables. Saves the hassle when later training BDTs arg_sorted_ip = '{},{},{},{}'.format( *[ipchi2(p) for p in mode.D0.all_daughters()]) arg_sorted_pt = '{},{},{},{}'.format( *[pt(p) for p in mode.D0.all_daughters()]) add_vars = { 'delta_m': '{} - {}'.format(m(mode.Dstp), m(mode.D0)), 'delta_m_dtf': '{} - {}'.format(dtf_m(mode.Dstp), dtf_m(mode.D0)), 'ltime_ratio': '{} / {}'.format(ltime(mode.D0), config.Dz_ltime), 'ipchi2_1': 'ROOTex::Leading({})'.format(arg_sorted_ip), 'ipchi2_2': 'ROOTex::SecondLeading({})'.format(arg_sorted_ip), 'ipchi2_3': 'ROOTex::ThirdLeading({})'.format(arg_sorted_ip), 'ipchi2_4': 'ROOTex::FourthLeading({})'.format(arg_sorted_ip), 'pt_1': 'ROOTex::Leading({})'.format(arg_sorted_pt), 'pt_2': 'ROOTex::SecondLeading({})'.format(arg_sorted_pt), 'pt_3': 'ROOTex::ThirdLeading({})'.format(arg_sorted_pt), 'pt_4': 'ROOTex::FourthLeading({})'.format(arg_sorted_pt), } variables_needed = list(variables.all_ever_used) if mc == 'mc': variables_needed.append('Dstp_BKGCAT') def run_splitter(fns): temp_file = tempfile.mktemp('.root') treesplitter(files=fns, treename=mode.get_tree_name(), output=temp_file, variables=variables_needed, selection=sel, addvariables=add_vars) return temp_file pool = ProcessingPool(njobs) temp_files = [] for r in tqdm.tqdm(pool.uimap(run_splitter, chunked), leave=True, total=length, smoothing=0): temp_files.append(r) log.info('Created {} temporary files.'.format(len(temp_files))) bcolz_folder = config.bcolz_locations.format(mode.get_store_name()) try: log.info('Removing already existing data at {}'.format( bcolz_folder)) shutil.rmtree(bcolz_folder) except OSError: log.info('No previous data found. Nothing to delete.') df_gen = root_pandas.read_root(temp_files, mode.get_tree_name(), chunksize=[500000, 100][args.test]) # New storage using bcolz because better ctuple = None for df in df_gen: log.info('Adding {} events of {} to store {}.'.format( len(df), mode.get_tree_name(), bcolz_folder)) if modename == 'WS' and year == 2016: new_names = { old: old.replace('Dst', 'Dstp') for old in df.columns if 'Dst' in old } df = df.rename(index=str, columns=new_names) if ctuple is None: ctuple = bcolz.ctable.fromdataframe(df, rootdir=bcolz_folder) else: ctuple.append(df.to_records(index=False)) for f in temp_files: os.remove(f) # Loop and delete everything in the datastore that needs to be recached remove_buffer_for_mode(mode.mode) if modename == 'WS' and year == 2016: # As this is the start, hack name of the particle in the mode. mode.Dstp.name = 'Dstp'