예제 #1
def load_reweighter():
    mode = gcm()
    # Hard coded check here: Use the RS mode if WS is supplied. Also get a new
    # mode object to remove possible MC flags.
    # Just recreate the mode here to get rid of potential MC flags
    mode = get_mode(mode.polarity, mode.year, mode.mode_short)
    if mode.mode == config.D0ToKpipipi_WS:
        mode = get_mode(mode.polarity, mode.year, 'RS')
    if mode.mode == config.D0ToKpipipi_2tag_WS:
        mode = get_mode(mode.polarity, mode.year, '2tag_RS')
    outfile = mode.get_output_path('effs') + 'reweighter.p'
    return helpers.load(outfile)
예제 #2
 def __init__(self, function, allow_for=None):
     if allow_for is not None:
         self.allow_for = allow_for
         self.allow_for = [None, 'mc', 'gen']
     self.requested_columns = {}
     self._wants_mode = 'mode' in inspect.getargspec(function).args
     self.log = get_logger(function.__name__)
     for m, mc in product(config.all_modes_short, self.allow_for):
         d = DefaultOrderedDict(lambda: 1)
         # Dummy call the selection classes with the mode classes to get
         # the different variables needed.
         if self._wants_mode:
             mode_cls = get_mode('MagDown', 2015, m, mc)
             function(d, mode_cls)
             with modes.MODE('MagDown', 2015, m, mc):
         look_up = m
         if mc is not None:
             look_up += mc
         self.requested_columns[look_up] = d.keys()
         [accumulated_per_mode[look_up].add(n) for n in d.keys()]
     self._wants_mode = 'mode' in inspect.getargspec(function).args
     self._func = function
     self._func_name = function.__name__
     self.__name__ = function.__name__
     self.__doc__ = function.__doc__
예제 #3
def load_classifiers(comb_bkg=False):
    mode = gcm()
    if comb_bkg:
        bdt_folder = 'bdt_comb_bkg'
        bdt_folder = 'bdt_rand_spi'
    # Hard coded check here: Use the RS mode if WS is supplied. Also get a new
    # mode object to remove possible MC flags.
    # Just recreate the mode here to get rid of potential MC flags
    mode = get_mode(mode.polarity, mode.year, mode.mode_short)
    if mode.mode == config.D0ToKpipipi_WS:
        mode = get_mode(mode.polarity, mode.year, 'RS')
    if mode.mode == config.D0ToKpipipi_2tag_WS:
        mode = get_mode(mode.polarity, mode.year, '2tag_RS')
    outfile = mode.get_output_path(bdt_folder) + 'classifiers.p'
    return helpers.load(outfile)
예제 #4
def get_luminosity(mode, polarity, year):
    mode = get_mode(polarity, year, mode)

    # For a yet to be determined reason, some files do not contain a LumiTuple
    # so sort those ones out
    infiles = []
    for f in mode.files:
        fl = ROOT.TFile.Open(f)
        if fl.Get('GetIntegratedLuminosity/LumiTuple'):

    # Get the files and stuff them into a dataframe
    df = root_pandas.read_root(
        infiles, key='GetIntegratedLuminosity/LumiTuple')

    log.info('Luminosity {} {}: {} +- {}'.format(
        year, polarity,
        df.sum().IntegratedLuminosity, df.sum().IntegratedLuminosityErr))
예제 #5
def download(modename, polarity, year, full, test=False, mc=None, njobs=1):
    import root_pandas
    log.info('Getting data for {} {} {}'.format(
        modename, polarity, year))

    mode = get_mode(polarity, year, modename, mc)
    # I accidentally forgot the p in Dstp. Got to rename everything now for
    # this one exception. Hack incoming
    if modename == 'WS' and year == 2016:
        # As this is the start, hack name of the particle in the mode.
        mode.Dstp.name = 'Dst'

    sel = get_root_preselection.get(mode)

    # Always download the entire MC
    if full != 1 and mc is None:
        ctr = int(1./float(full))
        sel = '({} % {} == 0) && '.format(evt_num(), ctr) + sel
        log.info('Using ({} % {} == 0)'.format(evt_num(), ctr))


    input_files = mode.get_file_list()
    if test:
        input_files = input_files[:4]
    chunked = list(helpers.chunks(input_files, 25))
    length = len(list(chunked))

    # While the code is in developement, just get any variables we can
    # access
    for part in mode.head.all_mothers() + mode.head.all_daughters():
        for func in variables.__all__:
                getattr(variables, func)(part)
            except variables.AccessorUsage:

    # Make some sorted variables. Saves the hassle when later training BDTs
    arg_sorted_ip = '{},{},{},{}'.format(
        *[ipchi2(p) for p in mode.D0.all_daughters()])
    arg_sorted_pt = '{},{},{},{}'.format(
        *[pt(p) for p in mode.D0.all_daughters()])

    add_vars = {
        'delta_m': '{} - {}'.format(m(mode.Dstp), m(mode.D0)),
        'delta_m_dtf': '{} - {}'.format(dtf_m(mode.Dstp), dtf_m(mode.D0)),
        'ltime_ratio': '{} / {}'.format(ltime(mode.D0), config.Dz_ltime),
        'ipchi2_1': 'ROOTex::Leading({})'.format(arg_sorted_ip),
        'ipchi2_2': 'ROOTex::SecondLeading({})'.format(arg_sorted_ip),
        'ipchi2_3': 'ROOTex::ThirdLeading({})'.format(arg_sorted_ip),
        'ipchi2_4': 'ROOTex::FourthLeading({})'.format(arg_sorted_ip),
        'pt_1': 'ROOTex::Leading({})'.format(arg_sorted_pt),
        'pt_2': 'ROOTex::SecondLeading({})'.format(arg_sorted_pt),
        'pt_3': 'ROOTex::ThirdLeading({})'.format(arg_sorted_pt),
        'pt_4': 'ROOTex::FourthLeading({})'.format(arg_sorted_pt),
    variables_needed = list(variables.all_ever_used)

    if mc == 'mc':

    def run_splitter(fns):
        temp_file = tempfile.mktemp('.root')
        treesplitter(files=fns, treename=mode.get_tree_name(), output=temp_file,
                     variables=variables_needed, selection=sel,
        return temp_file

    pool = ProcessingPool(njobs)
    temp_files = []
    for r in tqdm.tqdm(pool.uimap(run_splitter, chunked),
                       leave=True, total=length, smoothing=0):

    log.info('Created {} temporary files.'.format(len(temp_files)))
    bcolz_folder = config.bcolz_locations.format(mode.get_store_name())

        log.info('Removing already existing data at {}'.format(
    except OSError:
        log.info('No previous data found. Nothing to delete.')

    df_gen = root_pandas.read_root(temp_files, mode.get_tree_name(),
                                   chunksize=[500000, 100][args.test])

    # New storage using bcolz because better
    ctuple = None

    for df in df_gen:
        log.info('Adding {} events of {} to store {}.'.format(
            len(df), mode.get_tree_name(), bcolz_folder))
        if modename == 'WS' and year == 2016:
            new_names = {
                old: old.replace('Dst', 'Dstp')
                for old in df.columns if 'Dst' in old
            df = df.rename(index=str, columns=new_names)
        if ctuple is None:
            ctuple = bcolz.ctable.fromdataframe(df, rootdir=bcolz_folder)

    for f in temp_files:
    # Loop and delete everything in the datastore that needs to be recached
    if modename == 'WS' and year == 2016:
        # As this is the start, hack name of the particle in the mode.
        mode.Dstp.name = 'Dstp'