Exemple #1
0
def d0_selection(df):
    ret = True

    if gcm().mode not in config.twotag_modes:
        ret &= np.log(df[ipchi2(gcm().D0)]) < 1.
        ret &= df[pt(gcm().D0)] > 4000.
    ret &= df[vchi2(gcm().D0)] < 4.
    ret &= df[maxdoca(gcm().D0)] < .2
    return ret
def get(mode):
    """Get the preselection ROOT information from the mode

    :mode: TODO
    :returns: TODO

    """
    _cuts = []
    # _cuts += ['fabs(' +
    # m(mode.D0) +
    # ' - {}) < 60.'.format(config.PDG_MASSES[config.Dz])]
    _cuts += [build_step_cuts(ipchi2, mode.D0.all_daughters(), [4, 4, 4, 4])]
    for daug in mode.head.all_daughters():
        _cuts += [p(daug) + ' >= 3000.']
        _cuts += [p(daug) + ' < 100000.']
    _cuts += [dtf_chi2(mode.head) + ' > 0.']
    _cuts += [vdchi2(mode.D0) + ' > 0.']
    _cuts += [maxdoca(mode.D0) + ' > 0.']
    _cuts += [mindoca(mode.D0) + ' > 0.']
    _cuts += [ltime(mode.D0) + ' > -10000.']
    for daug in mode.head.all_daughters():
        _cuts += [p(daug) + ' >= 3000.']
        _cuts += [p(daug) + ' < 100000.']
    for kaon in mode.head.all_pid(config.kaon):
        if mode.mc is None:
            _cuts += [probnnk(kaon) + ' > 0.3']
            _cuts += [probnnpi(kaon) + ' < 0.7']
    for pion in mode.head.all_pid(config.pion):
        if mode.mc is None:
            _cuts += [probnnpi(pion) + ' > 0.3']
            _cuts += [probnnk(pion) + ' < 0.7']
    for pion in mode.head.all_pid(config.slowpion):
        _cuts += [probnnghost(mode.Pislow) + ' < 0.3']
        if mode.mc is None:
            _cuts += [probnnpi(mode.Pislow) + ' > 0.3']
            _cuts += [probnnk(mode.Pislow) + ' < 0.7']

    if mode.mode in config.twotag_modes:
        _cuts += [pt(mode.D0) + ' >= 1800.']
    else:
        _cuts += [pt(mode.D0) + ' >= 4000.']
        _cuts += ['TMath::Log(' + ipchi2(mode.D0) + ') < 1.']

    return ' && '.join(['({})'.format(x) for x in _cuts])
Exemple #3
0
def download(modename, polarity, year, full, test=False, mc=None, njobs=1):
    import root_pandas
    log.info('Getting data for {} {} {}'.format(
        modename, polarity, year))

    mode = get_mode(polarity, year, modename, mc)
    # I accidentally forgot the p in Dstp. Got to rename everything now for
    # this one exception. Hack incoming
    if modename == 'WS' and year == 2016:
        # As this is the start, hack name of the particle in the mode.
        mode.Dstp.name = 'Dst'

    sel = get_root_preselection.get(mode)

    # Always download the entire MC
    if full != 1 and mc is None:
        ctr = int(1./float(full))
        sel = '({} % {} == 0) && '.format(evt_num(), ctr) + sel
        log.info('Using ({} % {} == 0)'.format(evt_num(), ctr))

    tempfile.mktemp('.root')

    input_files = mode.get_file_list()
    if test:
        input_files = input_files[:4]
    chunked = list(helpers.chunks(input_files, 25))
    length = len(list(chunked))

    # While the code is in developement, just get any variables we can
    # access
    for part in mode.head.all_mothers() + mode.head.all_daughters():
        for func in variables.__all__:
            try:
                getattr(variables, func)(part)
            except variables.AccessorUsage:
                pass

    # Make some sorted variables. Saves the hassle when later training BDTs
    arg_sorted_ip = '{},{},{},{}'.format(
        *[ipchi2(p) for p in mode.D0.all_daughters()])
    arg_sorted_pt = '{},{},{},{}'.format(
        *[pt(p) for p in mode.D0.all_daughters()])

    add_vars = {
        'delta_m': '{} - {}'.format(m(mode.Dstp), m(mode.D0)),
        'delta_m_dtf': '{} - {}'.format(dtf_m(mode.Dstp), dtf_m(mode.D0)),
        'ltime_ratio': '{} / {}'.format(ltime(mode.D0), config.Dz_ltime),
        'ipchi2_1': 'ROOTex::Leading({})'.format(arg_sorted_ip),
        'ipchi2_2': 'ROOTex::SecondLeading({})'.format(arg_sorted_ip),
        'ipchi2_3': 'ROOTex::ThirdLeading({})'.format(arg_sorted_ip),
        'ipchi2_4': 'ROOTex::FourthLeading({})'.format(arg_sorted_ip),
        'pt_1': 'ROOTex::Leading({})'.format(arg_sorted_pt),
        'pt_2': 'ROOTex::SecondLeading({})'.format(arg_sorted_pt),
        'pt_3': 'ROOTex::ThirdLeading({})'.format(arg_sorted_pt),
        'pt_4': 'ROOTex::FourthLeading({})'.format(arg_sorted_pt),
    }
    variables_needed = list(variables.all_ever_used)

    if mc == 'mc':
        variables_needed.append('Dstp_BKGCAT')

    def run_splitter(fns):
        temp_file = tempfile.mktemp('.root')
        treesplitter(files=fns, treename=mode.get_tree_name(), output=temp_file,
                     variables=variables_needed, selection=sel,
                     addvariables=add_vars)
        return temp_file

    pool = ProcessingPool(njobs)
    temp_files = []
    for r in tqdm.tqdm(pool.uimap(run_splitter, chunked),
                       leave=True, total=length, smoothing=0):
        temp_files.append(r)

    log.info('Created {} temporary files.'.format(len(temp_files)))
    bcolz_folder = config.bcolz_locations.format(mode.get_store_name())

    try:
        log.info('Removing already existing data at {}'.format(
            bcolz_folder))
        shutil.rmtree(bcolz_folder)
    except OSError:
        log.info('No previous data found. Nothing to delete.')

    df_gen = root_pandas.read_root(temp_files, mode.get_tree_name(),
                                   chunksize=[500000, 100][args.test])

    # New storage using bcolz because better
    ctuple = None

    for df in df_gen:
        log.info('Adding {} events of {} to store {}.'.format(
            len(df), mode.get_tree_name(), bcolz_folder))
        if modename == 'WS' and year == 2016:
            new_names = {
                old: old.replace('Dst', 'Dstp')
                for old in df.columns if 'Dst' in old
            }
            df = df.rename(index=str, columns=new_names)
        if ctuple is None:
            ctuple = bcolz.ctable.fromdataframe(df, rootdir=bcolz_folder)
        else:
            ctuple.append(df.to_records(index=False))

    for f in temp_files:
        os.remove(f)
    # Loop and delete everything in the datastore that needs to be recached
    remove_buffer_for_mode(mode.mode)
    if modename == 'WS' and year == 2016:
        # As this is the start, hack name of the particle in the mode.
        mode.Dstp.name = 'Dstp'
Exemple #4
0
def remove_secondary(df):
    return np.log(df[ipchi2(gcm().D0)]) < 1.
Exemple #5
0
def _dtf_ip_diff(df):
    return df[vars.dtf_chi2(gcm().head)] - df[vars.ipchi2(gcm().D0)]