Beispiel #1
0
def phsp_goofit(flat_ltime=False):
    import root_pandas
    # path = 'root://eoslhcb.cern.ch//eos/lhcb/user/d/dmuller/K3Pi/RS_with_weight.root'
    path = 'root://eoslhcb.cern.ch//eos/lhcb/user/d/dmuller/K3Pi/phsp_mc.root'
    df = root_pandas.read_root(path, 'events', stop=15000000)
    df.rename(columns={
        'c12': vars.cos1(),
        'c34': vars.cos2(),
        'dtime': vars.ltime(mode_config.D0),
        'phi': vars.phi1(),
        'm12': vars.m12(),
        'm34': vars.m34()
    },
              inplace=True)
    df[vars.m12()] = df[vars.m12()] * 1000.
    df[vars.m34()] = df[vars.m34()] * 1000.
    if flat_ltime:
        df['D0_Loki_BPVLTIME'] = np.random.uniform(0.0001725,
                                                   0.00326,
                                                   size=df.index.size)
    else:
        df['D0_Loki_BPVLTIME'] = two_parts_generate(turn=0.55,
                                                    size=df.index.size) / 1000.

    return df
Beispiel #2
0
def _ltime_ratio(df):

    mode = gcm()
    ret = df[vars.ltime(mode.D0)] / config.Dz_ltime
    if is_dummy_run(df):
        return 1
    return pd.Series(ret, name='ltime_ratio', index=df.index)
Beispiel #3
0
def get_model_ws_alt(redo=False):
    files = [
        '/afs/cern.ch/user/c/chasse/public/forDominik/Sig_49_61_WSNR_Smaller.root'
    ]  # NOQA

    bcolz_folder = config.bcolz_locations.format('generated_model_ws_alt')
    if redo:
        try:
            shutil.rmtree(bcolz_folder)
        except:
            pass
        helpers.allow_root()
        import root_pandas
        df = root_pandas.read_root(files, 'events')
        # Now rename stuff and fix units to MeV and ns.
        # Ugly hardcoded for now.
        df.rename(columns={
            'c12': vars.cos1(),
            'c34': vars.cos2(),
            'dtime': vars.ltime(mode_config.D0),
            'phi': vars.phi1(),
            'm12': vars.m12(),
            'm34': vars.m34()
        },
                  inplace=True)
        df[vars.m12()] = df[vars.m12()] * 1000.
        df[vars.m34()] = df[vars.m34()] * 1000.
        df[vars.ltime(mode_config.D0)] = df[vars.ltime(mode_config.D0)] / 1000.
        df = df.query('{} > 0.0001725'.format(vars.ltime(mode_config.D0)))
        df = df.query('{} < 0.003256'.format(vars.ltime(mode_config.D0)))
        bcolz.ctable.fromdataframe(df, rootdir=bcolz_folder)
        return df

    else:
        bc = bcolz.open(bcolz_folder)
        return bc.todataframe()

    return df
Beispiel #4
0
def get_model(redo=False):
    files = filelists.Generated.paths

    bcolz_folder = config.bcolz_locations.format('generated_model')
    if redo:
        try:
            shutil.rmtree(bcolz_folder)
        except:
            pass
        helpers.allow_root()
        import root_pandas
        df = root_pandas.read_root(files, 'events')
        # Now rename stuff and fix units to MeV and ns.
        # Ugly hardcoded for now.
        df.rename(columns={
            'c12': vars.cos1(),
            'c34': vars.cos2(),
            'dtime': vars.ltime(mode_config.D0),
            'phi': vars.phi1(),
            'm12': vars.m12(),
            'm34': vars.m34()
        },
                  inplace=True)
        df[vars.m12()] = df[vars.m12()] * 1000.
        df[vars.m34()] = df[vars.m34()] * 1000.
        df[vars.ltime(mode_config.D0)] = df[vars.ltime(mode_config.D0)] / 1000.
        df = df.query('{} > 0.0001725'.format(vars.ltime(mode_config.D0)))
        df = df.query('{} < 0.003256'.format(vars.ltime(mode_config.D0)))
        bcolz.ctable.fromdataframe(df, rootdir=bcolz_folder)
        return df

    else:
        bc = bcolz.open(bcolz_folder)
        return bc.todataframe()

    return df
Beispiel #5
0
def phsp_goofit_alt():
    import root_pandas
    path = 'root://eoslhcb.cern.ch//eos/lhcb/user/d/dmuller/K3Pi/RS_with_weight_dtime.root'
    df = root_pandas.read_root(path, 'events')
    df.rename(columns={
        'c12': vars.cos1(),
        'c34': vars.cos2(),
        'dtime': vars.ltime(mode_config.D0),
        'phi': vars.phi1(),
        'm12': vars.m12(),
        'm34': vars.m34()
    },
              inplace=True)
    df[vars.m12()] = df[vars.m12()] * 1000.
    df[vars.m34()] = df[vars.m34()] * 1000.
    df['D0_Loki_BPVLTIME'] = df['D0_Loki_BPVLTIME'] / 1000.

    return df
def get(mode):
    """Get the preselection ROOT information from the mode

    :mode: TODO
    :returns: TODO

    """
    _cuts = []
    # _cuts += ['fabs(' +
    # m(mode.D0) +
    # ' - {}) < 60.'.format(config.PDG_MASSES[config.Dz])]
    _cuts += [build_step_cuts(ipchi2, mode.D0.all_daughters(), [4, 4, 4, 4])]
    for daug in mode.head.all_daughters():
        _cuts += [p(daug) + ' >= 3000.']
        _cuts += [p(daug) + ' < 100000.']
    _cuts += [dtf_chi2(mode.head) + ' > 0.']
    _cuts += [vdchi2(mode.D0) + ' > 0.']
    _cuts += [maxdoca(mode.D0) + ' > 0.']
    _cuts += [mindoca(mode.D0) + ' > 0.']
    _cuts += [ltime(mode.D0) + ' > -10000.']
    for daug in mode.head.all_daughters():
        _cuts += [p(daug) + ' >= 3000.']
        _cuts += [p(daug) + ' < 100000.']
    for kaon in mode.head.all_pid(config.kaon):
        if mode.mc is None:
            _cuts += [probnnk(kaon) + ' > 0.3']
            _cuts += [probnnpi(kaon) + ' < 0.7']
    for pion in mode.head.all_pid(config.pion):
        if mode.mc is None:
            _cuts += [probnnpi(pion) + ' > 0.3']
            _cuts += [probnnk(pion) + ' < 0.7']
    for pion in mode.head.all_pid(config.slowpion):
        _cuts += [probnnghost(mode.Pislow) + ' < 0.3']
        if mode.mc is None:
            _cuts += [probnnpi(mode.Pislow) + ' > 0.3']
            _cuts += [probnnk(mode.Pislow) + ' < 0.7']

    if mode.mode in config.twotag_modes:
        _cuts += [pt(mode.D0) + ' >= 1800.']
    else:
        _cuts += [pt(mode.D0) + ' >= 4000.']
        _cuts += ['TMath::Log(' + ipchi2(mode.D0) + ') < 1.']

    return ' && '.join(['({})'.format(x) for x in _cuts])
Beispiel #7
0
def download(modename, polarity, year, full, test=False, mc=None, njobs=1):
    import root_pandas
    log.info('Getting data for {} {} {}'.format(
        modename, polarity, year))

    mode = get_mode(polarity, year, modename, mc)
    # I accidentally forgot the p in Dstp. Got to rename everything now for
    # this one exception. Hack incoming
    if modename == 'WS' and year == 2016:
        # As this is the start, hack name of the particle in the mode.
        mode.Dstp.name = 'Dst'

    sel = get_root_preselection.get(mode)

    # Always download the entire MC
    if full != 1 and mc is None:
        ctr = int(1./float(full))
        sel = '({} % {} == 0) && '.format(evt_num(), ctr) + sel
        log.info('Using ({} % {} == 0)'.format(evt_num(), ctr))

    tempfile.mktemp('.root')

    input_files = mode.get_file_list()
    if test:
        input_files = input_files[:4]
    chunked = list(helpers.chunks(input_files, 25))
    length = len(list(chunked))

    # While the code is in developement, just get any variables we can
    # access
    for part in mode.head.all_mothers() + mode.head.all_daughters():
        for func in variables.__all__:
            try:
                getattr(variables, func)(part)
            except variables.AccessorUsage:
                pass

    # Make some sorted variables. Saves the hassle when later training BDTs
    arg_sorted_ip = '{},{},{},{}'.format(
        *[ipchi2(p) for p in mode.D0.all_daughters()])
    arg_sorted_pt = '{},{},{},{}'.format(
        *[pt(p) for p in mode.D0.all_daughters()])

    add_vars = {
        'delta_m': '{} - {}'.format(m(mode.Dstp), m(mode.D0)),
        'delta_m_dtf': '{} - {}'.format(dtf_m(mode.Dstp), dtf_m(mode.D0)),
        'ltime_ratio': '{} / {}'.format(ltime(mode.D0), config.Dz_ltime),
        'ipchi2_1': 'ROOTex::Leading({})'.format(arg_sorted_ip),
        'ipchi2_2': 'ROOTex::SecondLeading({})'.format(arg_sorted_ip),
        'ipchi2_3': 'ROOTex::ThirdLeading({})'.format(arg_sorted_ip),
        'ipchi2_4': 'ROOTex::FourthLeading({})'.format(arg_sorted_ip),
        'pt_1': 'ROOTex::Leading({})'.format(arg_sorted_pt),
        'pt_2': 'ROOTex::SecondLeading({})'.format(arg_sorted_pt),
        'pt_3': 'ROOTex::ThirdLeading({})'.format(arg_sorted_pt),
        'pt_4': 'ROOTex::FourthLeading({})'.format(arg_sorted_pt),
    }
    variables_needed = list(variables.all_ever_used)

    if mc == 'mc':
        variables_needed.append('Dstp_BKGCAT')

    def run_splitter(fns):
        temp_file = tempfile.mktemp('.root')
        treesplitter(files=fns, treename=mode.get_tree_name(), output=temp_file,
                     variables=variables_needed, selection=sel,
                     addvariables=add_vars)
        return temp_file

    pool = ProcessingPool(njobs)
    temp_files = []
    for r in tqdm.tqdm(pool.uimap(run_splitter, chunked),
                       leave=True, total=length, smoothing=0):
        temp_files.append(r)

    log.info('Created {} temporary files.'.format(len(temp_files)))
    bcolz_folder = config.bcolz_locations.format(mode.get_store_name())

    try:
        log.info('Removing already existing data at {}'.format(
            bcolz_folder))
        shutil.rmtree(bcolz_folder)
    except OSError:
        log.info('No previous data found. Nothing to delete.')

    df_gen = root_pandas.read_root(temp_files, mode.get_tree_name(),
                                   chunksize=[500000, 100][args.test])

    # New storage using bcolz because better
    ctuple = None

    for df in df_gen:
        log.info('Adding {} events of {} to store {}.'.format(
            len(df), mode.get_tree_name(), bcolz_folder))
        if modename == 'WS' and year == 2016:
            new_names = {
                old: old.replace('Dst', 'Dstp')
                for old in df.columns if 'Dst' in old
            }
            df = df.rename(index=str, columns=new_names)
        if ctuple is None:
            ctuple = bcolz.ctable.fromdataframe(df, rootdir=bcolz_folder)
        else:
            ctuple.append(df.to_records(index=False))

    for f in temp_files:
        os.remove(f)
    # Loop and delete everything in the datastore that needs to be recached
    remove_buffer_for_mode(mode.mode)
    if modename == 'WS' and year == 2016:
        # As this is the start, hack name of the particle in the mode.
        mode.Dstp.name = 'Dstp'
Beispiel #8
0
def train_bdts(sw=False, comb_bkg=False):
    log.info('Training BDTs for {} {} {}'.format(gcm().mode,
                                                 gcm().polarity,
                                                 gcm().year))
    (train, test, train_lbl,
     test_lbl), features, spectators = bdt_data.prep_data_for_sklearn(
         sw=sw, same_weight=True, comb_data=comb_bkg)  # NOQA

    uniform_features = [vars.ltime(gcm().D0)]
    n_estimators = 400

    classifiers = {}
    log.info('Configuring classifiers')

    min_samples = 2000 if sw else 10
    if comb_bkg:
        lrate = 0.1
    else:
        lrate = 0.1

    base_ada = GradientBoostingClassifier(max_depth=3,
                                          n_estimators=n_estimators,
                                          learning_rate=lrate,
                                          min_samples_leaf=min_samples,
                                          loss='exponential')
    classifiers['Exponential'] = base_ada

    flatnessloss = ugb.KnnFlatnessLossFunction(uniform_features,
                                               fl_coefficient=3.,
                                               power=1.3,
                                               uniform_label=1,
                                               max_groups=2000,
                                               n_neighbours=300)
    ugbFL = ugb.UGradientBoostingClassifier(loss=flatnessloss,
                                            max_depth=3,
                                            n_estimators=n_estimators,
                                            learning_rate=lrate,
                                            train_features=features,
                                            min_samples_leaf=min_samples)
    classifiers['KnnFlatness'] = ugbFL

    binflatnessloss = ugb.BinFlatnessLossFunction(uniform_features,
                                                  fl_coefficient=3.,
                                                  power=2.0,
                                                  uniform_label=1,
                                                  n_bins=15)
    ugbBFL = ugb.UGradientBoostingClassifier(loss=binflatnessloss,
                                             max_depth=3,
                                             n_estimators=n_estimators,
                                             learning_rate=lrate,
                                             train_features=features,
                                             min_samples_leaf=min_samples)
    classifiers['BinFlatness'] = ugbBFL

    log.info('Fitting classifiers')

    classifiers['Exponential'].fit(train[features],
                                   train_lbl,
                                   sample_weight=train.weights)
    classifiers['KnnFlatness'].fit(train[features + uniform_features],
                                   train_lbl,
                                   sample_weight=train.weights)
    classifiers['BinFlatness'].fit(train[features + uniform_features],
                                   train_lbl,
                                   sample_weight=train.weights)

    log.info('Pickling the thing')
    bdt_utils.dump_classifiers(classifiers, comb_bkg=comb_bkg)
    buffer.remove_buffer_for_function(get_bdt_discriminant)
Beispiel #9
0
def d0_lifetime_permille(df):
    ret = df[ltime(gcm().D0)] > 0.0001725
    ret &= df[ltime(gcm().D0)] < 0.00326
    return ret