Ejemplo n.º 1
0
def plot_fit(suffix=None, wsp=None):
    from . import roofit_to_matplotlib
    from . import fit_config
    shapes.load_shape_class('RooCruijff')
    shapes.load_shape_class('RooJohnsonSU')
    shapes.load_shape_class('RooBackground')
    mode = gcm()
    if wsp is None:
        wsp = fit_config.load_workspace(mode)
    sel = selection.get_final_selection()

    df = mode.get_data([dtf_dm(), m(mode.D0)])
    df = df[sel]
    data = fit_config.pandas_to_roodataset(df, wsp.set('datavars'))
    fit_config.WS_DMASS_NAME = dtf_dm()
    fit_config.WS_MASS_NAME = m(mode.D0)

    outfile = mode.get_output_path('sweight_fit') + 'fits{}.pdf'.format(
        suffix if suffix is not None else '')
    with PdfPages(outfile) as pdf:
        for func in [m, dtf_dm]:
            roofit_to_matplotlib.plot_fit(
                mode.D0, wsp, func, data=data, pdf=pdf,
                do_comb_bkg=mode.mode in config.twotag_modes)
            roofit_to_matplotlib.plot_fit(
                mode.D0, wsp, func, data=data, pdf=pdf, do_pulls=False,
                do_comb_bkg=mode.mode in config.twotag_modes)
Ejemplo n.º 2
0
def mass_fiducial_selection(df):
    ret = True
    ret &= (df[m(gcm().D0)] >= 1810.)
    ret &= (df[m(gcm().D0)] < 1920.)
    ret &= (df[dtf_dm()] >= 140.5)
    ret &= (df[dtf_dm()] < 160.5)

    return ret
Ejemplo n.º 3
0
def rand_spi_sideband_region(df):
    """Selects the signal D0 peak and delta mass sidebands to get a random
    slow pion enriched sample"""
    ret = True
    ret &= np.abs(df[m(gcm().D0)] - config.PDG_MASSES['D0']) < 18.
    ret &= np.abs(df[dtf_dm()] - config.PDG_MASSES['delta']) > 2.3
    return ret
Ejemplo n.º 4
0
def mass_signal_region(df):
    """Selects the signal peak in both D0 and delta mass to create a signal
    enriched sample."""
    ret = True
    ret &= np.abs(df[m(gcm().D0)] - config.PDG_MASSES['D0']) < 18.
    ret &= np.abs(df[dtf_dm()] - config.PDG_MASSES['delta']) < 0.5
    return ret
Ejemplo n.º 5
0
def fit():
    """Runs the mass fit. Either nominal with making pretty plots or
    in spearmint mode which does not save the workspace and returns a
    metric."""
    # Get the data
    # TODO: rewrite selection to use gcm itself
    mode = gcm()
    sel = selection.get_final_selection()

    df = mode.get_data([dtf_dm(), m(mode.D0)])
    df = df[sel]

    from . import fit_config
    from ROOT import RooFit as RF
    from .fit_setup import setup_workspace

    wsp, _ = setup_workspace()
    data = fit_config.pandas_to_roodataset(df, wsp.set('datavars'))
    model = wsp.pdf('total')

    plot_fit('_start_values', wsp=wsp)
    result = model.fitTo(data, RF.NumCPU(4), RF.Save(True), RF.Strategy(2),
                         RF.Extended(True))

    if not helpers.check_fit_result(result, log):
        log.error('Bad fit quality')
    fit_config.dump_workspace(mode, wsp)
Ejemplo n.º 6
0
def setup_workspace():

    mode = modes.gcm()

    wsp = ROOT.RooWorkspace(mode.mode, mode.mode)
    fit_config.WS_DMASS_NAME = dtf_dm()
    fit_config.WS_MASS_NAME = m(mode.D0)

    wsp.factory('{}[{},{}]'.format(m(mode.D0), 1810., 1920.))
    wsp.factory('{}[{},{}]'.format(dtf_dm(), 140.5, 160.5))
    wsp.var(dtf_dm()).setRange('plotting', 140.5, 152.5)
    wsp.var(m(mode.D0)).setRange('plotting', 1820, 1910)

    wsp.defineSet('datavars', '{},{}'.format(dtf_dm(), m(mode.D0)))

    vs = setup_pdf(wsp)

    return wsp, vs
Ejemplo n.º 7
0
def _dstp_slowpi_angle(df):

    mode = gcm()
    ret = compute_delta_angle(
        df[vars.pt(mode.D0)],
        df[vars.eta(mode.D0)],
        df[vars.phi(mode.D0)],
        df[vars.m(mode.D0)],
        df[vars.pt(mode.Pislow)],
        df[vars.eta(mode.Pislow)],
        df[vars.phi(mode.Pislow)],
        config.PDG_MASSES[config.pion],
    )
    if is_dummy_run(df):
        return 1
    return pd.Series(ret, name='dstp_slowpi_angle', index=df.index)
Ejemplo n.º 8
0
def double_misid_d0(df):
    """Returns d0 mass with changed kaon and ss pion mass hypthesis"""
    mode = gcm()

    val = double_misid_d0_mass(
        df[vars.dtf_pt(mode.K)], df[vars.dtf_eta(mode.K)],
        df[vars.dtf_phi(mode.K)], config.PDG_MASSES['Pi'],
        df[vars.dtf_pt(mode.Pi_SS)], df[vars.dtf_eta(mode.Pi_SS)],
        df[vars.dtf_phi(mode.Pi_SS)], config.PDG_MASSES['K'],
        df[vars.dtf_pt(mode.Pi_OS1)], df[vars.dtf_eta(mode.Pi_OS1)],
        df[vars.dtf_phi(mode.Pi_OS1)], config.PDG_MASSES['Pi'],
        df[vars.dtf_pt(mode.Pi_OS2)], df[vars.dtf_eta(mode.Pi_OS2)],
        df[vars.dtf_phi(mode.Pi_OS2)], config.PDG_MASSES['Pi'])
    if not is_dummy_run(df):
        return pd.Series(val, name=vars.m(gcm().D0), index=df.index)
    return 1
Ejemplo n.º 9
0
def run_spearmint_fit(spearmint_selection=None, metric='punzi'):
    """Runs the mass fit. Either nominal with making pretty plots or
    in spearmint mode which does not save the workspace and returns a
    metric."""
    from . import fit_config
    from ROOT import RooFit as RF
    shapes.load_shape_class('RooCruijff')
    shapes.load_shape_class('RooJohnsonSU')
    shapes.load_shape_class('RooBackground')
    mode = gcm()
    wsp = fit_config.load_workspace(mode)
    sel = selection.get_final_selection()

    # Get the data
    df = mode.get_data([dtf_dm(), m(mode.D0)])
    if spearmint_selection is not None:
        sel = sel & spearmint_selection
    df = df[sel]

    data = fit_config.pandas_to_roodataset(df, wsp.set('datavars'))
    model = wsp.pdf('total')

    metric = get_metric(metric)(wsp)

    if spearmint_selection is not None:
        result = model.fitTo(data, RF.NumCPU(4), RF.Save(True), RF.Strategy(2),
                             RF.Extended(True))

        if not helpers.check_fit_result(result, log):
            result = model.fitTo(data, RF.NumCPU(4), RF.Save(True),
                                 RF.Strategy(1), RF.Extended(True))

        if not helpers.check_fit_result(result, log):
            result = model.fitTo(data, RF.NumCPU(4), RF.Save(True),
                                 RF.Strategy(0), RF.Extended(True))

        if not helpers.check_fit_result(result, log):
            log.warn('Bad fit quality')
            return 0.0

    return metric()
Ejemplo n.º 10
0
def get_sweights(do_comb_bkg=False):
    helpers.allow_root()
    df = gcm().get_data([m(gcm().D0), dtf_dm()])
    from . import fit_config
    from hep_ml import splot
    shapes.load_shape_class('RooCruijff')
    shapes.load_shape_class('RooJohnsonSU')
    shapes.load_shape_class('RooBackground')
    wsp = fit_config.load_workspace(gcm())

    sel = selection.get_final_selection()
    do_comb_bkg = gcm().mode in config.twotag_modes

    df = df[sel]

    sig_pdf = wsp.pdf('signal')
    rnd_pdf = wsp.pdf('random')
    comb_pdf = wsp.pdf('combinatorial')

    sig_prob = call_after_set(sig_pdf, wsp, **df)
    rnd_prob = call_after_set(rnd_pdf, wsp, **df)
    if do_comb_bkg:
        comb_prob = call_after_set(comb_pdf, wsp, **df)

    if do_comb_bkg:
        probs = pd.DataFrame(dict(sig=sig_prob*wsp.var('NSig').getVal(),
                                  rnd=rnd_prob*wsp.var('NSPi').getVal(),
                                  comb=comb_prob*wsp.var('NBkg').getVal()),
                             index=df.index)
    else:
        probs = pd.DataFrame(dict(sig=sig_prob*wsp.var('NSig').getVal(),
                                  rnd=rnd_prob*wsp.var('NSPi').getVal()),
                             index=df.index)
    probs = probs.div(probs.sum(axis=1), axis=0)

    sweights = splot.compute_sweights(probs)
    sweights.index = probs.index
    if not do_comb_bkg:
        sweights['comb'] = 0.0

    return sweights
Ejemplo n.º 11
0
def misid_plots():
    """Remove wrong sign D0 candidates which are combined and end up
    in the signal window in the right sign sample"""
    # Get the necessary information from the current mode
    if gcm().mode in config.wrong_sign_modes:
        wrong_spi = add_variables.other_slowpi_ws()
    else:
        wrong_spi = add_variables.other_slowpi()

    dst_mass = gcm().get_data([vars.m(gcm().head)])[vars.m(gcm().head)]
    sel = final_selection.get_final_selection()
    bins, xmin, xmax = gcm().mass_var.binning
    ybins, ymin, ymax = gcm().dmass_var.binning
    bins = 30

    df_sel = final_selection.get_final_selection()
    misid = add_variables.double_misid()
    data = gcm().get_data([vars.dtf_dm(), vars.m(gcm().D0)])
    outfile = gcm().get_output_path('misid') + 'overview.pdf'
    with PdfPages(outfile) as pdf:
        for i, pc in enumerate(double_misid_pc):
            fig, ax = plt.subplots(figsize=(10, 10))
            nbins, xmin, xmax = pc.binning
            ax.hist(misid[df_sel][pc.var], bins=nbins, range=(xmin, xmax))
            ax.set_xlabel(pc.xlabel)
            ax.set_ylabel('Candidates')
            ax.set_xlim((xmin, xmax))
            pdf.savefig(fig)
            plt.close()
            if i % 2 == 0:
                fig, ax = plt.subplots(figsize=(10, 10))
                nbins, xmin, xmax = pc.binning
                cutvar = double_misid_pc[i+1].var
                narrow = misid[cutvar] < 147.5
                ax.hist(misid[df_sel&narrow][pc.var], bins=nbins, range=(xmin, xmax))  # NOQA
                ax.set_xlabel(pc.xlabel)
                ax.set_ylabel(r'Candidates with $\Delta m <147.5$')
                ax.set_xlim((xmin, xmax))
                pdf.savefig(fig)
                plt.close()


        cut = misid_selection.misid_cut()
        dm = gcm().dmass_var
        nbins, xmin, xmax = dm.binning

        fig, ax = plt.subplots(figsize=(10, 10))
        ax.hist(data[dm.var][sel & cut], bins=nbins, color='#D3EFFB',  # NOQA
                range=(xmin, xmax), label='Kept', edgecolor='#D3EFFB')
        ax.hist(data[dm.var][sel & ~cut], bins=nbins,
                range=(xmin, xmax), label='Removed', color='#006EB6', edgecolor='#006EB6')  # NOQA
        ax.set_xlim((xmin, xmax))
        ax.set_xlabel(dm.xlabel)
        ax.set_ylabel('Candidates')
        ax.legend()
        pdf.savefig(fig)
        plt.clf()

    outfile = gcm().get_output_path('misid') + 'wrong_spi.pdf'
    pdf = PdfPages(outfile)

    fig, ax = plt.subplots(figsize=(10, 10))
    ax.hist(wrong_spi[sel], bins=bins, range=(xmin, xmax), normed=True, color='#006EB6', edgecolor='#006EB6')  # NOQA
    ax.set_xlabel(gcm().mass_var.xlabel)
    ax.set_xlim((xmin, xmax))
    ax.set_ylabel('Arbitrary units')
    pdf.savefig(fig)

    fig, ax = plt.subplots(figsize=(10, 10))

    ax.hist((dst_mass - wrong_spi)[sel], bins=ybins, range=(ymin, ymax), color='#006EB6', edgecolor='#006EB6')  # NOQA
    ax.set_xlabel(gcm().dmass_var.xlabel)
    ax.set_xlim((xmin, xmax))
    pdf.savefig(fig)
    plt.clf()
    plt.clf()

    pdf.close()
Ejemplo n.º 12
0
def plot_mass_regions():
    sel = get_final_selection()
    df = gcm().get_data([vars.m(gcm().D0), vars.dtf_dm()])

    selected = df[sel]

    nbins = 100
    name = 'mass_regions'
    if config.optimised_selection:
        name += '_opt'
    if config.candidates_selection:
        name += '_cand'
    outfile = gcm().get_output_path('selection') + name + '.pdf'
    with PdfPages(outfile) as pdf:

        fig, ax = plt.subplots(figsize=(10, 10))

        # Doing D0 mass first
        xmin, xmax = 1810, 1920

        # Signal window boundaries
        sw_lo = config.PDG_MASSES['D0'] - 18.
        sw_hi = config.PDG_MASSES['D0'] + 18.
        # Lower sideband boundaries
        sb_lo_lo = xmin
        sb_lo_hi = config.PDG_MASSES['D0'] - 30.
        # Upper sideband boundaries
        sb_hi_lo = config.PDG_MASSES['D0'] + 30.
        sb_hi_hi = xmax

        bkg = np.array([(sb_lo_hi + sb_lo_lo) / 2.,
                        (sb_hi_hi + sb_hi_lo) / 2.])
        bkgw = np.array([(sb_lo_hi - sb_lo_lo), (sb_hi_hi - sb_hi_lo)])
        sig = np.array([(sw_lo + sw_hi) / 2.])
        sigw = np.array([(sw_hi - sw_lo)])

        h_vals, edges = np.histogram(selected[vars.m(gcm().D0)],
                                     bins=nbins,
                                     range=(xmin, xmax))
        h_errorbars = np.sqrt(h_vals)

        x_ctr = (edges[1:] + edges[:-1]) / 2.
        width = (edges[1:] - edges[:-1])
        x_err = width / 2.

        dt_options = dict(fmt='o',
                          markersize=5,
                          capthick=1,
                          capsize=0,
                          elinewidth=2,
                          color='#000000',
                          markeredgecolor='#000000')
        ax.errorbar(x_ctr, h_vals, xerr=x_err, yerr=h_errorbars, **dt_options)

        hmax = np.max(ax.lines[0].get_ydata())

        ax.bar(sig,
               1.10 * np.array(hmax),
               sigw,
               color='#D3EFFB',
               edgecolor='#D3EFFB',
               label='Signal',
               alpha=0.5)
        ax.bar(bkg,
               1.10 * np.ones(len(bkg)) * hmax,
               bkgw,
               label='Background',
               color='#006EB6',
               edgecolor='#006EB6',
               alpha=0.5)
        ax.set_xlabel(vars.m.latex((gcm().D0), with_unit=True))

        unit = r'{} {}'.format((xmax - xmin) / nbins, vars.m.unit)
        ylabel = r'Candidates / ({0})'.format(unit)
        ax.set_ylabel(ylabel)
        ax.legend()
        ax.set_xlim(xmin, 0.9999 * xmax)

        plot_utils.y_margin_scaler(ax, lf=0, la=True)
        pdf.savefig(fig)
        plt.clf()

        # Now delta mass
        fig, ax = plt.subplots(figsize=(10, 10))
        xmin, xmax = 140.5, 152.5

        # Signal window boundaries
        sw_lo = config.PDG_MASSES['delta'] - 0.5
        sw_hi = config.PDG_MASSES['delta'] + 0.5
        # Lower sideband boundaries
        sb_lo_lo = xmin
        sb_lo_hi = config.PDG_MASSES['delta'] - 2.3
        # Upper sideband boundaries
        sb_hi_lo = config.PDG_MASSES['delta'] + 2.3
        sb_hi_hi = xmax

        bkg = np.array([(sb_lo_hi + sb_lo_lo) / 2.,
                        (sb_hi_hi + sb_hi_lo) / 2.])
        bkgw = np.array([(sb_lo_hi - sb_lo_lo), (sb_hi_hi - sb_hi_lo)])
        sig = np.array([(sw_lo + sw_hi) / 2.])
        sigw = np.array([(sw_hi - sw_lo)])

        h_vals, edges = np.histogram(selected[vars.dtf_dm()],
                                     bins=nbins,
                                     range=(xmin, xmax))
        h_errorbars = np.sqrt(h_vals)

        x_ctr = (edges[1:] + edges[:-1]) / 2.
        width = (edges[1:] - edges[:-1])
        x_err = width / 2.

        ax.errorbar(x_ctr, h_vals, xerr=x_err, yerr=h_errorbars, **dt_options)

        hmax = np.max(ax.lines[0].get_ydata())

        ax.bar(sig,
               1.10 * np.array(hmax),
               sigw,
               color='#D3EFFB',
               edgecolor='#D3EFFB',
               label='Signal',
               alpha=0.5)
        ax.bar(bkg,
               1.10 * np.ones(len(bkg)) * hmax,
               bkgw,
               label='Background',
               color='#006EB6',
               edgecolor='#006EB6',
               alpha=0.5)
        ax.set_xlabel(vars.dtf_dm.latex(with_unit=True))
        unit = r'{} {}'.format((xmax - xmin) / nbins, vars.dtf_dm.unit)
        ylabel = r'Candidates / ({0})'.format(unit)
        ax.set_ylabel(ylabel)
        ax.legend()
        ax.set_xlim(xmin, 0.9999 * xmax)

        plot_utils.y_margin_scaler(ax, lf=0, la=True)
        pdf.savefig(fig)
        plt.clf()
Ejemplo n.º 13
0
def download(modename, polarity, year, full, test=False, mc=None, njobs=1):
    import root_pandas
    log.info('Getting data for {} {} {}'.format(
        modename, polarity, year))

    mode = get_mode(polarity, year, modename, mc)
    # I accidentally forgot the p in Dstp. Got to rename everything now for
    # this one exception. Hack incoming
    if modename == 'WS' and year == 2016:
        # As this is the start, hack name of the particle in the mode.
        mode.Dstp.name = 'Dst'

    sel = get_root_preselection.get(mode)

    # Always download the entire MC
    if full != 1 and mc is None:
        ctr = int(1./float(full))
        sel = '({} % {} == 0) && '.format(evt_num(), ctr) + sel
        log.info('Using ({} % {} == 0)'.format(evt_num(), ctr))

    tempfile.mktemp('.root')

    input_files = mode.get_file_list()
    if test:
        input_files = input_files[:4]
    chunked = list(helpers.chunks(input_files, 25))
    length = len(list(chunked))

    # While the code is in developement, just get any variables we can
    # access
    for part in mode.head.all_mothers() + mode.head.all_daughters():
        for func in variables.__all__:
            try:
                getattr(variables, func)(part)
            except variables.AccessorUsage:
                pass

    # Make some sorted variables. Saves the hassle when later training BDTs
    arg_sorted_ip = '{},{},{},{}'.format(
        *[ipchi2(p) for p in mode.D0.all_daughters()])
    arg_sorted_pt = '{},{},{},{}'.format(
        *[pt(p) for p in mode.D0.all_daughters()])

    add_vars = {
        'delta_m': '{} - {}'.format(m(mode.Dstp), m(mode.D0)),
        'delta_m_dtf': '{} - {}'.format(dtf_m(mode.Dstp), dtf_m(mode.D0)),
        'ltime_ratio': '{} / {}'.format(ltime(mode.D0), config.Dz_ltime),
        'ipchi2_1': 'ROOTex::Leading({})'.format(arg_sorted_ip),
        'ipchi2_2': 'ROOTex::SecondLeading({})'.format(arg_sorted_ip),
        'ipchi2_3': 'ROOTex::ThirdLeading({})'.format(arg_sorted_ip),
        'ipchi2_4': 'ROOTex::FourthLeading({})'.format(arg_sorted_ip),
        'pt_1': 'ROOTex::Leading({})'.format(arg_sorted_pt),
        'pt_2': 'ROOTex::SecondLeading({})'.format(arg_sorted_pt),
        'pt_3': 'ROOTex::ThirdLeading({})'.format(arg_sorted_pt),
        'pt_4': 'ROOTex::FourthLeading({})'.format(arg_sorted_pt),
    }
    variables_needed = list(variables.all_ever_used)

    if mc == 'mc':
        variables_needed.append('Dstp_BKGCAT')

    def run_splitter(fns):
        temp_file = tempfile.mktemp('.root')
        treesplitter(files=fns, treename=mode.get_tree_name(), output=temp_file,
                     variables=variables_needed, selection=sel,
                     addvariables=add_vars)
        return temp_file

    pool = ProcessingPool(njobs)
    temp_files = []
    for r in tqdm.tqdm(pool.uimap(run_splitter, chunked),
                       leave=True, total=length, smoothing=0):
        temp_files.append(r)

    log.info('Created {} temporary files.'.format(len(temp_files)))
    bcolz_folder = config.bcolz_locations.format(mode.get_store_name())

    try:
        log.info('Removing already existing data at {}'.format(
            bcolz_folder))
        shutil.rmtree(bcolz_folder)
    except OSError:
        log.info('No previous data found. Nothing to delete.')

    df_gen = root_pandas.read_root(temp_files, mode.get_tree_name(),
                                   chunksize=[500000, 100][args.test])

    # New storage using bcolz because better
    ctuple = None

    for df in df_gen:
        log.info('Adding {} events of {} to store {}.'.format(
            len(df), mode.get_tree_name(), bcolz_folder))
        if modename == 'WS' and year == 2016:
            new_names = {
                old: old.replace('Dst', 'Dstp')
                for old in df.columns if 'Dst' in old
            }
            df = df.rename(index=str, columns=new_names)
        if ctuple is None:
            ctuple = bcolz.ctable.fromdataframe(df, rootdir=bcolz_folder)
        else:
            ctuple.append(df.to_records(index=False))

    for f in temp_files:
        os.remove(f)
    # Loop and delete everything in the datastore that needs to be recached
    remove_buffer_for_mode(mode.mode)
    if modename == 'WS' and year == 2016:
        # As this is the start, hack name of the particle in the mode.
        mode.Dstp.name = 'Dstp'
Ejemplo n.º 14
0
def comb_bkg_sideband_region(df):
    """Selects the D0 mass sidebands to create a comb background
    enriched sample"""
    ret = np.abs(df[m(gcm().D0)] - config.PDG_MASSES['D0']) > 30.
    ret &= np.abs(df[dtf_dm()] - config.PDG_MASSES['delta']) > 2.3
    return ret