Exemple #1
0
def train_reweighter():
    extra_vars = [
        gcm().ltime_var
    ]
    all_vars = gcm().phsp_vars + extra_vars
    columns = [v.var for v in all_vars if 'phi' not in v.var]
    columns += ['cosphi', 'sinphi']

    # Current mode stuff
    data = gcm().get_data([f.var for f in extra_vars])
    add_variables.append_phsp(data)
    data['cosphi'] = np.cos(data.phi1)
    data['sinphi'] = np.sin(data.phi1)
    df_sel = final_selection.get_final_selection()
    df_sel &= selection.delta_mass_signal_region()

    gen = get_model()
    gen['cosphi'] = np.cos(gen.phi1)
    gen['sinphi'] = np.sin(gen.phi1)

    limits = {v.var: v.binning[1:] for v in all_vars}
    limits['cosphi'] = (-1., 1)
    limits['sinphi'] = (-1., 1)
    for c in columns:
        mi, ma = limits[c]
        data[c] = (data[c] - mi) / (ma - mi) + 2.
        gen[c] = (gen[c] - mi) / (ma - mi) + 2.

    log.info('Training BDT reweighter for {}'.format(', '.join(columns)))
    reweighter = GBReweighter(n_estimators=300, max_depth=5, learning_rate=0.2)

    reweighter.fit(original=gen[columns].sample(n=250000),
                   target=data[columns][df_sel].sample(n=250000))
    bdt_utils.dump_reweighter(reweighter)
Exemple #2
0
def plot_comparison_test():

    extra_vars = [
        gcm().ltime_var
    ]

    # Current mode stuff
    data = gcm().get_data([f.var for f in extra_vars])
    add_variables.append_phsp(data)
    df_sel = final_selection.get_final_selection()
    df_sel &= selection.delta_mass_signal_region()

    gen = get_model()
    gen['weight'] = get_efficiency_gen()

    outfile = gcm().get_output_path('effs') + 'Gen_DATA_Comp_weights.pdf'
    with PdfPages(outfile) as pdf:
        for pc in gcm().phsp_vars + extra_vars:
            log.info('Plotting {}'.format(pc.var))
            filled = gen[pc.var]
            filled_weight = gen['weight']
            errorbars = data[pc.var][df_sel]
            if pc.convert is not None:
                filled = pc.convert(filled)
                errorbars = pc.convert(errorbars)
            ax = comparison.plot_comparison(
                pc, filled, errorbars, 'Model', 'Data',
                filled_weight=filled_weight)
            ax.set_xlabel(pc.xlabel)
            ax.yaxis.set_visible(False)
            ax.legend()
            pdf.savefig(plt.gcf())
            plt.close()
Exemple #3
0
def lifetime_study(correct_efficiencies=False):
    # Current mode stuff
    data = gcm().get_data([gcm().ltime_var.var])
    add_variables.append_phsp(data)
    df_sel = final_selection.get_final_selection()
    df_sel &= selection.delta_mass_signal_region()
    data['weight'] = 1.

    if correct_efficiencies:
        outfile = gcm().get_output_path('effs') + 'DATA_ltime_dep_effs.pdf'
    else:
        outfile = gcm().get_output_path('effs') + 'DATA_ltime_dep.pdf'
    percentiles = np.arange(0, 1.1, 0.2)
    boundaries = helpers.weighted_quantile(
        data[gcm().ltime_var.var][df_sel], percentiles)
    if correct_efficiencies:
        data['weight'] = 1./get_efficiency()
        boundaries = boundaries[1:]
    with PdfPages(outfile) as pdf:
        for var in gcm().phsp_vars:
            fig, ax = plt.subplots(figsize=(10, 10))
            for low, high in zip(boundaries[:-1], boundaries[1:]):
                sel = (data[gcm().ltime_var.var] > low) & (data[gcm().ltime_var.var] < high)  # NOQA

                df = data[var.var][df_sel & sel]
                weight = data['weight'][df_sel & sel]

                rlow, prec = helpers.rounder(low*1000, [low*1000, high*1000])
                rhigh, _ = helpers.rounder(high*1000, [low*1000, high*1000])

                spec = '{{:.{}f}}'.format(prec)
                label = r'${} < \tau \mathrm{{ [ps]}}  < {}$'.format(
                    spec.format(rlow), spec.format(rhigh))

                values, edges = np.histogram(df, bins=int(var.binning[0]/5.), range=var.binning[1:], weights=weight)  # NOQA
                err, edges = np.histogram(df, bins=int(var.binning[0]/5.), range=var.binning[1:], weights=weight**2)  # NOQA
                norm = np.sum(values)
                values = values/norm
                err = np.sqrt(err)/norm
                x_ctr = (edges[1:] + edges[:-1])/2.
                width = (edges[1:] - edges[:-1])
                x_err = width/2.

                options = dict(
                    fmt='o', markersize=5, capthick=1, capsize=0, elinewidth=2,
                    alpha=1)

                ax.errorbar(x_ctr, values, err, x_err, label=label, **options)
            ax.set_xlabel(var.xlabel)
            ax.yaxis.set_visible(False)
            ax.legend()
            pdf.savefig(plt.gcf())
            plt.close()
Exemple #4
0
def simple_phsp_efficiencies():

    extra_vars = [
        gcm().ltime_var
    ]

    # Current mode stuff
    data = gcm().get_data([f.var for f in extra_vars])
    add_variables.append_phsp(data)
    df_sel = final_selection.get_final_selection()
    df_sel &= selection.delta_mass_signal_region()

    gen = get_model()

    outfile = gcm().get_output_path('effs') + 'Gen_DATA_Eff.pdf'
    with PdfPages(outfile) as pdf:
        for pc in gcm().phsp_vars + extra_vars:
            log.info('Plotting {}'.format(pc.var))
            denominator = gen[pc.var]
            numerator = data[pc.var][df_sel]
            weight_d = np.ones(denominator.index.size)*1./denominator.index.size  # NOQA
            weight_n = np.ones(numerator.index.size)*1./numerator.index.size
            fig, ax = plt.subplots(figsize=(10, 10))
            if pc.convert is not None:
                numerator = pc.convert(numerator)
                denominator = pc.convert(denominator)
            x, y, x_err, y_err = helpers.make_efficiency(
                numerator, denominator, 100, weight_n, weight_d, independent=True)  # NOQA
            options = dict(
                fmt='o', markersize=5, capthick=1, capsize=0, elinewidth=2,
                alpha=1)

            ax.errorbar(x, y, y_err, x_err, **options)
            ax.set_xlabel(pc.xlabel)
            ax.set_ylabel('Relative efficiency')
            pdf.savefig(plt.gcf())
            plt.close()
Exemple #5
0
def phsp_comparison_plots():
    """Plots the mode sidebands and the opposite mode signal region phsp
    distributions. Only really meaningful if executed for the WS events.
    Opposite mode is plotted as solid, with the uncertainty propagated to
    the mode error plot.
    """
    # Beside phase space, also plot D0 momentum and flight distance
    extra_vars = [
        gcm().ltime_var,
        PlotConfig(vars.pt,
                   gcm().D0, (100, 0, 15000)),
        PlotConfig(vars.vdchi2,
                   gcm().D0, (100, 0, 10), np.log,
                   r'$\ln(\text{{{}}})$'),  # NOQA
    ]
    # opposite_mode
    with opposite_mode():
        OS = gcm().get_data([f.var for f in extra_vars])
        add_variables.append_phsp(OS)
        os_sel = final_selection.get_final_selection()
        os_sel &= selection.delta_mass_signal_region()

        OS_weight = erf(OS[gcm().ltime_var.var] * 1600) / 24. + 0.038 + OS[
            gcm().ltime_var.var] * 4  # NOQA

    # Current mode stuff
    DF = gcm().get_data([f.var for f in extra_vars])
    add_variables.append_phsp(DF)
    df_sel = final_selection.get_final_selection()
    df_sel &= selection.mass_sideband_region()

    outfile = gcm().get_output_path('selection') + 'phsp_comp.pdf'
    with PdfPages(outfile) as pdf:
        for pc in gcm().phsp_vars + extra_vars:
            log.info('Plotting {}'.format(pc.var))
            filled = OS[pc.var][os_sel]
            filled_weights = OS_weight[os_sel]
            errorbars = DF[pc.var][df_sel]
            if pc.convert is not None:
                filled = pc.convert(filled)
                errorbars = pc.convert(errorbars)
            ax = comparison.plot_comparison(pc,
                                            filled,
                                            errorbars,
                                            'RS signal',
                                            'WS background',
                                            normed_max=True)
            ax.set_xlabel(pc.xlabel)
            plot_utils.y_margin_scaler(ax, lf=0, la=True)
            ax.set_ylabel('Arbitrary units')
            ax.legend()
            pdf.savefig(plt.gcf())
            plt.clf()
            ax = comparison.plot_comparison(pc,
                                            filled,
                                            errorbars,
                                            'RS signal',
                                            'WS background',
                                            filled_weight=filled_weights,
                                            normed_max=True)
            ax.set_xlabel(pc.xlabel)
            plot_utils.y_margin_scaler(ax, lf=0, la=True)
            ax.set_ylabel('Arbitrary units')
            ax.legend()
            pdf.savefig(plt.gcf())
Exemple #6
0
def dependence_study(use_efficiencies=False):

    extra_vars = [
        gcm().ltime_var
    ]
    all_vars = gcm().phsp_vars + extra_vars

    # Current mode stuff
    data = gcm().get_data([f.var for f in extra_vars])
    add_variables.append_phsp(data)
    df_sel = final_selection.get_final_selection()
    df_sel &= selection.delta_mass_signal_region()

    gen = get_model()

    if use_efficiencies:
        outfile = gcm().get_output_path('effs') + 'Gen_DATA_Eff_dep_eff.pdf'
        gen['weight'] = get_efficiency_gen()
    else:
        outfile = gcm().get_output_path('effs') + 'Gen_DATA_Eff_dep.pdf'
        gen['weight'] = 1.

    lim_file = gcm().get_output_path('effs') + 'limits_for_eff.p'
    with PdfPages(outfile) as pdf:
        for selected, plotted in permutations(all_vars, 2):
            log.info('Plotting {} in intervals of {}'.format(
                plotted.var, selected.var))
            percentiles = np.arange(0, 1.1, 0.2)
            boundaries = helpers.weighted_quantile(
                data[selected.var][df_sel], percentiles)
            fig, ax = plt.subplots(figsize=(10, 10))
            for low, high in zip(boundaries[:-1], boundaries[1:]):
                num_sel = (data[selected.var] > low) & (data[selected.var] < high)  # NOQA
                den_sel = (gen[selected.var] > low) & (gen[selected.var] < high)

                denominator = gen[plotted.var][den_sel]
                numerator = data[plotted.var][df_sel & num_sel]

                weight_d = gen['weight'][den_sel]
                weight_d /= np.sum(weight_d)
                weight_n = np.ones(numerator.index.size)*1./numerator.index.size  # NOQA

                x, y, x_err, y_err = helpers.make_efficiency(
                    numerator, denominator, 50, weight_n, weight_d, independent=True)  # NOQA
                options = dict(
                    fmt='o', markersize=5, capthick=1, capsize=0, elinewidth=2,
                    alpha=1)

                rlow, prec = helpers.rounder(low, boundaries)
                rhigh, _ = helpers.rounder(high, boundaries)

                spec = '{{:.{}f}}'.format(prec)
                label = r'${} <$ {} $ < {}$'.format(
                    spec.format(rlow), selected.xlabel, spec.format(rhigh))

                ax.errorbar(x, y, y_err, x_err, label=label, **options)
            ax.set_xlabel(plotted.xlabel)
            ax.set_ylabel('Relative efficiency')
            try:
                limits = load(lim_file)
            except:
                log.info('Creating new limits file')
                limits = {}
            if limits is None:
                log.info('Creating new limits file')
                limits = {}

            if (plotted.var, selected.var) not in limits or use_efficiencies is False:  # NOQA
                plot_utils.y_margin_scaler(ax, hf=0.4)
                limits[(plotted.var, selected.var)] = ax.get_ylim()
            else:
                log.info('Applying limits')
                lim = limits[(plotted.var, selected.var)]
                ax.set_ylim(lim)
            dump(limits, lim_file)
            ax.legend()
            pdf.savefig(plt.gcf())
            plt.close()