Example #1
0
def main():

    # Parse command-line arguments
    args = parser.parse_args()

    # Check(s)
    if (not args.data) and args.subtractWZMC:
        warning(
            "Requesting to subtract W/Z MC from MC background which contains no contamination. Exiting."
        )
        return

    if (not args.data) and args.subtractWZdata:
        warning(
            "Requesting to subtract W/Z data from MC background which contains no contamination. Exiting."
        )
        return

    # Setup.
    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

    # Load data
    if args.data:
        files_data = glob.glob(tf.config['base_path'] + 'objdef_data_*.root')
    else:
        files_data = glob.glob(tf.config['base_path'] + 'objdef_MC_3610*.root')
        pass
    files_WZ   = glob.glob(tf.config['base_path'] + 'objdef_MC_30543*.root') + \
                 glob.glob(tf.config['base_path'] + 'objdef_MC_30544*.root')

    if len(files_data) == 0:
        warning("No files found. Try to run:")
        warning(" $ source getSomeData.sh")
        return

    data = loadData(files_data, tf.config['tree'], prefix=tf.config['prefix'])
    WZ = loadData(files_WZ, tf.config['tree'], prefix=tf.config['prefix'])
    info_data = loadData(files_data, tf.config['outputtree'], stop=1)
    info_WZ = loadData(files_WZ, tf.config['outputtree'], stop=1)

    # Scaling by cross section
    xsec = loadXsec(tf.config['xsec_file'])

    # Append new DSID field
    data = append_fields(data, 'DSID', np.zeros((data.size, )), dtypes=int)
    for idx, id in enumerate(info_data['id']):
        msk = (
            data['id'] == id
        )  # Get mask of all 'data' entries with same id, i.e. from same file
        tmp_DSID = info_data['DSID'][idx]  # Get DSID for this file
        if not args.data:
            data['weight'][msk] *= xsec[
                tmp_DSID]  # Scale by cross section x filter eff. for this DSID
            data['DSID'][msk] = tmp_DSID  # Store DSID
            pass
        pass
    if not args.data:
        data['weight'] *= tf.config[
            'lumi']  # Scale all events (MC) by luminosity
        pass

    WZ = append_fields(WZ, 'DSID', np.zeros((WZ.size, )), dtypes=int)
    for idx in info_WZ['id']:
        msk = (
            WZ['id'] == idx
        )  # Get mask of all 'data' entries with same id, i.e. from same file
        tmp_DSID = info_WZ['DSID'][idx]  # Get DSID for this file
        WZ['weight'][msk] *= xsec[
            tmp_DSID]  # Scale by cross section x filter eff. for this DSID
        WZ['DSID'][msk] = tmp_DSID  # Store DSID
        pass
    WZ['weight'] *= tf.config['lumi']  # Scale all events (MC) by luminosity

    # Compute new variables
    data = append_fields(data, 'logpt', np.log(data['pt']))
    WZ = append_fields(WZ, 'logpt', np.log(WZ['pt']))

    # Transfer factor
    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

    # Pass/fail masks
    msk_pass = tf.config['pass'](data)
    msk_fail = ~msk_pass
    msk_WZ_pass = tf.config['pass'](WZ)
    msk_WZ_fail = ~msk_WZ_pass

    # Transfer factor calculator instance
    calc = tf.calculator(data=data,
                         config=tf.config,
                         subtract=WZ if
                         (args.subtractWZMC and args.data) else None)

    # GBS mass bins
    masses = np.linspace(100, 270, 34 + 1, endpoint=True)  # GBS mass bins

    # Weight and counter arrays
    weights_bkg_nom = np.zeros((np.sum(msk_fail), ), dtype=float)
    weights_bkg_up = np.zeros((np.sum(msk_fail), ), dtype=float)
    weights_bkg_down = np.zeros((np.sum(msk_fail), ), dtype=float)
    counter_bkg = np.zeros((np.sum(msk_fail), ), dtype=float)

    weights_WZ_nom = np.zeros((np.sum(msk_WZ_fail), ), dtype=float)
    weights_WZ_up = np.zeros((np.sum(msk_WZ_fail), ), dtype=float)
    weights_WZ_down = np.zeros((np.sum(msk_WZ_fail), ), dtype=float)
    counter_WZ = np.zeros((np.sum(msk_WZ_fail), ), dtype=float)

    #ctemp = ap.canvas(batch=True)
    for mass in masses:
        print " --", mass

        # Fit TF profile
        calc.mass = mass
        calc.fullfit()

        if args.show or args.save:
            calc.plot(show=args.show,
                      save=args.save,
                      prefix='plots/tf_gbs_%s_%dGeV_' %
                      ('data' if args.data else 'MC', mass),
                      MC=not args.data)

        # Get TF weights
        w_nom, w_up, w_down = calc.fullweights(data[msk_fail])
        w_WZ_nom, w_WZ_up, w_WZ_down = calc.fullweights(WZ[msk_WZ_fail])

        # Compute mask for which jets to use in GBS computation
        msk_gbs = ~(np.abs(data[msk_fail]['m'] - mass) < 0.2 * mass)
        msk_gbs_WZ = ~(np.abs(WZ[msk_WZ_fail]['m'] - mass) < 0.2 * mass)

        # Store weights and increment counter for masked jets
        weights_bkg_nom[msk_gbs] += w_nom[msk_gbs]
        weights_bkg_up[msk_gbs] += w_up[msk_gbs]
        weights_bkg_down[msk_gbs] += w_down[msk_gbs]
        counter_bkg[msk_gbs] += 1.

        weights_WZ_nom[msk_gbs_WZ] += w_WZ_nom[msk_gbs_WZ]
        weights_WZ_up[msk_gbs_WZ] += w_WZ_up[msk_gbs_WZ]
        weights_WZ_down[msk_gbs_WZ] += w_WZ_down[msk_gbs_WZ]
        counter_WZ[msk_gbs_WZ] += 1.
        pass

    # Take average of jets in signal regions
    msk = (counter_bkg > 0)
    weights_bkg_nom[msk] /= counter_bkg[msk]
    weights_bkg_up[msk] /= counter_bkg[msk]
    weights_bkg_down[msk] /= counter_bkg[msk]

    msk = (counter_WZ > 0)
    weights_WZ_nom[msk] /= counter_WZ[msk]
    weights_WZ_up[msk] /= counter_WZ[msk]
    weights_WZ_down[msk] /= counter_WZ[msk]

    # Computing data-driven background estimate
    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    check_make_dir('output')

    DSID = 400000 if args.data else 400001

    # Write TF-scaled failing data to file
    output = ROOT.TFile(
        'output/objdef_GBS{MC}_{DSID}.root'.format(
            DSID=DSID, MC='' if args.data else 'MC'), 'RECREATE')

    for shift, w, w_WZ in zip(
        [0, 1, -1], [weights_bkg_nom, weights_bkg_up, weights_bkg_down],
        [weights_WZ_nom, weights_WZ_up, weights_WZ_down]):

        # -- Get branch name for current variation
        var_name = 'Nominal' if shift == 0 else (
            'TF_UP' if shift == 1 else 'TF_DOWN')

        # -- Prepare mass- and weight vectors
        vector_m = data['m'][msk_fail]
        vector_w = data['weight'][msk_fail] * w
        if args.subtractWZdata and args.data:
            if WZ is not None and WZ.size > 0:
                print "  Subtracting TF-scaled W/Z MC from background estimate"
                vector_m = np.concatenate((vector_m, WZ['m'][msk_WZ_fail]))
                vector_w = np.concatenate(
                    (vector_w, -WZ['weight'][msk_WZ_fail] * w_WZ))
            else:
                warning(
                    "  Could not subtract failed, TF-scale W/Z MC component")
                pass
            pass
        # Note: Don't subtract the signal component; that's output as a separate histogram to be used in the simultaneous fit

        # -- Prepare DISD and isMC vectors
        vector_DSID = np.ones_like(vector_w) * DSID
        vector_isMC = np.ones_like(vector_w).astype(bool)

        array1 = np.array(zip(vector_m, vector_w),
                          dtype=[(tf.config['prefix'] + 'm', np.float64),
                                 ('weight', np.float64)])

        array2 = np.array(zip(vector_DSID, vector_isMC),
                          dtype=[('DSID', np.uint32), ('isMC', np.bool_)])

        # Mass and weight branch
        print "  Writing arrays to file: %s" % var_name
        treename1 = tf.config['tree'].replace('NumLargeRadiusJets',
                                              'Jet_tau21DDT').replace(
                                                  'Nominal', var_name)
        make_directories('/'.join(treename1.split('/')[:-1]), fromDir=output)
        tree1 = ROOT.TTree(treename1.split('/')[-1], "")
        array2tree(array1, tree=tree1)

        # outputTree
        treename2 = tf.config['outputtree'].replace('Nominal', var_name)
        make_directories('/'.join(treename2.split('/')[:-1]), fromDir=output)
        tree2 = ROOT.TTree(treename2.split('/')[-1], "")
        array2tree(array2, tree=tree2)

        output.Write()
        pass

    output.Close()

    # Save configuration
    check_make_dir('logs')

    # -- Turn numpy arrays into lists, in order to make them JSON serializable
    cfg = make_serializable(tf.config)

    json.dump([cfg, vars(args)],
              open(
                  'logs/gbs_config_%s_%d.log' %
                  ('data' if args.data else 'MC', DSID), 'w'))

    return
Example #2
0
def main():

    # Parse command-line arguments
    args = parser.parse_args()

    # Setup.
    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

    # Get signal file
    sig_DSID = get_signal_DSID(args.mass, tolerance=10)
    if sig_DSID is None:
        return
    sig_file = 'objdef_MC_{DSID:6d}.root'.format(DSID=sig_DSID)

    # Load data
    files = glob.glob(tf.config['base_path'] + 'objdef_MC_3610*.root') + [
        tf.config['base_path'] + sig_file
    ]

    if len(files) == 0:
        warning("No files found.")
        return

    data = loadData(files, tf.config['tree'], prefix=tf.config['prefix'])
    info = loadData(files, tf.config['outputtree'], stop=1)

    # Scaling by cross section
    xsec = loadXsec(tf.config['xsec_file'])

    # Append new DSID field # @TODO: Make more elegant?
    data = append_fields(data, 'DSID', np.zeros((data.size, )), dtypes=int)
    for idx in info['id']:
        msk = (
            data['id'] == idx
        )  # Get mask of all 'data' entries with same id, i.e. from same file
        DSID = info['DSID'][idx]  # Get DSID for this file
        data['weight'][msk] *= xsec[
            DSID]  # Scale by cross section x filter eff. for this DSID
        data['DSID'][msk] = DSID  # Store DSID
        pass
    data['weight'] *= tf.config['lumi']  # Scale all events (MC) by luminosity

    # Check output.
    if data.size == 0:
        warning("No data was loaded.")
        return

    # Compute new variables
    data = append_fields(data, 'logpt', np.log(data['pt']))

    # Separate out signal MC
    msk_sig = (data['DSID'] == sig_DSID)
    msk_data = ~msk_sig

    print "DATA STATISTICS:", np.sum(data[msk_data]['weight'])

    signal = data[msk_sig]
    if not args.inject:
        # If we're not injecting signal, explicitly remove it from the 'data' array
        data = data[~msk_sig]
        pass

    # Toys
    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

    if args.toys:

        # Get masks
        msk_pass = tf.config['pass'](data)
        msk_fail = ~msk_pass

        # Create histograms
        if args.inject:
            pdf_pass = get_histogram(data,
                                     tf.config['params'],
                                     tf.config['axes'],
                                     mask=msk_pass & ~msk_sig)
            pdf_fail = get_histogram(data,
                                     tf.config['params'],
                                     tf.config['axes'],
                                     mask=msk_fail & ~msk_sig)
        else:
            pdf_pass = get_histogram(data,
                                     tf.config['params'],
                                     tf.config['axes'],
                                     mask=msk_pass)
            pdf_fail = get_histogram(data,
                                     tf.config['params'],
                                     tf.config['axes'],
                                     mask=msk_fail)
            pass

        # Smooth (only leading background)
        for _ in range(2):
            pdf_pass.Smooth()
            pdf_fail.Smooth()
            pass

        # Inject afterwards
        if args.inject:
            pdf_pass.Add(
                get_histogram(data,
                              tf.config['params'],
                              tf.config['axes'],
                              mask=msk_pass & msk_sig))
            pdf_fail.Add(
                get_histogram(data,
                              tf.config['params'],
                              tf.config['axes'],
                              mask=msk_fail & msk_sig))

        # Create p.d.f.s
        # -- Define variables
        rhoDDT = ROOT.RooRealVar('rhoDDT', 'rhoDDT', tf.config['axes'][0][0],
                                 tf.config['axes'][0][-1])
        logpt = ROOT.RooRealVar('logpt', 'logpt', tf.config['axes'][1][0],
                                tf.config['axes'][1][-1])

        rhoDDT.setBins(len(tf.config['axes'][0]) - 1)
        logpt.setBins(len(tf.config['axes'][1]) - 1)

        # -- Define histograms
        rdh_pass = ROOT.RooDataHist('rdh_pass', 'rdh_pass',
                                    ROOT.RooArgList(rhoDDT, logpt), pdf_pass)
        rdh_fail = ROOT.RooDataHist('rdh_fail', 'rdh_fail',
                                    ROOT.RooArgList(rhoDDT, logpt), pdf_fail)

        # -- Turn histograms into pdf's
        rhp_pass = ROOT.RooHistPdf('rhp_pass', 'rhp_pass',
                                   ROOT.RooArgSet(rhoDDT, logpt), rdh_pass)
        rhp_fail = ROOT.RooHistPdf('rhp_fail', 'rhp_fail',
                                   ROOT.RooArgSet(rhoDDT, logpt), rdh_fail)

        # Generate toys
        mult = 1.
        N_pass = int(np.sum(data['weight'][msk_pass]) * mult)
        N_fail = int(np.sum(data['weight'][msk_fail]) * mult)

        dtype = ['rhoDDT', 'logpt', 'tau21DDT', 'pt', 'm', 'weight']
        dtype = [(var, 'f8') for var in dtype]
        toys_pass = np.zeros(N_pass, dtype=dtype)
        toys_fail = np.zeros(N_fail, dtype=dtype)

        print "Generating toys (pass: %d, fail: %d)" % (N_pass, N_fail)
        rds_pass = rhp_pass.generate(ROOT.RooArgSet(rhoDDT, logpt), N_pass,
                                     True, False)
        rds_fail = rhp_fail.generate(ROOT.RooArgSet(rhoDDT, logpt), N_fail,
                                     True, False)

        for idx in range(N_pass):
            toys_pass['rhoDDT'][idx] = rds_pass.get(idx).getRealValue('rhoDDT')
            toys_pass['logpt'][idx] = rds_pass.get(idx).getRealValue('logpt')
            toys_pass['pt'][idx] = np.exp(toys_pass['logpt'][idx])
            toys_pass['m'][idx] = np.sqrt(
                np.exp(toys_pass['rhoDDT'][idx]) * toys_pass['pt'][idx] * 1.)
            toys_pass['weight'][idx] = 1. / float(mult)
            toys_pass['tau21DDT'][idx] = 0.
            pass

        for idx in range(N_fail):
            toys_fail['rhoDDT'][idx] = rds_fail.get(idx).getRealValue('rhoDDT')
            toys_fail['logpt'][idx] = rds_fail.get(idx).getRealValue('logpt')
            toys_fail['pt'][idx] = np.exp(toys_fail['logpt'][idx])
            toys_fail['m'][idx] = np.sqrt(
                np.exp(toys_fail['rhoDDT'][idx]) * toys_fail['pt'][idx] * 1.)
            toys_fail['weight'][idx] = 1. / float(mult)
            toys_fail['tau21DDT'][idx] = 1.
            pass

        data = np.concatenate((toys_pass, toys_fail))  # ???
        pass

    # Transfer factor
    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

    calc = tf.calculator(data=data,
                         config=tf.config)  # Using default configuration
    calc.mass = args.mass
    calc.fullfit()

    # Pass/fail masks
    msk_data_pass = tf.config['pass'](data)
    msk_data_fail = ~msk_data_pass
    msk_sig_pass = tf.config['pass'](signal)
    msk_sig_fail = ~msk_sig_pass

    print "  -- Computing data weights"
    w_nom, w_up, w_down = calc.fullweights(data[msk_data_fail])
    print "  -- Computing signal weights"
    w_sig, _, _ = calc.fullweights(signal[msk_sig_fail])
    print "  -- Final fit done"
    if args.show or args.save:
        calc.plot(show=args.show,
                  save=args.save,
                  prefix='plots/new_signalinjection_%s%s_' %
                  ("toys_" if args.toys else "",
                   "injected" if args.inject else "notinjected"))

    # Performing signal injection test
    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    if True or args.show or args.save:

        bestfit_mu = None

        for mu, fit, prefit, subtract in zip([0, 1, 1, None],
                                             [False, False, True, False],
                                             [True, True, True, False],
                                             [True, True, False, True]):

            if not prefit:
                mu = bestfit_mu[0]
                pass

            # Plotting
            # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

            c = ap.canvas(num_pads=2, batch=not args.show)
            p0, p1 = c.pads()

            # -- Histograms: Main pad
            bins = tf.config['massbins']

            h_bkg = c.hist(data['m'][msk_data_fail],
                           bins=bins,
                           weights=data['weight'][msk_data_fail] * w_nom,
                           display=False)
            h_bkg_up = c.hist(data['m'][msk_data_fail],
                              bins=bins,
                              weights=data['weight'][msk_data_fail] * w_up,
                              display=False)
            h_bkg_down = c.hist(data['m'][msk_data_fail],
                                bins=bins,
                                weights=data['weight'][msk_data_fail] * w_down,
                                display=False)

            h_sig = c.hist(signal['m'][msk_sig_pass],
                           bins=bins,
                           weights=signal['weight'][msk_sig_pass],
                           scale=mu,
                           display=False)
            h_sfl = c.hist(signal['m'][msk_sig_fail],
                           bins=bins,
                           weights=signal['weight'][msk_sig_fail] * w_sig,
                           scale=mu,
                           display=False)
            h_data = c.plot(data['m'][msk_data_pass],
                            bins=bins,
                            weights=data['weight'][msk_data_pass],
                            display=False)

            for bin in range(1, h_bkg.GetXaxis().GetNbins() + 1):
                width = float(h_bkg.GetBinWidth(bin))
                h_bkg.SetBinContent(bin, h_bkg.GetBinContent(bin) / width)
                h_bkg.SetBinError(bin, h_bkg.GetBinError(bin) / width)
                h_bkg_up.SetBinContent(bin,
                                       h_bkg_up.GetBinContent(bin) / width)
                h_bkg_up.SetBinError(bin, h_bkg_up.GetBinError(bin) / width)
                h_bkg_down.SetBinContent(bin,
                                         h_bkg_down.GetBinContent(bin) / width)
                h_bkg_down.SetBinError(bin,
                                       h_bkg_down.GetBinError(bin) / width)
                h_sig.SetBinContent(bin, h_sig.GetBinContent(bin) / width)
                h_sig.SetBinError(bin, h_sig.GetBinError(bin) / width)
                h_sfl.SetBinContent(bin, h_sfl.GetBinContent(bin) / width)
                h_sfl.SetBinError(bin, h_sfl.GetBinError(bin) / width)
                h_data.SetBinContent(bin, h_data.GetBinContent(bin) / width)
                h_data.SetBinError(bin, h_data.GetBinError(bin) / width)
                pass

            if not fit:
                h_bkg.Add(h_sfl, -1)  # Subtracting signal
                h_bkg_up.Add(h_sfl, -1)  # --
                h_bkg_down.Add(h_sfl, -1)  # --
                pass

            c.hist(
                h_bkg, option='HIST', linestyle=0, fillstyle=0, fillcolor=0
            )  # Staring with standard histogram, not THStack, just to get y-axis to coorperate
            h_bkg = c.stack(h_bkg,
                            fillcolor=ROOT.kAzure + 7,
                            label='Background pred.')
            h_sig = c.stack(h_sig,
                            fillcolor=ROOT.kRed - 4,
                            label="Z' (#mu = %s)" %
                            ("%.0f" % mu if prefit else "%.2f #pm %.2f" %
                             (mu, bestfit_mu[1])))

            h_sum = h_bkg
            h_sum = c.hist(h_sum,
                           fillstyle=3245,
                           fillcolor=ROOT.kGray + 3,
                           option='E2',
                           label='Stat. uncert.')

            h_bkg_up = c.hist(h_bkg_up,
                              linecolor=ROOT.kGreen + 1,
                              linestyle=2,
                              option='HIST',
                              label='Syst. uncert.')
            h_bkg_down = c.hist(h_bkg_down,
                                linecolor=ROOT.kGreen + 1,
                                linestyle=2,
                                option='HIST')

            h_data = c.plot(h_data, label='Pseudo-data')

            c.hist(h_bkg, option='AXIS')  # Re-draw axes

            # -- Histograms: Ratio pad
            c.ratio_plot((h_sig, h_sum), option='HIST', offset=1)
            c.ratio_plot((h_sum, h_sum), option='E2')
            c.ratio_plot((h_bkg_up, h_sum), option='HIST')
            c.ratio_plot((h_bkg_down, h_sum), option='HIST')
            c.ratio_plot((h_data, h_sum))

            # -- Axis labels
            c.xlabel('Large-#it{R} jet mass [GeV]')
            c.ylabel('Events / GeV')
            p1.ylabel('Data / Est.')

            # -- Axis limits
            c.ylim(1.0E+00, 1.0E+06)
            p1.ylim(0.80, 1.20)

            # -- Line(s)
            p1.yline(1.0)

            # -- Region(s)
            c.region("SR", 0.8 * args.mass, 1.2 * args.mass)

            # -- Text
            c.text(
                [
                    "#sqrt{s} = 13 TeV,  %s fb^{-1}" % tf.config['lumi'],
                    "Incl. #gamma Monte Carlo",
                    "Photon channel",
                    #("Signal" if args.inject else "No signal") + " injected",
                ] + (["Using toys"] if args.toys else []),
                qualifier='Simulation Internal')

            # -- Log
            c.log()

            # -- Legend
            c.legend()
            if args.save and not fit:
                c.save('plots/new_signalinjection_%s%dGeV_pm%d_%s_%s.pdf' %
                       ("toys_" if args.toys else "", args.mass, 20.,
                        ('prefit_mu%d' % mu if prefit else 'postfit'),
                        ('injected' if args.inject else 'notinjected')))
            if args.show and not fit: c.show()

            # Fitting
            # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

            if fit:

                bestfit_mu = list()

                hs_save = [
                    h_bkg_down.Clone('h_save_down'),
                    h_bkg.Clone('h_save_nom'),
                    h_bkg_up.Clone('h_save_up'),
                ]

                for variation in range(3):

                    print "Variation: " + ("Nominal" if variation == 1 else (
                        "Up" if variation == 0 else "Down"))

                    # Get correct histogram fore this variation
                    h_bkg_use = hs_save[variation]

                    # -- Define jet mass variable
                    mJ = ROOT.RooRealVar('mJ', 'mJ', 50, 300)
                    #mJ.setBins(50)
                    roobinning = ROOT.RooBinning(
                        len(tf.config['massbins']) - 1, tf.config['massbins'])
                    mJ.setBinning(roobinning)

                    # -- Define histograms
                    rdh_bkg = ROOT.RooDataHist('rdh_bkg', 'rdh_bkg',
                                               ROOT.RooArgList(mJ), h_bkg_use)
                    rdh_sig = ROOT.RooDataHist('rdh_sig', 'rdh_sig',
                                               ROOT.RooArgList(mJ), h_sig)
                    rdh_sfl = ROOT.RooDataHist('rdh_sfl', 'rdh_sfl',
                                               ROOT.RooArgList(mJ), h_sfl)

                    # -- Turn histograms into pdf's
                    rhp_bkg = ROOT.RooHistPdf('rhp_bkg', 'rhp_bkg',
                                              ROOT.RooArgSet(mJ), rdh_bkg)
                    rhp_sig = ROOT.RooHistPdf('rhp_sig', 'rhp_sig',
                                              ROOT.RooArgSet(mJ), rdh_sig)
                    rhp_sfl = ROOT.RooHistPdf('rhp_sfl', 'rhp_sfl',
                                              ROOT.RooArgSet(mJ), rdh_sfl)

                    # -- Define integrals as constants
                    n_bkg = ROOT.RooRealVar('n_bkg', 'n_bkg',
                                            h_bkg_use.Integral())
                    n_sig = ROOT.RooRealVar('n_sig', 'n_sig', h_sig.Integral())
                    n_sfl = ROOT.RooRealVar('n_sfl', 'n_sfl', h_sfl.Integral())

                    # -- Define signal strength and constant(s)
                    mu = ROOT.RooRealVar('mu', 'mu', 1, 0, 5)
                    neg1 = ROOT.RooRealVar('neg1', 'neg1', -1)

                    # -- Define fittable normalisation factors
                    c_bkg = ROOT.RooFormulaVar('c_bkg', 'c_bkg', '@0',
                                               ROOT.RooArgList(n_bkg))
                    c_sig = ROOT.RooFormulaVar('c_sig', 'c_sig', '@0 * @1',
                                               ROOT.RooArgList(mu, n_sig))
                    c_sfl = ROOT.RooFormulaVar(
                        'c_sfl', 'c_sfl', '@0 * @1 * @2',
                        ROOT.RooArgList(neg1, mu, n_sfl))

                    # -- Construct combined pdf
                    pdf = ROOT.RooAddPdf(
                        'pdf', 'pdf', ROOT.RooArgList(rhp_bkg, rhp_sig,
                                                      rhp_sfl),
                        ROOT.RooArgList(c_bkg, c_sig, c_sfl))

                    # -- Construct data histogram
                    rdh_data = ROOT.RooDataHist('rdh_data', 'rdh_data',
                                                ROOT.RooArgList(mJ), h_data)

                    # -- Fit pdf to data histogram
                    pdf.chi2FitTo(rdh_data, ROOT.RooLinkedList())

                    print "Best fit mu: %.3f +/- %.3f" % (mu.getValV(),
                                                          mu.getError())
                    bestfit_mu.append((mu.getValV(), mu.getError()))
                    pass

                bestfit_mu = bestfit_mu[1][0], np.sqrt(
                    np.power(
                        abs(bestfit_mu[0][0] - bestfit_mu[2][0]) / 2., 2.) +
                    np.power(bestfit_mu[1][1], 2.))
                pass

            pass

        pass

    return
def main():

    # Parse command-line arguments
    args = parser.parse_args()

    DSID = int("100%03d" % args.mass)

    # Setup.
    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

    # Get signal file
    sig_DSID = get_signal_DSID(args.mass, tolerance=10)
    if sig_DSID is None:
        warning("No signal file was found")
        return
    sig_file = 'objdef_MC_{DSID:6d}.root'.format(DSID=sig_DSID)

    # Load data
    files = {
        'data': glob.glob(tf.config['base_path'] + 'objdef_MC_3610*.root'),
        'gbs': glob.glob(tf.config['base_path'] + 'objdef_GBSMC_400001.root'),
        'WZ': glob.glob(tf.config['base_path'] + 'objdef_MC_3054*.root')
    }
    if args.inject:
        files['sig'] = glob.glob(tf.config['base_path'] + sig_file)
        pass

    if len(files) == 0:
        warning("No files found. Try to run:")
        warning(" $ source getSomeData.sh")
        return

    data = loadData(files['data'],
                    tf.config['tree'],
                    prefix=tf.config['prefix'])
    gbs = loadData(files['gbs'],
                   tf.config['finaltree'],
                   prefix=tf.config['prefix'])
    WZ = loadData(files['WZ'], tf.config['tree'], prefix=tf.config['prefix'])
    if args.inject:
        signal = loadData(files['sig'],
                          tf.config['tree'],
                          prefix=tf.config['prefix'])
    else:
        signal = None
        pass
    info = {
        key: loadData(files[key], tf.config['outputtree'], stop=1)
        for key in files
    }

    # Scaling by cross section
    xsec = loadXsec(tf.config['xsec_file'])

    # Append new DSID field
    if args.inject:
        signal = append_fields(signal,
                               'DSID',
                               np.zeros((signal.size, )),
                               dtypes=int)
        for idx, id in enumerate(info['sig']['id']):
            msk = (
                signal['id'] == id
            )  # Get mask of all 'signal' entries with same id, i.e. from same file
            DSID = info['sig']['DSID'][idx]  # Get DSID for this file
            signal['weight'][msk] *= xsec[
                DSID]  # Scale by cross section x filter eff. for this DSID
            signal['DSID'][msk] = DSID  # Store DSID
            pass
        signal['weight'] *= tf.config['lumi']
        pass

    WZ = append_fields(WZ, 'DSID', np.zeros((WZ.size, )), dtypes=int)
    for idx, id in enumerate(info['WZ']['id']):
        msk = (
            WZ['id'] == id
        )  # Get mask of all 'WZ' entries with same id, i.e. from same file
        DSID = info['WZ']['DSID'][idx]  # Get DSID for this file
        WZ['weight'][msk] *= xsec[
            DSID]  # Scale by cross section x filter eff. for this DSID
        WZ['DSID'][msk] = DSID  # Store DSID
        pass
    WZ['weight'] *= tf.config['lumi']

    #if not args.data:
    data = append_fields(data, 'DSID', np.zeros((data.size, )), dtypes=int)
    for idx, id in enumerate(info['data']['id']):
        msk = (
            data['id'] == id
        )  # Get mask of all 'data' entries with same id, i.e. from same file
        DSID = info['data']['DSID'][idx]  # Get DSID for this file
        data['weight'][msk] *= xsec[
            DSID]  # Scale by cross section x filter eff. for this DSID
        data['DSID'][msk] = DSID  # Store DSID
        pass
    data['weight'] *= tf.config['lumi']
    #pass

    # Compute new variables
    data = append_fields(data, 'logpt', np.log(data['pt']))
    WZ = append_fields(WZ, 'logpt', np.log(WZ['pt']))
    if signal is not None:
        signal = append_fields(signal, 'logpt', np.log(signal['pt']))
        pass

    # Inject signal into data
    if args.inject:
        data = np.array(np.concatenate((data, signal)), dtype=data.dtype)
        pass
    #if not args.data:
    data = np.array(np.concatenate((data, WZ)), dtype=data.dtype)
    #pass
    """ @TODO: Not sure this script works for data input... But it's not used anyway. """

    # Transfer factor
    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

    # Pass/fail masks
    # -- Data (incl. signal)
    msk_pass = tf.config['pass'](data)
    msk_fail = ~msk_pass

    # -- W/Z
    msk_WZ_pass = tf.config['pass'](WZ)
    msk_WZ_fail = ~msk_WZ_pass

    # -- Signal
    if args.inject:
        msk_sig_pass = tf.config['pass'](signal)
        msk_sig_fail = ~msk_sig_pass
        pass

    # Transfer factor calculator instance
    calc = tf.calculator(data=data, config=tf.config, subtract=WZ)

    # Nominal fit
    calc.fit()
    w_nom = calc.weights(data[msk_fail])
    w_nom_WZ = calc.weights(WZ[msk_WZ_fail])
    if args.show or args.save:
        calc.plot(show=args.show,
                  save=args.save,
                  prefix='plots/globalbackground_%s_%s_' %
                  ('injected' if args.inject else 'notinjected',
                   'data' if args.data else 'MC'))

    # mass +/- 20% stripe fit
    calc.mass = args.mass
    calc.window = 0.2
    calc.fit()
    w_stripe = calc.weights(data[msk_fail])
    w_stripe_WZ = calc.weights(WZ[msk_WZ_fail])
    if args.inject:
        w_stripe_sig = calc.weights(signal[msk_sig_fail])
        pass

    # Plotting
    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

    bins = np.linspace(100, 250, 30 + 1, endpoint=True)

    # Setup canvas
    c = ap.canvas(num_pads=2, batch=not args.show)
    p0, p1 = c.pads()

    # Add stacked backgrounds
    h_bkg_nom = c.hist(data['m'][msk_fail],
                       bins=bins,
                       weights=data['weight'][msk_fail] * w_nom,
                       display=False)
    h_bkg_stripe = c.hist(data['m'][msk_fail],
                          bins=bins,
                          weights=data['weight'][msk_fail] * w_stripe,
                          display=False)
    h_WZfl_nom = c.hist(WZ['m'][msk_WZ_fail],
                        bins=bins,
                        weights=WZ['weight'][msk_WZ_fail] * w_nom_WZ,
                        display=False)
    h_WZfl_stripe = c.hist(WZ['m'][msk_WZ_fail],
                           bins=bins,
                           weights=WZ['weight'][msk_WZ_fail] * w_stripe_WZ,
                           display=False)
    if args.inject:
        h_sig = c.hist(signal['m'][msk_sig_pass],
                       bins=bins,
                       weights=signal['weight'][msk_sig_pass],
                       display=False)
        h_sfl = c.hist(signal['m'][msk_sig_fail],
                       bins=bins,
                       weights=signal['weight'][msk_sig_fail] * w_stripe_sig,
                       display=False)
        pass
    h_gbs = c.hist(gbs['m'], bins=bins, weights=gbs['weight'], display=False)

    # -- Subtract (opt.)
    if args.inject:
        h_bkg_stripe.Add(h_sfl, -1)
        h_gbs.Add(h_sfl, -1)
        pass
    h_bkg_nom.Add(h_WZfl_nom, -1)
    h_bkg_stripe.Add(h_WZfl_stripe, -1)

    # -- Actually draw
    #if not args.data:
    h_WZ = c.stack(WZ['m'][msk_WZ_pass],
                   bins=bins,
                   weights=WZ['weight'][msk_WZ_pass],
                   fillcolor=ROOT.kRed - 4,
                   label='W/Z + #gamma')
    #pass

    h_bkg_nom = c.stack(h_bkg_nom,
                        fillcolor=ROOT.kAzure + 7,
                        label="Bkg. (full)")
    h_sum = c.getStackSum()
    h_bkg_stripe.Add(h_WZ)
    h_gbs.Add(h_WZ)
    if args.inject:
        h_sig = c.stack(h_sig,
                        fillcolor=ROOT.kViolet - 4,
                        label="Z' (%d GeV)" % args.mass)
        pass
    h_bkg_stripe = c.hist(h_bkg_stripe,
                          linecolor=ROOT.kGreen + 1,
                          label="Bkg. (window)")  # % args.mass)
    h_gbs = c.hist(h_gbs, linecolor=ROOT.kViolet + 1, label="Bkg. (GBS)")

    # Draw stats. error of stacked sum
    h_sum = c.hist(h_sum,
                   fillstyle=3245,
                   fillcolor=ROOT.kGray + 2,
                   linecolor=ROOT.kGray + 3,
                   label='Stats. uncert.',
                   option='E2')

    # Add (pseudo-) data
    h_data = c.plot(data['m'][msk_pass],
                    bins=bins,
                    weights=data['weight'][msk_pass],
                    markersize=0.8,
                    label='Data' if args.data else 'Pseudo-data')

    # Axis limits
    p1.ylim(0.8, 1.2)
    c.padding(0.45)
    c.log(True)

    # Draw error- and ratio plots
    if args.inject:
        hr_sig = c.ratio_plot((h_sig, h_sum), option='HIST', offset=1)
        pass
    h_err = c.ratio_plot((h_sum, h_sum), option='E2')
    h_ratio = c.ratio_plot((h_data, h_sum), oob=True)
    h_rgbs = c.ratio_plot((h_gbs, h_sum),
                          linecolor=ROOT.kViolet + 1,
                          option='HIST ][')
    h_rgbs = c.ratio_plot((h_bkg_stripe, h_sum),
                          linecolor=ROOT.kGreen + 1,
                          option='HIST ][')

    # Add labels and text
    c.xlabel('Signal jet mass [GeV]')
    c.ylabel('Events')
    p1.ylabel('Data / Nom.')
    c.text([
        "#sqrt{s} = 13 TeV,  L = 36.1 fb^{-1}",
    ] + ([
        "Sherpa incl. #gamma MC",
    ] if not args.data else []) + [
        "Trimmed anti-k_{t}^{R=1.0} jets",
        "ISR #gamma selection",
    ] + (["Signal injected"] if args.inject else []),
           qualifier='%sInternal' % ("Simulation " if not args.data else ""))

    # Add line(s)
    p1.yline(1.0)

    # Draw legend
    c.legend()
    c.region("SR", 0.8 * args.mass, 1.2 * args.mass)

    # Save and show plot
    if args.save:
        c.save('plots/globalbackground_spectrum_%dGeV_%s_%s.pdf' %
               (args.mass, 'injected' if args.inject else 'notinjected',
                'data' if args.data else 'MC'))
    if args.show: c.show()

    # p0-plot
    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

    # Setup canvas
    c2 = ap.canvas(batch=not args.show)

    p_local = h_data.Clone('p_local')
    p_global = h_data.Clone('p_global')

    for bin in range(1, h_data.GetXaxis().GetNbins() + 1):
        c_data = h_data.GetBinContent(bin)
        e_data = h_data.GetBinError(bin)
        c_loc = h_bkg_stripe.GetBinContent(bin)
        e_loc = h_bkg_stripe.GetBinError(bin)
        c_glb = h_gbs.GetBinContent(bin)
        e_glb = e_loc  # h_gbs    .GetBinError  (bin)

        z_loc = (c_data -
                 c_loc) / np.sqrt(np.square(e_data) + np.square(e_loc))
        z_glb = (c_data -
                 c_glb) / np.sqrt(np.square(e_data) +
                                  np.square(e_glb)) if c_glb > 0 else 0

        p_loc = min(ROOT.TMath.Erfc(z_loc / np.sqrt(2)), 1)
        p_glb = min(ROOT.TMath.Erfc(z_glb / np.sqrt(2)), 1)

        p_local.SetBinContent(bin, p_loc)
        p_global.SetBinContent(bin, p_glb)
        p_local.SetBinError(bin, 0)
        p_global.SetBinError(bin, 0)
        pass

    c2.plot(p_local,
            markercolor=ROOT.kGreen + 1,
            linecolor=ROOT.kGreen + 1,
            option='PL',
            label="Local (20% window)")
    c2.plot(p_global,
            markercolor=ROOT.kViolet + 1,
            linecolor=ROOT.kViolet + 1,
            option='PL',
            label="Global (GBS)")
    c2.xlabel("Signal jet mass [GeV]")
    c2.ylabel("p_{0}")
    c2.log()

    c2.ylim(1E-04, 1E+04)
    for sigma in range(4):
        c2.yline(ROOT.TMath.Erfc(sigma / np.sqrt(2)))
        pass

    c2.text([
        "#sqrt{s} = 13 TeV,  L = 36.1 fb^{-1}",
    ] + ([
        "Sherpa incl. #gamma MC",
    ] if not args.data else []) + [
        "Trimmed anti-k_{t}^{R=1.0} jets",
        "ISR #gamma selection",
        ("Signal" if args.inject else "No signal") + " injected" +
        (" at m = %d GeV" % args.mass if args.inject else ""),
    ],
            qualifier='Simulation Internal')

    c2.region("SR", 0.8 * args.mass, 1.2 * args.mass)
    c2.legend()
    if args.save:
        c2.save('plots/globalbackground_p0_%dGeV_%s_%s.pdf' %
                (args.mass, 'injected' if args.inject else 'notinjected',
                 'data' if args.data else 'MC'))
    if args.show: c2.show()

    return
Example #4
0
def main():

    # Parse command-line arguments
    args = parser.parse_args()

    DSID = int("1%02d%03d" %
               (0 if args.window is None else args.window * 100, args.mass))

    # Setup.
    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

    # Get signal file
    sig_DSID = get_signal_DSID(args.mass)

    # Load data
    #files_data = glob.glob(tf.config['base_path'] + 'objdef_MC_3610*.root')
    files_data = glob.glob(tf.config['base_path'] + 'objdef_data_*.root')
    files_WZ   = glob.glob(tf.config['base_path'] + 'objdef_MC_30543*.root') + \
                 glob.glob(tf.config['base_path'] + 'objdef_MC_30544*.root')

    if sig_DSID is None:
        if args.mass < 100.:
            print "Assuming signal is W/Z"
            files_sig = files_WZ
            files_WZ = []
        else:
            files_sig = []
            pass
    else:
        sig_file = 'objdef_MC_{DSID:6d}.root'.format(DSID=sig_DSID)
        print "Using signal file: %s" % sig_file
        files_sig = [tf.config['base_path'] + sig_file]
        pass

    if len(files_data) == 0 or (sig_DSID and len(files_sig) == 0):
        warning("No files found.")
        return

    data = loadData(files_data, tf.config['tree'], prefix=tf.config['prefix'])
    signal = loadData(files_sig, tf.config['tree'], prefix=tf.config['prefix'])
    WZ = loadData(files_WZ, tf.config['tree'], prefix=tf.config['prefix'])
    info_data = loadData(files_data, tf.config['outputtree'], stop=1)
    info_sig = loadData(files_sig, tf.config['outputtree'], stop=1)
    info_WZ = loadData(files_WZ, tf.config['outputtree'], stop=1)

    # Scaling by cross section
    xsec = loadXsec(tf.config['xsec_file'])

    # ----------------------------------------------------
    # Make more elegant!
    # ----------------------------------------------------
    # Append new DSID field # @TODO: Make more elegant?
    #for arr, info in zip([signal, WZ], [info_sig, info_WZ]):
    '''# @TEMP >>>
    if data is not None:
        data = append_fields(data, 'DSID', np.zeros((data.size,)), dtypes=int)
        for idx in info_data['id']:    
            msk = (data['id'] == idx) # Get mask of all 'data' entries with same id, i.e. from same file
            tmp_DSID = info_data['DSID'][idx]  # Get DSID for this file
            data['weight'][msk] *= xsec[tmp_DSID] # Scale by cross section x filter eff. for this DSID
            data['DSID']  [msk] = tmp_DSID        # Store DSID
            pass
        #data['weight'] *= tf.config['lumi'] # Scale all events (MC) by luminosity
        pass
    # @TEMP <<<'''

    if signal is not None:
        signal = append_fields(signal,
                               'DSID',
                               np.zeros((signal.size, )),
                               dtypes=int)
        for idx in info_sig['id']:
            msk = (
                signal['id'] == idx
            )  # Get mask of all 'data' entries with same id, i.e. from same file
            tmp_DSID = info_sig['DSID'][idx]  # Get DSID for this file
            signal['weight'][msk] *= xsec[
                tmp_DSID]  # Scale by cross section x filter eff. for this DSID
            signal['DSID'][msk] = tmp_DSID  # Store DSID
            pass
        signal['weight'] *= tf.config[
            'lumi']  # Scale all events (MC) by luminosity
        pass

    if WZ is not None:
        WZ = append_fields(WZ, 'DSID', np.zeros((WZ.size, )), dtypes=int)
        for idx in info_WZ['id']:
            msk = (
                WZ['id'] == idx
            )  # Get mask of all 'data' entries with same id, i.e. from same file
            tmp_DSID = info_WZ['DSID'][idx]  # Get DSID for this file
            WZ['weight'][msk] *= xsec[
                tmp_DSID]  # Scale by cross section x filter eff. for this DSID
            WZ['DSID'][msk] = tmp_DSID  # Store DSID
            pass
        # @TODO: k-factors?
        WZ['weight'] *= tf.config[
            'lumi']  # Scale all events (MC) by luminosity
        pass

    # Check output.
    if data.size == 0 or ((signal is not None) and signal.size == 0):
        warning("No data was loaded. Exiting.")
        return

    # Compute new variables
    data = append_fields(data, 'logpt', np.log(data['pt']))
    if signal is not None:
        signal = append_fields(signal, 'logpt', np.log(signal['pt']))
        pass
    if WZ is not None:
        WZ = append_fields(WZ, 'logpt', np.log(WZ['pt']))
        pass

    # Transfer factor
    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

    # Pass/fail masks
    msk_data_pass = tf.config['pass'](data)
    msk_data_fail = ~msk_data_pass
    if signal is not None:
        msk_sig_pass = tf.config['pass'](signal)
        msk_sig_fail = ~msk_sig_pass
        pass
    if WZ is not None:
        msk_WZ_pass = tf.config['pass'](WZ)
        msk_WZ_fail = ~msk_WZ_pass
        pass

    calc = tf.calculator(data=data,
                         config=tf.config,
                         subtract=WZ if args.subtractWZMC else
                         None)  # Using default configuration
    calc.mass = args.mass

    # Perform full fit
    if args.window is None:
        calc.fullfit()

        print "  -- Computing data weights"
        w_nom, w_up, w_down = calc.fullweights(data[msk_data_fail])
        if signal is not None:
            print "  -- Computing signal weights"
            w_sig_nom, w_sig_up, w_sig_down = calc.fullweights(
                signal[msk_sig_fail])

            w_sig_pass, _, _ = calc.fullweights(signal[msk_sig_pass])

            print "Scale factors for signal strength:"
            print "  pass / (pass + fail) = %.3e / (%.3e + %.3e) = %.4f" % (
                np.sum(w_sig_pass), np.sum(w_sig_pass), np.sum(w_sig_nom),
                np.sum(w_sig_pass) / (np.sum(w_sig_nom) + np.sum(w_sig_pass)))
            pass
        if WZ is not None:
            print "  -- Computing W/Z weights"
            w_WZ_nom, w_WZ_up, w_WZ_down = calc.fullweights(WZ[msk_WZ_fail])
        else:
            w_WZ_nom, w_WZ_up, w_WZ_down = None, None, None
            pass
        print "  -- Final fit done"
        if args.show or args.save:
            calc.plot(show=args.show,
                      save=args.save,
                      prefix='plots/tf_',
                      MC=False)

    # Perform fit with manually-set window size
    else:
        # @TODO: - Forcing the fit to use same length scale as 20% window fit. Improve?
        calc.window = 0.2
        calc.fit()
        theta = calc.theta()
        calc.window = args.window
        calc.fit(theta=theta)

        print "  -- Computing data weights"
        w_nom = calc.weights(data[msk_data_fail])
        w_up = calc.weights(data[msk_data_fail], shift=+1)
        w_down = calc.weights(data[msk_data_fail], shift=-1)
        if signal is not None:
            print "  -- Computing signal weights"
            w_sig_nom = calc.weights(signal[msk_sig_fail])
            w_sig_up = calc.weights(signal[msk_sig_fail], shift=+1)
            w_sig_down = calc.weights(signal[msk_sig_fail], shift=-1)
            pass
        if WZ is not None:
            print "  -- Computing W/Z weights"
            w_WZ_nom, = calc.weights(WZ[msk_WZ_fail])
            w_WZ_up = calc.weights(WZ[msk_WZ_fail], shift=+1)
            w_WZ_down = calc.weights(WZ[msk_WZ_fail], shift=-1)
        else:
            w_WZ_nom, w_WZ_up, w_WZ_down = None, None, None
            pass
        print "  -- Manual fit done"
        if args.show or args.save:
            calc.plot(show=args.show,
                      save=args.save,
                      prefix='plots/tf_',
                      MC=False)
        pass

    # Computing data-driven background estimate
    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    check_make_dir('output')

    # Write TF-scaled failing data to file
    if args.save:
        output = ROOT.TFile(
            'output/objdef_TF_{DSID:6d}.root'.format(DSID=DSID), 'RECREATE')
        pass

    for shift, w, w_WZ in zip([0, 1, -1], [w_nom, w_up, w_down],
                              [w_WZ_nom, w_WZ_up, w_WZ_down]):

        # -- Get branch name for current variation
        var_name = 'Nominal' if shift == 0 else (
            'TF_UP' if shift == 1 else 'TF_DOWN')

        # -- Prepare mass- and weight vectors
        vector_m = data['m'][msk_data_fail]
        vector_w = data['weight'][msk_data_fail] * w
        if args.subtractWZdata:
            if WZ is not None and WZ.size > 0:
                print "  Subtracting TF-scaled W/Z MC from background estimate"
                vector_m = np.concatenate((vector_m, WZ['m'][msk_WZ_fail]))
                vector_w = np.concatenate(
                    (vector_w, -WZ['weight'][msk_WZ_fail] * w_WZ))
            else:
                warning(
                    "  Could not subtract failed, TF-scale W/Z MC component")
                pass
            pass
        # Note: Don't subtract the signal component; that's output as a separate histogram to be used in the simultaneous fit

        # -- Prepare DISD and isMC vectors
        vector_DSID = np.ones_like(vector_w) * DSID
        vector_isMC = np.ones_like(vector_w).astype(bool)

        array1 = np.array(zip(vector_m, vector_w),
                          dtype=[(tf.config['prefix'] + 'm', np.float64),
                                 ('weight', np.float64)])

        array2 = np.array(zip(vector_DSID, vector_isMC),
                          dtype=[('DSID', np.uint32), ('isMC', np.bool_)])

        if args.save:
            # Mass and weight branch
            print "  Writing arrays to file: %s" % var_name
            treename1 = tf.config['tree'].replace('NumLargeRadiusJets',
                                                  'Jet_tau21DDT').replace(
                                                      'Nominal', var_name)
            make_directories('/'.join(treename1.split('/')[:-1]),
                             fromDir=output)
            tree1 = ROOT.TTree(treename1.split('/')[-1], "")
            array2tree(array1, tree=tree1)

            # outputTree
            treename2 = tf.config['outputtree'].replace('Nominal', var_name)
            make_directories('/'.join(treename2.split('/')[:-1]),
                             fromDir=output)
            tree2 = ROOT.TTree(treename2.split('/')[-1], "")
            array2tree(array2, tree=tree2)

            output.Write()
            pass
        pass

    if args.save:
        output.Close()
        pass

    # Write TF-scaled failing signal MC to file
    if signal is not None:
        if args.save:
            output = ROOT.TFile(
                'output/objdef_TF_{DSID:6d}_signalfail.root'.format(DSID=DSID),
                'RECREATE')
            pass

        for shift, w_sig in zip([0, 1, -1], [w_sig_nom, w_sig_up, w_sig_down]):
            # -- Get branch name for current variation
            var_name = 'Nominal' if shift == 0 else (
                'TF_UP' if shift == 1 else 'TF_DOWN')

            # -- Prepare mass- and weight vectors
            vector_m = signal['m'][msk_sig_fail]
            vector_w = signal['weight'][msk_sig_fail] * w_sig

            # -- Prepare DISD and isMC vectors
            vector_DSID = np.ones_like(vector_w) * (DSID + 1E+05)
            vector_isMC = np.ones_like(vector_w).astype(bool)

            array1 = np.array(zip(vector_m, vector_w),
                              dtype=[(tf.config['prefix'] + 'm', np.float64),
                                     ('weight', np.float64)])

            array2 = np.array(zip(vector_DSID, vector_isMC),
                              dtype=[('DSID', np.uint32), ('isMC', np.bool_)])

            if args.save:
                # Mass and weight branch
                print "  Writing arrays to file: %s" % var_name
                treename1 = tf.config['tree'].replace('NumLargeRadiusJets',
                                                      'Jet_tau21DDT').replace(
                                                          'Nominal', var_name)
                make_directories('/'.join(treename1.split('/')[:-1]),
                                 fromDir=output)
                tree1 = ROOT.TTree(treename1.split('/')[-1], "")
                array2tree(array1, tree=tree1)

                # outputTree
                treename2 = tf.config['outputtree'].replace(
                    'Nominal', var_name)
                make_directories('/'.join(treename2.split('/')[:-1]),
                                 fromDir=output)
                tree2 = ROOT.TTree(treename2.split('/')[-1], "")
                array2tree(array2, tree=tree2)

                output.Write()
                pass
            pass

        if args.save:
            output.Close()
            pass
        pass

    # Save configuration
    check_make_dir('logs')

    # -- Turn numpy arrays into lists, in order to make them JSON serializable
    cfg = make_serializable(tf.config)

    json.dump([cfg, vars(args)], open('logs/tf_config_%d.log' % DSID, 'w'))
    return
Example #5
0
def main():

    # Parse command-line arguments
    args = parser.parse_args()

    # Setup.
    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

    # Load data
    files = glob.glob(tf.config['base_path'] + 'objdef_MC_3610*.root')

    if len(files) == 0:
        warning("No files found.")
        return

    data = loadData(files, tf.config['tree'], prefix=tf.config['prefix'])
    info = loadData(files, tf.config['outputtree'], stop=1)

    # Scaling by cross section
    xsec = loadXsec(tf.config['xsec_file'])

    # Append new DSID field # @TODO: Make more elegant?
    data = append_fields(data, 'DSID', np.zeros((data.size, )), dtypes=int)
    for idx in info['id']:
        msk = (
            data['id'] == idx
        )  # Get mask of all 'data' entries with same id, i.e. from same file
        DSID = info['DSID'][idx]  # Get DSID for this file
        data['weight'][msk] *= xsec[
            DSID]  # Scale by cross section x filter eff. for this DSID
        data['DSID'][msk] = DSID  # Store DSID
        pass
    data['weight'] *= tf.config['lumi']  # Scale all events (MC) by luminosity

    # Check output.
    if data.size == 0:
        warning("No data was loaded.")
        return

    # Compute new variables
    data = append_fields(data, 'logpt', np.log(data['pt']))

    # Transfer factor
    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

    # Pass/fail masks
    msk_pass = tf.config['pass'](data)
    msk_fail = ~msk_pass

    # Transfer factor calculator instance
    calc = tf.calculator(data=data,
                         config=tf.config)  # Using default configuration
    calc.mass = args.mass
    calc.window = args.window
    # ... calc.partialbins, calc.emptybins, ...
    calc.fit()  # ...(theta=0.5)
    w_nom = calc.weights(data[msk_fail])
    w_up = calc.weights(data[msk_fail], shift=+1)
    w_down = calc.weights(data[msk_fail], shift=-1)
    if args.show or args.save:
        calc.plot(show=args.show, save=args.save, prefix='plots/new_closure_')

    # Comparing jet mass distrbutions (closure)
    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

    if args.show or args.save:

        c = ap.canvas(num_pads=2, batch=not args.show)
        p0, p1 = c.pads()

        bins = tf.config['massbins']

        h_bkg = c.hist(data['m'][msk_fail],
                       bins=bins,
                       weights=data['weight'][msk_fail] * w_nom,
                       display=False)
        h_up = c.hist(data['m'][msk_fail],
                      bins=bins,
                      weights=data['weight'][msk_fail] * w_up,
                      display=False)
        h_down = c.hist(data['m'][msk_fail],
                        bins=bins,
                        weights=data['weight'][msk_fail] * w_down,
                        display=False)
        h_data = c.plot(data['m'][msk_pass],
                        bins=bins,
                        weights=data['weight'][msk_pass],
                        display=False)

        for bin in range(1, h_bkg.GetXaxis().GetNbins() + 1):
            width = float(h_bkg.GetBinWidth(bin))
            h_bkg.SetBinContent(bin, h_bkg.GetBinContent(bin) / width)
            h_bkg.SetBinError(bin, h_bkg.GetBinError(bin) / width)
            h_up.SetBinContent(bin, h_up.GetBinContent(bin) / width)
            h_up.SetBinError(bin, h_up.GetBinError(bin) / width)
            h_down.SetBinContent(bin, h_down.GetBinContent(bin) / width)
            h_down.SetBinError(bin, h_down.GetBinError(bin) / width)
            h_data.SetBinContent(bin, h_data.GetBinContent(bin) / width)
            h_data.SetBinError(bin, h_data.GetBinError(bin) / width)
            pass

        h_bkg = c.hist(h_bkg,
                       fillcolor=ROOT.kAzure + 7,
                       label='Background est.')
        h_err = c.hist(h_bkg,
                       fillstyle=3245,
                       fillcolor=ROOT.kGray + 2,
                       linecolor=ROOT.kGray + 3,
                       label='Stat. uncert.',
                       option='E2')
        h_up = c.hist(h_up,
                      linecolor=ROOT.kGreen + 1,
                      linestyle=2,
                      option='HIST',
                      label='Syst. uncert.')
        h_down = c.hist(h_down,
                        linecolor=ROOT.kGreen + 1,
                        linestyle=2,
                        option='HIST')
        h_data = c.plot(h_data, label='Pseudo-data')

        c.ratio_plot((h_err, h_bkg), option='E2')
        c.ratio_plot((h_up, h_bkg), option='HIST')
        c.ratio_plot((h_down, h_bkg), option='HIST')
        c.ratio_plot((h_data, h_bkg))

        c.xlabel('Large-#it{R} jet mass [GeV]')
        c.ylabel('Events / GeV')
        p1.ylabel('Data / Est.')

        c.ylim(1E+00, 1E+06)
        p1.ylim(0.80, 1.20)
        p1.yline(1.0)
        c.region("SR", 0.8 * args.mass, 1.2 * args.mass)

        #for x in [args.mass * (1 - args.window), args.mass * (1 + args.window)]:
        #    p0.line(x, 1E+01, x, 2E+04)
        #    pass
        #p1.xlines([args.mass * (1 - args.window), args.mass * (1 + args.window)])

        c.text([
            "#sqrt{s} = 13 TeV,  %s fb^{-1}" % tf.config['lumi'],
            "Incl. #gamma Monte Carlo",
            "Photon channel",
        ],
               qualifier='Simulation Internal')

        c.log()
        c.legend()

        if args.save:
            c.save('plots/new_closure_%dGeV_pm%d.pdf' %
                   (args.mass, args.window * 100.))
        if args.show: c.show()
        pass

    return
Example #6
0
def main ():

    # Parse command-line arguments
    args = parser.parse_args()


    # Setup.
    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 

    # Load data
    files = glob.glob(tf.config['base_path'] + 'objdef_data_*.root')

    if len(files) == 0:
        warning("No files found.")
        return

    data = loadData(files, tf.config['tree'], prefix=tf.config['prefix']) 
    info = loadData(files, tf.config['outputtree'], stop=1)
    
    # Check output.
    if data.size == 0:
        warning("No data was loaded. Exiting.")
        return 

    # Compute new variables
    data = append_fields(data, 'logpt', np.log(data['pt']))
    

    # Pass/fail masks
    msk_pass = tf.config['pass'](data)
    msk_fail = ~msk_pass
    
    
    # Validating transfer factor fit using toys
    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
    #for mass in [85] + list(np.linspace(100, 250, 15 + 1, endpoint=True)):
    for mass in list(np.linspace(110, 250, 14 + 1, endpoint=True)):

        print "-------- MASS: %d GeV" % mass
        
        # Set up transfer factor calculator instance
        calc = tf.calculator(data=data, config=tf.config, verbose=False) # Using default configuration
        calc.mass   = mass
        calc.window = 0.2 if (args.window is None) else args.window
        
        # Get nomnial best-fit theta
        calc.fit()
        theta = calc.theta()
        nominal_weights = calc.weights(data[msk_fail], shift=0), \
            calc.weights(data[msk_fail], shift=+1), \
            calc.weights(data[msk_fail], shift=-1)
        
        # "Throw toys" from TF profile, fit N times
        calc.toysfit(N=args.N, theta=theta)
        
        # Get weights for each toys experiment fit
        toys_weights = calc.toysweights(data[msk_fail])
        
        # Plot variations
        bins = tf.config['massbins']
        c = ap.canvas(num_pads=2, batch=not args.show)
        
        # -- Nominal background(s)
        hist_nom  = c.hist(data[msk_fail]['m'], bins=bins, weights=nominal_weights[0], fillcolor=ROOT.kAzure + 7, label='Nominal bkg.')
        h_sum = c.hist(hist_nom,
                       fillstyle=3245, fillcolor=ROOT.kGray + 2, linecolor=ROOT.kGray + 3, option='E2',
                       label='Stat. uncert.')
        
        
        # -- Toys backgrounds
        toys_hists = list()
        for idx, weights in enumerate(toys_weights):
            h = c.hist(data[msk_fail]['m'], bins=bins, weights=weights[0], fillstyle=0, linecolor=ROOT.kRed + idx % 5, linestyle = 1 + idx // 5, label='Toys %d' % (idx + 1) if idx < 5 else None)
            toys_hists.append(h)
            pass
        
        # -- Nominal variations
        hist_up   = c.hist(data[msk_fail]['m'], bins=bins, weights=nominal_weights[1], fillstyle=0, linecolor=ROOT.kGreen, linestyle=2, label='Syst. uncert.')
        hist_down = c.hist(data[msk_fail]['m'], bins=bins, weights=nominal_weights[2], fillstyle=0, linecolor=ROOT.kGreen, linestyle=2)
        
        
        # -- Data
        hist_data = c.plot(data[msk_pass]['m'], bins=bins, label='Data')
        
        # -- Ratio plots
        c.ratio_plot((h_sum,     hist_nom), option='E2')
        for idx, h in enumerate(toys_hists):
            c.ratio_plot((h, hist_nom), option='HIST')
            pass
        c.ratio_plot((hist_up,   hist_nom), option='HIST')
        c.ratio_plot((hist_down, hist_nom), option='HIST')
        c.ratio_plot((hist_data, hist_nom), oob=True)
        
        # -- Decorations
        c.xlabel('Large-#it{R} jet mass [GeV]')
        c.ylabel('Events / 5 GeV')
        c.pads()[1].ylabel('Ratio wrt. nominal')
        c.pads()[1].ylim(0.8, 1.2)
        c.pads()[1].yline(1.)
        c.text(["#sqrt{s} = 13 TeV,  L = 36.1 fb^{-1}",
                "Photon channel"],
                qualifier="Internal")

        c.region("SR", 0.8 * mass, 1.2*mass)
        
        c.legend()
        c.log()
        if args.show: c.show()
        if args.save: c.save('plots/validation_%dGeV_N%d.pdf' % (mass, args.N))

        pass
        
    return