Python calculator Examples

Programming Language: Python

Namespace/Package Name: transferfactor

Method/Function: calculator

Examples at hotexamples.com: 6

Python calculator - 6 examples found. These are the top rated real world Python examples of transferfactor.calculator extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

def main():

    # Parse command-line arguments
    args = parser.parse_args()

    # Check(s)
    if (not args.data) and args.subtractWZMC:
        warning(
            "Requesting to subtract W/Z MC from MC background which contains no contamination. Exiting."
        )
        return

    if (not args.data) and args.subtractWZdata:
        warning(
            "Requesting to subtract W/Z data from MC background which contains no contamination. Exiting."
        )
        return

    # Setup.
    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

    # Load data
    if args.data:
        files_data = glob.glob(tf.config['base_path'] + 'objdef_data_*.root')
    else:
        files_data = glob.glob(tf.config['base_path'] + 'objdef_MC_3610*.root')
        pass
    files_WZ   = glob.glob(tf.config['base_path'] + 'objdef_MC_30543*.root') + \
                 glob.glob(tf.config['base_path'] + 'objdef_MC_30544*.root')

    if len(files_data) == 0:
        warning("No files found. Try to run:")
        warning(" $ source getSomeData.sh")
        return

    data = loadData(files_data, tf.config['tree'], prefix=tf.config['prefix'])
    WZ = loadData(files_WZ, tf.config['tree'], prefix=tf.config['prefix'])
    info_data = loadData(files_data, tf.config['outputtree'], stop=1)
    info_WZ = loadData(files_WZ, tf.config['outputtree'], stop=1)

    # Scaling by cross section
    xsec = loadXsec(tf.config['xsec_file'])

    # Append new DSID field
    data = append_fields(data, 'DSID', np.zeros((data.size, )), dtypes=int)
    for idx, id in enumerate(info_data['id']):
        msk = (
            data['id'] == id
        )  # Get mask of all 'data' entries with same id, i.e. from same file
        tmp_DSID = info_data['DSID'][idx]  # Get DSID for this file
        if not args.data:
            data['weight'][msk] *= xsec[
                tmp_DSID]  # Scale by cross section x filter eff. for this DSID
            data['DSID'][msk] = tmp_DSID  # Store DSID
            pass
        pass
    if not args.data:
        data['weight'] *= tf.config[
            'lumi']  # Scale all events (MC) by luminosity
        pass

    WZ = append_fields(WZ, 'DSID', np.zeros((WZ.size, )), dtypes=int)
    for idx in info_WZ['id']:
        msk = (
            WZ['id'] == idx
        )  # Get mask of all 'data' entries with same id, i.e. from same file
        tmp_DSID = info_WZ['DSID'][idx]  # Get DSID for this file
        WZ['weight'][msk] *= xsec[
            tmp_DSID]  # Scale by cross section x filter eff. for this DSID
        WZ['DSID'][msk] = tmp_DSID  # Store DSID
        pass
    WZ['weight'] *= tf.config['lumi']  # Scale all events (MC) by luminosity

    # Compute new variables
    data = append_fields(data, 'logpt', np.log(data['pt']))
    WZ = append_fields(WZ, 'logpt', np.log(WZ['pt']))

    # Transfer factor
    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

    # Pass/fail masks
    msk_pass = tf.config['pass'](data)
    msk_fail = ~msk_pass
    msk_WZ_pass = tf.config['pass'](WZ)
    msk_WZ_fail = ~msk_WZ_pass

    # Transfer factor calculator instance
    calc = tf.calculator(data=data,
                         config=tf.config,
                         subtract=WZ if
                         (args.subtractWZMC and args.data) else None)

    # GBS mass bins
    masses = np.linspace(100, 270, 34 + 1, endpoint=True)  # GBS mass bins

    # Weight and counter arrays
    weights_bkg_nom = np.zeros((np.sum(msk_fail), ), dtype=float)
    weights_bkg_up = np.zeros((np.sum(msk_fail), ), dtype=float)
    weights_bkg_down = np.zeros((np.sum(msk_fail), ), dtype=float)
    counter_bkg = np.zeros((np.sum(msk_fail), ), dtype=float)

    weights_WZ_nom = np.zeros((np.sum(msk_WZ_fail), ), dtype=float)
    weights_WZ_up = np.zeros((np.sum(msk_WZ_fail), ), dtype=float)
    weights_WZ_down = np.zeros((np.sum(msk_WZ_fail), ), dtype=float)
    counter_WZ = np.zeros((np.sum(msk_WZ_fail), ), dtype=float)

    #ctemp = ap.canvas(batch=True)
    for mass in masses:
        print " --", mass

        # Fit TF profile
        calc.mass = mass
        calc.fullfit()

        if args.show or args.save:
            calc.plot(show=args.show,
                      save=args.save,
                      prefix='plots/tf_gbs_%s_%dGeV_' %
                      ('data' if args.data else 'MC', mass),
                      MC=not args.data)

        # Get TF weights
        w_nom, w_up, w_down = calc.fullweights(data[msk_fail])
        w_WZ_nom, w_WZ_up, w_WZ_down = calc.fullweights(WZ[msk_WZ_fail])

        # Compute mask for which jets to use in GBS computation
        msk_gbs = ~(np.abs(data[msk_fail]['m'] - mass) < 0.2 * mass)
        msk_gbs_WZ = ~(np.abs(WZ[msk_WZ_fail]['m'] - mass) < 0.2 * mass)

        # Store weights and increment counter for masked jets
        weights_bkg_nom[msk_gbs] += w_nom[msk_gbs]
        weights_bkg_up[msk_gbs] += w_up[msk_gbs]
        weights_bkg_down[msk_gbs] += w_down[msk_gbs]
        counter_bkg[msk_gbs] += 1.

        weights_WZ_nom[msk_gbs_WZ] += w_WZ_nom[msk_gbs_WZ]
        weights_WZ_up[msk_gbs_WZ] += w_WZ_up[msk_gbs_WZ]
        weights_WZ_down[msk_gbs_WZ] += w_WZ_down[msk_gbs_WZ]
        counter_WZ[msk_gbs_WZ] += 1.
        pass

    # Take average of jets in signal regions
    msk = (counter_bkg > 0)
    weights_bkg_nom[msk] /= counter_bkg[msk]
    weights_bkg_up[msk] /= counter_bkg[msk]
    weights_bkg_down[msk] /= counter_bkg[msk]

    msk = (counter_WZ > 0)
    weights_WZ_nom[msk] /= counter_WZ[msk]
    weights_WZ_up[msk] /= counter_WZ[msk]
    weights_WZ_down[msk] /= counter_WZ[msk]

    # Computing data-driven background estimate
    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    check_make_dir('output')

    DSID = 400000 if args.data else 400001

    # Write TF-scaled failing data to file
    output = ROOT.TFile(
        'output/objdef_GBS{MC}_{DSID}.root'.format(
            DSID=DSID, MC='' if args.data else 'MC'), 'RECREATE')

    for shift, w, w_WZ in zip(
        [0, 1, -1], [weights_bkg_nom, weights_bkg_up, weights_bkg_down],
        [weights_WZ_nom, weights_WZ_up, weights_WZ_down]):

        # -- Get branch name for current variation
        var_name = 'Nominal' if shift == 0 else (
            'TF_UP' if shift == 1 else 'TF_DOWN')

        # -- Prepare mass- and weight vectors
        vector_m = data['m'][msk_fail]
        vector_w = data['weight'][msk_fail] * w
        if args.subtractWZdata and args.data:
            if WZ is not None and WZ.size > 0:
                print "  Subtracting TF-scaled W/Z MC from background estimate"
                vector_m = np.concatenate((vector_m, WZ['m'][msk_WZ_fail]))
                vector_w = np.concatenate(
                    (vector_w, -WZ['weight'][msk_WZ_fail] * w_WZ))
            else:
                warning(
                    "  Could not subtract failed, TF-scale W/Z MC component")
                pass
            pass
        # Note: Don't subtract the signal component; that's output as a separate histogram to be used in the simultaneous fit

        # -- Prepare DISD and isMC vectors
        vector_DSID = np.ones_like(vector_w) * DSID
        vector_isMC = np.ones_like(vector_w).astype(bool)

        array1 = np.array(zip(vector_m, vector_w),
                          dtype=[(tf.config['prefix'] + 'm', np.float64),
                                 ('weight', np.float64)])

        array2 = np.array(zip(vector_DSID, vector_isMC),
                          dtype=[('DSID', np.uint32), ('isMC', np.bool_)])

        # Mass and weight branch
        print "  Writing arrays to file: %s" % var_name
        treename1 = tf.config['tree'].replace('NumLargeRadiusJets',
                                              'Jet_tau21DDT').replace(
                                                  'Nominal', var_name)
        make_directories('/'.join(treename1.split('/')[:-1]), fromDir=output)
        tree1 = ROOT.TTree(treename1.split('/')[-1], "")
        array2tree(array1, tree=tree1)

        # outputTree
        treename2 = tf.config['outputtree'].replace('Nominal', var_name)
        make_directories('/'.join(treename2.split('/')[:-1]), fromDir=output)
        tree2 = ROOT.TTree(treename2.split('/')[-1], "")
        array2tree(array2, tree=tree2)

        output.Write()
        pass

    output.Close()

    # Save configuration
    check_make_dir('logs')

    # -- Turn numpy arrays into lists, in order to make them JSON serializable
    cfg = make_serializable(tf.config)

    json.dump([cfg, vars(args)],
              open(
                  'logs/gbs_config_%s_%d.log' %
                  ('data' if args.data else 'MC', DSID), 'w'))

    return

Example #2

Show file

def main():

    # Parse command-line arguments
    args = parser.parse_args()

    # Setup.
    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

    # Get signal file
    sig_DSID = get_signal_DSID(args.mass, tolerance=10)
    if sig_DSID is None:
        return
    sig_file = 'objdef_MC_{DSID:6d}.root'.format(DSID=sig_DSID)

    # Load data
    files = glob.glob(tf.config['base_path'] + 'objdef_MC_3610*.root') + [
        tf.config['base_path'] + sig_file
    ]

    if len(files) == 0:
        warning("No files found.")
        return

    data = loadData(files, tf.config['tree'], prefix=tf.config['prefix'])
    info = loadData(files, tf.config['outputtree'], stop=1)

    # Scaling by cross section
    xsec = loadXsec(tf.config['xsec_file'])

    # Append new DSID field # @TODO: Make more elegant?
    data = append_fields(data, 'DSID', np.zeros((data.size, )), dtypes=int)
    for idx in info['id']:
        msk = (
            data['id'] == idx
        )  # Get mask of all 'data' entries with same id, i.e. from same file
        DSID = info['DSID'][idx]  # Get DSID for this file
        data['weight'][msk] *= xsec[
            DSID]  # Scale by cross section x filter eff. for this DSID
        data['DSID'][msk] = DSID  # Store DSID
        pass
    data['weight'] *= tf.config['lumi']  # Scale all events (MC) by luminosity

    # Check output.
    if data.size == 0:
        warning("No data was loaded.")
        return

    # Compute new variables
    data = append_fields(data, 'logpt', np.log(data['pt']))

    # Separate out signal MC
    msk_sig = (data['DSID'] == sig_DSID)
    msk_data = ~msk_sig

    print "DATA STATISTICS:", np.sum(data[msk_data]['weight'])

    signal = data[msk_sig]
    if not args.inject:
        # If we're not injecting signal, explicitly remove it from the 'data' array
        data = data[~msk_sig]
        pass

    # Toys
    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

    if args.toys:

        # Get masks
        msk_pass = tf.config['pass'](data)
        msk_fail = ~msk_pass

        # Create histograms
        if args.inject:
            pdf_pass = get_histogram(data,
                                     tf.config['params'],
                                     tf.config['axes'],
                                     mask=msk_pass & ~msk_sig)
            pdf_fail = get_histogram(data,
                                     tf.config['params'],
                                     tf.config['axes'],
                                     mask=msk_fail & ~msk_sig)
        else:
            pdf_pass = get_histogram(data,
                                     tf.config['params'],
                                     tf.config['axes'],
                                     mask=msk_pass)
            pdf_fail = get_histogram(data,
                                     tf.config['params'],
                                     tf.config['axes'],
                                     mask=msk_fail)
            pass

        # Smooth (only leading background)
        for _ in range(2):
            pdf_pass.Smooth()
            pdf_fail.Smooth()
            pass

        # Inject afterwards
        if args.inject:
            pdf_pass.Add(
                get_histogram(data,
                              tf.config['params'],
                              tf.config['axes'],
                              mask=msk_pass & msk_sig))
            pdf_fail.Add(
                get_histogram(data,
                              tf.config['params'],
                              tf.config['axes'],
                              mask=msk_fail & msk_sig))

        # Create p.d.f.s
        # -- Define variables
        rhoDDT = ROOT.RooRealVar('rhoDDT', 'rhoDDT', tf.config['axes'][0][0],
                                 tf.config['axes'][0][-1])
        logpt = ROOT.RooRealVar('logpt', 'logpt', tf.config['axes'][1][0],
                                tf.config['axes'][1][-1])

        rhoDDT.setBins(len(tf.config['axes'][0]) - 1)
        logpt.setBins(len(tf.config['axes'][1]) - 1)

        # -- Define histograms
        rdh_pass = ROOT.RooDataHist('rdh_pass', 'rdh_pass',
                                    ROOT.RooArgList(rhoDDT, logpt), pdf_pass)
        rdh_fail = ROOT.RooDataHist('rdh_fail', 'rdh_fail',
                                    ROOT.RooArgList(rhoDDT, logpt), pdf_fail)

        # -- Turn histograms into pdf's
        rhp_pass = ROOT.RooHistPdf('rhp_pass', 'rhp_pass',
                                   ROOT.RooArgSet(rhoDDT, logpt), rdh_pass)
        rhp_fail = ROOT.RooHistPdf('rhp_fail', 'rhp_fail',
                                   ROOT.RooArgSet(rhoDDT, logpt), rdh_fail)

        # Generate toys
        mult = 1.
        N_pass = int(np.sum(data['weight'][msk_pass]) * mult)
        N_fail = int(np.sum(data['weight'][msk_fail]) * mult)

        dtype = ['rhoDDT', 'logpt', 'tau21DDT', 'pt', 'm', 'weight']
        dtype = [(var, 'f8') for var in dtype]
        toys_pass = np.zeros(N_pass, dtype=dtype)
        toys_fail = np.zeros(N_fail, dtype=dtype)

        print "Generating toys (pass: %d, fail: %d)" % (N_pass, N_fail)
        rds_pass = rhp_pass.generate(ROOT.RooArgSet(rhoDDT, logpt), N_pass,
                                     True, False)
        rds_fail = rhp_fail.generate(ROOT.RooArgSet(rhoDDT, logpt), N_fail,
                                     True, False)

        for idx in range(N_pass):
            toys_pass['rhoDDT'][idx] = rds_pass.get(idx).getRealValue('rhoDDT')
            toys_pass['logpt'][idx] = rds_pass.get(idx).getRealValue('logpt')
            toys_pass['pt'][idx] = np.exp(toys_pass['logpt'][idx])
            toys_pass['m'][idx] = np.sqrt(
                np.exp(toys_pass['rhoDDT'][idx]) * toys_pass['pt'][idx] * 1.)
            toys_pass['weight'][idx] = 1. / float(mult)
            toys_pass['tau21DDT'][idx] = 0.
            pass

        for idx in range(N_fail):
            toys_fail['rhoDDT'][idx] = rds_fail.get(idx).getRealValue('rhoDDT')
            toys_fail['logpt'][idx] = rds_fail.get(idx).getRealValue('logpt')
            toys_fail['pt'][idx] = np.exp(toys_fail['logpt'][idx])
            toys_fail['m'][idx] = np.sqrt(
                np.exp(toys_fail['rhoDDT'][idx]) * toys_fail['pt'][idx] * 1.)
            toys_fail['weight'][idx] = 1. / float(mult)
            toys_fail['tau21DDT'][idx] = 1.
            pass

        data = np.concatenate((toys_pass, toys_fail))  # ???
        pass

    # Transfer factor
    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

    calc = tf.calculator(data=data,
                         config=tf.config)  # Using default configuration
    calc.mass = args.mass
    calc.fullfit()

    # Pass/fail masks
    msk_data_pass = tf.config['pass'](data)
    msk_data_fail = ~msk_data_pass
    msk_sig_pass = tf.config['pass'](signal)
    msk_sig_fail = ~msk_sig_pass

    print "  -- Computing data weights"
    w_nom, w_up, w_down = calc.fullweights(data[msk_data_fail])
    print "  -- Computing signal weights"
    w_sig, _, _ = calc.fullweights(signal[msk_sig_fail])
    print "  -- Final fit done"
    if args.show or args.save:
        calc.plot(show=args.show,
                  save=args.save,
                  prefix='plots/new_signalinjection_%s%s_' %
                  ("toys_" if args.toys else "",
                   "injected" if args.inject else "notinjected"))

    # Performing signal injection test
    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    if True or args.show or args.save:

        bestfit_mu = None

        for mu, fit, prefit, subtract in zip([0, 1, 1, None],
                                             [False, False, True, False],
                                             [True, True, True, False],
                                             [True, True, False, True]):

            if not prefit:
                mu = bestfit_mu[0]
                pass

            # Plotting
            # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

            c = ap.canvas(num_pads=2, batch=not args.show)
            p0, p1 = c.pads()

            # -- Histograms: Main pad
            bins = tf.config['massbins']

            h_bkg = c.hist(data['m'][msk_data_fail],
                           bins=bins,
                           weights=data['weight'][msk_data_fail] * w_nom,
                           display=False)
            h_bkg_up = c.hist(data['m'][msk_data_fail],
                              bins=bins,
                              weights=data['weight'][msk_data_fail] * w_up,
                              display=False)
            h_bkg_down = c.hist(data['m'][msk_data_fail],
                                bins=bins,
                                weights=data['weight'][msk_data_fail] * w_down,
                                display=False)

            h_sig = c.hist(signal['m'][msk_sig_pass],
                           bins=bins,
                           weights=signal['weight'][msk_sig_pass],
                           scale=mu,
                           display=False)
            h_sfl = c.hist(signal['m'][msk_sig_fail],
                           bins=bins,
                           weights=signal['weight'][msk_sig_fail] * w_sig,
                           scale=mu,
                           display=False)
            h_data = c.plot(data['m'][msk_data_pass],
                            bins=bins,
                            weights=data['weight'][msk_data_pass],
                            display=False)

            for bin in range(1, h_bkg.GetXaxis().GetNbins() + 1):
                width = float(h_bkg.GetBinWidth(bin))
                h_bkg.SetBinContent(bin, h_bkg.GetBinContent(bin) / width)
                h_bkg.SetBinError(bin, h_bkg.GetBinError(bin) / width)
                h_bkg_up.SetBinContent(bin,
                                       h_bkg_up.GetBinContent(bin) / width)
                h_bkg_up.SetBinError(bin, h_bkg_up.GetBinError(bin) / width)
                h_bkg_down.SetBinContent(bin,
                                         h_bkg_down.GetBinContent(bin) / width)
                h_bkg_down.SetBinError(bin,
                                       h_bkg_down.GetBinError(bin) / width)
                h_sig.SetBinContent(bin, h_sig.GetBinContent(bin) / width)
                h_sig.SetBinError(bin, h_sig.GetBinError(bin) / width)
                h_sfl.SetBinContent(bin, h_sfl.GetBinContent(bin) / width)
                h_sfl.SetBinError(bin, h_sfl.GetBinError(bin) / width)
                h_data.SetBinContent(bin, h_data.GetBinContent(bin) / width)
                h_data.SetBinError(bin, h_data.GetBinError(bin) / width)
                pass

            if not fit:
                h_bkg.Add(h_sfl, -1)  # Subtracting signal
                h_bkg_up.Add(h_sfl, -1)  # --
                h_bkg_down.Add(h_sfl, -1)  # --
                pass

            c.hist(
                h_bkg, option='HIST', linestyle=0, fillstyle=0, fillcolor=0
            )  # Staring with standard histogram, not THStack, just to get y-axis to coorperate
            h_bkg = c.stack(h_bkg,
                            fillcolor=ROOT.kAzure + 7,
                            label='Background pred.')
            h_sig = c.stack(h_sig,
                            fillcolor=ROOT.kRed - 4,
                            label="Z' (#mu = %s)" %
                            ("%.0f" % mu if prefit else "%.2f #pm %.2f" %
                             (mu, bestfit_mu[1])))

            h_sum = h_bkg
            h_sum = c.hist(h_sum,
                           fillstyle=3245,
                           fillcolor=ROOT.kGray + 3,
                           option='E2',
                           label='Stat. uncert.')

            h_bkg_up = c.hist(h_bkg_up,
                              linecolor=ROOT.kGreen + 1,
                              linestyle=2,
                              option='HIST',
                              label='Syst. uncert.')
            h_bkg_down = c.hist(h_bkg_down,
                                linecolor=ROOT.kGreen + 1,
                                linestyle=2,
                                option='HIST')

            h_data = c.plot(h_data, label='Pseudo-data')

            c.hist(h_bkg, option='AXIS')  # Re-draw axes

            # -- Histograms: Ratio pad
            c.ratio_plot((h_sig, h_sum), option='HIST', offset=1)
            c.ratio_plot((h_sum, h_sum), option='E2')
            c.ratio_plot((h_bkg_up, h_sum), option='HIST')
            c.ratio_plot((h_bkg_down, h_sum), option='HIST')
            c.ratio_plot((h_data, h_sum))

            # -- Axis labels
            c.xlabel('Large-#it{R} jet mass [GeV]')
            c.ylabel('Events / GeV')
            p1.ylabel('Data / Est.')

            # -- Axis limits
            c.ylim(1.0E+00, 1.0E+06)
            p1.ylim(0.80, 1.20)

            # -- Line(s)
            p1.yline(1.0)

            # -- Region(s)
            c.region("SR", 0.8 * args.mass, 1.2 * args.mass)

            # -- Text
            c.text(
                [
                    "#sqrt{s} = 13 TeV,  %s fb^{-1}" % tf.config['lumi'],
                    "Incl. #gamma Monte Carlo",
                    "Photon channel",
                    #("Signal" if args.inject else "No signal") + " injected",
                ] + (["Using toys"] if args.toys else []),
                qualifier='Simulation Internal')

            # -- Log
            c.log()

            # -- Legend
            c.legend()
            if args.save and not fit:
                c.save('plots/new_signalinjection_%s%dGeV_pm%d_%s_%s.pdf' %
                       ("toys_" if args.toys else "", args.mass, 20.,
                        ('prefit_mu%d' % mu if prefit else 'postfit'),
                        ('injected' if args.inject else 'notinjected')))
            if args.show and not fit: c.show()

            # Fitting
            # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

            if fit:

                bestfit_mu = list()

                hs_save = [
                    h_bkg_down.Clone('h_save_down'),
                    h_bkg.Clone('h_save_nom'),
                    h_bkg_up.Clone('h_save_up'),
                ]

                for variation in range(3):

                    print "Variation: " + ("Nominal" if variation == 1 else (
                        "Up" if variation == 0 else "Down"))

                    # Get correct histogram fore this variation
                    h_bkg_use = hs_save[variation]

                    # -- Define jet mass variable
                    mJ = ROOT.RooRealVar('mJ', 'mJ', 50, 300)
                    #mJ.setBins(50)
                    roobinning = ROOT.RooBinning(
                        len(tf.config['massbins']) - 1, tf.config['massbins'])
                    mJ.setBinning(roobinning)

                    # -- Define histograms
                    rdh_bkg = ROOT.RooDataHist('rdh_bkg', 'rdh_bkg',
                                               ROOT.RooArgList(mJ), h_bkg_use)
                    rdh_sig = ROOT.RooDataHist('rdh_sig', 'rdh_sig',
                                               ROOT.RooArgList(mJ), h_sig)
                    rdh_sfl = ROOT.RooDataHist('rdh_sfl', 'rdh_sfl',
                                               ROOT.RooArgList(mJ), h_sfl)

                    # -- Turn histograms into pdf's
                    rhp_bkg = ROOT.RooHistPdf('rhp_bkg', 'rhp_bkg',
                                              ROOT.RooArgSet(mJ), rdh_bkg)
                    rhp_sig = ROOT.RooHistPdf('rhp_sig', 'rhp_sig',
                                              ROOT.RooArgSet(mJ), rdh_sig)
                    rhp_sfl = ROOT.RooHistPdf('rhp_sfl', 'rhp_sfl',
                                              ROOT.RooArgSet(mJ), rdh_sfl)

                    # -- Define integrals as constants
                    n_bkg = ROOT.RooRealVar('n_bkg', 'n_bkg',
                                            h_bkg_use.Integral())
                    n_sig = ROOT.RooRealVar('n_sig', 'n_sig', h_sig.Integral())
                    n_sfl = ROOT.RooRealVar('n_sfl', 'n_sfl', h_sfl.Integral())

                    # -- Define signal strength and constant(s)
                    mu = ROOT.RooRealVar('mu', 'mu', 1, 0, 5)
                    neg1 = ROOT.RooRealVar('neg1', 'neg1', -1)

                    # -- Define fittable normalisation factors
                    c_bkg = ROOT.RooFormulaVar('c_bkg', 'c_bkg', '@0',
                                               ROOT.RooArgList(n_bkg))
                    c_sig = ROOT.RooFormulaVar('c_sig', 'c_sig', '@0 * @1',
                                               ROOT.RooArgList(mu, n_sig))
                    c_sfl = ROOT.RooFormulaVar(
                        'c_sfl', 'c_sfl', '@0 * @1 * @2',
                        ROOT.RooArgList(neg1, mu, n_sfl))

                    # -- Construct combined pdf
                    pdf = ROOT.RooAddPdf(
                        'pdf', 'pdf', ROOT.RooArgList(rhp_bkg, rhp_sig,
                                                      rhp_sfl),
                        ROOT.RooArgList(c_bkg, c_sig, c_sfl))

                    # -- Construct data histogram
                    rdh_data = ROOT.RooDataHist('rdh_data', 'rdh_data',
                                                ROOT.RooArgList(mJ), h_data)

                    # -- Fit pdf to data histogram
                    pdf.chi2FitTo(rdh_data, ROOT.RooLinkedList())

                    print "Best fit mu: %.3f +/- %.3f" % (mu.getValV(),
                                                          mu.getError())
                    bestfit_mu.append((mu.getValV(), mu.getError()))
                    pass

                bestfit_mu = bestfit_mu[1][0], np.sqrt(
                    np.power(
                        abs(bestfit_mu[0][0] - bestfit_mu[2][0]) / 2., 2.) +
                    np.power(bestfit_mu[1][1], 2.))
                pass

            pass

        pass

    return

Example #3

Show file

File: globalbackground.py Project: asogaard/transferfactor

def main():

    # Parse command-line arguments
    args = parser.parse_args()

    DSID = int("100%03d" % args.mass)

    # Setup.
    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

    # Get signal file
    sig_DSID = get_signal_DSID(args.mass, tolerance=10)
    if sig_DSID is None:
        warning("No signal file was found")
        return
    sig_file = 'objdef_MC_{DSID:6d}.root'.format(DSID=sig_DSID)

    # Load data
    files = {
        'data': glob.glob(tf.config['base_path'] + 'objdef_MC_3610*.root'),
        'gbs': glob.glob(tf.config['base_path'] + 'objdef_GBSMC_400001.root'),
        'WZ': glob.glob(tf.config['base_path'] + 'objdef_MC_3054*.root')
    }
    if args.inject:
        files['sig'] = glob.glob(tf.config['base_path'] + sig_file)
        pass

    if len(files) == 0:
        warning("No files found. Try to run:")
        warning(" $ source getSomeData.sh")
        return

    data = loadData(files['data'],
                    tf.config['tree'],
                    prefix=tf.config['prefix'])
    gbs = loadData(files['gbs'],
                   tf.config['finaltree'],
                   prefix=tf.config['prefix'])
    WZ = loadData(files['WZ'], tf.config['tree'], prefix=tf.config['prefix'])
    if args.inject:
        signal = loadData(files['sig'],
                          tf.config['tree'],
                          prefix=tf.config['prefix'])
    else:
        signal = None
        pass
    info = {
        key: loadData(files[key], tf.config['outputtree'], stop=1)
        for key in files
    }

    # Scaling by cross section
    xsec = loadXsec(tf.config['xsec_file'])

    # Append new DSID field
    if args.inject:
        signal = append_fields(signal,
                               'DSID',
                               np.zeros((signal.size, )),
                               dtypes=int)
        for idx, id in enumerate(info['sig']['id']):
            msk = (
                signal['id'] == id
            )  # Get mask of all 'signal' entries with same id, i.e. from same file
            DSID = info['sig']['DSID'][idx]  # Get DSID for this file
            signal['weight'][msk] *= xsec[
                DSID]  # Scale by cross section x filter eff. for this DSID
            signal['DSID'][msk] = DSID  # Store DSID
            pass
        signal['weight'] *= tf.config['lumi']
        pass

    WZ = append_fields(WZ, 'DSID', np.zeros((WZ.size, )), dtypes=int)
    for idx, id in enumerate(info['WZ']['id']):
        msk = (
            WZ['id'] == id
        )  # Get mask of all 'WZ' entries with same id, i.e. from same file
        DSID = info['WZ']['DSID'][idx]  # Get DSID for this file
        WZ['weight'][msk] *= xsec[
            DSID]  # Scale by cross section x filter eff. for this DSID
        WZ['DSID'][msk] = DSID  # Store DSID
        pass
    WZ['weight'] *= tf.config['lumi']

    #if not args.data:
    data = append_fields(data, 'DSID', np.zeros((data.size, )), dtypes=int)
    for idx, id in enumerate(info['data']['id']):
        msk = (
            data['id'] == id
        )  # Get mask of all 'data' entries with same id, i.e. from same file
        DSID = info['data']['DSID'][idx]  # Get DSID for this file
        data['weight'][msk] *= xsec[
            DSID]  # Scale by cross section x filter eff. for this DSID
        data['DSID'][msk] = DSID  # Store DSID
        pass
    data['weight'] *= tf.config['lumi']
    #pass

    # Compute new variables
    data = append_fields(data, 'logpt', np.log(data['pt']))
    WZ = append_fields(WZ, 'logpt', np.log(WZ['pt']))
    if signal is not None:
        signal = append_fields(signal, 'logpt', np.log(signal['pt']))
        pass

    # Inject signal into data
    if args.inject:
        data = np.array(np.concatenate((data, signal)), dtype=data.dtype)
        pass
    #if not args.data:
    data = np.array(np.concatenate((data, WZ)), dtype=data.dtype)
    #pass
    """ @TODO: Not sure this script works for data input... But it's not used anyway. """

    # Transfer factor
    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

    # Pass/fail masks
    # -- Data (incl. signal)
    msk_pass = tf.config['pass'](data)
    msk_fail = ~msk_pass

    # -- W/Z
    msk_WZ_pass = tf.config['pass'](WZ)
    msk_WZ_fail = ~msk_WZ_pass

    # -- Signal
    if args.inject:
        msk_sig_pass = tf.config['pass'](signal)
        msk_sig_fail = ~msk_sig_pass
        pass

    # Transfer factor calculator instance
    calc = tf.calculator(data=data, config=tf.config, subtract=WZ)

    # Nominal fit
    calc.fit()
    w_nom = calc.weights(data[msk_fail])
    w_nom_WZ = calc.weights(WZ[msk_WZ_fail])
    if args.show or args.save:
        calc.plot(show=args.show,
                  save=args.save,
                  prefix='plots/globalbackground_%s_%s_' %
                  ('injected' if args.inject else 'notinjected',
                   'data' if args.data else 'MC'))

    # mass +/- 20% stripe fit
    calc.mass = args.mass
    calc.window = 0.2
    calc.fit()
    w_stripe = calc.weights(data[msk_fail])
    w_stripe_WZ = calc.weights(WZ[msk_WZ_fail])
    if args.inject:
        w_stripe_sig = calc.weights(signal[msk_sig_fail])
        pass

    # Plotting
    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

    bins = np.linspace(100, 250, 30 + 1, endpoint=True)

    # Setup canvas
    c = ap.canvas(num_pads=2, batch=not args.show)
    p0, p1 = c.pads()

    # Add stacked backgrounds
    h_bkg_nom = c.hist(data['m'][msk_fail],
                       bins=bins,
                       weights=data['weight'][msk_fail] * w_nom,
                       display=False)
    h_bkg_stripe = c.hist(data['m'][msk_fail],
                          bins=bins,
                          weights=data['weight'][msk_fail] * w_stripe,
                          display=False)
    h_WZfl_nom = c.hist(WZ['m'][msk_WZ_fail],
                        bins=bins,
                        weights=WZ['weight'][msk_WZ_fail] * w_nom_WZ,
                        display=False)
    h_WZfl_stripe = c.hist(WZ['m'][msk_WZ_fail],
                           bins=bins,
                           weights=WZ['weight'][msk_WZ_fail] * w_stripe_WZ,
                           display=False)
    if args.inject:
        h_sig = c.hist(signal['m'][msk_sig_pass],
                       bins=bins,
                       weights=signal['weight'][msk_sig_pass],
                       display=False)
        h_sfl = c.hist(signal['m'][msk_sig_fail],
                       bins=bins,
                       weights=signal['weight'][msk_sig_fail] * w_stripe_sig,
                       display=False)
        pass
    h_gbs = c.hist(gbs['m'], bins=bins, weights=gbs['weight'], display=False)

    # -- Subtract (opt.)
    if args.inject:
        h_bkg_stripe.Add(h_sfl, -1)
        h_gbs.Add(h_sfl, -1)
        pass
    h_bkg_nom.Add(h_WZfl_nom, -1)
    h_bkg_stripe.Add(h_WZfl_stripe, -1)

    # -- Actually draw
    #if not args.data:
    h_WZ = c.stack(WZ['m'][msk_WZ_pass],
                   bins=bins,
                   weights=WZ['weight'][msk_WZ_pass],
                   fillcolor=ROOT.kRed - 4,
                   label='W/Z + #gamma')
    #pass

    h_bkg_nom = c.stack(h_bkg_nom,
                        fillcolor=ROOT.kAzure + 7,
                        label="Bkg. (full)")
    h_sum = c.getStackSum()
    h_bkg_stripe.Add(h_WZ)
    h_gbs.Add(h_WZ)
    if args.inject:
        h_sig = c.stack(h_sig,
                        fillcolor=ROOT.kViolet - 4,
                        label="Z' (%d GeV)" % args.mass)
        pass
    h_bkg_stripe = c.hist(h_bkg_stripe,
                          linecolor=ROOT.kGreen + 1,
                          label="Bkg. (window)")  # % args.mass)
    h_gbs = c.hist(h_gbs, linecolor=ROOT.kViolet + 1, label="Bkg. (GBS)")

    # Draw stats. error of stacked sum
    h_sum = c.hist(h_sum,
                   fillstyle=3245,
                   fillcolor=ROOT.kGray + 2,
                   linecolor=ROOT.kGray + 3,
                   label='Stats. uncert.',
                   option='E2')

    # Add (pseudo-) data
    h_data = c.plot(data['m'][msk_pass],
                    bins=bins,
                    weights=data['weight'][msk_pass],
                    markersize=0.8,
                    label='Data' if args.data else 'Pseudo-data')

    # Axis limits
    p1.ylim(0.8, 1.2)
    c.padding(0.45)
    c.log(True)

    # Draw error- and ratio plots
    if args.inject:
        hr_sig = c.ratio_plot((h_sig, h_sum), option='HIST', offset=1)
        pass
    h_err = c.ratio_plot((h_sum, h_sum), option='E2')
    h_ratio = c.ratio_plot((h_data, h_sum), oob=True)
    h_rgbs = c.ratio_plot((h_gbs, h_sum),
                          linecolor=ROOT.kViolet + 1,
                          option='HIST ][')
    h_rgbs = c.ratio_plot((h_bkg_stripe, h_sum),
                          linecolor=ROOT.kGreen + 1,
                          option='HIST ][')

    # Add labels and text
    c.xlabel('Signal jet mass [GeV]')
    c.ylabel('Events')
    p1.ylabel('Data / Nom.')
    c.text([
        "#sqrt{s} = 13 TeV,  L = 36.1 fb^{-1}",
    ] + ([
        "Sherpa incl. #gamma MC",
    ] if not args.data else []) + [
        "Trimmed anti-k_{t}^{R=1.0} jets",
        "ISR #gamma selection",
    ] + (["Signal injected"] if args.inject else []),
           qualifier='%sInternal' % ("Simulation " if not args.data else ""))

    # Add line(s)
    p1.yline(1.0)

    # Draw legend
    c.legend()
    c.region("SR", 0.8 * args.mass, 1.2 * args.mass)

    # Save and show plot
    if args.save:
        c.save('plots/globalbackground_spectrum_%dGeV_%s_%s.pdf' %
               (args.mass, 'injected' if args.inject else 'notinjected',
                'data' if args.data else 'MC'))
    if args.show: c.show()

    # p0-plot
    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

    # Setup canvas
    c2 = ap.canvas(batch=not args.show)

    p_local = h_data.Clone('p_local')
    p_global = h_data.Clone('p_global')

    for bin in range(1, h_data.GetXaxis().GetNbins() + 1):
        c_data = h_data.GetBinContent(bin)
        e_data = h_data.GetBinError(bin)
        c_loc = h_bkg_stripe.GetBinContent(bin)
        e_loc = h_bkg_stripe.GetBinError(bin)
        c_glb = h_gbs.GetBinContent(bin)
        e_glb = e_loc  # h_gbs    .GetBinError  (bin)

        z_loc = (c_data -
                 c_loc) / np.sqrt(np.square(e_data) + np.square(e_loc))
        z_glb = (c_data -
                 c_glb) / np.sqrt(np.square(e_data) +
                                  np.square(e_glb)) if c_glb > 0 else 0

        p_loc = min(ROOT.TMath.Erfc(z_loc / np.sqrt(2)), 1)
        p_glb = min(ROOT.TMath.Erfc(z_glb / np.sqrt(2)), 1)

        p_local.SetBinContent(bin, p_loc)
        p_global.SetBinContent(bin, p_glb)
        p_local.SetBinError(bin, 0)
        p_global.SetBinError(bin, 0)
        pass

    c2.plot(p_local,
            markercolor=ROOT.kGreen + 1,
            linecolor=ROOT.kGreen + 1,
            option='PL',
            label="Local (20% window)")
    c2.plot(p_global,
            markercolor=ROOT.kViolet + 1,
            linecolor=ROOT.kViolet + 1,
            option='PL',
            label="Global (GBS)")
    c2.xlabel("Signal jet mass [GeV]")
    c2.ylabel("p_{0}")
    c2.log()

    c2.ylim(1E-04, 1E+04)
    for sigma in range(4):
        c2.yline(ROOT.TMath.Erfc(sigma / np.sqrt(2)))
        pass

    c2.text([
        "#sqrt{s} = 13 TeV,  L = 36.1 fb^{-1}",
    ] + ([
        "Sherpa incl. #gamma MC",
    ] if not args.data else []) + [
        "Trimmed anti-k_{t}^{R=1.0} jets",
        "ISR #gamma selection",
        ("Signal" if args.inject else "No signal") + " injected" +
        (" at m = %d GeV" % args.mass if args.inject else ""),
    ],
            qualifier='Simulation Internal')

    c2.region("SR", 0.8 * args.mass, 1.2 * args.mass)
    c2.legend()
    if args.save:
        c2.save('plots/globalbackground_p0_%dGeV_%s_%s.pdf' %
                (args.mass, 'injected' if args.inject else 'notinjected',
                 'data' if args.data else 'MC'))
    if args.show: c2.show()

    return

Example #4

Show file

def main():

    # Parse command-line arguments
    args = parser.parse_args()

    DSID = int("1%02d%03d" %
               (0 if args.window is None else args.window * 100, args.mass))

    # Setup.
    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

    # Get signal file
    sig_DSID = get_signal_DSID(args.mass)

    # Load data
    #files_data = glob.glob(tf.config['base_path'] + 'objdef_MC_3610*.root')
    files_data = glob.glob(tf.config['base_path'] + 'objdef_data_*.root')
    files_WZ   = glob.glob(tf.config['base_path'] + 'objdef_MC_30543*.root') + \
                 glob.glob(tf.config['base_path'] + 'objdef_MC_30544*.root')

    if sig_DSID is None:
        if args.mass < 100.:
            print "Assuming signal is W/Z"
            files_sig = files_WZ
            files_WZ = []
        else:
            files_sig = []
            pass
    else:
        sig_file = 'objdef_MC_{DSID:6d}.root'.format(DSID=sig_DSID)
        print "Using signal file: %s" % sig_file
        files_sig = [tf.config['base_path'] + sig_file]
        pass

    if len(files_data) == 0 or (sig_DSID and len(files_sig) == 0):
        warning("No files found.")
        return

    data = loadData(files_data, tf.config['tree'], prefix=tf.config['prefix'])
    signal = loadData(files_sig, tf.config['tree'], prefix=tf.config['prefix'])
    WZ = loadData(files_WZ, tf.config['tree'], prefix=tf.config['prefix'])
    info_data = loadData(files_data, tf.config['outputtree'], stop=1)
    info_sig = loadData(files_sig, tf.config['outputtree'], stop=1)
    info_WZ = loadData(files_WZ, tf.config['outputtree'], stop=1)

    # Scaling by cross section
    xsec = loadXsec(tf.config['xsec_file'])

    # ----------------------------------------------------
    # Make more elegant!
    # ----------------------------------------------------
    # Append new DSID field # @TODO: Make more elegant?
    #for arr, info in zip([signal, WZ], [info_sig, info_WZ]):
    '''# @TEMP >>>
    if data is not None:
        data = append_fields(data, 'DSID', np.zeros((data.size,)), dtypes=int)
        for idx in info_data['id']:    
            msk = (data['id'] == idx) # Get mask of all 'data' entries with same id, i.e. from same file
            tmp_DSID = info_data['DSID'][idx]  # Get DSID for this file
            data['weight'][msk] *= xsec[tmp_DSID] # Scale by cross section x filter eff. for this DSID
            data['DSID']  [msk] = tmp_DSID        # Store DSID
            pass
        #data['weight'] *= tf.config['lumi'] # Scale all events (MC) by luminosity
        pass
    # @TEMP <<<'''

    if signal is not None:
        signal = append_fields(signal,
                               'DSID',
                               np.zeros((signal.size, )),
                               dtypes=int)
        for idx in info_sig['id']:
            msk = (
                signal['id'] == idx
            )  # Get mask of all 'data' entries with same id, i.e. from same file
            tmp_DSID = info_sig['DSID'][idx]  # Get DSID for this file
            signal['weight'][msk] *= xsec[
                tmp_DSID]  # Scale by cross section x filter eff. for this DSID
            signal['DSID'][msk] = tmp_DSID  # Store DSID
            pass
        signal['weight'] *= tf.config[
            'lumi']  # Scale all events (MC) by luminosity
        pass

    if WZ is not None:
        WZ = append_fields(WZ, 'DSID', np.zeros((WZ.size, )), dtypes=int)
        for idx in info_WZ['id']:
            msk = (
                WZ['id'] == idx
            )  # Get mask of all 'data' entries with same id, i.e. from same file
            tmp_DSID = info_WZ['DSID'][idx]  # Get DSID for this file
            WZ['weight'][msk] *= xsec[
                tmp_DSID]  # Scale by cross section x filter eff. for this DSID
            WZ['DSID'][msk] = tmp_DSID  # Store DSID
            pass
        # @TODO: k-factors?
        WZ['weight'] *= tf.config[
            'lumi']  # Scale all events (MC) by luminosity
        pass

    # Check output.
    if data.size == 0 or ((signal is not None) and signal.size == 0):
        warning("No data was loaded. Exiting.")
        return

    # Compute new variables
    data = append_fields(data, 'logpt', np.log(data['pt']))
    if signal is not None:
        signal = append_fields(signal, 'logpt', np.log(signal['pt']))
        pass
    if WZ is not None:
        WZ = append_fields(WZ, 'logpt', np.log(WZ['pt']))
        pass

    # Transfer factor
    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

    # Pass/fail masks
    msk_data_pass = tf.config['pass'](data)
    msk_data_fail = ~msk_data_pass
    if signal is not None:
        msk_sig_pass = tf.config['pass'](signal)
        msk_sig_fail = ~msk_sig_pass
        pass
    if WZ is not None:
        msk_WZ_pass = tf.config['pass'](WZ)
        msk_WZ_fail = ~msk_WZ_pass
        pass

    calc = tf.calculator(data=data,
                         config=tf.config,
                         subtract=WZ if args.subtractWZMC else
                         None)  # Using default configuration
    calc.mass = args.mass

    # Perform full fit
    if args.window is None:
        calc.fullfit()

        print "  -- Computing data weights"
        w_nom, w_up, w_down = calc.fullweights(data[msk_data_fail])
        if signal is not None:
            print "  -- Computing signal weights"
            w_sig_nom, w_sig_up, w_sig_down = calc.fullweights(
                signal[msk_sig_fail])

            w_sig_pass, _, _ = calc.fullweights(signal[msk_sig_pass])

            print "Scale factors for signal strength:"
            print "  pass / (pass + fail) = %.3e / (%.3e + %.3e) = %.4f" % (
                np.sum(w_sig_pass), np.sum(w_sig_pass), np.sum(w_sig_nom),
                np.sum(w_sig_pass) / (np.sum(w_sig_nom) + np.sum(w_sig_pass)))
            pass
        if WZ is not None:
            print "  -- Computing W/Z weights"
            w_WZ_nom, w_WZ_up, w_WZ_down = calc.fullweights(WZ[msk_WZ_fail])
        else:
            w_WZ_nom, w_WZ_up, w_WZ_down = None, None, None
            pass
        print "  -- Final fit done"
        if args.show or args.save:
            calc.plot(show=args.show,
                      save=args.save,
                      prefix='plots/tf_',
                      MC=False)

    # Perform fit with manually-set window size
    else:
        # @TODO: - Forcing the fit to use same length scale as 20% window fit. Improve?
        calc.window = 0.2
        calc.fit()
        theta = calc.theta()
        calc.window = args.window
        calc.fit(theta=theta)

        print "  -- Computing data weights"
        w_nom = calc.weights(data[msk_data_fail])
        w_up = calc.weights(data[msk_data_fail], shift=+1)
        w_down = calc.weights(data[msk_data_fail], shift=-1)
        if signal is not None:
            print "  -- Computing signal weights"
            w_sig_nom = calc.weights(signal[msk_sig_fail])
            w_sig_up = calc.weights(signal[msk_sig_fail], shift=+1)
            w_sig_down = calc.weights(signal[msk_sig_fail], shift=-1)
            pass
        if WZ is not None:
            print "  -- Computing W/Z weights"
            w_WZ_nom, = calc.weights(WZ[msk_WZ_fail])
            w_WZ_up = calc.weights(WZ[msk_WZ_fail], shift=+1)
            w_WZ_down = calc.weights(WZ[msk_WZ_fail], shift=-1)
        else:
            w_WZ_nom, w_WZ_up, w_WZ_down = None, None, None
            pass
        print "  -- Manual fit done"
        if args.show or args.save:
            calc.plot(show=args.show,
                      save=args.save,
                      prefix='plots/tf_',
                      MC=False)
        pass

    # Computing data-driven background estimate
    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    check_make_dir('output')

    # Write TF-scaled failing data to file
    if args.save:
        output = ROOT.TFile(
            'output/objdef_TF_{DSID:6d}.root'.format(DSID=DSID), 'RECREATE')
        pass

    for shift, w, w_WZ in zip([0, 1, -1], [w_nom, w_up, w_down],
                              [w_WZ_nom, w_WZ_up, w_WZ_down]):

        # -- Get branch name for current variation
        var_name = 'Nominal' if shift == 0 else (
            'TF_UP' if shift == 1 else 'TF_DOWN')

        # -- Prepare mass- and weight vectors
        vector_m = data['m'][msk_data_fail]
        vector_w = data['weight'][msk_data_fail] * w
        if args.subtractWZdata:
            if WZ is not None and WZ.size > 0:
                print "  Subtracting TF-scaled W/Z MC from background estimate"
                vector_m = np.concatenate((vector_m, WZ['m'][msk_WZ_fail]))
                vector_w = np.concatenate(
                    (vector_w, -WZ['weight'][msk_WZ_fail] * w_WZ))
            else:
                warning(
                    "  Could not subtract failed, TF-scale W/Z MC component")
                pass
            pass
        # Note: Don't subtract the signal component; that's output as a separate histogram to be used in the simultaneous fit

        # -- Prepare DISD and isMC vectors
        vector_DSID = np.ones_like(vector_w) * DSID
        vector_isMC = np.ones_like(vector_w).astype(bool)

        array1 = np.array(zip(vector_m, vector_w),
                          dtype=[(tf.config['prefix'] + 'm', np.float64),
                                 ('weight', np.float64)])

        array2 = np.array(zip(vector_DSID, vector_isMC),
                          dtype=[('DSID', np.uint32), ('isMC', np.bool_)])

        if args.save:
            # Mass and weight branch
            print "  Writing arrays to file: %s" % var_name
            treename1 = tf.config['tree'].replace('NumLargeRadiusJets',
                                                  'Jet_tau21DDT').replace(
                                                      'Nominal', var_name)
            make_directories('/'.join(treename1.split('/')[:-1]),
                             fromDir=output)
            tree1 = ROOT.TTree(treename1.split('/')[-1], "")
            array2tree(array1, tree=tree1)

            # outputTree
            treename2 = tf.config['outputtree'].replace('Nominal', var_name)
            make_directories('/'.join(treename2.split('/')[:-1]),
                             fromDir=output)
            tree2 = ROOT.TTree(treename2.split('/')[-1], "")
            array2tree(array2, tree=tree2)

            output.Write()
            pass
        pass

    if args.save:
        output.Close()
        pass

    # Write TF-scaled failing signal MC to file
    if signal is not None:
        if args.save:
            output = ROOT.TFile(
                'output/objdef_TF_{DSID:6d}_signalfail.root'.format(DSID=DSID),
                'RECREATE')
            pass

        for shift, w_sig in zip([0, 1, -1], [w_sig_nom, w_sig_up, w_sig_down]):
            # -- Get branch name for current variation
            var_name = 'Nominal' if shift == 0 else (
                'TF_UP' if shift == 1 else 'TF_DOWN')

            # -- Prepare mass- and weight vectors
            vector_m = signal['m'][msk_sig_fail]
            vector_w = signal['weight'][msk_sig_fail] * w_sig

            # -- Prepare DISD and isMC vectors
            vector_DSID = np.ones_like(vector_w) * (DSID + 1E+05)
            vector_isMC = np.ones_like(vector_w).astype(bool)

            array1 = np.array(zip(vector_m, vector_w),
                              dtype=[(tf.config['prefix'] + 'm', np.float64),
                                     ('weight', np.float64)])

            array2 = np.array(zip(vector_DSID, vector_isMC),
                              dtype=[('DSID', np.uint32), ('isMC', np.bool_)])

            if args.save:
                # Mass and weight branch
                print "  Writing arrays to file: %s" % var_name
                treename1 = tf.config['tree'].replace('NumLargeRadiusJets',
                                                      'Jet_tau21DDT').replace(
                                                          'Nominal', var_name)
                make_directories('/'.join(treename1.split('/')[:-1]),
                                 fromDir=output)
                tree1 = ROOT.TTree(treename1.split('/')[-1], "")
                array2tree(array1, tree=tree1)

                # outputTree
                treename2 = tf.config['outputtree'].replace(
                    'Nominal', var_name)
                make_directories('/'.join(treename2.split('/')[:-1]),
                                 fromDir=output)
                tree2 = ROOT.TTree(treename2.split('/')[-1], "")
                array2tree(array2, tree=tree2)

                output.Write()
                pass
            pass

        if args.save:
            output.Close()
            pass
        pass

    # Save configuration
    check_make_dir('logs')

    # -- Turn numpy arrays into lists, in order to make them JSON serializable
    cfg = make_serializable(tf.config)

    json.dump([cfg, vars(args)], open('logs/tf_config_%d.log' % DSID, 'w'))
    return

Example #5

Show file

File: closure.py Project: asogaard/transferfactor

def main():

    # Parse command-line arguments
    args = parser.parse_args()

    # Setup.
    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

    # Load data
    files = glob.glob(tf.config['base_path'] + 'objdef_MC_3610*.root')

    if len(files) == 0:
        warning("No files found.")
        return

    data = loadData(files, tf.config['tree'], prefix=tf.config['prefix'])
    info = loadData(files, tf.config['outputtree'], stop=1)

    # Scaling by cross section
    xsec = loadXsec(tf.config['xsec_file'])

    # Append new DSID field # @TODO: Make more elegant?
    data = append_fields(data, 'DSID', np.zeros((data.size, )), dtypes=int)
    for idx in info['id']:
        msk = (
            data['id'] == idx
        )  # Get mask of all 'data' entries with same id, i.e. from same file
        DSID = info['DSID'][idx]  # Get DSID for this file
        data['weight'][msk] *= xsec[
            DSID]  # Scale by cross section x filter eff. for this DSID
        data['DSID'][msk] = DSID  # Store DSID
        pass
    data['weight'] *= tf.config['lumi']  # Scale all events (MC) by luminosity

    # Check output.
    if data.size == 0:
        warning("No data was loaded.")
        return

    # Compute new variables
    data = append_fields(data, 'logpt', np.log(data['pt']))

    # Transfer factor
    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

    # Pass/fail masks
    msk_pass = tf.config['pass'](data)
    msk_fail = ~msk_pass

    # Transfer factor calculator instance
    calc = tf.calculator(data=data,
                         config=tf.config)  # Using default configuration
    calc.mass = args.mass
    calc.window = args.window
    # ... calc.partialbins, calc.emptybins, ...
    calc.fit()  # ...(theta=0.5)
    w_nom = calc.weights(data[msk_fail])
    w_up = calc.weights(data[msk_fail], shift=+1)
    w_down = calc.weights(data[msk_fail], shift=-1)
    if args.show or args.save:
        calc.plot(show=args.show, save=args.save, prefix='plots/new_closure_')

    # Comparing jet mass distrbutions (closure)
    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

    if args.show or args.save:

        c = ap.canvas(num_pads=2, batch=not args.show)
        p0, p1 = c.pads()

        bins = tf.config['massbins']

        h_bkg = c.hist(data['m'][msk_fail],
                       bins=bins,
                       weights=data['weight'][msk_fail] * w_nom,
                       display=False)
        h_up = c.hist(data['m'][msk_fail],
                      bins=bins,
                      weights=data['weight'][msk_fail] * w_up,
                      display=False)
        h_down = c.hist(data['m'][msk_fail],
                        bins=bins,
                        weights=data['weight'][msk_fail] * w_down,
                        display=False)
        h_data = c.plot(data['m'][msk_pass],
                        bins=bins,
                        weights=data['weight'][msk_pass],
                        display=False)

        for bin in range(1, h_bkg.GetXaxis().GetNbins() + 1):
            width = float(h_bkg.GetBinWidth(bin))
            h_bkg.SetBinContent(bin, h_bkg.GetBinContent(bin) / width)
            h_bkg.SetBinError(bin, h_bkg.GetBinError(bin) / width)
            h_up.SetBinContent(bin, h_up.GetBinContent(bin) / width)
            h_up.SetBinError(bin, h_up.GetBinError(bin) / width)
            h_down.SetBinContent(bin, h_down.GetBinContent(bin) / width)
            h_down.SetBinError(bin, h_down.GetBinError(bin) / width)
            h_data.SetBinContent(bin, h_data.GetBinContent(bin) / width)
            h_data.SetBinError(bin, h_data.GetBinError(bin) / width)
            pass

        h_bkg = c.hist(h_bkg,
                       fillcolor=ROOT.kAzure + 7,
                       label='Background est.')
        h_err = c.hist(h_bkg,
                       fillstyle=3245,
                       fillcolor=ROOT.kGray + 2,
                       linecolor=ROOT.kGray + 3,
                       label='Stat. uncert.',
                       option='E2')
        h_up = c.hist(h_up,
                      linecolor=ROOT.kGreen + 1,
                      linestyle=2,
                      option='HIST',
                      label='Syst. uncert.')
        h_down = c.hist(h_down,
                        linecolor=ROOT.kGreen + 1,
                        linestyle=2,
                        option='HIST')
        h_data = c.plot(h_data, label='Pseudo-data')

        c.ratio_plot((h_err, h_bkg), option='E2')
        c.ratio_plot((h_up, h_bkg), option='HIST')
        c.ratio_plot((h_down, h_bkg), option='HIST')
        c.ratio_plot((h_data, h_bkg))

        c.xlabel('Large-#it{R} jet mass [GeV]')
        c.ylabel('Events / GeV')
        p1.ylabel('Data / Est.')

        c.ylim(1E+00, 1E+06)
        p1.ylim(0.80, 1.20)
        p1.yline(1.0)
        c.region("SR", 0.8 * args.mass, 1.2 * args.mass)

        #for x in [args.mass * (1 - args.window), args.mass * (1 + args.window)]:
        #    p0.line(x, 1E+01, x, 2E+04)
        #    pass
        #p1.xlines([args.mass * (1 - args.window), args.mass * (1 + args.window)])

        c.text([
            "#sqrt{s} = 13 TeV,  %s fb^{-1}" % tf.config['lumi'],
            "Incl. #gamma Monte Carlo",
            "Photon channel",
        ],
               qualifier='Simulation Internal')

        c.log()
        c.legend()

        if args.save:
            c.save('plots/new_closure_%dGeV_pm%d.pdf' %
                   (args.mass, args.window * 100.))
        if args.show: c.show()
        pass

    return

Example #6

Show file

def main ():

    # Parse command-line arguments
    args = parser.parse_args()


    # Setup.
    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 

    # Load data
    files = glob.glob(tf.config['base_path'] + 'objdef_data_*.root')

    if len(files) == 0:
        warning("No files found.")
        return

    data = loadData(files, tf.config['tree'], prefix=tf.config['prefix']) 
    info = loadData(files, tf.config['outputtree'], stop=1)
    
    # Check output.
    if data.size == 0:
        warning("No data was loaded. Exiting.")
        return 

    # Compute new variables
    data = append_fields(data, 'logpt', np.log(data['pt']))
    

    # Pass/fail masks
    msk_pass = tf.config['pass'](data)
    msk_fail = ~msk_pass
    
    
    # Validating transfer factor fit using toys
    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
    #for mass in [85] + list(np.linspace(100, 250, 15 + 1, endpoint=True)):
    for mass in list(np.linspace(110, 250, 14 + 1, endpoint=True)):

        print "-------- MASS: %d GeV" % mass
        
        # Set up transfer factor calculator instance
        calc = tf.calculator(data=data, config=tf.config, verbose=False) # Using default configuration
        calc.mass   = mass
        calc.window = 0.2 if (args.window is None) else args.window
        
        # Get nomnial best-fit theta
        calc.fit()
        theta = calc.theta()
        nominal_weights = calc.weights(data[msk_fail], shift=0), \
            calc.weights(data[msk_fail], shift=+1), \
            calc.weights(data[msk_fail], shift=-1)
        
        # "Throw toys" from TF profile, fit N times
        calc.toysfit(N=args.N, theta=theta)
        
        # Get weights for each toys experiment fit
        toys_weights = calc.toysweights(data[msk_fail])
        
        # Plot variations
        bins = tf.config['massbins']
        c = ap.canvas(num_pads=2, batch=not args.show)
        
        # -- Nominal background(s)
        hist_nom  = c.hist(data[msk_fail]['m'], bins=bins, weights=nominal_weights[0], fillcolor=ROOT.kAzure + 7, label='Nominal bkg.')
        h_sum = c.hist(hist_nom,
                       fillstyle=3245, fillcolor=ROOT.kGray + 2, linecolor=ROOT.kGray + 3, option='E2',
                       label='Stat. uncert.')
        
        
        # -- Toys backgrounds
        toys_hists = list()
        for idx, weights in enumerate(toys_weights):
            h = c.hist(data[msk_fail]['m'], bins=bins, weights=weights[0], fillstyle=0, linecolor=ROOT.kRed + idx % 5, linestyle = 1 + idx // 5, label='Toys %d' % (idx + 1) if idx < 5 else None)
            toys_hists.append(h)
            pass
        
        # -- Nominal variations
        hist_up   = c.hist(data[msk_fail]['m'], bins=bins, weights=nominal_weights[1], fillstyle=0, linecolor=ROOT.kGreen, linestyle=2, label='Syst. uncert.')
        hist_down = c.hist(data[msk_fail]['m'], bins=bins, weights=nominal_weights[2], fillstyle=0, linecolor=ROOT.kGreen, linestyle=2)
        
        
        # -- Data
        hist_data = c.plot(data[msk_pass]['m'], bins=bins, label='Data')
        
        # -- Ratio plots
        c.ratio_plot((h_sum,     hist_nom), option='E2')
        for idx, h in enumerate(toys_hists):
            c.ratio_plot((h, hist_nom), option='HIST')
            pass
        c.ratio_plot((hist_up,   hist_nom), option='HIST')
        c.ratio_plot((hist_down, hist_nom), option='HIST')
        c.ratio_plot((hist_data, hist_nom), oob=True)
        
        # -- Decorations
        c.xlabel('Large-#it{R} jet mass [GeV]')
        c.ylabel('Events / 5 GeV')
        c.pads()[1].ylabel('Ratio wrt. nominal')
        c.pads()[1].ylim(0.8, 1.2)
        c.pads()[1].yline(1.)
        c.text(["#sqrt{s} = 13 TeV,  L = 36.1 fb^{-1}",
                "Photon channel"],
                qualifier="Internal")

        c.region("SR", 0.8 * mass, 1.2*mass)
        
        c.legend()
        c.log()
        if args.show: c.show()
        if args.save: c.save('plots/validation_%dGeV_N%d.pdf' % (mass, args.N))

        pass
        
    return