Beispiel #1
0
    def _normalize(self, cutflows):
        if self.__normalized:
            return
        self.__normalized = True
        for fullname, hist in self.__hists.items():
            if fullname.endswith('Up') or fullname.endswith('Down'):
                name, _ = fullname.rsplit('_CMS', 1)
                proc = Process.get(name)
            else:
                proc = Process.get(fullname)

            logging.debug("normalizing histogram {0}, process {1}".format(self.__name, proc))
            denom = float(cutflows[proc.cutflow][-3][proc])
            factor = 0. if denom == 0. else cutflows[proc.cutflow][-1][proc] / denom
            hist.Scale(factor)
Beispiel #2
0
 def _add_legend(self, config, factor):
     l = Legend(0.05, 3, 0.08)
     for cfg in config['backgrounds']:
         bkg, color = cfg.items()[0]
         l.draw_box(1001, self._eval(color), Process.get(bkg).fullname)
     l.draw_box(3654, r.kBlack, "Bkg. err.", True)
     # TODO add collisions
     l.new_row()
     for cfg in config['signals']:
         sig, color = cfg.items()[0]
         label = Process.get(sig).fullname
         if factor != 1:
             label += " (#times {0:.1f})".format(factor)
         l.draw_line(2, self._eval(color), label)
     return l
Beispiel #3
0
 def _add_legend(self, config, factor):
     l = Legend(0.05, 3, 0.08)
     for cfg in config['backgrounds']:
         bkg, color = cfg.items()[0]
         l.draw_box(1001, self._eval(color), Process.get(bkg).fullname)
     l.draw_box(3654, r.kBlack, "Bkg. err.", True)
     # TODO add collisions
     l.new_row()
     for cfg in config['signals']:
         sig, color = cfg.items()[0]
         label = Process.get(sig).fullname
         if factor != 1:
             label += " (#times {0:.1f})".format(factor)
         l.draw_line(2, self._eval(color), label)
     return l
Beispiel #4
0
def cutflow(cuts, procs, relative=False, f=sys.stdout):
    expanded_proc = [Process.expand(proc) for proc in procs]

    cutdata = [[sum(float(cut[p]) for p in ps) for ps in expanded_proc] for cut in cuts]

    if relative:
        for i in xrange(1, len(cutdata)):
            cutdata[-i] = [float(b) / a for a, b in zip(cutdata[-(i + 1)], cutdata[-i])]

    namelength = max(len(unicode(cut)) for cut in cuts)
    fieldlengths = []
    for proc, subprocs in zip(procs, expanded_proc):
        val = sum(cuts[0][p] for p in subprocs)
        length = max(len(proc), len("{:.2f}".format(float(val))))
        fieldlengths.append(length)

    header = u"{{:{0}}}".format(namelength) \
            + u"".join(u"   {{:{0}}}".format(fl) for fl in fieldlengths) \
            + u"\n"
    format = u"{{:{0}}}".format(namelength) \
            + "".join("   {{:{0}.2f}}".format(fl) for fl in fieldlengths) \
            + "\n"

    f.write(header.format("Cut", *procs))
    f.write("-" * namelength + "".join("   " + "-" * fl for fl in fieldlengths) + "\n")
    for cut, data in zip(cuts, cutdata):
        f.write(format.format(cut, *data))
Beispiel #5
0
 def get_event_count(cls, f, proc, category, fmt, unweighed):
     p = cls.__plots['Events']
     p.clear()
     p.read(f, category, Process.expand(proc), fmt=fmt)
     if unweighed:
         return p._get_histogram(proc).GetEntries()
     return p._get_histogram(proc).GetBinContent(1)
Beispiel #6
0
def cutflow(cuts, processes, relative=False, weighed=False, f=sys.stdout):
    expanded_proc = []
    procs = []
    for proc in processes:
        subs = [p for p in Process.expand(proc) if str(p) in cuts[0].processes()]
        if len(subs) > 0:
            expanded_proc.append(subs)
            procs.append(proc)

    cutdata = [[sum(float(cut[p]) for p in ps) for ps in expanded_proc] for cut in cuts]

    if weighed:
        for n, c in enumerate(reversed(cuts)):
            if not isinstance(c, StaticCut):
                break

        ratios = [a / (b if b != 0 else 1) for a, b in zip(cutdata[-1], cutdata[-(n + 1)])]
        cutdata = cutdata[:-n]
        for i in xrange(3, len(cutdata)):
            cutdata[i] = [a * b for a, b in zip(cutdata[i], ratios)]

    if relative:
        for i in xrange(1, len(cutdata)):
            cutdata[-i] = [(float(b) / a if a != 0 else 0)
                           for a, b in zip(cutdata[-(i + 1)], cutdata[-i])]

    print_cuts(cuts, procs, cutdata, expanded_proc, "Cut", f, 5 if relative else 2)
Beispiel #7
0
def cutflow(cuts, procs, relative=False, f=sys.stdout):
    expanded_proc = [Process.expand(proc) for proc in procs]

    cutdata = [[sum(float(cut[p]) for p in ps) for ps in expanded_proc]
               for cut in cuts]

    if relative:
        for i in xrange(1, len(cutdata)):
            cutdata[-i] = [
                float(b) / a for a, b in zip(cutdata[-(i + 1)], cutdata[-i])
            ]

    namelength = max(len(unicode(cut)) for cut in cuts)
    fieldlengths = []
    for proc, subprocs in zip(procs, expanded_proc):
        val = sum(cuts[0][p] for p in subprocs)
        length = max(len(proc), len("{:.2f}".format(float(val))))
        fieldlengths.append(length)

    header = u"{{:{0}}}".format(namelength) \
            + u"".join(u"   {{:{0}}}".format(fl) for fl in fieldlengths) \
            + u"\n"
    format = u"{{:{0}}}".format(namelength) \
            + "".join("   {{:{0}.2f}}".format(fl) for fl in fieldlengths) \
            + "\n"

    f.write(header.format("Cut", *procs))
    f.write("-" * namelength + "".join("   " + "-" * fl
                                       for fl in fieldlengths) + "\n")
    for cut, data in zip(cuts, cutdata):
        f.write(format.format(cut, *data))
Beispiel #8
0
 def _get_histogram(self, process, systematic=None):
     if isinstance(process, Process):
         proc = process
         process = str(process)
     else:
         proc = Process.get(process)
     if isinstance(proc, BasicProcess):
         scale = 1.
         if systematic and systematic.startswith('Relative'):
             scale = max(0, 1 + proc.relativesys() * (1. if systematic.endswith('Up') else -1.))
             systematic = None
         suffix = '_' + systematic if systematic else ''
         hist = self.__hists[process + suffix].Clone()
         if hist.ClassName().startswith('TH1'):
             lastbin = hist.GetNbinsX()
             overbin = lastbin + 1
             err = math.sqrt(hist.GetBinError(lastbin) ** 2 + hist.GetBinError(overbin) ** 2)
             val = hist.GetBinContent(lastbin) + hist.GetBinContent(overbin)
             hist.SetBinContent(lastbin, val)
             hist.SetBinError(lastbin, err)
         if scale != 1.:
             hist.Scale(scale)
         return hist
     hist = None
     for p in proc.subprocesses:
         h = self._get_histogram(p, systematic)
         if hist:
             hist.Add(h, proc.factor)
         else:
             hist = h.Clone()
     if not hist:
         raise KeyError(process)
     return hist
Beispiel #9
0
def read_inputs(config, setup):
    from ttH.TauRoast.processing import Process

    fn = os.path.join(config.get("indir", config["outdir"]), "ntuple.root")

    signal = None
    signal_weights = None
    for proc, weight in sum([cfg.items() for cfg in setup['signals']], []):
        for p in sum([Process.expand(proc)], []):
            logging.debug('reading {}'.format(p))
            d = rec2array(root2array(fn, str(p), setup['variables']))
            if isinstance(weight, float) or isinstance(weight, int):
                w = np.array([weight] * len(d))
            else:
                w = rec2array(root2array(fn, str(p), [weight])).ravel()
            w *= p.cross_section / p.events
            if signal is not None:
                signal = np.concatenate((signal, d))
                signal_weights = np.concatenate((signal_weights, w))
            else:
                signal = d
                signal_weights = w

    background = None
    background_weights = None
    for proc, weight in sum([cfg.items() for cfg in setup['backgrounds']], []):
        for p in sum([Process.expand(proc)], []):
            logging.debug('reading {}'.format(p))
            d = rec2array(root2array(fn, str(p), setup['variables']))
            if isinstance(weight, float) or isinstance(weight, int):
                w = np.array([weight] * len(d))
            else:
                w = rec2array(root2array(fn, str(p), [weight])).ravel()
            w *= p.cross_section / p.events
            if background is not None:
                background = np.concatenate((background, d))
                background_weights = np.concatenate((background_weights, w))
            else:
                background = d
                background_weights = w

    factor = np.sum(signal_weights) / np.sum(background_weights)
    logging.info("renormalizing background events by factor {}".format(factor))
    background_weights *= factor

    return signal, signal_weights, background, background_weights
Beispiel #10
0
def add_mva(args, config):
    fn = os.path.join(config["outdir"], "ntuple.root")
    for proc in set(sum((Process.expand(p) for p in config['plot'] + config['limits']), [])):
        systematics = ['NA']
        if args.systematics:
            weights = config.get(proc.cutflow + ' weights')
            systematics = config.get(proc.cutflow + ' systematics', [])
            systematics = set([s for s, w in expand_systematics(systematics, weights)])
        for unc in systematics:
            logging.info("using systematics: " + unc)
            proc.add_mva(config, fn, unc)
Beispiel #11
0
 def _add_legend(self, factor):
     legend = Legend(0.05, 4, 0.03)
     if len(self.__backgrounds_present) > 0:
         legend.draw_marker(20, r.kBlack, "Data")
         for cfg in self._plotconfig['backgrounds']:
             props = {'SetFillStyle': 1001}
             props.update(cfg)
             bkg = props.pop('process')
             if bkg not in self.__backgrounds_present:
                 continue
             legend.draw_box({k: self._eval(v) for (k, v) in props.items()}, Process.get(bkg).fullname, centerline=False)
     if len(self.__signals_present) > 0:
         legend.new_row()
         for cfg in self._plotconfig['signals']:
             sig, color = cfg.items()[0]
             if sig not in self.__signals_present:
                 continue
             label = Process.get(sig).fullname
             if factor != 1:
                 label += " (#times {0:.1f})".format(factor)
             legend.draw_line(2, self._eval(color), label)
             legend.new_row()
     return legend
Beispiel #12
0
    def write(self, file, cutflows, category, systematics=None, procs=None, fmt="{p}_{c}_{v}"):
        """Write histograms of the plot to `file`.

        Use the specified `category` and normalize histograms via the
        cutflows passed by `cutflows`. Can be limited to processes
        specified in `procs`. Optionally, use `systematics`, and write with
        the format specified in `fmt`, where `p` is the process name, `c`
        the category, and `v` the limit name of the current plot.
        Systematics always get appended to the format string (with an
        underscore).
        """
        self._normalize(cutflows)

        if systematics is None:
            systematics = []
        systematics = set(systematics + ['NA'])

        uncertainties = []
        for systematic in systematics:
            if systematic == 'NA':
                uncertainties.append((None, ''))
            else:
                uncertainties.append((systematic + 'Up', '_{}Up'.format(systematic)))
                uncertainties.append((systematic + 'Down', '_{}Down'.format(systematic)))

        if procs is None:
            procs = [Process.get(k) for k in self.__hists if
                     (not k.endswith('Up')) and (not k.endswith('Down'))]
        else:
            procs = map(Process.get, procs)

        for proc in procs:
            for uncertainty, suffix in uncertainties:
                histname = fmt.format(p=proc.limitname, v=self.__limitname, c=category)
                histname += suffix
                logging.debug("writing histogram {0}".format(histname))
                try:
                    hist = self._get_histogram(proc, uncertainty)
                    file.WriteObject(hist, histname, "WriteDelete")
                except KeyError:
                    pass
Beispiel #13
0
def normalize(cuts, lumi, limit=None):
    weights = None
    processed = None

    ntuplized = None
    analyzed = None

    for cut in cuts:
        if str(cut).lower() == "dataset processed":
            processed = cut
        elif str(cut).lower() == "dataset event weights":
            weights = cut
        elif str(cut).lower() == "ntuple":
            ntuplized = cut
        elif str(cut).lower() == "ntuple analyzed":
            analyzed = cut
        elif processed and weights and ntuplized and analyzed:
            break

    dsetnorm = StaticCut("Dataset norm")
    luminorm = StaticCut("Luminosity norm")
    for proc in cuts[-1].processes():
        if str(proc).startswith("collisions") or str(proc).startswith("fakes"):
            dsetnorm[proc] = cuts[-1][proc]
            luminorm[proc] = cuts[-1][proc]
        else:
            p = Process.get(proc)
            scale = processed[proc] / float(weights[proc])
            if ntuplized[proc] == 0 or analyzed[proc] == 0:
                logging.warning("0 event count for {}".format(proc))
                fraction = 1.
            elif (not limit) or analyzed[proc] < limit:
                fraction = 1.
            else:
                fraction = analyzed[proc] / float(ntuplized[proc])
                logging.warning("scaling {} by {} to compensate for partially analyzed dataset".format(proc, 1. / fraction))
            dsetnorm[proc] = cuts[-1][proc] * scale / fraction
            luminorm[proc] = cuts[-1][proc] * scale / fraction * lumi * p.cross_section / float(p.events)
    cuts.append(dsetnorm)
    cuts.append(luminorm)
Beispiel #14
0
def analyze(args, config):
    fn = os.path.join(config["outdir"], "ntuple.root")

    if args.reuse:
        cutflows = split_cuts(load_cutflows(config))
    else:
        if os.path.exists(fn):
            os.unlink(fn)
        cutflows = setup_cuts(config)

    for proc in set(sum((Process.expand(p) for p in config['plot'] + config['limits']), [])):
        uncertainties = ['NA']
        if args.systematics:
            weights = config.get(proc.cutflow + ' weights')
            systematics = config.get(proc.cutflow + ' systematics', [])
            uncertainties = [s for s, w in expand_systematics(systematics, weights)]
        for unc in uncertainties:
            suffix = '' if unc == 'NA' else '_' + unc
            counts, cuts, weights = cutflows[proc.cutflow + suffix]

            if len(counts) > 0 and str(proc) in counts[0].processes():
                continue

            logging.info("using systematics: " + unc)

            local_cuts = list(cuts)
            for cfg in proc.additional_cuts:
                local_cuts.insert(0, Cut(*cfg))

            proc.analyze(config, fn, counts, local_cuts, weights, unc, args.debug_cuts)

    concatenated_cutflows = Cutflows()
    for name, (counts, cuts, weights) in cutflows.items():
        cuts = counts + cuts + weights
        normalize(cuts, config["lumi"], config.get("event limit"))
        concatenated_cutflows[name] = cuts

    concatenated_cutflows.save(config)
Beispiel #15
0
def fill(args, config):
    cutflows = load_cutflows(config)
    for name, cuts in cutflows.items():
        normalize(cuts, config["lumi"], config.get("event limit"))

    categories, definitions = get_categories(config)

    atomic_processes = set(sum(map(Process.expand, config['plot'] + config['limits']), []))
    limit_processes = config["limits"]
    all_processes = set(Process.get(n) for n in limit_processes + config['plot']) | atomic_processes

    if len(all_processes) != len(set([p.limitname for p in all_processes])):
        logging.error("the limit names of the processes are not unique and will lead to collisions!")

    if 'indir' in config and config['indir'] != config['outdir']:
        shutil.copy(
            os.path.join(config['indir'], 'cutflow.pkl'),
            os.path.join(config['outdir'], 'cutflow.pkl')
        )

    fn = os.path.join(config.get("indir", config["outdir"]), "ntuple.root")
    forest = Forest(fn)

    for category, definition in zip(categories, definitions):
        logging.info("filling category: " + category)
        Plot.reset()

        for proc in atomic_processes:
            logging.info("filling process: " + str(proc))

            weights = config.get(proc.cutflow + ' weights')
            systematics = config.get(proc.cutflow + ' systematics', [])
            uncertainties = [('NA', weights)]
            if args.systematics:
                uncertainties = expand_systematics(systematics, weights)
            for n, (systematic, weights) in enumerate(uncertainties):
                logging.info("using systematics: " + systematic)
                logging.info("using weights: " + ", ".join(weights))
                for p in Plot.plots():
                    if (not args.essential and n == 0) or p.essential():
                        p.fill(proc, systematic, weights, definition)

        uncertainties = None
        if args.systematics:
            uncertainties = list(set(sum((config.get(p.cutflow + ' systematics', []) for p in atomic_processes), [])))

        logging.info("writing out plots for category: " + category)
        fn = os.path.join(config["outdir"], "plots.root")
        with open_rootfile(fn) as f:
            for p in Plot.plots():
                p.write(f, cutflows, category, uncertainties,
                        procs=all_processes, fmt=config["histformat"])

        discriminants = config.get("discriminants", [])
        fn = os.path.join(config["outdir"], "limits.root")
        with open_rootfile(fn) as f:
            for p in Plot.plots():
                if p.limitname in discriminants:
                    p.write(f, cutflows, category, uncertainties,
                            procs=limit_processes, fmt=config["histformat"])

        timing = sorted(Plot.plots(), key=lambda p: p._time)
        for p in timing[:10] + timing[-10:]:
            logging.debug("plot filling time for {0}: {1}".format(p, p._time))
        logging.info("done filling category: " + category)
    del forest
Beispiel #16
0
 def _get_histogram(self, process):
     procs = Process.expand(process)
     h = self.__hists[procs[0]].Clone()
     for proc in procs[1:]:
         h.Add(self.__hists[proc])
     return h
Beispiel #17
0
 def _get_histogram(self, process):
     procs = Process.expand(process)
     h = self.__hists[procs[0]].Clone()
     for proc in procs[1:]:
         h.Add(self.__hists[proc])
     return h