Beispiel #1
0
    def clf_channels(self, clf,
                     category, region,
                     cuts=None,
                     bins=10,
                     limits=None,
                     mass=None,
                     mode=None,
                     systematics=True,
                     unblind=False,
                     hybrid_data=False,
                     no_signal_fixes=False,
                     uniform=False,
                     mva=False,
                     min_score=None,
                     max_score=None,
                     include_signal=True):
        """
        Return a HistFactory Channel for each mass hypothesis
        """
        log.info("constructing channels")

        # determine min and max scores
        scores_obj = self.get_scores(
            clf, category, region, cuts=cuts,
            masses=[mass], mode=mode,
            systematics=systematics,
            unblind=unblind)

        data_scores = scores_obj.data_scores
        bkg_scores = scores_obj.bkg_scores
        all_sig_scores = scores_obj.all_sig_scores
        if min_score is None:
            min_score = scores_obj.min_score
        if max_score is None:
            max_score = scores_obj.max_score

        if isinstance(bins, int):
            if limits is not None:
                low, high = limits
                binning = Hist(bins, low, high, type='D')
            else:
                binning = Hist(bins, min_score, max_score, type='D')
        else: # iterable
            if bins[0] > min_score:
                log.warning("min score is less than first edge "
                            "(will be underflow)")
            if bins[-1] <= max_score:
                log.warning("max score is greater than or equal to last edge "
                            "(will be overflow)")
            binning = Hist(bins, type='D')

        bkg_samples = []
        for s, scores in bkg_scores:
            hist_template = binning.Clone(
                title=s.label,
                **s.hist_decor)
            sample = s.get_histfactory_sample(
                hist_template, clf,
                category, region,
                min_score=min_score, max_score=max_score ,
                cuts=cuts, scores=scores,
                systematics=systematics,
                uniform=uniform,
                mva=mva)
            bkg_samples.append(sample)

        data_sample = None
        if data_scores is not None:
            hist_template = binning.Clone(
                title=self.data.label,
                **self.data.hist_decor)
            data_sample = self.data.get_histfactory_sample(
                hist_template, clf,
                category, region,
                cuts=cuts, scores=data_scores,
                uniform=uniform)
            if unblind is False:
                # blind full histogram
                data_sample.hist[:] = (0, 0)
            elif (unblind is not True) and isinstance(unblind, int):
                # blind highest N bins
                data_sample.hist[-(unblind + 1):] = (0, 0)
            elif isinstance(unblind, float):
                # blind above a signal efficiency
                max_unblind_score = efficiency_cut(
                    sum([histogram_scores(hist_template, scores)
                        for s, scores in all_sig_scores[mass]]), unblind)
                blind_bin = hist_template.FindBin(max_unblind_score)
                data_sample.hist[blind_bin:] = (0, 0)

        # create signal HistFactory samples
        sig_samples = []
        if include_signal:
            for s, scores in all_sig_scores[mass]:
                hist_template = binning.Clone(
                    title=s.label,
                    **s.hist_decor)
                sample = s.get_histfactory_sample(
                    hist_template, clf,
                    category, region,
                    cuts=cuts, scores=scores,
                    no_signal_fixes=no_signal_fixes,
                    systematics=systematics,
                    uniform=uniform,
                    mva=mva)
                sig_samples.append(sample)

        # replace data in blind bins with signal + background
        if hybrid_data and (unblind is not True):
            sum_sig_bkg = sum([s.hist for s in (bkg_samples + sig_samples)])
            if unblind is False:
                # replace full hist
                data_sample.hist[:] = sum_sig_bkg[:]
            elif isinstance(unblind, int):
                # replace highest N bins
                bin = -(unblind + 1)
                data_sample.hist[bin:] = sum_sig_bkg[bin:]
            elif isinstance(unblind, float):
                data_sample.hist[blind_bin:] = sum_sig_bkg[blind_bin:]

        # create channel for this mass point
        channel = histfactory.make_channel(
            'hh_{0}_{1}_{2}'.format(self.year % 1000, category.name, mass),
            bkg_samples + sig_samples,
            data=data_sample)

        return scores_obj, channel
Beispiel #2
0
def plot_clf(
    background_scores,
    category,
    signal_scores=None,
    signal_scale=1.0,
    data_scores=None,
    name=None,
    draw_histograms=True,
    draw_data=False,
    save_histograms=False,
    hist_template=None,
    bins=10,
    min_score=0,
    max_score=1,
    signal_colors=cm.spring,
    systematics=None,
    unblind=False,
    **kwargs
):

    if hist_template is None:
        if hasattr(bins, "__iter__"):
            # variable width bins
            hist_template = Hist(bins)
            min_score = min(bins)
            max_score = max(bins)
        else:
            hist_template = Hist(bins, min_score, max_score)

    bkg_hists = []
    for bkg, scores_dict in background_scores:
        hist = hist_template.Clone(title=bkg.label)
        scores, weight = scores_dict["NOMINAL"]
        fill_hist(hist, scores, weight)
        hist.decorate(**bkg.hist_decor)
        hist.systematics = {}
        for sys_term in scores_dict.keys():
            if sys_term == "NOMINAL":
                continue
            sys_hist = hist_template.Clone()
            scores, weight = scores_dict[sys_term]
            fill_hist(sys_hist, scores, weight)
            hist.systematics[sys_term] = sys_hist
        bkg_hists.append(hist)

    if signal_scores is not None:
        sig_hists = []
        for sig, scores_dict in signal_scores:
            sig_hist = hist_template.Clone(title=sig.label)
            scores, weight = scores_dict["NOMINAL"]
            fill_hist(sig_hist, scores, weight)
            sig_hist.decorate(**sig.hist_decor)
            sig_hist.systematics = {}
            for sys_term in scores_dict.keys():
                if sys_term == "NOMINAL":
                    continue
                sys_hist = hist_template.Clone()
                scores, weight = scores_dict[sys_term]
                fill_hist(sys_hist, scores, weight)
                sig_hist.systematics[sys_term] = sys_hist
            sig_hists.append(sig_hist)
    else:
        sig_hists = None

    if data_scores is not None and draw_data and unblind is not False:
        data, data_scores = data_scores
        if isinstance(unblind, float):
            if sig_hists is not None:
                # unblind up to `unblind` % signal efficiency
                sum_sig = sum(sig_hists)
                cut = efficiency_cut(sum_sig, 0.3)
                data_scores = data_scores[data_scores < cut]
        data_hist = hist_template.Clone(title=data.label)
        data_hist.decorate(**data.hist_decor)
        fill_hist(data_hist, data_scores)
        if unblind >= 1 or unblind is True:
            log.info("Data events: %d" % sum(data_hist))
            log.info("Model events: %f" % sum(sum(bkg_hists)))
            for hist in bkg_hists:
                log.info("{0} {1}".format(hist.GetTitle(), sum(hist)))
            log.info("Data / Model: %f" % (sum(data_hist) / sum(sum(bkg_hists))))
    else:
        data_hist = None

    if draw_histograms:
        output_name = "event_bdt_score"
        if name is not None:
            output_name += "_" + name
        for logy in (False, True):
            draw(
                data=data_hist,
                model=bkg_hists,
                signal=sig_hists,
                signal_scale=signal_scale,
                category=category,
                name="BDT Score",
                output_name=output_name,
                show_ratio=data_hist is not None,
                model_colors=None,
                signal_colors=signal_colors,
                systematics=systematics,
                logy=logy,
                **kwargs
            )
    return bkg_hists, sig_hists, data_hist
Beispiel #3
0
    def clf_channels(self, clf,
                     category, region,
                     cuts=None,
                     bins=10,
                     limits=None,
                     mass=None,
                     mode=None,
                     systematics=True,
                     unblind=False,
                     hybrid_data=False,
                     no_signal_fixes=False,
                     uniform=False,
                     mva=False):
        """
        Return a HistFactory Channel for each mass hypothesis
        """
        log.info("constructing channels")

        # determine min and max scores
        scores_obj = self.get_scores(
            clf, category, region, cuts=cuts,
            masses=[mass], mode=mode,
            systematics=systematics,
            unblind=unblind)

        data_scores = scores_obj.data_scores
        bkg_scores = scores_obj.bkg_scores
        all_sig_scores = scores_obj.all_sig_scores
        min_score = scores_obj.min_score
        max_score = scores_obj.max_score

        if isinstance(bins, int):
            if limits is not None:
                low, high = limits
                binning = Hist(bins, low, high, type='D')
            else:
                binning = Hist(bins, min_score, max_score, type='D')
        else: # iterable
            if bins[0] > min_score:
                log.warning("min score is less than first edge "
                            "(will be underflow)")
            if bins[-1] <= max_score:
                log.warning("max score is greater than or equal to last edge "
                            "(will be overflow)")
            binning = Hist(bins, type='D')

        bkg_samples = []
        for s, scores in bkg_scores:
            hist_template = binning.Clone(
                title=s.label,
                **s.hist_decor)
            sample = s.get_histfactory_sample(
                hist_template, clf,
                category, region,
                cuts=cuts, scores=scores,
                systematics=systematics,
                uniform=uniform,
                mva=mva)
            bkg_samples.append(sample)

        data_sample = None
        if data_scores is not None:
            hist_template = binning.Clone(
                title=self.data.label,
                **self.data.hist_decor)
            data_sample = self.data.get_histfactory_sample(
                hist_template, clf,
                category, region,
                cuts=cuts, scores=data_scores,
                uniform=uniform)
            if unblind is False:
                # blind full histogram
                data_sample.hist[:] = (0, 0)
            elif (unblind is not True) and isinstance(unblind, int):
                # blind highest N bins
                data_sample.hist[-(unblind + 1):] = (0, 0)
            elif isinstance(unblind, float):
                # blind above a signal efficiency
                max_unblind_score = efficiency_cut(
                    sum([histogram_scores(hist_template, scores)
                        for s, scores in all_sig_scores[mass]]), unblind)
                blind_bin = hist_template.FindBin(max_unblind_score)
                data_sample.hist[blind_bin:] = (0, 0)

        # create signal HistFactory samples
        sig_samples = []
        for s, scores in all_sig_scores[mass]:
            hist_template = binning.Clone(
                title=s.label,
                **s.hist_decor)
            sample = s.get_histfactory_sample(
                hist_template, clf,
                category, region,
                cuts=cuts, scores=scores,
                no_signal_fixes=no_signal_fixes,
                systematics=systematics,
                uniform=uniform,
                mva=mva)
            sig_samples.append(sample)

        # replace data in blind bins with signal + background
        if hybrid_data and (unblind is not True):
            sum_sig_bkg = sum([s.hist for s in (bkg_samples + sig_samples)])
            if unblind is False:
                # replace full hist
                data_sample.hist[:] = sum_sig_bkg[:]
            elif isinstance(unblind, int):
                # replace highest N bins
                bin = -(unblind + 1)
                data_sample.hist[bin:] = sum_sig_bkg[bin:]
            elif isinstance(unblind, float):
                data_sample.hist[blind_bin:] = sum_sig_bkg[blind_bin:]

        # create channel for this mass point
        channel = histfactory.make_channel(
            'hh_{0}_{1}_{2}'.format(self.year % 1000, category.name, mass),
            bkg_samples + sig_samples,
            data=data_sample)

        return scores_obj, channel
Beispiel #4
0
def plot_clf(background_scores,
             category,
             signal_scores=None,
             signal_scale=1.,
             data_scores=None,
             name=None,
             draw_histograms=True,
             draw_data=False,
             save_histograms=False,
             hist_template=None,
             bins=10,
             min_score=0,
             max_score=1,
             signal_colors=cm.spring,
             systematics=None,
             unblind=False,
             **kwargs):

    if hist_template is None:
        if hasattr(bins, '__iter__'):
            # variable width bins
            hist_template = Hist(bins)
            min_score = min(bins)
            max_score = max(bins)
        else:
            hist_template = Hist(bins, min_score, max_score)

    bkg_hists = []
    for bkg, scores_dict in background_scores:
        hist = hist_template.Clone(title=bkg.label)
        scores, weight = scores_dict['NOMINAL']
        fill_hist(hist, scores, weight)
        hist.decorate(**bkg.hist_decor)
        hist.systematics = {}
        for sys_term in scores_dict.keys():
            if sys_term == 'NOMINAL':
                continue
            sys_hist = hist_template.Clone()
            scores, weight = scores_dict[sys_term]
            fill_hist(sys_hist, scores, weight)
            hist.systematics[sys_term] = sys_hist
        bkg_hists.append(hist)

    if signal_scores is not None:
        sig_hists = []
        for sig, scores_dict in signal_scores:
            sig_hist = hist_template.Clone(title=sig.label)
            scores, weight = scores_dict['NOMINAL']
            fill_hist(sig_hist, scores, weight)
            sig_hist.decorate(**sig.hist_decor)
            sig_hist.systematics = {}
            for sys_term in scores_dict.keys():
                if sys_term == 'NOMINAL':
                    continue
                sys_hist = hist_template.Clone()
                scores, weight = scores_dict[sys_term]
                fill_hist(sys_hist, scores, weight)
                sig_hist.systematics[sys_term] = sys_hist
            sig_hists.append(sig_hist)
    else:
        sig_hists = None

    if data_scores is not None and draw_data and unblind is not False:
        data, data_scores = data_scores
        if isinstance(unblind, float):
            if sig_hists is not None:
                # unblind up to `unblind` % signal efficiency
                sum_sig = sum(sig_hists)
                cut = efficiency_cut(sum_sig, 0.3)
                data_scores = data_scores[data_scores < cut]
        data_hist = hist_template.Clone(title=data.label)
        data_hist.decorate(**data.hist_decor)
        fill_hist(data_hist, data_scores)
        if unblind >= 1 or unblind is True:
            log.info("Data events: %d" % sum(data_hist))
            log.info("Model events: %f" % sum(sum(bkg_hists)))
            for hist in bkg_hists:
                log.info("{0} {1}".format(hist.GetTitle(), sum(hist)))
            log.info("Data / Model: %f" %
                     (sum(data_hist) / sum(sum(bkg_hists))))
    else:
        data_hist = None

    if draw_histograms:
        output_name = 'event_bdt_score'
        if name is not None:
            output_name += '_' + name
        for logy in (False, True):
            draw(data=data_hist,
                 model=bkg_hists,
                 signal=sig_hists,
                 signal_scale=signal_scale,
                 category=category,
                 name="BDT Score",
                 output_name=output_name,
                 show_ratio=data_hist is not None,
                 model_colors=None,
                 signal_colors=signal_colors,
                 systematics=systematics,
                 logy=logy,
                 **kwargs)
    return bkg_hists, sig_hists, data_hist