Beispiel #1
0
    def cut_flow(self):
        BasePlotter.set_canvas_style(self.canvas)
        BasePlotter.set_canvas_style(self.pad)
        lab_f1, _ = self.dual_pad_format()
        self.label_factor = lab_f1
        views_to_flow = filter(lambda x: 'ttJets' not in x and 'QCD' not in x,
                               self.mc_samples)
        views_to_flow.append(self.ttbar_to_use)
        qcd_samples = [i for i in self.views if 'QCD' in i]
        samples = []

        for vtf in views_to_flow:
            histo = self.get_view(vtf).Get('cut_flow')
            print vtf, len(histo)
            self.keep.append(histo)
            samples.append(histo)

        #QCD may not have all the bins filled, needs special care
        qcd_histo = histo.Clone()
        qcd_histo.Reset()
        for sample in qcd_samples:
            qcd_flow = self.get_view(sample).Get('cut_flow')
            qcd_histo = qcd_histo.decorate(**qcd_flow.decorators)
            qcd_histo.title = qcd_flow.title
            for sbin, qbin in zip(qcd_histo, qcd_flow):
                sbin.value += qbin.value
                sbin.error = quad.quad(sbin.error, qbin.error)
        samples.append(qcd_histo)
        self.keep.append(qcd_histo)
        samples.sort(key=lambda x: x[-2].value)
        stack = plotting.HistStack()
        self.keep.append(stack)
        for i in samples:
            stack.Add(i)

        self.style_histo(stack)
        self.style_histo(histo, **histo.decorators)

        histo.Draw()  #set the proper axis labels
        histo.yaxis.title = 'Events'
        data = self.get_view('data').Get('cut_flow')
        smin = min(stack.min(), data.min(), 1.2)
        smax = max(stack.max(), data.max())
        histo.yaxis.range_user = smin * 0.8, smax * 1.2
        stack.Draw('same')
        data.Draw('same')
        self.keep.append(data)
        self.add_legend([stack, data], False, entries=len(views_to_flow) + 1)
        self.pad.SetLogy()
        self.add_ratio_plot(data, stack, ratio_range=0.4)
        self.lower_pad.SetLogy(False)
Beispiel #2
0
 def create_stack(self, *histos, **styles_kwargs):
     '''makes a HistStack out of provided histograms,
 styles them according to provided styles and default
 ones'''
     sort = True
     histos = list(histos)
     if 'sort' in styles_kwargs:
         sort = styles_kwargs['sort']
         del styles_kwargs['sort']
     if sort:
         histos.sort(key=lambda x: x.Integral())
     stack = plotting.HistStack()
     styles = BasePlotter._kwargs_to_styles_(styles_kwargs, len(histos))
     for histo, style in izip_longest(histos, styles):
         style = style if style else {}
         self.style_histo(histo, **style)
         stack.Add(histo)
     return stack
Beispiel #3
0
        *[ views.SubdirectoryView( tfile, category ) 
           for category in categories]
        )
    
    if options.differential == 1:
        input_view = DifferentialView( input_view )

    histograms = [ apply_style(input_view.Get(i), i) for i in keys ]
    histograms = sorted(histograms, key=lambda x: x.Integral())
    observed   = apply_style(input_view.Get(data), data)

    logging.debug("debugging histos:")
    for histo in histograms:
        logging.debug("    %s: style: %s, integral: %.2f" % ( histo.GetTitle(), histo.drawstyle, histo.Integral() ) )

    stack = plotting.HistStack()
    for obj in histograms:
        stack.Add(obj)

    maximum = max(list(observed)+[stack.GetMaximum()])
    
    canvas = plotting.Canvas(name='adsf', title='asdf')
    canvas.cd()
    stack.SetMaximum(maximum*1.8)
    stack.Draw()
    stack.GetXaxis().SetTitle(options.xtitle)
    stack.GetYaxis().SetTitle(options.ytitle)
    observed.Draw('same')

    #tries to figure which side the legend goes
    obslist = list(observed)
Beispiel #4
0
            ch_plot = ch_h.Clone()

    # set bincontents of plot histogram to means of all samples
    for iBin in range(unc_plot.GetNbinsX() + 1):
        bin_contents = [h.GetBinContent(iBin) for h in unchanged_hists]
        unc_plot.SetBinContent(iBin, np.mean(bin_contents))
        unc_plot.SetBinError(iBin, np.std(bin_contents))
    for iBin in range(ch_plot.GetNbinsX() + 1):
        bin_contents = [h.GetBinContent(iBin) for h in changed_hists]
        ch_plot.SetBinContent(iBin, np.mean(bin_contents))
        ch_plot.SetBinError(iBin, np.std(bin_contents))

    # generate upper plot
    canvas = pltstyle.init_canvas(ratiopad=True)
    stack = rp.HistStack([unc_plot, ch_plot],
                         stacked=True,
                         drawstyle="HIST E1 X0")
    max_val = stack.GetMaximum()
    stack.SetMaximum(max_val * 1.3)

    rp.utils.draw([stack],
                  pad=canvas.cd(1),
                  xtitle="discriminator output for {} node".format(
                      event_classes[i_node]),
                  ytitle="Events")
    legend = pltstyle.init_legend([unc_plot, ch_plot])
    pltstyle.add_category_label(canvas.cd(1), categories[key])

    # generate lower plot
    line1 = rp.Graph(50)
    for i, x in enumerate(np.linspace(0., 1., 50)):
Beispiel #5
0
    def make_charge_flip_control_plot(self,
                                      variable,
                                      xaxis='',
                                      rebin=1,
                                      legend_on_the_left=False,
                                      data_type='data',
                                      x_range=None,
                                      apply_scale='',
                                      show_ratio=False,
                                      differential=False):
        ss_p1p2_view, ss_fakes_est, os_flip_est_nofake = self.get_flip_data(
            rebin, xaxis, data_type)

        if differential:
            ss_p1p2_view = DifferentialView(ss_p1p2_view)
            ss_fakes_est = DifferentialView(ss_fakes_est)
            os_flip_est_nofake = DifferentialView(os_flip_est_nofake)

        fakes_hist = ss_fakes_est.Get(variable)
        flip_hist = os_flip_est_nofake.Get(variable)
        if apply_scale:
            flip_hist = MedianView.apply_view(
                flip_hist, os_flip_est_nofake.Get(variable + apply_scale))

        obs_hist = ss_p1p2_view.Get(variable)
        estimate_hist = plotting.HistStack()
        estimate_hist.Add(fakes_hist)
        estimate_hist.Add(flip_hist)

        estimate_error = HistStackToTGRaphErrors(estimate_hist)
        estimate_error.SetFillStyle(3013)
        estimate_error.SetFillColor(ROOT.EColor.kBlack)
        estimate_error.SetTitle('Error on estimate')

        #from pdb import set_trace; set_trace()
        sum_stack = sum(estimate_hist.hists)
        print "variable %s: data integral: %.1f (%.1f/%.1f), estimate: %.1f (%.1f/%.1f) (under/overflow)" % (variable, \
            obs_hist.Integral(), obs_hist.GetBinContent(0), obs_hist.GetBinContent(obs_hist.GetNbinsX()+1), \
            sum_stack.Integral(), sum_stack.GetBinContent(0), sum_stack.GetBinContent(sum_stack.GetNbinsX()+1) )
        hmax = max([estimate_hist.GetMaximum(), max(list(obs_hist))])
        obs_hist.GetYaxis().SetRangeUser(0, hmax * 1.3)
        if x_range:
            obs_hist.GetXaxis().SetRangeUser(x_range[0], x_range[1])

        obs_hist.Draw()
        estimate_hist.Draw('same')
        self.canvas.Update()
        estimate_error.Draw('2 same')
        obs_hist.Draw('same')
        self.keep.extend([estimate_hist, estimate_error, obs_hist])

        legend = self.add_legend([obs_hist],
                                 leftside=legend_on_the_left,
                                 entries=4)
        legend.AddEntry(estimate_hist, 'f')
        #legend.AddEntry(estimate_error,'f')
        legend.Draw()
        self.add_cms_blurb(self.sqrts)
        if show_ratio:
            self.add_ratio_plot(obs_hist,
                                estimate_hist,
                                x_range,
                                ratio_range=0.2)
Beispiel #6
0
    def plot_class_differences(self, log=False):

        pltstyle.init_plot_style()

        nbins = 20
        bin_range = [0., 1.]

        # loop over discriminator nodes
        for i, node_cls in enumerate(self.event_classes):
            node_index = self.data.class_translation[node_cls]

            # get outputs of node
            node_values = self.mainnet_predicted_vector[:, i]
            filtered_node_values = np.array([node_values[k] for k in range(len(node_values)) \
                if self.predicted_classes[k] == node_index])

            filtered_weights = [ self.data.get_lumi_weights()[k] for k in range(len(node_values)) \
                if self.predicted_classes[k] == node_index]

            histograms = []
            first = True
            max_val = 0
            # loop over other nodes and get those predictions
            for j, other_cls in enumerate(self.event_classes):
                if i == j: continue
                other_index = self.data.class_translation[other_cls]

                other_values = self.mainnet_predicted_vector[:, j]
                filtered_other_values = np.array([other_values[k] for k in range(len(other_values)) \
                    if self.predicted_classes[k] == node_index])

                # get difference of predicted node value and other value
                diff_values = (filtered_node_values -
                               filtered_other_values) / filtered_node_values

                hist = rp.Hist(nbins,
                               *bin_range,
                               title=str(other_cls) + " node",
                               drawstyle="HIST E1 X0")
                pltstyle.set_sig_hist_style(hist, other_cls)
                hist.fill_array(diff_values, filtered_weights)
                if hist.GetMaximum() > max_val: max_val = hist.GetMaximum()

                if first:
                    stack = rp.HistStack([hist], stacked=True)
                    first_hist = hist
                    first = False
                else:
                    histograms.append(hist)

            # create canvas
            canvas = pltstyle.init_canvas()
            # drawing hists
            stack.SetMaximum(max_val * 1.3)
            rp.utils.draw([stack] + histograms,
                          pad=canvas,
                          xtitle="relative difference (" + str(node_cls) +
                          " - X_node)/" + str(node_cls),
                          ytitle="Events")
            if log: canvas.cd().SetLogy()

            # legend
            legend = pltstyle.init_legend([first_hist] + histograms)
            pltstyle.add_lumi(canvas)
            pltstyle.add_category_label(canvas, self.event_category)

            # save
            out_path = self.save_path + "/node_differences_{}.pdf".format(
                node_cls)
            pltstyle.save_canvas(canvas, out_path)
Beispiel #7
0
    def plot_classification(self, log=False):
        ''' plot all events classified as one category '''

        pltstyle.init_plot_style()
        nbins = 20
        bin_range = [0., 1.]

        ttH_index = self.data.class_translation["ttHbb"]
        # loop over discriminator nodes
        for i, node_cls in enumerate(self.event_classes):
            node_index = self.data.class_translation[node_cls]

            # get outputs of node
            out_values = self.mainnet_predicted_vector[:, i]

            # fill lists according to class
            bkg_hists = []
            weight_integral = 0

            # loop over all classes to fill hist according to predicted class
            for j, truth_cls in enumerate(self.event_classes):
                class_index = self.data.class_translation[truth_cls]

                # filter values per event class
                filtered_values = [ out_values[k] for k in range(len(out_values)) \
                    if self.data.get_test_labels(as_categorical = False)[k] == class_index \
                        and self.predicted_classes[k] == node_index ]
                filtered_weights = [ self.data.get_lumi_weights()[k] for k in range(len(out_values)) \
                    if self.data.get_test_labels(as_categorical = False)[k] == class_index \
                        and self.predicted_classes[k] == node_index ]

                if j == ttH_index:
                    # signal in this node
                    sig_values = filtered_values
                    sig_label = str(truth_cls)
                    sig_weights = filtered_weights
                else:
                    # background in this node
                    weight_integral += sum(filtered_weights)
                    hist = rp.Hist(nbins, *bin_range, title=str(truth_cls))
                    pltstyle.set_bkg_hist_style(hist, truth_cls)
                    hist.fill_array(filtered_values, filtered_weights)
                    bkg_hists.append(hist)

            # stack backgrounds
            bkg_stack = rp.HistStack(bkg_hists,
                                     stacked=True,
                                     drawstyle="HIST E1 X0")
            bkg_stack.SetMinimum(1e-4)
            max_val = bkg_stack.GetMaximum() * 1.3
            bkg_stack.SetMaximum(max_val)

            # plot signal
            weight_sum = sum(sig_weights)
            scale_factor = 1. * weight_integral / weight_sum
            sig_weights = [w * scale_factor for w in sig_weights]

            sig_title = sig_label + "*{:.3f}".format(scale_factor)
            sig_hist = rp.Hist(nbins, *bin_range, title=sig_title)
            pltstyle.set_sig_hist_style(sig_hist, sig_label)
            sig_hist.fill_array(sig_values, sig_weights)

            # creatin canvas

            canvas = pltstyle.init_canvas()

            # drawing hists
            rp.utils.draw([bkg_stack, sig_hist],
                          xtitle="Events predicted as " + node_cls,
                          ytitle="Events",
                          pad=canvas)
            if log: canvas.cd().SetLogy()

            # legend
            legend = pltstyle.init_legend(bkg_hists + [sig_hist])
            pltstyle.add_lumi(canvas)
            pltstyle.add_category_label(canvas, self.event_category)
            print("S/B = {}".format(weight_sum / weight_integral))
            # save
            out_path = self.save_path + "/predictions_{}.pdf".format(node_cls)

            pltstyle.save_canvas(canvas, out_path)
Beispiel #8
0
    def plot_discriminators(self, log=False, cut_on_variable=None):
        ''' plot discriminators for output classes '''
        pltstyle.init_plot_style()

        nbins = 50
        bin_range = [0., 1.]

        # get some ttH specific info for plotting
        ttH_index = self.data.class_translation["ttHbb"]
        ttH_true_labels = self.data.get_ttH_flag()

        # apply cut to output node value if wanted
        if cut_on_variable:
            cut_class = cut_on_variable["class"]
            cut_value = cut_on_variable["val"]

            cut_index = self.data.class_translation[cut_class]
            cut_prediction = self.mainnet_predicted_vector[:, cut_index]

        # loop over discriminator nodes
        for i, node_cls in enumerate(self.event_classes):
            # get outputs of node
            out_values = self.mainnet_predicted_vector[:, i]

            # calculate node specific ROC value
            node_ROC = roc_auc_score(ttH_true_labels, out_values)

            # fill lists according to class
            bkg_hists = []
            weight_integral = 0

            # loop over all classes to fill hist according to predicted class
            for j, truth_cls in enumerate(self.event_classes):
                class_index = self.data.class_translation[truth_cls]

                # filter values per event class
                if cut_on_variable:
                    filtered_values = [ out_values[k] for k in range(len(out_values)) \
                        if self.data.get_test_labels(as_categorical = False)[k] == class_index \
                        and cut_prediction[k] <= cut_value]
                    filtered_weights = [ self.data.get_lumi_weights()[k] for k in range(len(out_values)) \
                        if self.data.get_test_labels(as_categorical = False)[k] == class_index \
                        and cut_prediction[k] <= cut_value]
                else:
                    filtered_values = [ out_values[k] for k in range(len(out_values)) \
                        if self.data.get_test_labels(as_categorical = False)[k] == class_index ]
                    filtered_weights = [ self.data.get_lumi_weights()[k] for k in range(len(out_values)) \
                        if self.data.get_test_labels(as_categorical = False)[k] == class_index ]

                if j == ttH_index:
                    # ttH signal
                    sig_values = filtered_values
                    sig_label = str(truth_cls)
                    sig_weights = filtered_weights
                else:
                    # background in this node
                    weight_integral += sum(filtered_weights)
                    hist = rp.Hist(nbins, *bin_range, title=str(truth_cls))
                    pltstyle.set_bkg_hist_style(hist, truth_cls)
                    hist.fill_array(filtered_values, filtered_weights)
                    bkg_hists.append(hist)

            # stack backgrounds
            bkg_stack = rp.HistStack(bkg_hists,
                                     stacked=True,
                                     drawstyle="HIST E1 X0")
            bkg_stack.SetMinimum(1e-4)
            max_val = bkg_stack.GetMaximum() * 1.3
            bkg_stack.SetMaximum(max_val)

            # plot signal
            weight_sum = sum(sig_weights)
            scale_factor = 1. * weight_integral / weight_sum
            sig_weights = [w * scale_factor for w in sig_weights]

            sig_title = sig_label + "*{:.3f}".format(scale_factor)
            sig_hist = rp.Hist(nbins, *bin_range, title=sig_title)
            pltstyle.set_sig_hist_style(sig_hist, sig_label)
            sig_hist.fill_array(sig_values, sig_weights)

            # creating canvas
            canvas = pltstyle.init_canvas()

            # drawing histograms
            rp.utils.draw([bkg_stack, sig_hist],
                          xtitle=node_cls + " Discriminator",
                          ytitle="Events",
                          pad=canvas)
            if log: canvas.cd().SetLogy()

            # creating legend
            legend = pltstyle.init_legend(bkg_hists + [sig_hist])
            pltstyle.add_lumi(canvas)
            pltstyle.add_category_label(canvas, self.event_category)

            # add ROC value to plot
            pltstyle.add_ROC_value(canvas, node_ROC)

            # save canvas
            out_path = self.save_path + "/discriminator_{}.pdf".format(
                node_cls)
            pltstyle.save_canvas(canvas, out_path)
Beispiel #9
0
    def plot_prenet_nodes(self, log=False):
        ''' plot prenet nodes '''
        pltstyle.init_plot_style()
        n_bins = 20
        bin_range = [0., 1.]

        for i, node_cls in enumerate(self.prenet_targets):
            # get outputs of class node
            out_values = self.prenet_predicted_vector[:, i]

            prenet_labels = self.data.get_prenet_test_labels()[:, i]

            sig_values = [
                out_values[k] for k in range(len(out_values))
                if prenet_labels[k] == 1
            ]
            bkg_values = [
                out_values[k] for k in range(len(out_values))
                if prenet_labels[k] == 0
            ]

            sig_weights = [
                self.data.get_lumi_weights()[k] for k in range(len(out_values))
                if prenet_labels[k] == 1
            ]
            bkg_weights = [
                self.data.get_lumi_weights()[k] for k in range(len(out_values))
                if prenet_labels[k] == 0
            ]

            bkg_sig_ratio = 1. * sum(bkg_weights) / sum(sig_weights)
            sig_weights = [w * bkg_sig_ratio for w in sig_weights]

            sig_label = "True"
            bkg_label = "False"

            sig_title = sig_label + "*{:.3f}".format(bkg_sig_ratio)

            # plot output
            bkg_hist = rp.Hist(n_bins, *bin_range, title=bkg_label)
            pltstyle.set_bkg_hist_style(bkg_hist, bkg_label)
            bkg_hist.fill_array(bkg_values, bkg_weights)

            sig_hist = rp.Hist(n_bins, *bin_range, title=sig_title)
            pltstyle.set_sig_hist_style(sig_hist, sig_label)
            sig_hist.fill_array(sig_values, sig_weights)

            stack = rp.HistStack([bkg_hist],
                                 stacked=True,
                                 drawstyle="HIST E1 X0")
            stack.SetMinimum(1e-4)

            canvas = pltstyle.init_canvas()

            rp.utils.draw([stack, sig_hist],
                          xtitle="prenet node {}".format(node_cls),
                          ytitle="Events",
                          pad=canvas)
            if log: canvas.cd().SetLogy()

            legend = pltstyle.init_legend([bkg_hist, sig_hist])
            pltstyle.add_lumi(canvas)
            pltstyle.add_category_label(canvas, self.event_category)

            out_path = self.save_path + "/prenet_output_{}.pdf".format(
                node_cls)

            pltstyle.save_canvas(canvas, out_path)
Beispiel #10
0
            new_h_plot = new_h.Clone()
        new_hists.append(new_h)

    # loop over bins and fill average to plot hist with errors
    for iBin in range(new_h_plot.GetNbinsX() + 1):
        bin_contents = [h.GetBinContent(iBin) for h in new_hists]
        new_h_plot.SetBinContent(iBin, np.mean(bin_contents))
        new_h_plot.SetBinError(iBin, np.std(bin_contents))

    # get unsmeared histogram
    old_h = before_hists[i_node]
    old_h_plot = old_h.Clone()

    # plot histogram
    canvas = pltstyle.init_canvas(ratiopad=True)
    stack = rp.HistStack([old_h_plot], stacked=True, drawstyle="HIST X0")
    max_val = max(stack.GetMaximum(), new_h.GetMaximum())
    stack.SetMaximum(max_val * 1.3)

    rp.utils.draw([stack] + [new_h_plot],
                  pad=canvas.cd(1),
                  xtitle="discriminator output for {} node".format(
                      event_classes[i_node]),
                  ytitle="Events")
    legend = pltstyle.init_legend([old_h_plot, new_h_plot])
    pltstyle.add_category_label(canvas.cd(1), categories[key])

    x_vals = []
    ks_probs_per_bin = []
    ks_error_per_bin = []
    for i_bin in range(new_h.GetNbinsX()):