예제 #1
0
import pandas
import glob
import os
import sys
import socket
import numpy as np
import matplotlib.pyplot as plt
import matplotlib

# local imports
import variable_info
import plot_configs.variable_binning as binning
import plot_configs.plotting_styles as ps
ps.init_plot_style()

# give 1 as first argument if all correlations should be plotted as scatter plots
if len(sys.argv) > 1: plot_correlations = sys.argv[1]
else: plot_correlations = 0

# lumi
lumi = 41.5

categories = {
    "(N_Jets >= 6 and N_BTagsM >= 3)": variable_info.variables_4j_3b,
    "(N_Jets == 5 and N_BTagsM >= 3)": variable_info.variables_5j_3b,
    "(N_Jets == 4 and N_BTagsM >= 3)": variable_info.variables_6j_3b,
}

category_names = {
    "(N_Jets >= 6 and N_BTagsM >= 3)": "ge6j_ge3t",
    "(N_Jets == 5 and N_BTagsM >= 3)": "5j_ge3t",
예제 #2
0
    def plot_class_differences(self, log=False):

        pltstyle.init_plot_style()

        nbins = 20
        bin_range = [0., 1.]

        # loop over discriminator nodes
        for i, node_cls in enumerate(self.event_classes):
            node_index = self.data.class_translation[node_cls]

            # get outputs of node
            node_values = self.mainnet_predicted_vector[:, i]
            filtered_node_values = np.array([node_values[k] for k in range(len(node_values)) \
                if self.predicted_classes[k] == node_index])

            filtered_weights = [ self.data.get_lumi_weights()[k] for k in range(len(node_values)) \
                if self.predicted_classes[k] == node_index]

            histograms = []
            first = True
            max_val = 0
            # loop over other nodes and get those predictions
            for j, other_cls in enumerate(self.event_classes):
                if i == j: continue
                other_index = self.data.class_translation[other_cls]

                other_values = self.mainnet_predicted_vector[:, j]
                filtered_other_values = np.array([other_values[k] for k in range(len(other_values)) \
                    if self.predicted_classes[k] == node_index])

                # get difference of predicted node value and other value
                diff_values = (filtered_node_values -
                               filtered_other_values) / filtered_node_values

                hist = rp.Hist(nbins,
                               *bin_range,
                               title=str(other_cls) + " node",
                               drawstyle="HIST E1 X0")
                pltstyle.set_sig_hist_style(hist, other_cls)
                hist.fill_array(diff_values, filtered_weights)
                if hist.GetMaximum() > max_val: max_val = hist.GetMaximum()

                if first:
                    stack = rp.HistStack([hist], stacked=True)
                    first_hist = hist
                    first = False
                else:
                    histograms.append(hist)

            # create canvas
            canvas = pltstyle.init_canvas()
            # drawing hists
            stack.SetMaximum(max_val * 1.3)
            rp.utils.draw([stack] + histograms,
                          pad=canvas,
                          xtitle="relative difference (" + str(node_cls) +
                          " - X_node)/" + str(node_cls),
                          ytitle="Events")
            if log: canvas.cd().SetLogy()

            # legend
            legend = pltstyle.init_legend([first_hist] + histograms)
            pltstyle.add_lumi(canvas)
            pltstyle.add_category_label(canvas, self.event_category)

            # save
            out_path = self.save_path + "/node_differences_{}.pdf".format(
                node_cls)
            pltstyle.save_canvas(canvas, out_path)
예제 #3
0
    def plot_discriminators(self, log=False, cut_on_variable=None):
        ''' plot discriminators for output classes '''
        pltstyle.init_plot_style()

        nbins = 50
        bin_range = [0., 1.]

        # get some ttH specific info for plotting
        ttH_index = self.data.class_translation["ttHbb"]
        ttH_true_labels = self.data.get_ttH_flag()

        # apply cut to output node value if wanted
        if cut_on_variable:
            cut_class = cut_on_variable["class"]
            cut_value = cut_on_variable["val"]

            cut_index = self.data.class_translation[cut_class]
            cut_prediction = self.mainnet_predicted_vector[:, cut_index]

        # loop over discriminator nodes
        for i, node_cls in enumerate(self.event_classes):
            # get outputs of node
            out_values = self.mainnet_predicted_vector[:, i]

            # calculate node specific ROC value
            node_ROC = roc_auc_score(ttH_true_labels, out_values)

            # fill lists according to class
            bkg_hists = []
            weight_integral = 0

            # loop over all classes to fill hist according to predicted class
            for j, truth_cls in enumerate(self.event_classes):
                class_index = self.data.class_translation[truth_cls]

                # filter values per event class
                if cut_on_variable:
                    filtered_values = [ out_values[k] for k in range(len(out_values)) \
                        if self.data.get_test_labels(as_categorical = False)[k] == class_index \
                        and cut_prediction[k] <= cut_value]
                    filtered_weights = [ self.data.get_lumi_weights()[k] for k in range(len(out_values)) \
                        if self.data.get_test_labels(as_categorical = False)[k] == class_index \
                        and cut_prediction[k] <= cut_value]
                else:
                    filtered_values = [ out_values[k] for k in range(len(out_values)) \
                        if self.data.get_test_labels(as_categorical = False)[k] == class_index ]
                    filtered_weights = [ self.data.get_lumi_weights()[k] for k in range(len(out_values)) \
                        if self.data.get_test_labels(as_categorical = False)[k] == class_index ]

                if j == ttH_index:
                    # ttH signal
                    sig_values = filtered_values
                    sig_label = str(truth_cls)
                    sig_weights = filtered_weights
                else:
                    # background in this node
                    weight_integral += sum(filtered_weights)
                    hist = rp.Hist(nbins, *bin_range, title=str(truth_cls))
                    pltstyle.set_bkg_hist_style(hist, truth_cls)
                    hist.fill_array(filtered_values, filtered_weights)
                    bkg_hists.append(hist)

            # stack backgrounds
            bkg_stack = rp.HistStack(bkg_hists,
                                     stacked=True,
                                     drawstyle="HIST E1 X0")
            bkg_stack.SetMinimum(1e-4)
            max_val = bkg_stack.GetMaximum() * 1.3
            bkg_stack.SetMaximum(max_val)

            # plot signal
            weight_sum = sum(sig_weights)
            scale_factor = 1. * weight_integral / weight_sum
            sig_weights = [w * scale_factor for w in sig_weights]

            sig_title = sig_label + "*{:.3f}".format(scale_factor)
            sig_hist = rp.Hist(nbins, *bin_range, title=sig_title)
            pltstyle.set_sig_hist_style(sig_hist, sig_label)
            sig_hist.fill_array(sig_values, sig_weights)

            # creating canvas
            canvas = pltstyle.init_canvas()

            # drawing histograms
            rp.utils.draw([bkg_stack, sig_hist],
                          xtitle=node_cls + " Discriminator",
                          ytitle="Events",
                          pad=canvas)
            if log: canvas.cd().SetLogy()

            # creating legend
            legend = pltstyle.init_legend(bkg_hists + [sig_hist])
            pltstyle.add_lumi(canvas)
            pltstyle.add_category_label(canvas, self.event_category)

            # add ROC value to plot
            pltstyle.add_ROC_value(canvas, node_ROC)

            # save canvas
            out_path = self.save_path + "/discriminator_{}.pdf".format(
                node_cls)
            pltstyle.save_canvas(canvas, out_path)
예제 #4
0
    def plot_classification(self, log=False):
        ''' plot all events classified as one category '''

        pltstyle.init_plot_style()
        nbins = 20
        bin_range = [0., 1.]

        ttH_index = self.data.class_translation["ttHbb"]
        # loop over discriminator nodes
        for i, node_cls in enumerate(self.event_classes):
            node_index = self.data.class_translation[node_cls]

            # get outputs of node
            out_values = self.mainnet_predicted_vector[:, i]

            # fill lists according to class
            bkg_hists = []
            weight_integral = 0

            # loop over all classes to fill hist according to predicted class
            for j, truth_cls in enumerate(self.event_classes):
                class_index = self.data.class_translation[truth_cls]

                # filter values per event class
                filtered_values = [ out_values[k] for k in range(len(out_values)) \
                    if self.data.get_test_labels(as_categorical = False)[k] == class_index \
                        and self.predicted_classes[k] == node_index ]
                filtered_weights = [ self.data.get_lumi_weights()[k] for k in range(len(out_values)) \
                    if self.data.get_test_labels(as_categorical = False)[k] == class_index \
                        and self.predicted_classes[k] == node_index ]

                if j == ttH_index:
                    # signal in this node
                    sig_values = filtered_values
                    sig_label = str(truth_cls)
                    sig_weights = filtered_weights
                else:
                    # background in this node
                    weight_integral += sum(filtered_weights)
                    hist = rp.Hist(nbins, *bin_range, title=str(truth_cls))
                    pltstyle.set_bkg_hist_style(hist, truth_cls)
                    hist.fill_array(filtered_values, filtered_weights)
                    bkg_hists.append(hist)

            # stack backgrounds
            bkg_stack = rp.HistStack(bkg_hists,
                                     stacked=True,
                                     drawstyle="HIST E1 X0")
            bkg_stack.SetMinimum(1e-4)
            max_val = bkg_stack.GetMaximum() * 1.3
            bkg_stack.SetMaximum(max_val)

            # plot signal
            weight_sum = sum(sig_weights)
            scale_factor = 1. * weight_integral / weight_sum
            sig_weights = [w * scale_factor for w in sig_weights]

            sig_title = sig_label + "*{:.3f}".format(scale_factor)
            sig_hist = rp.Hist(nbins, *bin_range, title=sig_title)
            pltstyle.set_sig_hist_style(sig_hist, sig_label)
            sig_hist.fill_array(sig_values, sig_weights)

            # creatin canvas

            canvas = pltstyle.init_canvas()

            # drawing hists
            rp.utils.draw([bkg_stack, sig_hist],
                          xtitle="Events predicted as " + node_cls,
                          ytitle="Events",
                          pad=canvas)
            if log: canvas.cd().SetLogy()

            # legend
            legend = pltstyle.init_legend(bkg_hists + [sig_hist])
            pltstyle.add_lumi(canvas)
            pltstyle.add_category_label(canvas, self.event_category)
            print("S/B = {}".format(weight_sum / weight_integral))
            # save
            out_path = self.save_path + "/predictions_{}.pdf".format(node_cls)

            pltstyle.save_canvas(canvas, out_path)
예제 #5
0
    def plot_prenet_nodes(self, log=False):
        ''' plot prenet nodes '''
        pltstyle.init_plot_style()
        n_bins = 20
        bin_range = [0., 1.]

        for i, node_cls in enumerate(self.prenet_targets):
            # get outputs of class node
            out_values = self.prenet_predicted_vector[:, i]

            prenet_labels = self.data.get_prenet_test_labels()[:, i]

            sig_values = [
                out_values[k] for k in range(len(out_values))
                if prenet_labels[k] == 1
            ]
            bkg_values = [
                out_values[k] for k in range(len(out_values))
                if prenet_labels[k] == 0
            ]

            sig_weights = [
                self.data.get_lumi_weights()[k] for k in range(len(out_values))
                if prenet_labels[k] == 1
            ]
            bkg_weights = [
                self.data.get_lumi_weights()[k] for k in range(len(out_values))
                if prenet_labels[k] == 0
            ]

            bkg_sig_ratio = 1. * sum(bkg_weights) / sum(sig_weights)
            sig_weights = [w * bkg_sig_ratio for w in sig_weights]

            sig_label = "True"
            bkg_label = "False"

            sig_title = sig_label + "*{:.3f}".format(bkg_sig_ratio)

            # plot output
            bkg_hist = rp.Hist(n_bins, *bin_range, title=bkg_label)
            pltstyle.set_bkg_hist_style(bkg_hist, bkg_label)
            bkg_hist.fill_array(bkg_values, bkg_weights)

            sig_hist = rp.Hist(n_bins, *bin_range, title=sig_title)
            pltstyle.set_sig_hist_style(sig_hist, sig_label)
            sig_hist.fill_array(sig_values, sig_weights)

            stack = rp.HistStack([bkg_hist],
                                 stacked=True,
                                 drawstyle="HIST E1 X0")
            stack.SetMinimum(1e-4)

            canvas = pltstyle.init_canvas()

            rp.utils.draw([stack, sig_hist],
                          xtitle="prenet node {}".format(node_cls),
                          ytitle="Events",
                          pad=canvas)
            if log: canvas.cd().SetLogy()

            legend = pltstyle.init_legend([bkg_hist, sig_hist])
            pltstyle.add_lumi(canvas)
            pltstyle.add_category_label(canvas, self.event_category)

            out_path = self.save_path + "/prenet_output_{}.pdf".format(
                node_cls)

            pltstyle.save_canvas(canvas, out_path)