import pandas import glob import os import sys import socket import numpy as np import matplotlib.pyplot as plt import matplotlib # local imports import variable_info import plot_configs.variable_binning as binning import plot_configs.plotting_styles as ps ps.init_plot_style() # give 1 as first argument if all correlations should be plotted as scatter plots if len(sys.argv) > 1: plot_correlations = sys.argv[1] else: plot_correlations = 0 # lumi lumi = 41.5 categories = { "(N_Jets >= 6 and N_BTagsM >= 3)": variable_info.variables_4j_3b, "(N_Jets == 5 and N_BTagsM >= 3)": variable_info.variables_5j_3b, "(N_Jets == 4 and N_BTagsM >= 3)": variable_info.variables_6j_3b, } category_names = { "(N_Jets >= 6 and N_BTagsM >= 3)": "ge6j_ge3t", "(N_Jets == 5 and N_BTagsM >= 3)": "5j_ge3t",
def plot_class_differences(self, log=False): pltstyle.init_plot_style() nbins = 20 bin_range = [0., 1.] # loop over discriminator nodes for i, node_cls in enumerate(self.event_classes): node_index = self.data.class_translation[node_cls] # get outputs of node node_values = self.mainnet_predicted_vector[:, i] filtered_node_values = np.array([node_values[k] for k in range(len(node_values)) \ if self.predicted_classes[k] == node_index]) filtered_weights = [ self.data.get_lumi_weights()[k] for k in range(len(node_values)) \ if self.predicted_classes[k] == node_index] histograms = [] first = True max_val = 0 # loop over other nodes and get those predictions for j, other_cls in enumerate(self.event_classes): if i == j: continue other_index = self.data.class_translation[other_cls] other_values = self.mainnet_predicted_vector[:, j] filtered_other_values = np.array([other_values[k] for k in range(len(other_values)) \ if self.predicted_classes[k] == node_index]) # get difference of predicted node value and other value diff_values = (filtered_node_values - filtered_other_values) / filtered_node_values hist = rp.Hist(nbins, *bin_range, title=str(other_cls) + " node", drawstyle="HIST E1 X0") pltstyle.set_sig_hist_style(hist, other_cls) hist.fill_array(diff_values, filtered_weights) if hist.GetMaximum() > max_val: max_val = hist.GetMaximum() if first: stack = rp.HistStack([hist], stacked=True) first_hist = hist first = False else: histograms.append(hist) # create canvas canvas = pltstyle.init_canvas() # drawing hists stack.SetMaximum(max_val * 1.3) rp.utils.draw([stack] + histograms, pad=canvas, xtitle="relative difference (" + str(node_cls) + " - X_node)/" + str(node_cls), ytitle="Events") if log: canvas.cd().SetLogy() # legend legend = pltstyle.init_legend([first_hist] + histograms) pltstyle.add_lumi(canvas) pltstyle.add_category_label(canvas, self.event_category) # save out_path = self.save_path + "/node_differences_{}.pdf".format( node_cls) pltstyle.save_canvas(canvas, out_path)
def plot_discriminators(self, log=False, cut_on_variable=None): ''' plot discriminators for output classes ''' pltstyle.init_plot_style() nbins = 50 bin_range = [0., 1.] # get some ttH specific info for plotting ttH_index = self.data.class_translation["ttHbb"] ttH_true_labels = self.data.get_ttH_flag() # apply cut to output node value if wanted if cut_on_variable: cut_class = cut_on_variable["class"] cut_value = cut_on_variable["val"] cut_index = self.data.class_translation[cut_class] cut_prediction = self.mainnet_predicted_vector[:, cut_index] # loop over discriminator nodes for i, node_cls in enumerate(self.event_classes): # get outputs of node out_values = self.mainnet_predicted_vector[:, i] # calculate node specific ROC value node_ROC = roc_auc_score(ttH_true_labels, out_values) # fill lists according to class bkg_hists = [] weight_integral = 0 # loop over all classes to fill hist according to predicted class for j, truth_cls in enumerate(self.event_classes): class_index = self.data.class_translation[truth_cls] # filter values per event class if cut_on_variable: filtered_values = [ out_values[k] for k in range(len(out_values)) \ if self.data.get_test_labels(as_categorical = False)[k] == class_index \ and cut_prediction[k] <= cut_value] filtered_weights = [ self.data.get_lumi_weights()[k] for k in range(len(out_values)) \ if self.data.get_test_labels(as_categorical = False)[k] == class_index \ and cut_prediction[k] <= cut_value] else: filtered_values = [ out_values[k] for k in range(len(out_values)) \ if self.data.get_test_labels(as_categorical = False)[k] == class_index ] filtered_weights = [ self.data.get_lumi_weights()[k] for k in range(len(out_values)) \ if self.data.get_test_labels(as_categorical = False)[k] == class_index ] if j == ttH_index: # ttH signal sig_values = filtered_values sig_label = str(truth_cls) sig_weights = filtered_weights else: # background in this node weight_integral += sum(filtered_weights) hist = rp.Hist(nbins, *bin_range, title=str(truth_cls)) pltstyle.set_bkg_hist_style(hist, truth_cls) hist.fill_array(filtered_values, filtered_weights) bkg_hists.append(hist) # stack backgrounds bkg_stack = rp.HistStack(bkg_hists, stacked=True, drawstyle="HIST E1 X0") bkg_stack.SetMinimum(1e-4) max_val = bkg_stack.GetMaximum() * 1.3 bkg_stack.SetMaximum(max_val) # plot signal weight_sum = sum(sig_weights) scale_factor = 1. * weight_integral / weight_sum sig_weights = [w * scale_factor for w in sig_weights] sig_title = sig_label + "*{:.3f}".format(scale_factor) sig_hist = rp.Hist(nbins, *bin_range, title=sig_title) pltstyle.set_sig_hist_style(sig_hist, sig_label) sig_hist.fill_array(sig_values, sig_weights) # creating canvas canvas = pltstyle.init_canvas() # drawing histograms rp.utils.draw([bkg_stack, sig_hist], xtitle=node_cls + " Discriminator", ytitle="Events", pad=canvas) if log: canvas.cd().SetLogy() # creating legend legend = pltstyle.init_legend(bkg_hists + [sig_hist]) pltstyle.add_lumi(canvas) pltstyle.add_category_label(canvas, self.event_category) # add ROC value to plot pltstyle.add_ROC_value(canvas, node_ROC) # save canvas out_path = self.save_path + "/discriminator_{}.pdf".format( node_cls) pltstyle.save_canvas(canvas, out_path)
def plot_classification(self, log=False): ''' plot all events classified as one category ''' pltstyle.init_plot_style() nbins = 20 bin_range = [0., 1.] ttH_index = self.data.class_translation["ttHbb"] # loop over discriminator nodes for i, node_cls in enumerate(self.event_classes): node_index = self.data.class_translation[node_cls] # get outputs of node out_values = self.mainnet_predicted_vector[:, i] # fill lists according to class bkg_hists = [] weight_integral = 0 # loop over all classes to fill hist according to predicted class for j, truth_cls in enumerate(self.event_classes): class_index = self.data.class_translation[truth_cls] # filter values per event class filtered_values = [ out_values[k] for k in range(len(out_values)) \ if self.data.get_test_labels(as_categorical = False)[k] == class_index \ and self.predicted_classes[k] == node_index ] filtered_weights = [ self.data.get_lumi_weights()[k] for k in range(len(out_values)) \ if self.data.get_test_labels(as_categorical = False)[k] == class_index \ and self.predicted_classes[k] == node_index ] if j == ttH_index: # signal in this node sig_values = filtered_values sig_label = str(truth_cls) sig_weights = filtered_weights else: # background in this node weight_integral += sum(filtered_weights) hist = rp.Hist(nbins, *bin_range, title=str(truth_cls)) pltstyle.set_bkg_hist_style(hist, truth_cls) hist.fill_array(filtered_values, filtered_weights) bkg_hists.append(hist) # stack backgrounds bkg_stack = rp.HistStack(bkg_hists, stacked=True, drawstyle="HIST E1 X0") bkg_stack.SetMinimum(1e-4) max_val = bkg_stack.GetMaximum() * 1.3 bkg_stack.SetMaximum(max_val) # plot signal weight_sum = sum(sig_weights) scale_factor = 1. * weight_integral / weight_sum sig_weights = [w * scale_factor for w in sig_weights] sig_title = sig_label + "*{:.3f}".format(scale_factor) sig_hist = rp.Hist(nbins, *bin_range, title=sig_title) pltstyle.set_sig_hist_style(sig_hist, sig_label) sig_hist.fill_array(sig_values, sig_weights) # creatin canvas canvas = pltstyle.init_canvas() # drawing hists rp.utils.draw([bkg_stack, sig_hist], xtitle="Events predicted as " + node_cls, ytitle="Events", pad=canvas) if log: canvas.cd().SetLogy() # legend legend = pltstyle.init_legend(bkg_hists + [sig_hist]) pltstyle.add_lumi(canvas) pltstyle.add_category_label(canvas, self.event_category) print("S/B = {}".format(weight_sum / weight_integral)) # save out_path = self.save_path + "/predictions_{}.pdf".format(node_cls) pltstyle.save_canvas(canvas, out_path)
def plot_prenet_nodes(self, log=False): ''' plot prenet nodes ''' pltstyle.init_plot_style() n_bins = 20 bin_range = [0., 1.] for i, node_cls in enumerate(self.prenet_targets): # get outputs of class node out_values = self.prenet_predicted_vector[:, i] prenet_labels = self.data.get_prenet_test_labels()[:, i] sig_values = [ out_values[k] for k in range(len(out_values)) if prenet_labels[k] == 1 ] bkg_values = [ out_values[k] for k in range(len(out_values)) if prenet_labels[k] == 0 ] sig_weights = [ self.data.get_lumi_weights()[k] for k in range(len(out_values)) if prenet_labels[k] == 1 ] bkg_weights = [ self.data.get_lumi_weights()[k] for k in range(len(out_values)) if prenet_labels[k] == 0 ] bkg_sig_ratio = 1. * sum(bkg_weights) / sum(sig_weights) sig_weights = [w * bkg_sig_ratio for w in sig_weights] sig_label = "True" bkg_label = "False" sig_title = sig_label + "*{:.3f}".format(bkg_sig_ratio) # plot output bkg_hist = rp.Hist(n_bins, *bin_range, title=bkg_label) pltstyle.set_bkg_hist_style(bkg_hist, bkg_label) bkg_hist.fill_array(bkg_values, bkg_weights) sig_hist = rp.Hist(n_bins, *bin_range, title=sig_title) pltstyle.set_sig_hist_style(sig_hist, sig_label) sig_hist.fill_array(sig_values, sig_weights) stack = rp.HistStack([bkg_hist], stacked=True, drawstyle="HIST E1 X0") stack.SetMinimum(1e-4) canvas = pltstyle.init_canvas() rp.utils.draw([stack, sig_hist], xtitle="prenet node {}".format(node_cls), ytitle="Events", pad=canvas) if log: canvas.cd().SetLogy() legend = pltstyle.init_legend([bkg_hist, sig_hist]) pltstyle.add_lumi(canvas) pltstyle.add_category_label(canvas, self.event_category) out_path = self.save_path + "/prenet_output_{}.pdf".format( node_cls) pltstyle.save_canvas(canvas, out_path)