def get_best_binning( histogram_information, p_min, s_min, n_min, min_width, x_min = None ): ''' Step 1: Change the size of the first bin until it fulfils the minimal criteria Step 3: Check if it is true for all other histograms. If not back to step 2 Step 4: Repeat step 2 & 3 until no more bins can be created ''' histograms = [info['hist'] for info in histogram_information] bin_edges = [] resolutions = [] purities = {} stabilities = {} current_bin_start = 0 current_bin_end = 0 first_hist = histograms[0] n_bins = first_hist.GetNbinsX() if x_min: current_bin_start = first_hist.ProjectionX().FindBin(x_min) - 1 current_bin_end = current_bin_start while current_bin_end < n_bins: # bin_End, p, s, N_reco current_bin_end, _, _, _, r = get_next_end( histograms, current_bin_start, current_bin_end, p_min, s_min, n_min, min_width ) resolutions.append(r) if not bin_edges: # if empty bin_edges.append( first_hist.GetXaxis().GetBinLowEdge( current_bin_start + 1 ) ) bin_edges.append( first_hist.GetXaxis().GetBinLowEdge( current_bin_end ) + first_hist.GetXaxis().GetBinWidth( current_bin_end ) ) current_bin_start = current_bin_end # add the purity and stability values for the final binning for info in histogram_information: new_hist = rebin_2d( info['hist'], bin_edges, bin_edges ).Clone( info['channel'] + '_' + str( info['CoM'] ) ) get_bin_content = new_hist.ProjectionX().GetBinContent purities = calculate_purities( new_hist.Clone() ) stabilities = calculate_stabilities( new_hist.Clone() ) n_events = [int( get_bin_content( i ) ) for i in range( 1, len( bin_edges ) )] # Now check if the last bin also fulfils the requirements if ( purities[-1] < p_min or stabilities[-1] < s_min or n_events[-1] < n_min ) and len(purities) > 3: # if not, merge last two bins bin_edges[-2] = bin_edges[-1] bin_edges = bin_edges[:-1] new_hist = rebin_2d( info['hist'], bin_edges, bin_edges ).Clone() get_bin_content = new_hist.ProjectionX().GetBinContent purities = calculate_purities( new_hist.Clone() ) stabilities = calculate_stabilities( new_hist.Clone() ) n_events = [int( get_bin_content( i ) ) for i in range( 1, len( bin_edges ) )] info['p_i'] = purities info['s_i'] = stabilities info['N'] = n_events info['res'] = resolutions return bin_edges, histogram_information
def test_rebin_2d_not_on_boundaries(self): new_hist = rebin_2d(self.diagonals, self.bin_edges_not_on_boundaries, self.bin_edges_not_on_boundaries) for i in range(1, new_hist.nbins() + 1): self.assertEqual(new_hist.GetBinContent(i, i), self.result_not_on_boundaries[i - 1], 'histogram contents do not match')
def test_rebin_2d_not_on_boundaries( self ): new_hist = rebin_2d( self.diagonals, self.bin_edges_not_on_boundaries, self.bin_edges_not_on_boundaries ) for i in range( 1, new_hist.nbins() + 1 ): self.assertEqual( new_hist.GetBinContent( i, i ), self.result_not_on_boundaries[i - 1], 'histogram contents do not match' )
def calculate_purity_stability(hist_info, bin_edges): ''' Rebin finebinned histograms to current binning standards ''' hist = hist_info['hist'] binned_hist = rebin_2d( hist, bin_edges, bin_edges ).Clone() p = calculate_purities(binned_hist) s = calculate_stabilities(binned_hist) return p, s
def setUp(self): # create histograms # self.h1 = Hist2D( 15, 0, 15, 15, 0, 15 ) # x_1 = [1, 3, 7, 7, 8, 1, 12, 7, 8, 6] # y_1 = [1, 7, 3, 7, 8, 12, 7, 12, 13, 11] self.h1 = Hist2D(60, 40, 100, 60, 40, 100) n_1 = 100000 x_1 = 60 + 10 * np.random.randn(n_1) y_1 = x_1 + np.random.randn(n_1) z_1 = np.vstack((x_1, y_1)).T self.h1.fill_array(z_1) self.h1 = fix_overflow(self.h1) self.histogram_information = [{"hist": self.h1, "CoM": 7, "channel": "test_1"}] self.histograms = [info["hist"] for info in self.histogram_information] # requirements for new binning self.p_min, self.s_min, self.n_min = 0.5, 0.5, 1000 self.bin_edges = [] self.purities_GetBinContent = [] self.stabilities_GetBinContent = [] self.n_events_GetBinContent = [] self.purities_Integral = [] self.stabilities_Integral = [] self.n_events_Integral = [] first_hist = self.histograms[0] n_bins = first_hist.GetNbinsX() current_bin_start = 0 current_bin_end = 0 while current_bin_end < n_bins: current_bin_end, p, s, n_gen_and_reco = pick_bins.get_next_end( self.histograms, current_bin_start, current_bin_end, self.p_min, self.s_min, self.n_min, 0 ) if not self.bin_edges: # if empty self.bin_edges.append(first_hist.GetXaxis().GetBinLowEdge(current_bin_start + 1)) self.bin_edges.append( first_hist.GetXaxis().GetBinLowEdge(current_bin_end) + first_hist.GetXaxis().GetBinWidth(current_bin_end) ) self.purities_Integral.append(p) self.stabilities_Integral.append(s) self.n_events_Integral.append(n_gen_and_reco) current_bin_start = current_bin_end self.h1_rebinned = rebin_2d(self.h1, self.bin_edges, self.bin_edges) self.purities_GetBinContent = calculate_purities(self.h1_rebinned) self.stabilities_GetBinContent = calculate_stabilities(self.h1_rebinned) self.n_events_GetBinContent = [int(self.h1_rebinned.GetBinContent(i, i)) for i in range(1, len(self.bin_edges))]
def calculate_purity_stability(hist_info, bin_edges): ''' Rebin finebinned histograms to current binning standards ''' hist = hist_info['hist'] binned_hist = rebin_2d(hist, bin_edges, bin_edges).Clone() p = calculate_purities(binned_hist) s = calculate_stabilities(binned_hist) return p, s
def setUp( self ): # create histograms self.h1 = Hist( 100, 0, 100, title = '1D' ) self.h2 = Hist2D( 60, 40, 100, 60, 40, 100 ) self.simple = Hist( 100, 0, 100, title = '1D' ) # fill the histograms with our distributions map( self.h1.Fill, x1 ) self.h2.fill_array( np.random.multivariate_normal( mean = ( 50, 50 ), cov = np.arange( 4 ).reshape( 2, 2 ), size = ( int( 1E6 ), ) ) ) self.bins = [40, 45, 50, 60, 65, 70, 75, 80, 100] # rebin them self.h2_rebinned = self.h2.rebinned( self.bins, axis = 0 ) self.h2_rebinned = self.h2_rebinned.rebinned( self.bins, axis = 1 ) self.h2_rebinned_2 = rebin_2d( self.h2, self.bins, self.bins ) # we only test symmetric bins for now self.n_bins_x = 5 self.n_bins_y = 5 # only entries in diagonals, p = 1, s = 1 for all bins self.diagonals = Hist2D( self.n_bins_x, 0, 10, self.n_bins_y, 0, 10 ) # this creates # [0, 0, 0, 0, 1], # [0, 0, 0, 1, 0], # [0, 0, 1, 0, 0], # [0, 1, 0, 0, 0], # [1, 0, 0, 0, 0] for i in range( 1, self.n_bins_x + 1 ): self.diagonals.SetBinContent( i, i, 1 ) # the following should result in # [0, 0, 2], # [0, 2, 0], # [1, 0, 0] self.bin_edges_nice = [0, 2, 6, 10] self.result_nice = [1, 2, 2] # the following should result in # [0, 0, 0, 2], # [0, 0, 2, 0] # [0, 1, 0, 0] # [0, 0, 0, 0] self.bin_edges_out_of_bound = [-2, 0, 2, 6, 20] self.result_out_of_bound = [0, 1, 2, 2] # the following should result in # [0, 0, 2], # [0, 1, 0], # [2, 0, 0] self.bin_edges_not_on_boundaries = [0, 3.5, 6, 20] self.result_not_on_boundaries = [2, 1, 2] for i in range(100): self.simple.Fill(i, 1)
def makePurityStabilityPlots(input_file, histogram, bin_edges, channel, variable, isVisiblePhaseSpace): global output_folder, output_formats hist = get_histogram_from_file( histogram, input_file ) print "bin edges contents : ", bin_edges new_hist = rebin_2d( hist, bin_edges, bin_edges ).Clone() # get_bin_content = hist.ProjectionX().GetBinContent purities = calculate_purities( new_hist.Clone() ) stabilities = calculate_stabilities( new_hist.Clone() ) # n_events = [int( get_bin_content( i ) ) for i in range( 1, len( bin_edges ) )] print "purities contents : ", purities print "stabilities contents : ", stabilities hist_stability = value_tuplelist_to_hist(stabilities, bin_edges) hist_purity = value_tuplelist_to_hist(purities, bin_edges) hist_purity.color = 'red' hist_stability.color = 'blue' hist_stability.linewidth = 4 hist_purity.linewidth = 4 x_limits = [bin_edges[0], bin_edges[-1]] y_limits = [0,1] plt.figure( figsize = ( 20, 16 ), dpi = 200, facecolor = 'white' ) ax0 = plt.axes() ax0.minorticks_on() # ax0.grid( True, 'major', linewidth = 2 ) # ax0.grid( True, 'minor' ) plt.tick_params( **CMS.axis_label_major ) plt.tick_params( **CMS.axis_label_minor ) ax0.xaxis.labelpad = 12 ax0.yaxis.labelpad = 12 rplt.hist( hist_stability , stacked=False, axes = ax0, cmap = my_cmap, vmin = 1, label = 'Stability' ) rplt.hist( hist_purity, stacked=False, axes = ax0, cmap = my_cmap, vmin = 1, label = 'Purity' ) ax0.set_xlim( x_limits ) ax0.set_ylim( y_limits ) plt.tick_params( **CMS.axis_label_major ) plt.tick_params( **CMS.axis_label_minor ) x_title = '$' + variables_latex[variable] + '$ [GeV]' plt.xlabel( x_title, CMS.x_axis_title ) leg = plt.legend(loc=4,prop={'size':40}) plt.tight_layout() plt.savefig('test.pdf') save_as_name = 'purityStability_'+channel + '_' + variable + '_' + str(options.CoM) + 'TeV' for output_format in output_formats: plt.savefig( output_folder + save_as_name + '.' + output_format )
def setUp(self): # create histograms self.h1 = Hist(100, 0, 100, title='1D') self.h2 = Hist2D(60, 40, 100, 60, 40, 100) self.simple = Hist(100, 0, 100, title='1D') # fill the histograms with our distributions map(self.h1.Fill, x1) self.h2.fill_array( np.random.multivariate_normal(mean=(50, 50), cov=np.arange(4).reshape(2, 2), size=(int(1E6), ))) self.bins = [40, 45, 50, 60, 65, 70, 75, 80, 100] # rebin them self.h2_rebinned = self.h2.rebinned(self.bins, axis=0) self.h2_rebinned = self.h2_rebinned.rebinned(self.bins, axis=1) self.h2_rebinned_2 = rebin_2d(self.h2, self.bins, self.bins) # we only test symmetric bins for now self.n_bins_x = 5 self.n_bins_y = 5 # only entries in diagonals, p = 1, s = 1 for all bins self.diagonals = Hist2D(self.n_bins_x, 0, 10, self.n_bins_y, 0, 10) # this creates # [0, 0, 0, 0, 1], # [0, 0, 0, 1, 0], # [0, 0, 1, 0, 0], # [0, 1, 0, 0, 0], # [1, 0, 0, 0, 0] for i in range(1, self.n_bins_x + 1): self.diagonals.SetBinContent(i, i, 1) # the following should result in # [0, 0, 2], # [0, 2, 0], # [1, 0, 0] self.bin_edges_nice = [0, 2, 6, 10] self.result_nice = [1, 2, 2] # the following should result in # [0, 0, 0, 2], # [0, 0, 2, 0] # [0, 1, 0, 0] # [0, 0, 0, 0] self.bin_edges_out_of_bound = [-2, 0, 2, 6, 20] self.result_out_of_bound = [0, 1, 2, 2] # the following should result in # [0, 0, 2], # [0, 1, 0], # [2, 0, 0] self.bin_edges_not_on_boundaries = [0, 3.5, 6, 20] self.result_not_on_boundaries = [2, 1, 2] for i in range(100): self.simple.Fill(i, 1)
def setUp(self): # create histograms self.h1 = Hist2D(60, 40, 100, 60, 40, 100) self.h2 = Hist2D(60, 40, 100, 60, 40, 100) self.h3 = Hist2D(60, 40, 100, 60, 40, 100) n_1 = 10000 n_2 = int(n_1 / 5) x_1 = 60 + 10 * np.random.randn(n_1) x_2 = 60 + 10 * np.random.randn(n_2) x_3 = 60 + 5 * np.random.randn(n_1) y_1 = x_1 + np.random.randn(n_1) y_2 = x_2 + np.random.randn(n_2) y_3 = x_3 + np.random.randn(n_1) z_1 = np.vstack((x_1, y_1)).T z_2 = np.vstack((x_2, y_2)).T z_3 = np.vstack((x_3, y_3)).T # fill the histograms with our distributions self.h1.fill_array(z_1) # reduced number of events self.h2.fill_array(z_2) # reduced spread self.h3.fill_array(z_3) self.histogram_information_1 = [ {'hist': self.h1, 'CoM': 7, 'channel': 'test_1'}, ] self.histogram_information_2 = [ {'hist': self.h2, 'CoM': 7, 'channel': 'test_2'}, ] self.histogram_information_3 = [ {'hist': self.h3, 'CoM': 7, 'channel': 'test_3'}, ] self.histogram_information_1_2 = [ {'hist': self.h1, 'CoM': 7, 'channel': 'test_1'}, {'hist': self.h2, 'CoM': 7, 'channel': 'test_2'}, ] self.histogram_information_1_3 = [ {'hist': self.h1, 'CoM': 7, 'channel': 'test_1'}, {'hist': self.h3, 'CoM': 7, 'channel': 'test_3'}, ] # requirements for new binning self.p_min, self.s_min, self.n_min = 0.5, 0.5, 100 min_width = 0.000000000000001 self.bin_edges_1, _ = pick_bins.get_best_binning( self.histogram_information_1, self.p_min, self.s_min, self.n_min, min_width, ) self.bin_edges_2, _ = pick_bins.get_best_binning( self.histogram_information_2, self.p_min, self.s_min, self.n_min, min_width, ) self.bin_edges_3, _ = pick_bins.get_best_binning( self.histogram_information_3, self.p_min, self.s_min, self.n_min, min_width, ) self.bin_edges_1_2, _ = pick_bins.get_best_binning( self.histogram_information_1_2, self.p_min, self.s_min, self.n_min, min_width, ) self.bin_edges_1_3, _ = pick_bins.get_best_binning( self.histogram_information_1_3, self.p_min, self.s_min, self.n_min, min_width, ) self.h1_rebinned = rebin_2d( self.h1, self.bin_edges_1, self.bin_edges_1) self.h2_rebinned = rebin_2d( self.h2, self.bin_edges_2, self.bin_edges_2) self.h3_rebinned = rebin_2d( self.h3, self.bin_edges_3, self.bin_edges_3) self.h1_2_rebinned = rebin_2d( self.h1, self.bin_edges_1_2, self.bin_edges_1_2) self.h1_3_rebinned = rebin_2d( self.h1, self.bin_edges_1_3, self.bin_edges_1_3)
def plotting_response( histogram_information, variable, channel, bin_edges ): global output_folder, output_formats, options my_cmap = cm.get_cmap( 'rainbow' ) my_cmap.set_under( 'w' ) scatter_plot = histogram_information['hist'] response_plot = rebin_2d(scatter_plot, bin_edges, bin_edges ) norm_response_plot = Hist2D( bin_edges, bin_edges, type = 'D' ) n_bins = len( bin_edges ) - 1 get_bin_content = response_plot.GetBinContent set_bin_content = norm_response_plot.SetBinContent # Put into array of values sorted by y columns xy=[] norm_xy = [] for bin_j in range( 0, n_bins+1): y = [] for bin_i in range( 0, n_bins+1 ): y.append( get_bin_content( bin_j+1, bin_i+1 ) ) xy.append(y) # Normalise by the reconstructed column and round for y_col in xy: norm_xy.append(y_col / np.sum(y_col)) rounded_norm_xy = np.around(np.array(norm_xy), 2) # New 2D Hist + Mesh to Plot for bin_i in range( 0, n_bins+1): for bin_j in range( 0, n_bins+1 ): set_bin_content( bin_i, bin_j, rounded_norm_xy.item(bin_j, bin_i) ) X, Y = np.meshgrid(list(norm_response_plot.x()), list(norm_response_plot.y())) x = X.ravel() y = Y.ravel() z = np.array(norm_response_plot.z()).ravel() v_unit = '$'+variables_latex[variable]+'$' if variable in ['HT', 'ST', 'MET', 'lepton_pt', 'WPT']: v_unit += ' [GeV]' x_title = 'Reconstructed ' + v_unit y_title = 'Generated ' + v_unit # title = "channel = {}, variable = ${}$".format(channel, variables_latex[variable]) title = "Response matrix normalised wrt reconstructed bins" plt.figure( figsize = ( 20, 16 ), dpi = 200, facecolor = 'white' ) ax0 = plt.axes() ax0.minorticks_on() plt.tick_params( **CMS.axis_label_major ) plt.tick_params( **CMS.axis_label_minor ) ax0.xaxis.labelpad = 12 ax0.yaxis.labelpad = 12 h2d = plt.hist2d(x, y, weights=z, bins=(list(norm_response_plot.xedges()), list(norm_response_plot.yedges())), cmap=my_cmap, vmin=0, vmax=1) colorbar = plt.colorbar() colorbar.ax.tick_params( **CMS.axis_label_major ) plt.xlabel( x_title, CMS.x_axis_title ) plt.ylabel( y_title, CMS.y_axis_title ) plt.title( title, CMS.title ) plt.tight_layout() plot_filepath = 'plots/binning/response/' make_folder_if_not_exists(plot_filepath) plot_filename = '{}_{}_Response.pdf'.format(channel, variable) plt.savefig(plot_filepath+plot_filename, bbox_inches='tight') return
def get_best_binning( histogram_information, p_min, s_min, n_min, min_width, nice_width, x_min = None, is_NJet=False, plot_resolution=False ): ''' Step 1: Change the size of the first bin until it fulfils the minimal criteria Step 3: Check if it is true for other channel histograms. If not back to step 2 Step 4: Repeat step 2 & 3 until no more bins can be created ''' histograms = [info['hist'] for info in histogram_information] bin_edges = [] purities = {} stabilities = {} resolutions = [] current_bin_start = 0 current_bin_end = 0 first_hist = histograms[0] n_bins = first_hist.GetNbinsX() # Start at minimum x instead of 0 if x_min: current_bin_start = first_hist.ProjectionX().FindBin(x_min) - 1 current_bin_end = current_bin_start # Calculate the bin edges until no more bins can be iterated over while current_bin_end < n_bins: # Return the next bin end + (p, s, N_reco, res) current_bin_end, _, _, _, _ = get_next_end( histogram_information, current_bin_start, current_bin_end, p_min, s_min, n_min, min_width, nice_width, is_NJet=is_NJet) # Attach first bin low edge if not bin_edges: bin_edges.append( first_hist.GetXaxis().GetBinLowEdge( current_bin_start + 1 ) ) # Attachs the current bin end edge bin_edges.append( first_hist.GetXaxis().GetBinLowEdge( current_bin_end ) + first_hist.GetXaxis().GetBinWidth( current_bin_end ) ) current_bin_start = current_bin_end # add the purity and stability values for the final binning for hist_info in histogram_information: new_hist = rebin_2d( hist_info['hist'], bin_edges, bin_edges ).Clone( hist_info['channel'] + '_' + str( hist_info['CoM'] ) ) get_bin_content = new_hist.ProjectionX().GetBinContent purities = calculate_purities( new_hist.Clone() ) stabilities = calculate_stabilities( new_hist.Clone() ) resolutions = calculate_resolutions( var, bin_edges=bin_edges, channel = hist_info['channel'], res_to_plot = plot_resolution ) n_events = [int( get_bin_content( i ) ) for i in range( 1, len( bin_edges ) )] # Now check if the last bin also fulfils the requirements if ( purities[-1] < p_min or stabilities[-1] < s_min or n_events[-1] < n_min ) and len(purities) > 3: # Merge last two bins bin_edges[-2] = bin_edges[-1] bin_edges = bin_edges[:-1] # Recalculate purities and stabilites new_hist = rebin_2d( hist_info['hist'], bin_edges, bin_edges ).Clone() purities = calculate_purities( new_hist.Clone() ) stabilities = calculate_stabilities( new_hist.Clone() ) resolutions = calculate_resolutions( var, bin_edges=bin_edges, channel = hist_info['channel'], res_to_plot = plot_resolution ) n_events = [int( get_bin_content( i ) ) for i in range( 1, len( bin_edges ) )] # Make sure last bin edge is also a nice rounded number if bin_edges[-1] % nice_width != 0: bin_edges[-1] = nice_width * round(bin_edges[-1]/nice_width) # print (bin_edges[-1], nice_width * round(bin_edges[-1]/nice_width)) # Add purites, stabilities, n_events and resolutions to the hstogram information hist_info['p_i'] = purities hist_info['s_i'] = stabilities hist_info['N'] = n_events hist_info['res'] = resolutions return bin_edges, histogram_information
def plotting_response(histogram_information, variable, channel, bin_edges): global output_folder, output_formats, options my_cmap = cm.get_cmap('rainbow') my_cmap.set_under('w') scatter_plot = histogram_information['hist'] response_plot = rebin_2d(scatter_plot, bin_edges, bin_edges) norm_response_plot = Hist2D(bin_edges, bin_edges, type='D') n_bins = len(bin_edges) - 1 get_bin_content = response_plot.GetBinContent set_bin_content = norm_response_plot.SetBinContent # Put into array of values sorted by y columns xy = [] norm_xy = [] for bin_j in range(0, n_bins + 1): y = [] for bin_i in range(0, n_bins + 1): y.append(get_bin_content(bin_j + 1, bin_i + 1)) xy.append(y) # Normalise by the reconstructed column and round for y_col in xy: norm_xy.append(y_col / np.sum(y_col)) rounded_norm_xy = np.around(np.array(norm_xy), 2) # New 2D Hist + Mesh to Plot for bin_i in range(0, n_bins + 1): for bin_j in range(0, n_bins + 1): set_bin_content(bin_i, bin_j, rounded_norm_xy.item(bin_j, bin_i)) X, Y = np.meshgrid(list(norm_response_plot.x()), list(norm_response_plot.y())) x = X.ravel() y = Y.ravel() z = np.array(norm_response_plot.z()).ravel() v_unit = '$' + variables_latex[variable] + '$' if variable in ['HT', 'ST', 'MET', 'lepton_pt', 'WPT']: v_unit += ' [GeV]' x_title = 'Reconstructed ' + v_unit y_title = 'Generated ' + v_unit # title = "channel = {}, variable = ${}$".format(channel, variables_latex[variable]) title = "Response matrix normalised wrt reconstructed bins" plt.figure(figsize=(20, 16), dpi=200, facecolor='white') ax0 = plt.axes() ax0.minorticks_on() plt.tick_params(**CMS.axis_label_major) plt.tick_params(**CMS.axis_label_minor) ax0.xaxis.labelpad = 12 ax0.yaxis.labelpad = 12 h2d = plt.hist2d(x, y, weights=z, bins=(list(norm_response_plot.xedges()), list(norm_response_plot.yedges())), cmap=my_cmap, vmin=0, vmax=1) colorbar = plt.colorbar() colorbar.ax.tick_params(**CMS.axis_label_major) plt.xlabel(x_title, CMS.x_axis_title) plt.ylabel(y_title, CMS.y_axis_title) plt.title(title, CMS.title) plt.tight_layout() plot_filepath = 'plots/binning/response/' make_folder_if_not_exists(plot_filepath) plot_filename = '{}_{}_Response.pdf'.format(channel, variable) plt.savefig(plot_filepath + plot_filename, bbox_inches='tight') return
def get_best_binning(histogram_information, p_min, s_min, n_min, min_width, nice_width, x_min=None, is_NJet=False, plot_resolution=False): ''' Step 1: Change the size of the first bin until it fulfils the minimal criteria Step 3: Check if it is true for other channel histograms. If not back to step 2 Step 4: Repeat step 2 & 3 until no more bins can be created ''' histograms = [info['hist'] for info in histogram_information] bin_edges = [] purities = {} stabilities = {} resolutions = [] current_bin_start = 0 current_bin_end = 0 first_hist = histograms[0] n_bins = first_hist.GetNbinsX() # Start at minimum x instead of 0 if x_min: current_bin_start = first_hist.ProjectionX().FindBin(x_min) - 1 current_bin_end = current_bin_start # Calculate the bin edges until no more bins can be iterated over while current_bin_end < n_bins: # Return the next bin end + (p, s, N_reco, res) current_bin_end, _, _, _, _ = get_next_end(histogram_information, current_bin_start, current_bin_end, p_min, s_min, n_min, min_width, nice_width, is_NJet=is_NJet) # Attach first bin low edge if not bin_edges: bin_edges.append( first_hist.GetXaxis().GetBinLowEdge(current_bin_start + 1)) # Attachs the current bin end edge bin_edges.append(first_hist.GetXaxis().GetBinLowEdge(current_bin_end) + first_hist.GetXaxis().GetBinWidth(current_bin_end)) current_bin_start = current_bin_end # add the purity and stability values for the final binning for hist_info in histogram_information: new_hist = rebin_2d(hist_info['hist'], bin_edges, bin_edges).Clone(hist_info['channel'] + '_' + str(hist_info['CoM'])) get_bin_content = new_hist.ProjectionX().GetBinContent purities = calculate_purities(new_hist.Clone()) stabilities = calculate_stabilities(new_hist.Clone()) resolutions = calculate_resolutions(var, bin_edges=bin_edges, channel=hist_info['channel'], res_to_plot=plot_resolution) n_events = [int(get_bin_content(i)) for i in range(1, len(bin_edges))] # Now check if the last bin also fulfils the requirements if (purities[-1] < p_min or stabilities[-1] < s_min or n_events[-1] < n_min) and len(purities) > 3: # Merge last two bins bin_edges[-2] = bin_edges[-1] bin_edges = bin_edges[:-1] # Recalculate purities and stabilites new_hist = rebin_2d(hist_info['hist'], bin_edges, bin_edges).Clone() purities = calculate_purities(new_hist.Clone()) stabilities = calculate_stabilities(new_hist.Clone()) resolutions = calculate_resolutions(var, bin_edges=bin_edges, channel=hist_info['channel'], res_to_plot=plot_resolution) n_events = [ int(get_bin_content(i)) for i in range(1, len(bin_edges)) ] # Make sure last bin edge is also a nice rounded number if bin_edges[-1] % nice_width != 0: bin_edges[-1] = nice_width * round(bin_edges[-1] / nice_width) # print (bin_edges[-1], nice_width * round(bin_edges[-1]/nice_width)) # Add purites, stabilities, n_events and resolutions to the hstogram information hist_info['p_i'] = purities hist_info['s_i'] = stabilities hist_info['N'] = n_events hist_info['res'] = resolutions return bin_edges, histogram_information
def setUp(self): # create histograms # self.h1 = Hist2D( 15, 0, 15, 15, 0, 15 ) # x_1 = [1, 3, 7, 7, 8, 1, 12, 7, 8, 6] # y_1 = [1, 7, 3, 7, 8, 12, 7, 12, 13, 11] self.h1 = Hist2D(60, 40, 100, 60, 40, 100) n_1 = 100000 x_1 = 60 + 10 * np.random.randn(n_1) y_1 = x_1 + np.random.randn(n_1) z_1 = np.vstack((x_1, y_1)).T self.h1.fill_array(z_1) self.h1 = fix_overflow(self.h1) self.histogram_information = [ { 'hist': self.h1, 'CoM': 7, 'channel': 'test_1' }, ] self.histograms = [info['hist'] for info in self.histogram_information] # requirements for new binning self.p_min, self.s_min, self.n_min = 0.5, 0.5, 1000 self.bin_edges = [] self.purities_GetBinContent = [] self.stabilities_GetBinContent = [] self.n_events_GetBinContent = [] self.purities_Integral = [] self.stabilities_Integral = [] self.n_events_Integral = [] first_hist = self.histograms[0] n_bins = first_hist.GetNbinsX() current_bin_start = 0 current_bin_end = 0 while current_bin_end < n_bins: current_bin_end, p, s, n_gen_and_reco = pick_bins.get_next_end( self.histograms, current_bin_start, current_bin_end, self.p_min, self.s_min, self.n_min, 0) if not self.bin_edges: # if empty self.bin_edges.append( first_hist.GetXaxis().GetBinLowEdge(current_bin_start + 1)) self.bin_edges.append( first_hist.GetXaxis().GetBinLowEdge(current_bin_end) + first_hist.GetXaxis().GetBinWidth(current_bin_end)) self.purities_Integral.append(p) self.stabilities_Integral.append(s) self.n_events_Integral.append(n_gen_and_reco) current_bin_start = current_bin_end self.h1_rebinned = rebin_2d(self.h1, self.bin_edges, self.bin_edges) self.purities_GetBinContent = calculate_purities(self.h1_rebinned) self.stabilities_GetBinContent = calculate_stabilities( self.h1_rebinned) self.n_events_GetBinContent = [ int(self.h1_rebinned.GetBinContent(i, i)) for i in range(1, len(self.bin_edges)) ]
def setUp(self): # create histograms self.h1 = Hist2D(60, 40, 100, 60, 40, 100) self.h2 = Hist2D(60, 40, 100, 60, 40, 100) self.h3 = Hist2D(60, 40, 100, 60, 40, 100) n_1 = 10000 n_2 = int(n_1 / 5) x_1 = 60 + 10 * np.random.randn(n_1) x_2 = 60 + 10 * np.random.randn(n_2) x_3 = 60 + 5 * np.random.randn(n_1) y_1 = x_1 + np.random.randn(n_1) y_2 = x_2 + np.random.randn(n_2) y_3 = x_3 + np.random.randn(n_1) z_1 = np.vstack((x_1, y_1)).T z_2 = np.vstack((x_2, y_2)).T z_3 = np.vstack((x_3, y_3)).T # fill the histograms with our distributions self.h1.fill_array(z_1) # reduced number of events self.h2.fill_array(z_2) # reduced spread self.h3.fill_array(z_3) self.histogram_information_1 = [ { 'hist': self.h1, 'CoM': 7, 'channel': 'test_1' }, ] self.histogram_information_2 = [ { 'hist': self.h2, 'CoM': 7, 'channel': 'test_2' }, ] self.histogram_information_3 = [ { 'hist': self.h3, 'CoM': 7, 'channel': 'test_3' }, ] self.histogram_information_1_2 = [ { 'hist': self.h1, 'CoM': 7, 'channel': 'test_1' }, { 'hist': self.h2, 'CoM': 7, 'channel': 'test_2' }, ] self.histogram_information_1_3 = [ { 'hist': self.h1, 'CoM': 7, 'channel': 'test_1' }, { 'hist': self.h3, 'CoM': 7, 'channel': 'test_3' }, ] # requirements for new binning self.p_min, self.s_min, self.n_min = 0.5, 0.5, 100 min_width = 0.000000000000001 self.bin_edges_1, _ = pick_bins.get_best_binning( self.histogram_information_1, self.p_min, self.s_min, self.n_min, min_width, ) self.bin_edges_2, _ = pick_bins.get_best_binning( self.histogram_information_2, self.p_min, self.s_min, self.n_min, min_width, ) self.bin_edges_3, _ = pick_bins.get_best_binning( self.histogram_information_3, self.p_min, self.s_min, self.n_min, min_width, ) self.bin_edges_1_2, _ = pick_bins.get_best_binning( self.histogram_information_1_2, self.p_min, self.s_min, self.n_min, min_width, ) self.bin_edges_1_3, _ = pick_bins.get_best_binning( self.histogram_information_1_3, self.p_min, self.s_min, self.n_min, min_width, ) self.h1_rebinned = rebin_2d(self.h1, self.bin_edges_1, self.bin_edges_1) self.h2_rebinned = rebin_2d(self.h2, self.bin_edges_2, self.bin_edges_2) self.h3_rebinned = rebin_2d(self.h3, self.bin_edges_3, self.bin_edges_3) self.h1_2_rebinned = rebin_2d(self.h1, self.bin_edges_1_2, self.bin_edges_1_2) self.h1_3_rebinned = rebin_2d(self.h1, self.bin_edges_1_3, self.bin_edges_1_3)