def get_best_binning( histogram_information, p_min, s_min, n_min, min_width, x_min = None ): ''' Step 1: Change the size of the first bin until it fulfils the minimal criteria Step 3: Check if it is true for all other histograms. If not back to step 2 Step 4: Repeat step 2 & 3 until no more bins can be created ''' histograms = [info['hist'] for info in histogram_information] bin_edges = [] resolutions = [] purities = {} stabilities = {} current_bin_start = 0 current_bin_end = 0 first_hist = histograms[0] n_bins = first_hist.GetNbinsX() if x_min: current_bin_start = first_hist.ProjectionX().FindBin(x_min) - 1 current_bin_end = current_bin_start while current_bin_end < n_bins: # bin_End, p, s, N_reco current_bin_end, _, _, _, r = get_next_end( histograms, current_bin_start, current_bin_end, p_min, s_min, n_min, min_width ) resolutions.append(r) if not bin_edges: # if empty bin_edges.append( first_hist.GetXaxis().GetBinLowEdge( current_bin_start + 1 ) ) bin_edges.append( first_hist.GetXaxis().GetBinLowEdge( current_bin_end ) + first_hist.GetXaxis().GetBinWidth( current_bin_end ) ) current_bin_start = current_bin_end # add the purity and stability values for the final binning for info in histogram_information: new_hist = rebin_2d( info['hist'], bin_edges, bin_edges ).Clone( info['channel'] + '_' + str( info['CoM'] ) ) get_bin_content = new_hist.ProjectionX().GetBinContent purities = calculate_purities( new_hist.Clone() ) stabilities = calculate_stabilities( new_hist.Clone() ) n_events = [int( get_bin_content( i ) ) for i in range( 1, len( bin_edges ) )] # Now check if the last bin also fulfils the requirements if ( purities[-1] < p_min or stabilities[-1] < s_min or n_events[-1] < n_min ) and len(purities) > 3: # if not, merge last two bins bin_edges[-2] = bin_edges[-1] bin_edges = bin_edges[:-1] new_hist = rebin_2d( info['hist'], bin_edges, bin_edges ).Clone() get_bin_content = new_hist.ProjectionX().GetBinContent purities = calculate_purities( new_hist.Clone() ) stabilities = calculate_stabilities( new_hist.Clone() ) n_events = [int( get_bin_content( i ) ) for i in range( 1, len( bin_edges ) )] info['p_i'] = purities info['s_i'] = stabilities info['N'] = n_events info['res'] = resolutions return bin_edges, histogram_information
def test_pre_calculated_stability( self ): stabilities = calculate_stabilities( self.pre_calculated ) self.assertEqual( len( stabilities ), self.n_bins_x, 'Invalid number of stability terms' ) for s in stabilities[1:-1]: self.assertEqual( s, 1 ) self.assertEqual( stabilities[0], 0.25 ) self.assertEqual( stabilities[-1], 0.2 )
def test_pre_calculated_stability(self): stabilities = calculate_stabilities(self.pre_calculated) self.assertEqual(len(stabilities), self.n_bins_x, 'Invalid number of stability terms') for s in stabilities[1:-1]: self.assertEqual(s, 1) self.assertEqual(stabilities[0], 0.25) self.assertEqual(stabilities[-1], 0.2)
def test_random_elipse_stability( self ): stabilities = calculate_stabilities( self.random_elipse ) self.assertEqual( len( stabilities ), self.n_bins_x, 'Invalid number of stability terms' ) # stabilities should always be above 0 and below ~0.6 for s in stabilities: self.assertGreater( s, 0 ) # allow for 10% error due to randomness self.assertLess( s, 0.6 + 0.06)
def setUp(self): # create histograms # self.h1 = Hist2D( 15, 0, 15, 15, 0, 15 ) # x_1 = [1, 3, 7, 7, 8, 1, 12, 7, 8, 6] # y_1 = [1, 7, 3, 7, 8, 12, 7, 12, 13, 11] self.h1 = Hist2D(60, 40, 100, 60, 40, 100) n_1 = 100000 x_1 = 60 + 10 * np.random.randn(n_1) y_1 = x_1 + np.random.randn(n_1) z_1 = np.vstack((x_1, y_1)).T self.h1.fill_array(z_1) self.h1 = fix_overflow(self.h1) self.histogram_information = [{"hist": self.h1, "CoM": 7, "channel": "test_1"}] self.histograms = [info["hist"] for info in self.histogram_information] # requirements for new binning self.p_min, self.s_min, self.n_min = 0.5, 0.5, 1000 self.bin_edges = [] self.purities_GetBinContent = [] self.stabilities_GetBinContent = [] self.n_events_GetBinContent = [] self.purities_Integral = [] self.stabilities_Integral = [] self.n_events_Integral = [] first_hist = self.histograms[0] n_bins = first_hist.GetNbinsX() current_bin_start = 0 current_bin_end = 0 while current_bin_end < n_bins: current_bin_end, p, s, n_gen_and_reco = pick_bins.get_next_end( self.histograms, current_bin_start, current_bin_end, self.p_min, self.s_min, self.n_min, 0 ) if not self.bin_edges: # if empty self.bin_edges.append(first_hist.GetXaxis().GetBinLowEdge(current_bin_start + 1)) self.bin_edges.append( first_hist.GetXaxis().GetBinLowEdge(current_bin_end) + first_hist.GetXaxis().GetBinWidth(current_bin_end) ) self.purities_Integral.append(p) self.stabilities_Integral.append(s) self.n_events_Integral.append(n_gen_and_reco) current_bin_start = current_bin_end self.h1_rebinned = rebin_2d(self.h1, self.bin_edges, self.bin_edges) self.purities_GetBinContent = calculate_purities(self.h1_rebinned) self.stabilities_GetBinContent = calculate_stabilities(self.h1_rebinned) self.n_events_GetBinContent = [int(self.h1_rebinned.GetBinContent(i, i)) for i in range(1, len(self.bin_edges))]
def calculate_purity_stability(hist_info, bin_edges): ''' Rebin finebinned histograms to current binning standards ''' hist = hist_info['hist'] binned_hist = rebin_2d( hist, bin_edges, bin_edges ).Clone() p = calculate_purities(binned_hist) s = calculate_stabilities(binned_hist) return p, s
def calculate_purity_stability(hist_info, bin_edges): ''' Rebin finebinned histograms to current binning standards ''' hist = hist_info['hist'] binned_hist = rebin_2d(hist, bin_edges, bin_edges).Clone() p = calculate_purities(binned_hist) s = calculate_stabilities(binned_hist) return p, s
def test_random_elipse_stability(self): stabilities = calculate_stabilities(self.random_elipse) self.assertEqual(len(stabilities), self.n_bins_x, 'Invalid number of stability terms') # stabilities should always be above 0 and below ~0.6 for s in stabilities: self.assertGreater(s, 0) # allow for 10% error due to randomness self.assertLess(s, 0.6 + 0.06)
def makePurityStabilityPlots(input_file, histogram, bin_edges, channel, variable, isVisiblePhaseSpace): global output_folder, output_formats hist = get_histogram_from_file( histogram, input_file ) print "bin edges contents : ", bin_edges new_hist = rebin_2d( hist, bin_edges, bin_edges ).Clone() # get_bin_content = hist.ProjectionX().GetBinContent purities = calculate_purities( new_hist.Clone() ) stabilities = calculate_stabilities( new_hist.Clone() ) # n_events = [int( get_bin_content( i ) ) for i in range( 1, len( bin_edges ) )] print "purities contents : ", purities print "stabilities contents : ", stabilities hist_stability = value_tuplelist_to_hist(stabilities, bin_edges) hist_purity = value_tuplelist_to_hist(purities, bin_edges) hist_purity.color = 'red' hist_stability.color = 'blue' hist_stability.linewidth = 4 hist_purity.linewidth = 4 x_limits = [bin_edges[0], bin_edges[-1]] y_limits = [0,1] plt.figure( figsize = ( 20, 16 ), dpi = 200, facecolor = 'white' ) ax0 = plt.axes() ax0.minorticks_on() # ax0.grid( True, 'major', linewidth = 2 ) # ax0.grid( True, 'minor' ) plt.tick_params( **CMS.axis_label_major ) plt.tick_params( **CMS.axis_label_minor ) ax0.xaxis.labelpad = 12 ax0.yaxis.labelpad = 12 rplt.hist( hist_stability , stacked=False, axes = ax0, cmap = my_cmap, vmin = 1, label = 'Stability' ) rplt.hist( hist_purity, stacked=False, axes = ax0, cmap = my_cmap, vmin = 1, label = 'Purity' ) ax0.set_xlim( x_limits ) ax0.set_ylim( y_limits ) plt.tick_params( **CMS.axis_label_major ) plt.tick_params( **CMS.axis_label_minor ) x_title = '$' + variables_latex[variable] + '$ [GeV]' plt.xlabel( x_title, CMS.x_axis_title ) leg = plt.legend(loc=4,prop={'size':40}) plt.tight_layout() plt.savefig('test.pdf') save_as_name = 'purityStability_'+channel + '_' + variable + '_' + str(options.CoM) + 'TeV' for output_format in output_formats: plt.savefig( output_folder + save_as_name + '.' + output_format )
def test_stabilities(self): stabilities_1 = calculate_stabilities(self.h1_rebinned) for stability in stabilities_1: self.assertGreaterEqual(stability, self.s_min)
def test_stabilities_reduced_spread(self): stabilities = calculate_stabilities(self.h3_rebinned) for stability in stabilities: self.assertGreaterEqual(stability, self.s_min)
def get_best_binning( histogram_information, p_min, s_min, n_min, min_width, nice_width, x_min = None, is_NJet=False, plot_resolution=False ): ''' Step 1: Change the size of the first bin until it fulfils the minimal criteria Step 3: Check if it is true for other channel histograms. If not back to step 2 Step 4: Repeat step 2 & 3 until no more bins can be created ''' histograms = [info['hist'] for info in histogram_information] bin_edges = [] purities = {} stabilities = {} resolutions = [] current_bin_start = 0 current_bin_end = 0 first_hist = histograms[0] n_bins = first_hist.GetNbinsX() # Start at minimum x instead of 0 if x_min: current_bin_start = first_hist.ProjectionX().FindBin(x_min) - 1 current_bin_end = current_bin_start # Calculate the bin edges until no more bins can be iterated over while current_bin_end < n_bins: # Return the next bin end + (p, s, N_reco, res) current_bin_end, _, _, _, _ = get_next_end( histogram_information, current_bin_start, current_bin_end, p_min, s_min, n_min, min_width, nice_width, is_NJet=is_NJet) # Attach first bin low edge if not bin_edges: bin_edges.append( first_hist.GetXaxis().GetBinLowEdge( current_bin_start + 1 ) ) # Attachs the current bin end edge bin_edges.append( first_hist.GetXaxis().GetBinLowEdge( current_bin_end ) + first_hist.GetXaxis().GetBinWidth( current_bin_end ) ) current_bin_start = current_bin_end # add the purity and stability values for the final binning for hist_info in histogram_information: new_hist = rebin_2d( hist_info['hist'], bin_edges, bin_edges ).Clone( hist_info['channel'] + '_' + str( hist_info['CoM'] ) ) get_bin_content = new_hist.ProjectionX().GetBinContent purities = calculate_purities( new_hist.Clone() ) stabilities = calculate_stabilities( new_hist.Clone() ) resolutions = calculate_resolutions( var, bin_edges=bin_edges, channel = hist_info['channel'], res_to_plot = plot_resolution ) n_events = [int( get_bin_content( i ) ) for i in range( 1, len( bin_edges ) )] # Now check if the last bin also fulfils the requirements if ( purities[-1] < p_min or stabilities[-1] < s_min or n_events[-1] < n_min ) and len(purities) > 3: # Merge last two bins bin_edges[-2] = bin_edges[-1] bin_edges = bin_edges[:-1] # Recalculate purities and stabilites new_hist = rebin_2d( hist_info['hist'], bin_edges, bin_edges ).Clone() purities = calculate_purities( new_hist.Clone() ) stabilities = calculate_stabilities( new_hist.Clone() ) resolutions = calculate_resolutions( var, bin_edges=bin_edges, channel = hist_info['channel'], res_to_plot = plot_resolution ) n_events = [int( get_bin_content( i ) ) for i in range( 1, len( bin_edges ) )] # Make sure last bin edge is also a nice rounded number if bin_edges[-1] % nice_width != 0: bin_edges[-1] = nice_width * round(bin_edges[-1]/nice_width) # print (bin_edges[-1], nice_width * round(bin_edges[-1]/nice_width)) # Add purites, stabilities, n_events and resolutions to the hstogram information hist_info['p_i'] = purities hist_info['s_i'] = stabilities hist_info['N'] = n_events hist_info['res'] = resolutions return bin_edges, histogram_information
def test_stabilities_combined_1_3(self): stabilities = calculate_stabilities(self.h1_3_rebinned) for stability in stabilities: self.assertGreaterEqual(stability, self.s_min)
def setUp(self): # create histograms # self.h1 = Hist2D( 15, 0, 15, 15, 0, 15 ) # x_1 = [1, 3, 7, 7, 8, 1, 12, 7, 8, 6] # y_1 = [1, 7, 3, 7, 8, 12, 7, 12, 13, 11] self.h1 = Hist2D(60, 40, 100, 60, 40, 100) n_1 = 100000 x_1 = 60 + 10 * np.random.randn(n_1) y_1 = x_1 + np.random.randn(n_1) z_1 = np.vstack((x_1, y_1)).T self.h1.fill_array(z_1) self.h1 = fix_overflow(self.h1) self.histogram_information = [ { 'hist': self.h1, 'CoM': 7, 'channel': 'test_1' }, ] self.histograms = [info['hist'] for info in self.histogram_information] # requirements for new binning self.p_min, self.s_min, self.n_min = 0.5, 0.5, 1000 self.bin_edges = [] self.purities_GetBinContent = [] self.stabilities_GetBinContent = [] self.n_events_GetBinContent = [] self.purities_Integral = [] self.stabilities_Integral = [] self.n_events_Integral = [] first_hist = self.histograms[0] n_bins = first_hist.GetNbinsX() current_bin_start = 0 current_bin_end = 0 while current_bin_end < n_bins: current_bin_end, p, s, n_gen_and_reco = pick_bins.get_next_end( self.histograms, current_bin_start, current_bin_end, self.p_min, self.s_min, self.n_min, 0) if not self.bin_edges: # if empty self.bin_edges.append( first_hist.GetXaxis().GetBinLowEdge(current_bin_start + 1)) self.bin_edges.append( first_hist.GetXaxis().GetBinLowEdge(current_bin_end) + first_hist.GetXaxis().GetBinWidth(current_bin_end)) self.purities_Integral.append(p) self.stabilities_Integral.append(s) self.n_events_Integral.append(n_gen_and_reco) current_bin_start = current_bin_end self.h1_rebinned = rebin_2d(self.h1, self.bin_edges, self.bin_edges) self.purities_GetBinContent = calculate_purities(self.h1_rebinned) self.stabilities_GetBinContent = calculate_stabilities( self.h1_rebinned) self.n_events_GetBinContent = [ int(self.h1_rebinned.GetBinContent(i, i)) for i in range(1, len(self.bin_edges)) ]
def test_best_case_stability( self ): stabilities = calculate_stabilities( self.best_case ) self.assertEqual( len( stabilities ), self.n_bins_x, 'Invalid number of stability terms' ) for s in stabilities: self.assertEqual( s, 1 )
def get_best_binning(histogram_information, p_min, s_min, n_min, min_width, nice_width, x_min=None, is_NJet=False, plot_resolution=False): ''' Step 1: Change the size of the first bin until it fulfils the minimal criteria Step 3: Check if it is true for other channel histograms. If not back to step 2 Step 4: Repeat step 2 & 3 until no more bins can be created ''' histograms = [info['hist'] for info in histogram_information] bin_edges = [] purities = {} stabilities = {} resolutions = [] current_bin_start = 0 current_bin_end = 0 first_hist = histograms[0] n_bins = first_hist.GetNbinsX() # Start at minimum x instead of 0 if x_min: current_bin_start = first_hist.ProjectionX().FindBin(x_min) - 1 current_bin_end = current_bin_start # Calculate the bin edges until no more bins can be iterated over while current_bin_end < n_bins: # Return the next bin end + (p, s, N_reco, res) current_bin_end, _, _, _, _ = get_next_end(histogram_information, current_bin_start, current_bin_end, p_min, s_min, n_min, min_width, nice_width, is_NJet=is_NJet) # Attach first bin low edge if not bin_edges: bin_edges.append( first_hist.GetXaxis().GetBinLowEdge(current_bin_start + 1)) # Attachs the current bin end edge bin_edges.append(first_hist.GetXaxis().GetBinLowEdge(current_bin_end) + first_hist.GetXaxis().GetBinWidth(current_bin_end)) current_bin_start = current_bin_end # add the purity and stability values for the final binning for hist_info in histogram_information: new_hist = rebin_2d(hist_info['hist'], bin_edges, bin_edges).Clone(hist_info['channel'] + '_' + str(hist_info['CoM'])) get_bin_content = new_hist.ProjectionX().GetBinContent purities = calculate_purities(new_hist.Clone()) stabilities = calculate_stabilities(new_hist.Clone()) resolutions = calculate_resolutions(var, bin_edges=bin_edges, channel=hist_info['channel'], res_to_plot=plot_resolution) n_events = [int(get_bin_content(i)) for i in range(1, len(bin_edges))] # Now check if the last bin also fulfils the requirements if (purities[-1] < p_min or stabilities[-1] < s_min or n_events[-1] < n_min) and len(purities) > 3: # Merge last two bins bin_edges[-2] = bin_edges[-1] bin_edges = bin_edges[:-1] # Recalculate purities and stabilites new_hist = rebin_2d(hist_info['hist'], bin_edges, bin_edges).Clone() purities = calculate_purities(new_hist.Clone()) stabilities = calculate_stabilities(new_hist.Clone()) resolutions = calculate_resolutions(var, bin_edges=bin_edges, channel=hist_info['channel'], res_to_plot=plot_resolution) n_events = [ int(get_bin_content(i)) for i in range(1, len(bin_edges)) ] # Make sure last bin edge is also a nice rounded number if bin_edges[-1] % nice_width != 0: bin_edges[-1] = nice_width * round(bin_edges[-1] / nice_width) # print (bin_edges[-1], nice_width * round(bin_edges[-1]/nice_width)) # Add purites, stabilities, n_events and resolutions to the hstogram information hist_info['p_i'] = purities hist_info['s_i'] = stabilities hist_info['N'] = n_events hist_info['res'] = resolutions return bin_edges, histogram_information
def test_stabilities_reduced_spread(self): stabilities = calculate_stabilities(self.h3_rebinned) for stability in stabilities: self.assertGreaterEqual(stability, self.s_min)
def test_stabilities_combined_1_3(self): stabilities = calculate_stabilities(self.h1_3_rebinned) for stability in stabilities: self.assertGreaterEqual(stability, self.s_min)
def test_best_case_stability(self): stabilities = calculate_stabilities(self.best_case) self.assertEqual(len(stabilities), self.n_bins_x, 'Invalid number of stability terms') for s in stabilities: self.assertEqual(s, 1)
def test_stabilities(self): stabilities_1 = calculate_stabilities(self.h1_rebinned) for stability in stabilities_1: self.assertGreaterEqual(stability, self.s_min)