def get_best_binning(histogram_information, p_min, s_min, n_min, min_width, x_min=None): """ Step 1: Change the size of the first bin until it fulfils the minimal criteria Step 3: Check if it is true for all other histograms. If not back to step 2 Step 4: Repeat step 2 & 3 until no more bins can be created """ histograms = [info["hist"] for info in histogram_information] bin_edges = [] purities = {} stabilities = {} current_bin_start = 0 current_bin_end = 0 first_hist = histograms[0] n_bins = first_hist.GetNbinsX() if x_min: current_bin_start = first_hist.ProjectionX().FindBin(x_min) - 1 current_bin_end = current_bin_start while current_bin_end < n_bins: current_bin_end, _, _, _ = get_next_end( histograms, current_bin_start, current_bin_end, p_min, s_min, n_min, min_width ) if not bin_edges: # if empty bin_edges.append(first_hist.GetXaxis().GetBinLowEdge(current_bin_start + 1)) bin_edges.append( first_hist.GetXaxis().GetBinLowEdge(current_bin_end) + first_hist.GetXaxis().GetBinWidth(current_bin_end) ) current_bin_start = current_bin_end # add the purity and stability values for the final binning for info in histogram_information: new_hist = rebin_2d(info["hist"], bin_edges, bin_edges).Clone(info["channel"] + "_" + str(info["CoM"])) get_bin_content = new_hist.ProjectionX().GetBinContent purities = calculate_purities(new_hist.Clone()) stabilities = calculate_stabilities(new_hist.Clone()) n_events = [int(get_bin_content(i)) for i in range(1, len(bin_edges))] # Now check if the last bin also fulfils the requirements if (purities[-1] < p_min or stabilities[-1] < s_min or n_events[-1] < n_min) and len(purities) > 3: # if not, merge last two bins bin_edges[-2] = bin_edges[-1] bin_edges = bin_edges[:-1] new_hist = rebin_2d(info["hist"], bin_edges, bin_edges).Clone() get_bin_content = new_hist.ProjectionX().GetBinContent purities = calculate_purities(new_hist.Clone()) stabilities = calculate_stabilities(new_hist.Clone()) n_events = [int(get_bin_content(i)) for i in range(1, len(bin_edges))] info["p_i"] = purities info["s_i"] = stabilities info["N"] = n_events return bin_edges, histogram_information
def get_best_binning( histogram_information, p_min, s_min, n_min, min_width, x_min = None ): ''' Step 1: Change the size of the first bin until it fulfils the minimal criteria Step 3: Check if it is true for all other histograms. If not back to step 2 Step 4: Repeat step 2 & 3 until no more bins can be created ''' histograms = [info['hist'] for info in histogram_information] bin_edges = [] purities = {} stabilities = {} current_bin_start = 0 current_bin_end = 0 first_hist = histograms[0] n_bins = first_hist.GetNbinsX() if x_min: current_bin_start = first_hist.ProjectionX().FindBin(x_min) - 1 current_bin_end = current_bin_start while current_bin_end < n_bins: current_bin_end, _, _, _ = get_next_end( histograms, current_bin_start, current_bin_end, p_min, s_min, n_min, min_width ) if not bin_edges: # if empty bin_edges.append( first_hist.GetXaxis().GetBinLowEdge( current_bin_start + 1 ) ) bin_edges.append( first_hist.GetXaxis().GetBinLowEdge( current_bin_end ) + first_hist.GetXaxis().GetBinWidth( current_bin_end ) ) current_bin_start = current_bin_end # add the purity and stability values for the final binning for info in histogram_information: new_hist = rebin_2d( info['hist'], bin_edges, bin_edges ).Clone( info['channel'] + '_' + str( info['CoM'] ) ) get_bin_content = new_hist.ProjectionX().GetBinContent purities = calculate_purities( new_hist.Clone() ) stabilities = calculate_stabilities( new_hist.Clone() ) n_events = [int( get_bin_content( i ) ) for i in range( 1, len( bin_edges ) )] # Now check if the last bin also fulfils the requirements if ( purities[-1] < p_min or stabilities[-1] < s_min or n_events[-1] < n_min ) and len(purities) > 3: # if not, merge last two bins bin_edges[-2] = bin_edges[-1] bin_edges = bin_edges[:-1] new_hist = rebin_2d( info['hist'], bin_edges, bin_edges ).Clone() get_bin_content = new_hist.ProjectionX().GetBinContent purities = calculate_purities( new_hist.Clone() ) stabilities = calculate_stabilities( new_hist.Clone() ) n_events = [int( get_bin_content( i ) ) for i in range( 1, len( bin_edges ) )] info['p_i'] = purities info['s_i'] = stabilities info['N'] = n_events return bin_edges, histogram_information
def get_best_binning( histogram_information, p_min, s_min, n_min ): ''' Step 1: Change the size of the first bin until it fulfils the minimal criteria Step 3: Check if it is true for all other histograms. If not back to step 2 Step 4: Repeat step 2 & 3 until no mo bins can be created ''' histograms = [info['hist'] for info in histogram_information] bin_edges = [] purities = {} stabilities = {} current_bin_start = 0 current_bin_end = 0 first_hist = histograms[0] n_bins = first_hist.GetNbinsX() while current_bin_end < n_bins: current_bin_end, _, _, _ = get_next_end( histograms, current_bin_start, current_bin_end, p_min, s_min, n_min ) if not bin_edges: # if empty bin_edges.append( first_hist.GetXaxis().GetBinLowEdge( current_bin_start + 1 ) ) bin_edges.append( first_hist.GetXaxis().GetBinLowEdge( current_bin_end ) + first_hist.GetXaxis().GetBinWidth( current_bin_end ) ) current_bin_start = current_bin_end # add the purity and stability values for the final binning for info in histogram_information: new_hist = rebin_2d( info['hist'], bin_edges, bin_edges ).Clone( info['channel'] + '_' + str( info['CoM'] ) ) get_bin_content = new_hist.GetBinContent purities = calculate_purities( new_hist.Clone() ) stabilities = calculate_stabilities( new_hist.Clone() ) n_events = [int( get_bin_content( i, i ) ) for i in range( 1, len( bin_edges ) )] # Now check if the last bin also fulfils the requirements if purities[-1] < p_min or stabilities[-1] < s_min or n_events[-1] < n_min: # if not, merge last two bins bin_edges[-2] = bin_edges[-1] bin_edges = bin_edges[:-1] new_hist = rebin_2d( info['hist'], bin_edges, bin_edges ).Clone() get_bin_content = new_hist.GetBinContent purities = calculate_purities( new_hist.Clone() ) stabilities = calculate_stabilities( new_hist.Clone() ) n_events = [int( get_bin_content( i, i ) ) for i in range( 1, len( bin_edges ) )] info['p_i'] = purities info['s_i'] = stabilities info['N'] = n_events return bin_edges, histogram_information
def test_pre_calculated_stability( self ): stabilities = calculate_stabilities( self.pre_calculated ) self.assertEqual( len( stabilities ), self.n_bins_x, 'Invalid number of stability terms' ) for s in stabilities[1:-1]: self.assertEqual( s, 1, 's-value is incorrect' ) self.assertEqual( stabilities[0], 0.25, 's-value is incorrect' ) self.assertEqual( stabilities[-1], 0.2, 's-value is incorrect' )
def test_pre_calculated_stability(self): stabilities = calculate_stabilities(self.pre_calculated) self.assertEqual(len(stabilities), self.n_bins_x, 'Invalid number of stability terms') for s in stabilities[1:-1]: self.assertEqual(s, 1) self.assertEqual(stabilities[0], 0.25) self.assertEqual(stabilities[-1], 0.2)
def test_random_elipse_stability( self ): stabilities = calculate_stabilities( self.random_elipse ) self.assertEqual( len( stabilities ), self.n_bins_x, 'Invalid number of stability terms' ) # stabilities should always be above 0 and below 1 for s in stabilities: self.assertGreater( s, 0, 's-value is incorrect' ) self.assertLess( s, 0.5, 's-value is incorrect' )
def test_random_elipse_stability( self ): stabilities = calculate_stabilities( self.random_elipse ) self.assertEqual( len( stabilities ), self.n_bins_x, 'Invalid number of stability terms' ) # stabilities should always be above 0 and below ~0.6 for s in stabilities: self.assertGreater( s, 0 ) # allow for 10% error due to randomness self.assertLess( s, 0.6 + 0.06)
def setUp( self ): # create histograms # self.h1 = Hist2D( 15, 0, 15, 15, 0, 15 ) # x_1 = [1, 3, 7, 7, 8, 1, 12, 7, 8, 6] # y_1 = [1, 7, 3, 7, 8, 12, 7, 12, 13, 11] self.h1 = Hist2D( 60, 40, 100, 60, 40, 100 ) n_1 = 100000 x_1 = 60 + 10 * np.random.randn( n_1 ) y_1 = x_1 + np.random.randn( n_1 ) z_1 = np.vstack( ( x_1, y_1 ) ).T self.h1.fill_array( z_1 ) self.h1 = fix_overflow( self.h1 ) self.histogram_information = [ {'hist': self.h1, 'CoM': 7, 'channel':'test_1'}, ] self.histograms = [info['hist'] for info in self.histogram_information] # requirements for new binning self.p_min, self.s_min, self.n_min = 0.5, 0.5, 1000 self.bin_edges = [] self.purities_GetBinContent = [] self.stabilities_GetBinContent = [] self.n_events_GetBinContent = [] self.purities_Integral = [] self.stabilities_Integral = [] self.n_events_Integral = [] first_hist = self.histograms[0] n_bins = first_hist.GetNbinsX() current_bin_start = 0 current_bin_end = 0 while current_bin_end < n_bins: current_bin_end, p, s, n_gen_and_reco = pick_bins.get_next_end( self.histograms, current_bin_start, current_bin_end, self.p_min, self.s_min, self.n_min ) if not self.bin_edges: # if empty self.bin_edges.append( first_hist.GetXaxis().GetBinLowEdge( current_bin_start + 1 ) ) self.bin_edges.append( first_hist.GetXaxis().GetBinLowEdge( current_bin_end ) + first_hist.GetXaxis().GetBinWidth( current_bin_end ) ) self.purities_Integral.append(p) self.stabilities_Integral.append(s) self.n_events_Integral.append(n_gen_and_reco) current_bin_start = current_bin_end self.h1_rebinned = rebin_2d(self.h1, self.bin_edges, self.bin_edges) self.purities_GetBinContent = calculate_purities( self.h1_rebinned ) self.stabilities_GetBinContent = calculate_stabilities( self.h1_rebinned ) self.n_events_GetBinContent = [int( self.h1_rebinned.GetBinContent( i, i ) ) for i in range( 1, len( self.bin_edges ) )]
def test_random_elipse_stability(self): stabilities = calculate_stabilities(self.random_elipse) self.assertEqual(len(stabilities), self.n_bins_x, 'Invalid number of stability terms') # stabilities should always be above 0 and below ~0.6 for s in stabilities: self.assertGreater(s, 0) # allow for 10% error due to randomness self.assertLess(s, 0.6 + 0.06)
def makePurityStabilityPlots(input_file, histogram, bin_edges, channel, variable, isVisiblePhaseSpace): global output_folder, output_formats hist = get_histogram_from_file( histogram, input_file ) print "bin edges contents : ", bin_edges new_hist = rebin_2d( hist, bin_edges, bin_edges ).Clone() # get_bin_content = hist.ProjectionX().GetBinContent purities = calculate_purities( new_hist.Clone() ) stabilities = calculate_stabilities( new_hist.Clone() ) # n_events = [int( get_bin_content( i ) ) for i in range( 1, len( bin_edges ) )] print "purities contents : ", purities print "stabilities contents : ", stabilities hist_stability = value_tuplelist_to_hist(stabilities, bin_edges) hist_purity = value_tuplelist_to_hist(purities, bin_edges) hist_purity.color = 'red' hist_stability.color = 'blue' hist_stability.linewidth = 4 hist_purity.linewidth = 4 x_limits = [bin_edges[0], bin_edges[-1]] y_limits = [0,1] plt.figure( figsize = ( 20, 16 ), dpi = 200, facecolor = 'white' ) ax0 = plt.axes() ax0.minorticks_on() # ax0.grid( True, 'major', linewidth = 2 ) # ax0.grid( True, 'minor' ) plt.tick_params( **CMS.axis_label_major ) plt.tick_params( **CMS.axis_label_minor ) ax0.xaxis.labelpad = 12 ax0.yaxis.labelpad = 12 rplt.hist( hist_stability , stacked=False, axes = ax0, cmap = my_cmap, vmin = 1, label = 'Stability' ) rplt.hist( hist_purity, stacked=False, axes = ax0, cmap = my_cmap, vmin = 1, label = 'Purity' ) ax0.set_xlim( x_limits ) ax0.set_ylim( y_limits ) plt.tick_params( **CMS.axis_label_major ) plt.tick_params( **CMS.axis_label_minor ) x_title = '$' + variables_latex[variable] + '$ [GeV]' plt.xlabel( x_title, CMS.x_axis_title ) leg = plt.legend(loc=4,prop={'size':40}) plt.tight_layout() plt.savefig('test.pdf') save_as_name = 'purityStability_'+channel + '_' + variable + '_' + str(options.CoM) + 'TeV' for output_format in output_formats: plt.savefig( output_folder + save_as_name + '.' + output_format )
def test_best_case_stability( self ): stabilities = calculate_stabilities( self.best_case ) self.assertEqual( len( stabilities ), self.n_bins_x, 'Invalid number of stability terms' ) for s in stabilities: self.assertEqual( s, 1, 's-value is incorrect' )
def test_stabilities_combined_1_3(self): stabilities = calculate_stabilities( self.h1_3_rebinned ) for stability in stabilities: self.assertGreaterEqual( stability, self.s_min )
def test_stabilities_reduced_spread(self): stabilities = calculate_stabilities( self.h3_rebinned ) for stability in stabilities: self.assertGreaterEqual( stability, self.s_min )
def test_stabilities( self ): stabilities_1 = calculate_stabilities( self.h1_rebinned ) for stability in stabilities_1: self.assertGreaterEqual( stability, self.s_min )
def test_stabilities_combined_1_3(self): stabilities = calculate_stabilities(self.h1_3_rebinned) for stability in stabilities: self.assertGreaterEqual(stability, self.s_min)
def test_stabilities_reduced_spread(self): stabilities = calculate_stabilities(self.h3_rebinned) for stability in stabilities: self.assertGreaterEqual(stability, self.s_min)
def test_best_case_stability(self): stabilities = calculate_stabilities(self.best_case) self.assertEqual(len(stabilities), self.n_bins_x, 'Invalid number of stability terms') for s in stabilities: self.assertEqual(s, 1)
def test_stabilities(self): stabilities_1 = calculate_stabilities(self.h1_rebinned) for stability in stabilities_1: self.assertGreaterEqual(stability, self.s_min)
def makePurityStabilityPlots(input_file, histogram, bin_edges, channel, variable, isVisiblePhaseSpace): global output_folder, output_formats hist = get_histogram_from_file(histogram, input_file) # get_bin_content = hist.ProjectionX().GetBinContent purities = calculate_purities(hist.Clone()) stabilities = calculate_stabilities(hist.Clone()) # n_events = [int( get_bin_content( i ) ) for i in range( 1, len( bin_edges ) )] hist_stability = value_tuplelist_to_hist(stabilities, bin_edges) hist_purity = value_tuplelist_to_hist(purities, bin_edges) hist_purity.color = 'red' hist_stability.color = 'blue' hist_stability.linewidth = 4 hist_purity.linewidth = 4 x_limits = [bin_edges[0], bin_edges[-1]] y_limits = [0, 1] plt.figure(figsize=(20, 16), dpi=200, facecolor='white') ax0 = plt.axes() ax0.minorticks_on() # ax0.grid( True, 'major', linewidth = 2 ) # ax0.grid( True, 'minor' ) plt.tick_params(**CMS.axis_label_major) plt.tick_params(**CMS.axis_label_minor) ax0.xaxis.labelpad = 12 ax0.yaxis.labelpad = 12 rplt.hist(hist_stability, stacked=False, axes=ax0, cmap=my_cmap, vmin=1, label='Stability') rplt.hist(hist_purity, stacked=False, axes=ax0, cmap=my_cmap, vmin=1, label='Purity') ax0.set_xlim(x_limits) ax0.set_ylim(y_limits) plt.tick_params(**CMS.axis_label_major) plt.tick_params(**CMS.axis_label_minor) x_title = '$' + variables_latex[variable] + '$ [GeV]' plt.xlabel(x_title, CMS.x_axis_title) leg = plt.legend(loc=4, prop={'size': 40}) plt.tight_layout() plt.savefig('test.pdf') save_as_name = 'purityStability_' + channel + '_' + variable + '_' + str( options.CoM) + 'TeV' for output_format in output_formats: plt.savefig(output_folder + save_as_name + '.' + output_format)