def get_best_binning( histogram_information, p_min, s_min, n_min, min_width, x_min = None ):
    '''
    Step 1: Change the size of the first bin until it fulfils the minimal criteria
    Step 3: Check if it is true for all other histograms. If not back to step 2
    Step 4: Repeat step 2 & 3 until no more bins can be created
    '''
    histograms = [info['hist'] for info in histogram_information]
    bin_edges = []
    resolutions = []
    purities = {}
    stabilities = {}
    
    current_bin_start = 0
    current_bin_end = 0
        
    first_hist = histograms[0]
    n_bins = first_hist.GetNbinsX()
    if x_min:
        current_bin_start = first_hist.ProjectionX().FindBin(x_min) - 1
        current_bin_end = current_bin_start
    
    while current_bin_end < n_bins:
        # bin_End, p, s, N_reco
        current_bin_end, _, _, _, r = get_next_end( histograms, current_bin_start, current_bin_end, p_min, s_min, n_min, min_width )
        resolutions.append(r)
        if not bin_edges:
            # if empty
            bin_edges.append( first_hist.GetXaxis().GetBinLowEdge( current_bin_start + 1 ) )
        bin_edges.append( first_hist.GetXaxis().GetBinLowEdge( current_bin_end ) + first_hist.GetXaxis().GetBinWidth( current_bin_end ) )
        current_bin_start = current_bin_end
    # add the purity and stability values for the final binning
    for info in histogram_information:
        new_hist = rebin_2d( info['hist'], bin_edges, bin_edges ).Clone( info['channel'] + '_' + str( info['CoM'] ) )
        get_bin_content = new_hist.ProjectionX().GetBinContent
        purities = calculate_purities( new_hist.Clone() )
        stabilities = calculate_stabilities( new_hist.Clone() )
        n_events = [int( get_bin_content( i ) ) for i in range( 1, len( bin_edges ) )]
        # Now check if the last bin also fulfils the requirements
        if ( purities[-1] < p_min or stabilities[-1] < s_min or n_events[-1] < n_min ) and len(purities) > 3:
            # if not, merge last two bins 
            bin_edges[-2] = bin_edges[-1]
            bin_edges = bin_edges[:-1]
            new_hist = rebin_2d( info['hist'], bin_edges, bin_edges ).Clone()
            get_bin_content = new_hist.ProjectionX().GetBinContent
            purities = calculate_purities( new_hist.Clone() )
            stabilities = calculate_stabilities( new_hist.Clone() )
            n_events = [int( get_bin_content( i ) ) for i in range( 1, len( bin_edges ) )]
        
        info['p_i'] = purities
        info['s_i'] = stabilities
        info['N'] = n_events
        info['res'] = resolutions

    return bin_edges, histogram_information
Example #2
0
 def test_rebin_2d_not_on_boundaries(self):
     new_hist = rebin_2d(self.diagonals, self.bin_edges_not_on_boundaries,
                         self.bin_edges_not_on_boundaries)
     for i in range(1, new_hist.nbins() + 1):
         self.assertEqual(new_hist.GetBinContent(i, i),
                          self.result_not_on_boundaries[i - 1],
                          'histogram contents do not match')
 def test_rebin_2d_not_on_boundaries( self ):
     new_hist = rebin_2d( self.diagonals, self.bin_edges_not_on_boundaries, self.bin_edges_not_on_boundaries )
     for i in range( 1, new_hist.nbins() + 1 ):
         self.assertEqual( 
             new_hist.GetBinContent( i, i ),
             self.result_not_on_boundaries[i - 1],
             'histogram contents do not match' )
def calculate_purity_stability(hist_info, bin_edges):
    '''
    Rebin finebinned histograms to current binning standards
    '''
    hist = hist_info['hist']
    binned_hist = rebin_2d( hist, bin_edges, bin_edges ).Clone()
    p = calculate_purities(binned_hist)
    s = calculate_stabilities(binned_hist)
    return p, s
    def setUp(self):
        # create histograms
        # self.h1 = Hist2D( 15, 0, 15, 15, 0, 15 )
        # x_1 = [1, 3, 7, 7, 8, 1, 12, 7, 8, 6]
        # y_1 = [1, 7, 3, 7, 8, 12, 7, 12, 13, 11]
        self.h1 = Hist2D(60, 40, 100, 60, 40, 100)
        n_1 = 100000
        x_1 = 60 + 10 * np.random.randn(n_1)
        y_1 = x_1 + np.random.randn(n_1)
        z_1 = np.vstack((x_1, y_1)).T
        self.h1.fill_array(z_1)

        self.h1 = fix_overflow(self.h1)

        self.histogram_information = [{"hist": self.h1, "CoM": 7, "channel": "test_1"}]

        self.histograms = [info["hist"] for info in self.histogram_information]

        # requirements for new binning
        self.p_min, self.s_min, self.n_min = 0.5, 0.5, 1000

        self.bin_edges = []
        self.purities_GetBinContent = []
        self.stabilities_GetBinContent = []
        self.n_events_GetBinContent = []

        self.purities_Integral = []
        self.stabilities_Integral = []
        self.n_events_Integral = []

        first_hist = self.histograms[0]
        n_bins = first_hist.GetNbinsX()

        current_bin_start = 0
        current_bin_end = 0

        while current_bin_end < n_bins:
            current_bin_end, p, s, n_gen_and_reco = pick_bins.get_next_end(
                self.histograms, current_bin_start, current_bin_end, self.p_min, self.s_min, self.n_min, 0
            )
            if not self.bin_edges:
                # if empty
                self.bin_edges.append(first_hist.GetXaxis().GetBinLowEdge(current_bin_start + 1))
            self.bin_edges.append(
                first_hist.GetXaxis().GetBinLowEdge(current_bin_end)
                + first_hist.GetXaxis().GetBinWidth(current_bin_end)
            )
            self.purities_Integral.append(p)
            self.stabilities_Integral.append(s)
            self.n_events_Integral.append(n_gen_and_reco)
            current_bin_start = current_bin_end

        self.h1_rebinned = rebin_2d(self.h1, self.bin_edges, self.bin_edges)

        self.purities_GetBinContent = calculate_purities(self.h1_rebinned)
        self.stabilities_GetBinContent = calculate_stabilities(self.h1_rebinned)
        self.n_events_GetBinContent = [int(self.h1_rebinned.GetBinContent(i, i)) for i in range(1, len(self.bin_edges))]
Example #6
0
def calculate_purity_stability(hist_info, bin_edges):
    '''
    Rebin finebinned histograms to current binning standards
    '''
    hist = hist_info['hist']
    binned_hist = rebin_2d(hist, bin_edges, bin_edges).Clone()
    p = calculate_purities(binned_hist)
    s = calculate_stabilities(binned_hist)
    return p, s
 def setUp( self ):
     # create histograms
     self.h1 = Hist( 100, 0, 100, title = '1D' )
     self.h2 = Hist2D( 60, 40, 100, 60, 40, 100 )
     self.simple = Hist( 100, 0, 100, title = '1D' )
 
     # fill the histograms with our distributions
     map( self.h1.Fill, x1 )
     self.h2.fill_array( np.random.multivariate_normal( 
                                 mean = ( 50, 50 ),
                                 cov = np.arange( 4 ).reshape( 2, 2 ),
                                 size = ( int( 1E6 ), ) ) 
                     )
     self.bins = [40, 45, 50, 60, 65, 70, 75, 80, 100]
     # rebin them
     self.h2_rebinned = self.h2.rebinned( self.bins, axis = 0 )
     self.h2_rebinned = self.h2_rebinned.rebinned( self.bins, axis = 1 )
     self.h2_rebinned_2 = rebin_2d( self.h2, self.bins, self.bins )
     
     # we only test symmetric bins for now
     self.n_bins_x = 5
     self.n_bins_y = 5
     # only entries in diagonals, p = 1, s = 1 for all bins
     self.diagonals = Hist2D( self.n_bins_x, 0, 10, self.n_bins_y, 0, 10 )
     # this creates
     # [0, 0, 0, 0, 1],
     # [0, 0, 0, 1, 0],
     # [0, 0, 1, 0, 0],
     # [0, 1, 0, 0, 0],
     # [1, 0, 0, 0, 0]
     for i in range( 1, self.n_bins_x + 1 ):
         self.diagonals.SetBinContent( i, i, 1 )
     
     # the following should result in
     # [0, 0, 2],
     # [0, 2, 0],
     # [1, 0, 0]    
     self.bin_edges_nice = [0, 2, 6, 10]
     self.result_nice = [1, 2, 2]
     
     # the following should result in
     # [0, 0, 0, 2],
     # [0, 0, 2, 0]
     # [0, 1, 0, 0]
     # [0, 0, 0, 0]  
     self.bin_edges_out_of_bound = [-2, 0, 2, 6, 20]
     self.result_out_of_bound = [0, 1, 2, 2]
     # the following should result in
     # [0, 0, 2],
     # [0, 1, 0],
     # [2, 0, 0] 
     self.bin_edges_not_on_boundaries = [0, 3.5, 6, 20]
     self.result_not_on_boundaries = [2, 1, 2]
     
     for i in range(100):
         self.simple.Fill(i, 1)
Example #8
0
def makePurityStabilityPlots(input_file, histogram, bin_edges, channel, variable, isVisiblePhaseSpace):
    global output_folder, output_formats
 
    hist = get_histogram_from_file( histogram, input_file )
    print "bin edges contents   : ", bin_edges
    new_hist = rebin_2d( hist, bin_edges, bin_edges ).Clone()

    # get_bin_content = hist.ProjectionX().GetBinContent
    purities = calculate_purities( new_hist.Clone() )
    stabilities = calculate_stabilities( new_hist.Clone() )
    # n_events = [int( get_bin_content( i ) ) for i in range( 1, len( bin_edges ) )]
    print "purities contents    : ", purities
    print "stabilities contents : ", stabilities

    hist_stability = value_tuplelist_to_hist(stabilities, bin_edges)
    hist_purity = value_tuplelist_to_hist(purities, bin_edges)

    hist_purity.color = 'red'
    hist_stability.color = 'blue'

    hist_stability.linewidth = 4
    hist_purity.linewidth = 4

    x_limits = [bin_edges[0], bin_edges[-1]]
    y_limits = [0,1]
    plt.figure( figsize = ( 20, 16 ), dpi = 200, facecolor = 'white' )

    ax0 = plt.axes()
    ax0.minorticks_on()
#     ax0.grid( True, 'major', linewidth = 2 )
#     ax0.grid( True, 'minor' )
    plt.tick_params( **CMS.axis_label_major )
    plt.tick_params( **CMS.axis_label_minor )
    ax0.xaxis.labelpad = 12
    ax0.yaxis.labelpad = 12
    rplt.hist( hist_stability , stacked=False, axes = ax0, cmap = my_cmap, vmin = 1, label = 'Stability' )
    rplt.hist( hist_purity, stacked=False, axes = ax0, cmap = my_cmap, vmin = 1, label = 'Purity' )

    ax0.set_xlim( x_limits )
    ax0.set_ylim( y_limits )

    plt.tick_params( **CMS.axis_label_major )
    plt.tick_params( **CMS.axis_label_minor )

    x_title = '$' + variables_latex[variable] + '$ [GeV]'
    plt.xlabel( x_title, CMS.x_axis_title )

    leg = plt.legend(loc=4,prop={'size':40})

    plt.tight_layout()

    plt.savefig('test.pdf')
    save_as_name = 'purityStability_'+channel + '_' + variable + '_' + str(options.CoM) + 'TeV'
    for output_format in output_formats:
        plt.savefig( output_folder + save_as_name + '.' + output_format )
Example #9
0
    def setUp(self):
        # create histograms
        self.h1 = Hist(100, 0, 100, title='1D')
        self.h2 = Hist2D(60, 40, 100, 60, 40, 100)
        self.simple = Hist(100, 0, 100, title='1D')

        # fill the histograms with our distributions
        map(self.h1.Fill, x1)
        self.h2.fill_array(
            np.random.multivariate_normal(mean=(50, 50),
                                          cov=np.arange(4).reshape(2, 2),
                                          size=(int(1E6), )))
        self.bins = [40, 45, 50, 60, 65, 70, 75, 80, 100]
        # rebin them
        self.h2_rebinned = self.h2.rebinned(self.bins, axis=0)
        self.h2_rebinned = self.h2_rebinned.rebinned(self.bins, axis=1)
        self.h2_rebinned_2 = rebin_2d(self.h2, self.bins, self.bins)

        # we only test symmetric bins for now
        self.n_bins_x = 5
        self.n_bins_y = 5
        # only entries in diagonals, p = 1, s = 1 for all bins
        self.diagonals = Hist2D(self.n_bins_x, 0, 10, self.n_bins_y, 0, 10)
        # this creates
        # [0, 0, 0, 0, 1],
        # [0, 0, 0, 1, 0],
        # [0, 0, 1, 0, 0],
        # [0, 1, 0, 0, 0],
        # [1, 0, 0, 0, 0]
        for i in range(1, self.n_bins_x + 1):
            self.diagonals.SetBinContent(i, i, 1)

        # the following should result in
        # [0, 0, 2],
        # [0, 2, 0],
        # [1, 0, 0]
        self.bin_edges_nice = [0, 2, 6, 10]
        self.result_nice = [1, 2, 2]

        # the following should result in
        # [0, 0, 0, 2],
        # [0, 0, 2, 0]
        # [0, 1, 0, 0]
        # [0, 0, 0, 0]
        self.bin_edges_out_of_bound = [-2, 0, 2, 6, 20]
        self.result_out_of_bound = [0, 1, 2, 2]
        # the following should result in
        # [0, 0, 2],
        # [0, 1, 0],
        # [2, 0, 0]
        self.bin_edges_not_on_boundaries = [0, 3.5, 6, 20]
        self.result_not_on_boundaries = [2, 1, 2]

        for i in range(100):
            self.simple.Fill(i, 1)
    def setUp(self):

        # create histograms
        self.h1 = Hist2D(60, 40, 100, 60, 40, 100)
        self.h2 = Hist2D(60, 40, 100, 60, 40, 100)
        self.h3 = Hist2D(60, 40, 100, 60, 40, 100)

        n_1 = 10000
        n_2 = int(n_1 / 5)
        x_1 = 60 + 10 * np.random.randn(n_1)
        x_2 = 60 + 10 * np.random.randn(n_2)
        x_3 = 60 + 5 * np.random.randn(n_1)
        y_1 = x_1 + np.random.randn(n_1)
        y_2 = x_2 + np.random.randn(n_2)
        y_3 = x_3 + np.random.randn(n_1)

        z_1 = np.vstack((x_1, y_1)).T
        z_2 = np.vstack((x_2, y_2)).T
        z_3 = np.vstack((x_3, y_3)).T
        # fill the histograms with our distributions
        self.h1.fill_array(z_1)
        # reduced number of events
        self.h2.fill_array(z_2)
        # reduced spread
        self.h3.fill_array(z_3)

        self.histogram_information_1 = [
            {'hist': self.h1,
             'CoM': 7,
             'channel': 'test_1'},
        ]
        self.histogram_information_2 = [
            {'hist': self.h2,
             'CoM': 7,
             'channel': 'test_2'},
        ]
        self.histogram_information_3 = [
            {'hist': self.h3,
             'CoM': 7,
             'channel': 'test_3'},
        ]
        self.histogram_information_1_2 = [
            {'hist': self.h1,
             'CoM': 7,
             'channel': 'test_1'},
            {'hist': self.h2,
             'CoM': 7,
             'channel': 'test_2'},
        ]
        self.histogram_information_1_3 = [
            {'hist': self.h1,
             'CoM': 7,
             'channel': 'test_1'},
            {'hist': self.h3,
             'CoM': 7,
             'channel': 'test_3'},
        ]
        # requirements for new binning
        self.p_min, self.s_min, self.n_min = 0.5, 0.5, 100
        min_width = 0.000000000000001
        self.bin_edges_1, _ = pick_bins.get_best_binning(
            self.histogram_information_1,
            self.p_min,
            self.s_min,
            self.n_min,
            min_width,
        )
        self.bin_edges_2, _ = pick_bins.get_best_binning(
            self.histogram_information_2,
            self.p_min,
            self.s_min,
            self.n_min,
            min_width,
        )
        self.bin_edges_3, _ = pick_bins.get_best_binning(
            self.histogram_information_3,
            self.p_min,
            self.s_min,
            self.n_min,
            min_width,
        )
        self.bin_edges_1_2, _ = pick_bins.get_best_binning(
            self.histogram_information_1_2,
            self.p_min,
            self.s_min,
            self.n_min,
            min_width,
        )
        self.bin_edges_1_3, _ = pick_bins.get_best_binning(
            self.histogram_information_1_3,
            self.p_min,
            self.s_min,
            self.n_min,
            min_width,
        )

        self.h1_rebinned = rebin_2d(
            self.h1, self.bin_edges_1, self.bin_edges_1)
        self.h2_rebinned = rebin_2d(
            self.h2, self.bin_edges_2, self.bin_edges_2)
        self.h3_rebinned = rebin_2d(
            self.h3, self.bin_edges_3, self.bin_edges_3)
        self.h1_2_rebinned = rebin_2d(
            self.h1, self.bin_edges_1_2, self.bin_edges_1_2)
        self.h1_3_rebinned = rebin_2d(
            self.h1, self.bin_edges_1_3, self.bin_edges_1_3)
def plotting_response( histogram_information, variable, channel, bin_edges ):
    global output_folder, output_formats, options
    my_cmap = cm.get_cmap( 'rainbow' )
    my_cmap.set_under( 'w' )

    scatter_plot = histogram_information['hist']
    response_plot = rebin_2d(scatter_plot, bin_edges, bin_edges )
    norm_response_plot = Hist2D( bin_edges, bin_edges, type = 'D' )

    n_bins = len( bin_edges ) - 1
    get_bin_content = response_plot.GetBinContent
    set_bin_content = norm_response_plot.SetBinContent

    # Put into array of values sorted by y columns
    xy=[]
    norm_xy = []
    for bin_j in range( 0, n_bins+1):
        y = []
        for bin_i in range( 0, n_bins+1 ):
            y.append( get_bin_content( bin_j+1, bin_i+1 ) )
        xy.append(y)

    # Normalise by the reconstructed column and round
    for y_col in xy:
        norm_xy.append(y_col / np.sum(y_col))
    rounded_norm_xy = np.around(np.array(norm_xy), 2)

    # New 2D Hist + Mesh to Plot
    for bin_i in range( 0, n_bins+1):
        for bin_j in range( 0, n_bins+1 ):
            set_bin_content( bin_i, bin_j, rounded_norm_xy.item(bin_j, bin_i) )
    X, Y = np.meshgrid(list(norm_response_plot.x()), list(norm_response_plot.y()))
    x = X.ravel()
    y = Y.ravel()
    z = np.array(norm_response_plot.z()).ravel()


    v_unit = '$'+variables_latex[variable]+'$'
    if variable in ['HT', 'ST', 'MET', 'lepton_pt', 'WPT']: 
        v_unit += ' [GeV]'
    x_title = 'Reconstructed ' + v_unit
    y_title = 'Generated ' + v_unit
    # title = "channel = {}, variable = ${}$".format(channel, variables_latex[variable])
    title = "Response matrix normalised wrt reconstructed bins"

    plt.figure( figsize = ( 20, 16 ), dpi = 200, facecolor = 'white' )

    ax0 = plt.axes()
    ax0.minorticks_on()

    plt.tick_params( **CMS.axis_label_major )
    plt.tick_params( **CMS.axis_label_minor )
    ax0.xaxis.labelpad = 12
    ax0.yaxis.labelpad = 12

    h2d = plt.hist2d(x, y, weights=z, bins=(list(norm_response_plot.xedges()), list(norm_response_plot.yedges())), cmap=my_cmap, vmin=0, vmax=1)
    colorbar = plt.colorbar()
    colorbar.ax.tick_params( **CMS.axis_label_major )

    plt.xlabel( x_title, CMS.x_axis_title )
    plt.ylabel( y_title, CMS.y_axis_title )
    plt.title( title, CMS.title )

    plt.tight_layout()

    plot_filepath = 'plots/binning/response/'
    make_folder_if_not_exists(plot_filepath)
    plot_filename = '{}_{}_Response.pdf'.format(channel, variable)
    plt.savefig(plot_filepath+plot_filename, bbox_inches='tight')
    return
def get_best_binning( histogram_information, p_min, s_min, n_min, min_width, nice_width, x_min = None, is_NJet=False, plot_resolution=False ):
    '''
    Step 1: Change the size of the first bin until it fulfils the minimal criteria
    Step 3: Check if it is true for other channel histograms. If not back to step 2
    Step 4: Repeat step 2 & 3 until no more bins can be created
    '''
    histograms  = [info['hist'] for info in histogram_information]
    bin_edges   = []

    purities    = {}
    stabilities = {}
    resolutions = []

    current_bin_start = 0
    current_bin_end = 0

    first_hist = histograms[0]
    n_bins     = first_hist.GetNbinsX()
    # Start at minimum x instead of 0

    if x_min:
        current_bin_start = first_hist.ProjectionX().FindBin(x_min) - 1
        current_bin_end = current_bin_start

    # Calculate the bin edges until no more bins can be iterated over
    while current_bin_end < n_bins:
        # Return the next bin end + (p, s, N_reco, res)
        current_bin_end, _, _, _, _ = get_next_end( histogram_information, current_bin_start, current_bin_end, p_min, s_min, n_min, min_width, nice_width, is_NJet=is_NJet)

        # Attach first bin low edge
        if not bin_edges:
            bin_edges.append( first_hist.GetXaxis().GetBinLowEdge( current_bin_start + 1 ) )
        # Attachs the current bin end edge
        bin_edges.append( first_hist.GetXaxis().GetBinLowEdge( current_bin_end ) + first_hist.GetXaxis().GetBinWidth( current_bin_end ) )
        current_bin_start = current_bin_end

    # add the purity and stability values for the final binning
    for hist_info in histogram_information:
        new_hist            = rebin_2d( hist_info['hist'], bin_edges, bin_edges ).Clone( hist_info['channel'] + '_' + str( hist_info['CoM'] ) )
        get_bin_content     = new_hist.ProjectionX().GetBinContent
        purities            = calculate_purities( new_hist.Clone() )
        stabilities         = calculate_stabilities( new_hist.Clone() )
        resolutions         = calculate_resolutions(  var, bin_edges=bin_edges, channel = hist_info['channel'], res_to_plot = plot_resolution )

        n_events            = [int( get_bin_content( i ) ) for i in range( 1, len( bin_edges ) )]

        # Now check if the last bin also fulfils the requirements
        if ( purities[-1] < p_min or stabilities[-1] < s_min or n_events[-1] < n_min ) and len(purities) > 3:
            # Merge last two bins 
            bin_edges[-2]   = bin_edges[-1]
            bin_edges       = bin_edges[:-1]

            # Recalculate purities and stabilites
            new_hist        = rebin_2d( hist_info['hist'], bin_edges, bin_edges ).Clone()
            purities        = calculate_purities( new_hist.Clone() )
            stabilities     = calculate_stabilities( new_hist.Clone() )
            resolutions     = calculate_resolutions(  var, bin_edges=bin_edges, channel = hist_info['channel'], res_to_plot = plot_resolution )
            n_events        = [int( get_bin_content( i ) ) for i in range( 1, len( bin_edges ) )]

        # Make sure last bin edge is also a nice rounded number
        if bin_edges[-1] % nice_width != 0:
            bin_edges[-1] = nice_width * round(bin_edges[-1]/nice_width)
            # print (bin_edges[-1], nice_width * round(bin_edges[-1]/nice_width))

        # Add purites, stabilities, n_events and resolutions to the hstogram information
        hist_info['p_i'] = purities
        hist_info['s_i'] = stabilities
        hist_info['N']   = n_events
        hist_info['res'] = resolutions

    return bin_edges, histogram_information
Example #13
0
def plotting_response(histogram_information, variable, channel, bin_edges):
    global output_folder, output_formats, options
    my_cmap = cm.get_cmap('rainbow')
    my_cmap.set_under('w')

    scatter_plot = histogram_information['hist']
    response_plot = rebin_2d(scatter_plot, bin_edges, bin_edges)
    norm_response_plot = Hist2D(bin_edges, bin_edges, type='D')

    n_bins = len(bin_edges) - 1
    get_bin_content = response_plot.GetBinContent
    set_bin_content = norm_response_plot.SetBinContent

    # Put into array of values sorted by y columns
    xy = []
    norm_xy = []
    for bin_j in range(0, n_bins + 1):
        y = []
        for bin_i in range(0, n_bins + 1):
            y.append(get_bin_content(bin_j + 1, bin_i + 1))
        xy.append(y)

    # Normalise by the reconstructed column and round
    for y_col in xy:
        norm_xy.append(y_col / np.sum(y_col))
    rounded_norm_xy = np.around(np.array(norm_xy), 2)

    # New 2D Hist + Mesh to Plot
    for bin_i in range(0, n_bins + 1):
        for bin_j in range(0, n_bins + 1):
            set_bin_content(bin_i, bin_j, rounded_norm_xy.item(bin_j, bin_i))
    X, Y = np.meshgrid(list(norm_response_plot.x()),
                       list(norm_response_plot.y()))
    x = X.ravel()
    y = Y.ravel()
    z = np.array(norm_response_plot.z()).ravel()

    v_unit = '$' + variables_latex[variable] + '$'
    if variable in ['HT', 'ST', 'MET', 'lepton_pt', 'WPT']:
        v_unit += ' [GeV]'
    x_title = 'Reconstructed ' + v_unit
    y_title = 'Generated ' + v_unit
    # title = "channel = {}, variable = ${}$".format(channel, variables_latex[variable])
    title = "Response matrix normalised wrt reconstructed bins"

    plt.figure(figsize=(20, 16), dpi=200, facecolor='white')

    ax0 = plt.axes()
    ax0.minorticks_on()

    plt.tick_params(**CMS.axis_label_major)
    plt.tick_params(**CMS.axis_label_minor)
    ax0.xaxis.labelpad = 12
    ax0.yaxis.labelpad = 12

    h2d = plt.hist2d(x,
                     y,
                     weights=z,
                     bins=(list(norm_response_plot.xedges()),
                           list(norm_response_plot.yedges())),
                     cmap=my_cmap,
                     vmin=0,
                     vmax=1)
    colorbar = plt.colorbar()
    colorbar.ax.tick_params(**CMS.axis_label_major)

    plt.xlabel(x_title, CMS.x_axis_title)
    plt.ylabel(y_title, CMS.y_axis_title)
    plt.title(title, CMS.title)

    plt.tight_layout()

    plot_filepath = 'plots/binning/response/'
    make_folder_if_not_exists(plot_filepath)
    plot_filename = '{}_{}_Response.pdf'.format(channel, variable)
    plt.savefig(plot_filepath + plot_filename, bbox_inches='tight')
    return
Example #14
0
def get_best_binning(histogram_information,
                     p_min,
                     s_min,
                     n_min,
                     min_width,
                     nice_width,
                     x_min=None,
                     is_NJet=False,
                     plot_resolution=False):
    '''
    Step 1: Change the size of the first bin until it fulfils the minimal criteria
    Step 3: Check if it is true for other channel histograms. If not back to step 2
    Step 4: Repeat step 2 & 3 until no more bins can be created
    '''
    histograms = [info['hist'] for info in histogram_information]
    bin_edges = []

    purities = {}
    stabilities = {}
    resolutions = []

    current_bin_start = 0
    current_bin_end = 0

    first_hist = histograms[0]
    n_bins = first_hist.GetNbinsX()
    # Start at minimum x instead of 0

    if x_min:
        current_bin_start = first_hist.ProjectionX().FindBin(x_min) - 1
        current_bin_end = current_bin_start

    # Calculate the bin edges until no more bins can be iterated over
    while current_bin_end < n_bins:
        # Return the next bin end + (p, s, N_reco, res)
        current_bin_end, _, _, _, _ = get_next_end(histogram_information,
                                                   current_bin_start,
                                                   current_bin_end,
                                                   p_min,
                                                   s_min,
                                                   n_min,
                                                   min_width,
                                                   nice_width,
                                                   is_NJet=is_NJet)

        # Attach first bin low edge
        if not bin_edges:
            bin_edges.append(
                first_hist.GetXaxis().GetBinLowEdge(current_bin_start + 1))
        # Attachs the current bin end edge
        bin_edges.append(first_hist.GetXaxis().GetBinLowEdge(current_bin_end) +
                         first_hist.GetXaxis().GetBinWidth(current_bin_end))
        current_bin_start = current_bin_end

    # add the purity and stability values for the final binning
    for hist_info in histogram_information:
        new_hist = rebin_2d(hist_info['hist'], bin_edges,
                            bin_edges).Clone(hist_info['channel'] + '_' +
                                             str(hist_info['CoM']))
        get_bin_content = new_hist.ProjectionX().GetBinContent
        purities = calculate_purities(new_hist.Clone())
        stabilities = calculate_stabilities(new_hist.Clone())
        resolutions = calculate_resolutions(var,
                                            bin_edges=bin_edges,
                                            channel=hist_info['channel'],
                                            res_to_plot=plot_resolution)

        n_events = [int(get_bin_content(i)) for i in range(1, len(bin_edges))]

        # Now check if the last bin also fulfils the requirements
        if (purities[-1] < p_min or stabilities[-1] < s_min
                or n_events[-1] < n_min) and len(purities) > 3:
            # Merge last two bins
            bin_edges[-2] = bin_edges[-1]
            bin_edges = bin_edges[:-1]

            # Recalculate purities and stabilites
            new_hist = rebin_2d(hist_info['hist'], bin_edges,
                                bin_edges).Clone()
            purities = calculate_purities(new_hist.Clone())
            stabilities = calculate_stabilities(new_hist.Clone())
            resolutions = calculate_resolutions(var,
                                                bin_edges=bin_edges,
                                                channel=hist_info['channel'],
                                                res_to_plot=plot_resolution)
            n_events = [
                int(get_bin_content(i)) for i in range(1, len(bin_edges))
            ]

        # Make sure last bin edge is also a nice rounded number
        if bin_edges[-1] % nice_width != 0:
            bin_edges[-1] = nice_width * round(bin_edges[-1] / nice_width)
            # print (bin_edges[-1], nice_width * round(bin_edges[-1]/nice_width))

        # Add purites, stabilities, n_events and resolutions to the hstogram information
        hist_info['p_i'] = purities
        hist_info['s_i'] = stabilities
        hist_info['N'] = n_events
        hist_info['res'] = resolutions

    return bin_edges, histogram_information
    def setUp(self):
        # create histograms
        # self.h1 = Hist2D( 15, 0, 15, 15, 0, 15 )
        # x_1 = [1, 3, 7, 7, 8, 1, 12, 7, 8, 6]
        # y_1 = [1, 7, 3, 7, 8, 12, 7, 12, 13, 11]
        self.h1 = Hist2D(60, 40, 100, 60, 40, 100)
        n_1 = 100000
        x_1 = 60 + 10 * np.random.randn(n_1)
        y_1 = x_1 + np.random.randn(n_1)
        z_1 = np.vstack((x_1, y_1)).T
        self.h1.fill_array(z_1)

        self.h1 = fix_overflow(self.h1)

        self.histogram_information = [
            {
                'hist': self.h1,
                'CoM': 7,
                'channel': 'test_1'
            },
        ]

        self.histograms = [info['hist'] for info in self.histogram_information]

        # requirements for new binning
        self.p_min, self.s_min, self.n_min = 0.5, 0.5, 1000

        self.bin_edges = []
        self.purities_GetBinContent = []
        self.stabilities_GetBinContent = []
        self.n_events_GetBinContent = []

        self.purities_Integral = []
        self.stabilities_Integral = []
        self.n_events_Integral = []

        first_hist = self.histograms[0]
        n_bins = first_hist.GetNbinsX()

        current_bin_start = 0
        current_bin_end = 0

        while current_bin_end < n_bins:
            current_bin_end, p, s, n_gen_and_reco = pick_bins.get_next_end(
                self.histograms, current_bin_start, current_bin_end,
                self.p_min, self.s_min, self.n_min, 0)
            if not self.bin_edges:
                # if empty
                self.bin_edges.append(
                    first_hist.GetXaxis().GetBinLowEdge(current_bin_start + 1))
            self.bin_edges.append(
                first_hist.GetXaxis().GetBinLowEdge(current_bin_end) +
                first_hist.GetXaxis().GetBinWidth(current_bin_end))
            self.purities_Integral.append(p)
            self.stabilities_Integral.append(s)
            self.n_events_Integral.append(n_gen_and_reco)
            current_bin_start = current_bin_end

        self.h1_rebinned = rebin_2d(self.h1, self.bin_edges, self.bin_edges)

        self.purities_GetBinContent = calculate_purities(self.h1_rebinned)
        self.stabilities_GetBinContent = calculate_stabilities(
            self.h1_rebinned)
        self.n_events_GetBinContent = [
            int(self.h1_rebinned.GetBinContent(i, i))
            for i in range(1, len(self.bin_edges))
        ]
Example #16
0
    def setUp(self):

        # create histograms
        self.h1 = Hist2D(60, 40, 100, 60, 40, 100)
        self.h2 = Hist2D(60, 40, 100, 60, 40, 100)
        self.h3 = Hist2D(60, 40, 100, 60, 40, 100)

        n_1 = 10000
        n_2 = int(n_1 / 5)
        x_1 = 60 + 10 * np.random.randn(n_1)
        x_2 = 60 + 10 * np.random.randn(n_2)
        x_3 = 60 + 5 * np.random.randn(n_1)
        y_1 = x_1 + np.random.randn(n_1)
        y_2 = x_2 + np.random.randn(n_2)
        y_3 = x_3 + np.random.randn(n_1)

        z_1 = np.vstack((x_1, y_1)).T
        z_2 = np.vstack((x_2, y_2)).T
        z_3 = np.vstack((x_3, y_3)).T
        # fill the histograms with our distributions
        self.h1.fill_array(z_1)
        # reduced number of events
        self.h2.fill_array(z_2)
        # reduced spread
        self.h3.fill_array(z_3)

        self.histogram_information_1 = [
            {
                'hist': self.h1,
                'CoM': 7,
                'channel': 'test_1'
            },
        ]
        self.histogram_information_2 = [
            {
                'hist': self.h2,
                'CoM': 7,
                'channel': 'test_2'
            },
        ]
        self.histogram_information_3 = [
            {
                'hist': self.h3,
                'CoM': 7,
                'channel': 'test_3'
            },
        ]
        self.histogram_information_1_2 = [
            {
                'hist': self.h1,
                'CoM': 7,
                'channel': 'test_1'
            },
            {
                'hist': self.h2,
                'CoM': 7,
                'channel': 'test_2'
            },
        ]
        self.histogram_information_1_3 = [
            {
                'hist': self.h1,
                'CoM': 7,
                'channel': 'test_1'
            },
            {
                'hist': self.h3,
                'CoM': 7,
                'channel': 'test_3'
            },
        ]
        # requirements for new binning
        self.p_min, self.s_min, self.n_min = 0.5, 0.5, 100
        min_width = 0.000000000000001
        self.bin_edges_1, _ = pick_bins.get_best_binning(
            self.histogram_information_1,
            self.p_min,
            self.s_min,
            self.n_min,
            min_width,
        )
        self.bin_edges_2, _ = pick_bins.get_best_binning(
            self.histogram_information_2,
            self.p_min,
            self.s_min,
            self.n_min,
            min_width,
        )
        self.bin_edges_3, _ = pick_bins.get_best_binning(
            self.histogram_information_3,
            self.p_min,
            self.s_min,
            self.n_min,
            min_width,
        )
        self.bin_edges_1_2, _ = pick_bins.get_best_binning(
            self.histogram_information_1_2,
            self.p_min,
            self.s_min,
            self.n_min,
            min_width,
        )
        self.bin_edges_1_3, _ = pick_bins.get_best_binning(
            self.histogram_information_1_3,
            self.p_min,
            self.s_min,
            self.n_min,
            min_width,
        )

        self.h1_rebinned = rebin_2d(self.h1, self.bin_edges_1,
                                    self.bin_edges_1)
        self.h2_rebinned = rebin_2d(self.h2, self.bin_edges_2,
                                    self.bin_edges_2)
        self.h3_rebinned = rebin_2d(self.h3, self.bin_edges_3,
                                    self.bin_edges_3)
        self.h1_2_rebinned = rebin_2d(self.h1, self.bin_edges_1_2,
                                      self.bin_edges_1_2)
        self.h1_3_rebinned = rebin_2d(self.h1, self.bin_edges_1_3,
                                      self.bin_edges_1_3)