Example #1
0
def get_best_binning(histogram_information, p_min, s_min, n_min, min_width, x_min=None):
    """
    Step 1: Change the size of the first bin until it fulfils the minimal criteria
    Step 3: Check if it is true for all other histograms. If not back to step 2
    Step 4: Repeat step 2 & 3 until no more bins can be created
    """
    histograms = [info["hist"] for info in histogram_information]
    bin_edges = []
    purities = {}
    stabilities = {}

    current_bin_start = 0
    current_bin_end = 0

    first_hist = histograms[0]
    n_bins = first_hist.GetNbinsX()
    if x_min:
        current_bin_start = first_hist.ProjectionX().FindBin(x_min) - 1
        current_bin_end = current_bin_start

    while current_bin_end < n_bins:
        current_bin_end, _, _, _ = get_next_end(
            histograms, current_bin_start, current_bin_end, p_min, s_min, n_min, min_width
        )
        if not bin_edges:
            # if empty
            bin_edges.append(first_hist.GetXaxis().GetBinLowEdge(current_bin_start + 1))
        bin_edges.append(
            first_hist.GetXaxis().GetBinLowEdge(current_bin_end) + first_hist.GetXaxis().GetBinWidth(current_bin_end)
        )
        current_bin_start = current_bin_end
    # add the purity and stability values for the final binning
    for info in histogram_information:
        new_hist = rebin_2d(info["hist"], bin_edges, bin_edges).Clone(info["channel"] + "_" + str(info["CoM"]))
        get_bin_content = new_hist.ProjectionX().GetBinContent
        purities = calculate_purities(new_hist.Clone())
        stabilities = calculate_stabilities(new_hist.Clone())
        n_events = [int(get_bin_content(i)) for i in range(1, len(bin_edges))]
        # Now check if the last bin also fulfils the requirements
        if (purities[-1] < p_min or stabilities[-1] < s_min or n_events[-1] < n_min) and len(purities) > 3:
            # if not, merge last two bins
            bin_edges[-2] = bin_edges[-1]
            bin_edges = bin_edges[:-1]
            new_hist = rebin_2d(info["hist"], bin_edges, bin_edges).Clone()
            get_bin_content = new_hist.ProjectionX().GetBinContent
            purities = calculate_purities(new_hist.Clone())
            stabilities = calculate_stabilities(new_hist.Clone())
            n_events = [int(get_bin_content(i)) for i in range(1, len(bin_edges))]

        info["p_i"] = purities
        info["s_i"] = stabilities
        info["N"] = n_events

    return bin_edges, histogram_information
def get_best_binning( histogram_information, p_min, s_min, n_min, min_width, x_min = None ):
    '''
    Step 1: Change the size of the first bin until it fulfils the minimal criteria
    Step 3: Check if it is true for all other histograms. If not back to step 2
    Step 4: Repeat step 2 & 3 until no more bins can be created
    '''
    histograms = [info['hist'] for info in histogram_information]
    bin_edges = []
    purities = {}
    stabilities = {}
    
    current_bin_start = 0
    current_bin_end = 0
        

    first_hist = histograms[0]
    n_bins = first_hist.GetNbinsX()
    if x_min:
        current_bin_start = first_hist.ProjectionX().FindBin(x_min) - 1
        current_bin_end = current_bin_start
    
    while current_bin_end < n_bins:
        current_bin_end, _, _, _ = get_next_end( histograms, current_bin_start, current_bin_end, p_min, s_min, n_min, min_width )
        if not bin_edges:
            # if empty
            bin_edges.append( first_hist.GetXaxis().GetBinLowEdge( current_bin_start + 1 ) )
        bin_edges.append( first_hist.GetXaxis().GetBinLowEdge( current_bin_end ) + first_hist.GetXaxis().GetBinWidth( current_bin_end ) )
        current_bin_start = current_bin_end
    # add the purity and stability values for the final binning
    for info in histogram_information:
        new_hist = rebin_2d( info['hist'], bin_edges, bin_edges ).Clone( info['channel'] + '_' + str( info['CoM'] ) )
        get_bin_content = new_hist.ProjectionX().GetBinContent
        purities = calculate_purities( new_hist.Clone() )
        stabilities = calculate_stabilities( new_hist.Clone() )
        n_events = [int( get_bin_content( i ) ) for i in range( 1, len( bin_edges ) )]
        # Now check if the last bin also fulfils the requirements
        if ( purities[-1] < p_min or stabilities[-1] < s_min or n_events[-1] < n_min ) and len(purities) > 3:
            # if not, merge last two bins 
            bin_edges[-2] = bin_edges[-1]
            bin_edges = bin_edges[:-1]
            new_hist = rebin_2d( info['hist'], bin_edges, bin_edges ).Clone()
            get_bin_content = new_hist.ProjectionX().GetBinContent
            purities = calculate_purities( new_hist.Clone() )
            stabilities = calculate_stabilities( new_hist.Clone() )
            n_events = [int( get_bin_content( i ) ) for i in range( 1, len( bin_edges ) )]
        info['p_i'] = purities
        info['s_i'] = stabilities
        info['N'] = n_events
        
    return bin_edges, histogram_information
def get_best_binning( histogram_information, p_min, s_min, n_min ):
    '''
    Step 1: Change the size of the first bin until it fulfils the minimal criteria
    Step 3: Check if it is true for all other histograms. If not back to step 2
    Step 4: Repeat step 2 & 3 until no mo bins can be created
    '''
    histograms = [info['hist'] for info in histogram_information]
    bin_edges = []
    purities = {}
    stabilities = {}
    
    current_bin_start = 0
    current_bin_end = 0

    first_hist = histograms[0]
    n_bins = first_hist.GetNbinsX()
    
    while current_bin_end < n_bins:
        current_bin_end, _, _, _ = get_next_end( histograms, current_bin_start, current_bin_end, p_min, s_min, n_min )
        if not bin_edges:
            # if empty
            bin_edges.append( first_hist.GetXaxis().GetBinLowEdge( current_bin_start + 1 ) )
        bin_edges.append( first_hist.GetXaxis().GetBinLowEdge( current_bin_end ) + first_hist.GetXaxis().GetBinWidth( current_bin_end ) )
        current_bin_start = current_bin_end
    
    # add the purity and stability values for the final binning
    for info in histogram_information:
        new_hist = rebin_2d( info['hist'], bin_edges, bin_edges ).Clone( info['channel'] + '_' + str( info['CoM'] ) )  
        get_bin_content = new_hist.GetBinContent
        purities = calculate_purities( new_hist.Clone() )
        stabilities = calculate_stabilities( new_hist.Clone() )
        n_events = [int( get_bin_content( i, i ) ) for i in range( 1, len( bin_edges ) )]
        # Now check if the last bin also fulfils the requirements
        if purities[-1] < p_min or stabilities[-1] < s_min or n_events[-1] < n_min:
            # if not, merge last two bins 
            bin_edges[-2] = bin_edges[-1]
            bin_edges = bin_edges[:-1]
            new_hist = rebin_2d( info['hist'], bin_edges, bin_edges ).Clone()
            get_bin_content = new_hist.GetBinContent
            purities = calculate_purities( new_hist.Clone() )
            stabilities = calculate_stabilities( new_hist.Clone() )
            n_events = [int( get_bin_content( i, i ) ) for i in range( 1, len( bin_edges ) )]
            
        info['p_i'] = purities
        info['s_i'] = stabilities
        info['N'] = n_events
        
    return bin_edges, histogram_information
 def test_rebin_2d_not_on_boundaries( self ):
     new_hist = rebin_2d( self.diagonals, self.bin_edges_not_on_boundaries, self.bin_edges_not_on_boundaries )
     for i in range( 1, new_hist.nbins() + 1 ):
         self.assertEqual( 
             new_hist.GetBinContent( i, i ),
             self.result_not_on_boundaries[i - 1],
             'histogram contents do not match' )
 def setUp( self ):
     # create histograms
     self.h1 = Hist( 100, 0, 100, title = '1D' )
     self.h2 = Hist2D( 60, 40, 100, 60, 40, 100 )
     self.simple = Hist( 100, 0, 100, title = '1D' )
 
     # fill the histograms with our distributions
     map( self.h1.Fill, x1 )
     self.h2.fill_array( np.random.multivariate_normal( 
                                 mean = ( 50, 50 ),
                                 cov = np.arange( 4 ).reshape( 2, 2 ),
                                 size = ( int( 1E6 ), ) ) 
                     )
     self.bins = [40, 45, 50, 60, 65, 70, 75, 80, 100]
     # rebin them
     self.h2_rebinned = self.h2.rebinned( self.bins, axis = 0 )
     self.h2_rebinned = self.h2_rebinned.rebinned( self.bins, axis = 1 )
     self.h2_rebinned_2 = rebin_2d( self.h2, self.bins, self.bins )
     
     # we only test symmetric bins for now
     self.n_bins_x = 5
     self.n_bins_y = 5
     # only entries in diagonals, p = 1, s = 1 for all bins
     self.diagonals = Hist2D( self.n_bins_x, 0, 10, self.n_bins_y, 0, 10 )
     # this creates
     # [0, 0, 0, 0, 1],
     # [0, 0, 0, 1, 0],
     # [0, 0, 1, 0, 0],
     # [0, 1, 0, 0, 0],
     # [1, 0, 0, 0, 0]
     for i in range( 1, self.n_bins_x + 1 ):
         self.diagonals.SetBinContent( i, i, 1 )
     
     # the following should result in
     # [0, 0, 2],
     # [0, 2, 0],
     # [1, 0, 0]    
     self.bin_edges_nice = [0, 2, 6, 10]
     self.result_nice = [1, 2, 2]
     
     # the following should result in
     # [0, 0, 0, 2],
     # [0, 0, 2, 0]
     # [0, 1, 0, 0]
     # [0, 0, 0, 0]  
     self.bin_edges_out_of_bound = [-2, 0, 2, 6, 20]
     self.result_out_of_bound = [0, 1, 2, 2]
     # the following should result in
     # [0, 0, 2],
     # [0, 1, 0],
     # [2, 0, 0] 
     self.bin_edges_not_on_boundaries = [0, 3.5, 6, 20]
     self.result_not_on_boundaries = [2, 1, 2]
     
     for i in range(100):
         self.simple.Fill(i, 1)
    def setUp( self ):
    	# create histograms
        # self.h1 = Hist2D( 15, 0, 15, 15, 0, 15 )
        # x_1 = [1, 3, 7, 7, 8, 1, 12, 7, 8, 6]
        # y_1 = [1, 7, 3, 7, 8, 12, 7, 12, 13, 11]
        self.h1 = Hist2D( 60, 40, 100, 60, 40, 100 )
        n_1 = 100000
        x_1 = 60 + 10 * np.random.randn( n_1 )
        y_1 = x_1 + np.random.randn( n_1 )
        z_1 = np.vstack( ( x_1, y_1 ) ).T
        self.h1.fill_array( z_1 )

        self.h1 = fix_overflow( self.h1 )

        self.histogram_information = [
                {'hist': self.h1,
                 'CoM': 7,
                 'channel':'test_1'},
                ]

        self.histograms = [info['hist'] for info in self.histogram_information]
        
        # requirements for new binning
        self.p_min, self.s_min, self.n_min = 0.5, 0.5, 1000

        self.bin_edges = []
        self.purities_GetBinContent = []
        self.stabilities_GetBinContent = []
        self.n_events_GetBinContent = []

        self.purities_Integral = []
        self.stabilities_Integral = []
        self.n_events_Integral = []

        first_hist = self.histograms[0]
        n_bins = first_hist.GetNbinsX()

        current_bin_start = 0
        current_bin_end = 0

        while current_bin_end < n_bins:
            current_bin_end, p, s, n_gen_and_reco = pick_bins.get_next_end( self.histograms, current_bin_start, current_bin_end, self.p_min, self.s_min, self.n_min )
            if not self.bin_edges:
                # if empty
                self.bin_edges.append( first_hist.GetXaxis().GetBinLowEdge( current_bin_start + 1 ) )
            self.bin_edges.append( first_hist.GetXaxis().GetBinLowEdge( current_bin_end ) + first_hist.GetXaxis().GetBinWidth( current_bin_end ) )
            self.purities_Integral.append(p)
            self.stabilities_Integral.append(s)
            self.n_events_Integral.append(n_gen_and_reco)
            current_bin_start = current_bin_end

        self.h1_rebinned = rebin_2d(self.h1, self.bin_edges, self.bin_edges)

        self.purities_GetBinContent = calculate_purities( self.h1_rebinned )
        self.stabilities_GetBinContent = calculate_stabilities( self.h1_rebinned )
        self.n_events_GetBinContent = [int( self.h1_rebinned.GetBinContent( i, i ) ) for i in range( 1, len( self.bin_edges ) )]
def makePurityStabilityPlots(input_file, histogram, bin_edges, channel, variable, isVisiblePhaseSpace):
    global output_folder, output_formats
 
    hist = get_histogram_from_file( histogram, input_file )
    print "bin edges contents   : ", bin_edges
    new_hist = rebin_2d( hist, bin_edges, bin_edges ).Clone()

    # get_bin_content = hist.ProjectionX().GetBinContent
    purities = calculate_purities( new_hist.Clone() )
    stabilities = calculate_stabilities( new_hist.Clone() )
    # n_events = [int( get_bin_content( i ) ) for i in range( 1, len( bin_edges ) )]
    print "purities contents    : ", purities
    print "stabilities contents : ", stabilities

    hist_stability = value_tuplelist_to_hist(stabilities, bin_edges)
    hist_purity = value_tuplelist_to_hist(purities, bin_edges)

    hist_purity.color = 'red'
    hist_stability.color = 'blue'

    hist_stability.linewidth = 4
    hist_purity.linewidth = 4

    x_limits = [bin_edges[0], bin_edges[-1]]
    y_limits = [0,1]
    plt.figure( figsize = ( 20, 16 ), dpi = 200, facecolor = 'white' )

    ax0 = plt.axes()
    ax0.minorticks_on()
#     ax0.grid( True, 'major', linewidth = 2 )
#     ax0.grid( True, 'minor' )
    plt.tick_params( **CMS.axis_label_major )
    plt.tick_params( **CMS.axis_label_minor )
    ax0.xaxis.labelpad = 12
    ax0.yaxis.labelpad = 12
    rplt.hist( hist_stability , stacked=False, axes = ax0, cmap = my_cmap, vmin = 1, label = 'Stability' )
    rplt.hist( hist_purity, stacked=False, axes = ax0, cmap = my_cmap, vmin = 1, label = 'Purity' )

    ax0.set_xlim( x_limits )
    ax0.set_ylim( y_limits )

    plt.tick_params( **CMS.axis_label_major )
    plt.tick_params( **CMS.axis_label_minor )

    x_title = '$' + variables_latex[variable] + '$ [GeV]'
    plt.xlabel( x_title, CMS.x_axis_title )

    leg = plt.legend(loc=4,prop={'size':40})

    plt.tight_layout()

    plt.savefig('test.pdf')
    save_as_name = 'purityStability_'+channel + '_' + variable + '_' + str(options.CoM) + 'TeV'
    for output_format in output_formats:
        plt.savefig( output_folder + save_as_name + '.' + output_format )
    def setUp( self ):
        

        # create histograms
        self.h1 = Hist2D( 60, 40, 100, 60, 40, 100 )
        self.h2 = Hist2D( 60, 40, 100, 60, 40, 100 )
        self.h3 = Hist2D( 60, 40, 100, 60, 40, 100 )
    
        n_1 = 10000
        n_2 = int( n_1 / 5 )
        x_1 = 60 + 10 * np.random.randn( n_1 )
        x_2 = 60 + 10 * np.random.randn( n_2 )
        x_3 = 60 + 5 * np.random.randn( n_1 )
        y_1 = x_1 + np.random.randn( n_1 )
        y_2 = x_2 + np.random.randn( n_2 )
        y_3 = x_3 + np.random.randn( n_1 )
        
        z_1 = np.vstack( ( x_1, y_1 ) ).T
        z_2 = np.vstack( ( x_2, y_2 ) ).T
        z_3 = np.vstack( ( x_3, y_3 ) ).T
        # fill the histograms with our distributions
        self.h1.fill_array( z_1 )
        # reduced number of events
        self.h2.fill_array( z_2 )
        # reduced spread
        self.h3.fill_array( z_3 )
        
        self.histogram_information_1 = [
                {'hist': self.h1,
                 'CoM': 7,
                 'channel':'test_1'},
                   ]
        self.histogram_information_2 = [
                {'hist': self.h2,
                 'CoM': 7,
                 'channel':'test_2'},
                   ]
        self.histogram_information_3 = [
                {'hist': self.h3,
                 'CoM': 7,
                 'channel':'test_3'},
                   ]
        self.histogram_information_1_2 = [
                {'hist': self.h1,
                 'CoM': 7,
                 'channel':'test_1'},
                {'hist': self.h2,
                 'CoM': 7,
                 'channel':'test_2'},
                   ]
        self.histogram_information_1_3 = [
                {'hist': self.h1,
                 'CoM': 7,
                 'channel':'test_1'},
                {'hist': self.h3,
                 'CoM': 7,
                 'channel':'test_3'},
                   ]
        # requirements for new binning
        self.p_min, self.s_min, self.n_min = 0.5, 0.5, 100
        self.bin_edges_1, _ = pick_bins.get_best_binning( 
                                        self.histogram_information_1, 
                                        self.p_min, 
                                        self.s_min, 
                                        self.n_min 
                                        )
        self.bin_edges_2, _ = pick_bins.get_best_binning( 
                                        self.histogram_information_2, 
                                        self.p_min, 
                                        self.s_min, 
                                        self.n_min 
                                        )
        self.bin_edges_3, _ = pick_bins.get_best_binning( 
                                        self.histogram_information_3, 
                                        self.p_min, 
                                        self.s_min, 
                                        self.n_min 
                                        )
        self.bin_edges_1_2, _ = pick_bins.get_best_binning( 
                                        self.histogram_information_1_2, 
                                        self.p_min, 
                                        self.s_min, 
                                        self.n_min 
                                        )
        self.bin_edges_1_3, _ = pick_bins.get_best_binning( 
                                        self.histogram_information_1_3, 
                                        self.p_min, 
                                        self.s_min, 
                                        self.n_min 
                                        )
        
        self.h1_rebinned = rebin_2d(self.h1, self.bin_edges_1, self.bin_edges_1)
        self.h2_rebinned = rebin_2d(self.h2, self.bin_edges_2, self.bin_edges_2)
        self.h3_rebinned = rebin_2d(self.h3, self.bin_edges_3, self.bin_edges_3)
        self.h1_2_rebinned = rebin_2d(self.h1, self.bin_edges_1_2, self.bin_edges_1_2)
        self.h1_3_rebinned = rebin_2d(self.h1, self.bin_edges_1_3, self.bin_edges_1_3)
    def setUp(self):

        # create histograms
        self.h1 = Hist2D(60, 40, 100, 60, 40, 100)
        self.h2 = Hist2D(60, 40, 100, 60, 40, 100)
        self.h3 = Hist2D(60, 40, 100, 60, 40, 100)

        n_1 = 10000
        n_2 = int(n_1 / 5)
        x_1 = 60 + 10 * np.random.randn(n_1)
        x_2 = 60 + 10 * np.random.randn(n_2)
        x_3 = 60 + 5 * np.random.randn(n_1)
        y_1 = x_1 + np.random.randn(n_1)
        y_2 = x_2 + np.random.randn(n_2)
        y_3 = x_3 + np.random.randn(n_1)

        z_1 = np.vstack((x_1, y_1)).T
        z_2 = np.vstack((x_2, y_2)).T
        z_3 = np.vstack((x_3, y_3)).T
        # fill the histograms with our distributions
        self.h1.fill_array(z_1)
        # reduced number of events
        self.h2.fill_array(z_2)
        # reduced spread
        self.h3.fill_array(z_3)

        self.histogram_information_1 = [
            {
                'hist': self.h1,
                'CoM': 7,
                'channel': 'test_1'
            },
        ]
        self.histogram_information_2 = [
            {
                'hist': self.h2,
                'CoM': 7,
                'channel': 'test_2'
            },
        ]
        self.histogram_information_3 = [
            {
                'hist': self.h3,
                'CoM': 7,
                'channel': 'test_3'
            },
        ]
        self.histogram_information_1_2 = [
            {
                'hist': self.h1,
                'CoM': 7,
                'channel': 'test_1'
            },
            {
                'hist': self.h2,
                'CoM': 7,
                'channel': 'test_2'
            },
        ]
        self.histogram_information_1_3 = [
            {
                'hist': self.h1,
                'CoM': 7,
                'channel': 'test_1'
            },
            {
                'hist': self.h3,
                'CoM': 7,
                'channel': 'test_3'
            },
        ]
        # requirements for new binning
        self.p_min, self.s_min, self.n_min = 0.5, 0.5, 100
        self.bin_edges_1, _ = pick_bins.get_best_binning(
            self.histogram_information_1, self.p_min, self.s_min, self.n_min)
        self.bin_edges_2, _ = pick_bins.get_best_binning(
            self.histogram_information_2, self.p_min, self.s_min, self.n_min)
        self.bin_edges_3, _ = pick_bins.get_best_binning(
            self.histogram_information_3, self.p_min, self.s_min, self.n_min)
        self.bin_edges_1_2, _ = pick_bins.get_best_binning(
            self.histogram_information_1_2, self.p_min, self.s_min, self.n_min)
        self.bin_edges_1_3, _ = pick_bins.get_best_binning(
            self.histogram_information_1_3, self.p_min, self.s_min, self.n_min)

        self.h1_rebinned = rebin_2d(self.h1, self.bin_edges_1,
                                    self.bin_edges_1)
        self.h2_rebinned = rebin_2d(self.h2, self.bin_edges_2,
                                    self.bin_edges_2)
        self.h3_rebinned = rebin_2d(self.h3, self.bin_edges_3,
                                    self.bin_edges_3)
        self.h1_2_rebinned = rebin_2d(self.h1, self.bin_edges_1_2,
                                      self.bin_edges_1_2)
        self.h1_3_rebinned = rebin_2d(self.h1, self.bin_edges_1_3,
                                      self.bin_edges_1_3)