def get_unfold_histogram_tuple( 
                inputfile,
                variable,
                channel,
                met_type = 'patType1CorrectedPFMet',
                centre_of_mass = 8,
                ttbar_xsection = 245.8,
                luminosity = 19712,
                load_fakes = False,
                scale_to_lumi = False,
                ):
    folder = None
    h_truth = None
    h_measured = None
    h_response = None
    h_fakes = None
    if not channel == 'combined':
        if not 'HT' in variable:
            folder = inputfile.Get( 'unfolding_%s_analyser_%s_channel_%s' % ( variable, channel, met_type ) )
        else:
            folder = inputfile.Get( 'unfolding_%s_analyser_%s_channel' % ( variable, channel ) )
        
        h_truth = asrootpy( folder.truth.Clone() )
        h_measured = asrootpy( folder.measured.Clone() )

        # response matrix is always without fakes
        # fake subtraction from measured is performed automatically in RooUnfoldSvd (h_measured - h_response->ProjectionX())
        # or manually for TSVDUnfold
        # fix for a bug/typo in NTupleTools
        h_response = asrootpy( folder.response_without_fakes.Clone() )

        if load_fakes:
            h_fakes = asrootpy( folder.fake.Clone() )
    else:
        return get_combined_unfold_histogram_tuple( inputfile = inputfile,
                                                   variable = variable,
                                                   met_type = met_type,
                                                   centre_of_mass = centre_of_mass,
                                                   ttbar_xsection = ttbar_xsection,
                                                   luminosity = luminosity,
                                                   load_fakes = load_fakes,
                                                   scale_to_lumi = scale_to_lumi,
                                                   )

    if scale_to_lumi:
        nEvents = inputfile.EventFilter.EventCounter.GetBinContent( 1 )  # number of processed events 
        lumiweight = ttbar_xsection * luminosity / nEvents
        if load_fakes:
            h_fakes.Scale( lumiweight )
        h_truth.Scale( lumiweight )
        h_measured.Scale( lumiweight )
        h_response.Scale( lumiweight )
    
    h_truth, h_measured, h_response = [ fix_overflow( hist ) for hist in [h_truth, h_measured, h_response] ]
    if load_fakes:
        h_fakes = fix_overflow( h_fakes )
    
    return h_truth, h_measured, h_response, h_fakes
 def test_overflow_2D( self ):
     before_fix = check_overflow_in_2DHist(self.h2)
     has_overflow_in_x = before_fix['has_overflow_in_x']
     has_overflow_in_y = before_fix['has_overflow_in_y']
             
     self.assertGreater(has_overflow_in_x, 0, '2D hist: No overflow in x present, wrong setup.')
     self.assertGreater(has_overflow_in_y, 0, '2D hist: No overflow in y present, wrong setup.')
     
     h2 = fix_overflow( self.h2 )
     
     after_fix = check_overflow_in_2DHist(h2)
     has_overflow_in_x = after_fix['has_overflow_in_x']
     has_overflow_in_y = after_fix['has_overflow_in_y']
     # check if overflow has been reset
     self.assertEqual( has_overflow_in_x, 0, '2D hist: Overflow in x is not 0.' )
     self.assertEqual( has_overflow_in_y, 0, '2D hist: Overflow in y is not 0.' )
     # now check if new last bin content is equal to the old one plus overflow
     overflow_x_before = before_fix['overflow_x']
     overflow_y_before = before_fix['overflow_y']
     last_bin_content_x_before = before_fix['last_bin_content_x']
     last_bin_content_y_before = before_fix['last_bin_content_y']
     last_bin_content_x_after = after_fix['last_bin_content_x']
     last_bin_content_y_after = after_fix['last_bin_content_y']
     check_last_bin_content_x = [overflow + last_bin_content for overflow,last_bin_content in zip(overflow_x_before, last_bin_content_x_before)]
     check_last_bin_content_y = [overflow + last_bin_content for overflow,last_bin_content in zip(overflow_y_before, last_bin_content_y_before)]
     # remember, the last item in each list is actually the overflow, which should be 0 and the above calculation is not correct.
     self.assertTrue(check_equal_lists(check_last_bin_content_x[:-2], last_bin_content_x_after[:-2]), '2D hist: last bins in x are not correct.')
     self.assertTrue(check_equal_lists(check_last_bin_content_y[:-2], last_bin_content_y_after[:-2]), '2D hist: last bins in y are not correct.')
    def calculate_normalisation(self):
        '''
            1. get file names
            2. get histograms from files
            3. ???
            4. calculate normalisation based on self.method
        '''
        if self.have_normalisation:
            return
        histograms = self.measurement.histograms

        for sample, hist in histograms.items():
            # TODO: this should be a list of bin-contents
            hist = fix_overflow(hist)
            histograms[sample] = hist
            self.initial_normalisation[
                sample] = hist_to_value_error_tuplelist(hist)
            if self.method == self.BACKGROUND_SUBTRACTION and sample != 'TTJet':
                self.normalisation[sample] = self.initial_normalisation[sample]

        if self.method == self.BACKGROUND_SUBTRACTION:
            self.background_subtraction(histograms)
        if self.method == self.SIMULTANEOUS_FIT:
            self.simultaneous_fit(histograms)

        # next, let's round all numbers (they are event numbers after all
        for sample, values in self.normalisation.items():
            new_values = [(round(v, 1), round(e, 1)) for v, e in values]
            self.normalisation[sample] = new_values

        self.have_normalisation = True
    def setUp( self ):
    	# create histograms
        # self.h1 = Hist2D( 15, 0, 15, 15, 0, 15 )
        # x_1 = [1, 3, 7, 7, 8, 1, 12, 7, 8, 6]
        # y_1 = [1, 7, 3, 7, 8, 12, 7, 12, 13, 11]
        self.h1 = Hist2D( 60, 40, 100, 60, 40, 100 )
        n_1 = 100000
        x_1 = 60 + 10 * np.random.randn( n_1 )
        y_1 = x_1 + np.random.randn( n_1 )
        z_1 = np.vstack( ( x_1, y_1 ) ).T
        self.h1.fill_array( z_1 )

        self.h1 = fix_overflow( self.h1 )

        self.histogram_information = [
                {'hist': self.h1,
                 'CoM': 7,
                 'channel':'test_1'},
                ]

        self.histograms = [info['hist'] for info in self.histogram_information]
        
        # requirements for new binning
        self.p_min, self.s_min, self.n_min = 0.5, 0.5, 1000

        self.bin_edges = []
        self.purities_GetBinContent = []
        self.stabilities_GetBinContent = []
        self.n_events_GetBinContent = []

        self.purities_Integral = []
        self.stabilities_Integral = []
        self.n_events_Integral = []

        first_hist = self.histograms[0]
        n_bins = first_hist.GetNbinsX()

        current_bin_start = 0
        current_bin_end = 0

        while current_bin_end < n_bins:
            current_bin_end, p, s, n_gen_and_reco = pick_bins.get_next_end( self.histograms, current_bin_start, current_bin_end, self.p_min, self.s_min, self.n_min )
            if not self.bin_edges:
                # if empty
                self.bin_edges.append( first_hist.GetXaxis().GetBinLowEdge( current_bin_start + 1 ) )
            self.bin_edges.append( first_hist.GetXaxis().GetBinLowEdge( current_bin_end ) + first_hist.GetXaxis().GetBinWidth( current_bin_end ) )
            self.purities_Integral.append(p)
            self.stabilities_Integral.append(s)
            self.n_events_Integral.append(n_gen_and_reco)
            current_bin_start = current_bin_end

        self.h1_rebinned = rebin_2d(self.h1, self.bin_edges, self.bin_edges)

        self.purities_GetBinContent = calculate_purities( self.h1_rebinned )
        self.stabilities_GetBinContent = calculate_stabilities( self.h1_rebinned )
        self.n_events_GetBinContent = [int( self.h1_rebinned.GetBinContent( i, i ) ) for i in range( 1, len( self.bin_edges ) )]
 def test_overflow_1D( self ):
     last_bin = self.h1.nbins()
     overflow_bin = last_bin + 1
     overflow = self.h1.GetBinContent( overflow_bin )
     last_bin_content = self.h1.GetBinContent( last_bin )
     
     self.assertGreater( overflow, 0, '1D hist: No overflow present, wrong setup.' )
     h1 = fix_overflow( self.h1 )
     
     self.assertEqual( h1.GetBinContent( overflow_bin ), 0., '1D hist: Overflow bin is not 0.' )
     self.assertEqual( h1.GetBinContent( last_bin ), last_bin_content + overflow, '1D hist: last bin is not correct.' )
    def test_overflow_1D(self):
        last_bin = self.h1.nbins()
        overflow_bin = last_bin + 1
        overflow = self.h1.GetBinContent(overflow_bin)
        last_bin_content = self.h1.GetBinContent(last_bin)

        self.assertGreater(overflow, 0,
                           '1D hist: No overflow present, wrong setup.')
        h1 = fix_overflow(self.h1)

        self.assertEqual(h1.GetBinContent(overflow_bin), 0.,
                         '1D hist: Overflow bin is not 0.')
        self.assertEqual(h1.GetBinContent(last_bin),
                         last_bin_content + overflow,
                         '1D hist: last bin is not correct.')
    def test_overflow_2D(self):
        before_fix = check_overflow_in_2DHist(self.h2)
        has_overflow_in_x = before_fix['has_overflow_in_x']
        has_overflow_in_y = before_fix['has_overflow_in_y']

        self.assertGreater(has_overflow_in_x, 0,
                           '2D hist: No overflow in x present, wrong setup.')
        self.assertGreater(has_overflow_in_y, 0,
                           '2D hist: No overflow in y present, wrong setup.')

        h2 = fix_overflow(self.h2)

        after_fix = check_overflow_in_2DHist(h2)
        has_overflow_in_x = after_fix['has_overflow_in_x']
        has_overflow_in_y = after_fix['has_overflow_in_y']
        # check if overflow has been reset
        self.assertEqual(has_overflow_in_x, 0,
                         '2D hist: Overflow in x is not 0.')
        self.assertEqual(has_overflow_in_y, 0,
                         '2D hist: Overflow in y is not 0.')
        # now check if new last bin content is equal to the old one plus overflow
        overflow_x_before = before_fix['overflow_x']
        overflow_y_before = before_fix['overflow_y']
        last_bin_content_x_before = before_fix['last_bin_content_x']
        last_bin_content_y_before = before_fix['last_bin_content_y']
        last_bin_content_x_after = after_fix['last_bin_content_x']
        last_bin_content_y_after = after_fix['last_bin_content_y']
        check_last_bin_content_x = [
            overflow + last_bin_content for overflow, last_bin_content in zip(
                overflow_x_before, last_bin_content_x_before)
        ]
        check_last_bin_content_y = [
            overflow + last_bin_content for overflow, last_bin_content in zip(
                overflow_y_before, last_bin_content_y_before)
        ]
        # remember, the last item in each list is actually the overflow, which should be 0 and the above calculation is not correct.
        self.assertTrue(
            check_equal_lists(check_last_bin_content_x[:-2],
                              last_bin_content_x_after[:-2]),
            '2D hist: last bins in x are not correct.')
        self.assertTrue(
            check_equal_lists(check_last_bin_content_y[:-2],
                              last_bin_content_y_after[:-2]),
            '2D hist: last bins in y are not correct.')