def test_vectors( self ):
        h_signal = adjust_overflow_to_limit( self.h_signal, x_min, x_max )
        h_signal.Scale(1/h_signal.Integral())
        h_bkg1 = adjust_overflow_to_limit( self.h_bkg1, x_min, x_max )
        h_bkg1.Scale(1/h_bkg1.Integral())
        signal = list( h_signal.y() )
        bkg1 = list( h_bkg1.y() )
        
        v_from_fit_data = self.fit_data_1.vectors
        v_from_single_collection = self.single_collection.vectors()
#         v_from_collection = self.collection_1.vectors( 'signal region' )
#         v_from_collection_1 = self.collection_1.vectors()['signal region']
        self.assertEqual(signal, v_from_fit_data['signal'])
        self.assertEqual(bkg1, v_from_fit_data['bkg1'])
        
        self.assertEqual(signal, v_from_single_collection['signal'])
        self.assertEqual(bkg1, v_from_single_collection['bkg1'])
 def test_adjust_overflow_to_limit( self ):
     x_min = 40
     x_max = 80
     adjusted = adjust_overflow_to_limit(self.h1, x_min, x_max)
     # number of events should be unchanged
     # the adjusted histogram should have no overflow for this example
     self.assertEqual(self.h1.integral( overflow = True ), adjusted.Integral())
     # first bin (x_min) should contain all events
     # with x <= x_min
     x_min_bin = self.h1.FindBin(x_min)
     x_max_bin = self.h1.FindBin(x_max)
     self.assertEqual(self.h1.integral(0, x_min_bin), 
                      adjusted.GetBinContent(x_min_bin))
     # last bin (x_max) should contain all events
     # with x >= x_max
     self.assertEqual( self.h1.integral( x_max_bin, self.h1.nbins() + 1 ),
                      adjusted.GetBinContent( x_max_bin ) )
    def test_adjust_overflow_to_limit_simple( self ):
        x_min = 0
        x_max = 95
        adjusted = adjust_overflow_to_limit(self.simple, x_min, x_max)
#         for entry_1, entry_2 in zip(hist_to_value_error_tuplelist(self.simple), hist_to_value_error_tuplelist(adjusted)):
#             print entry_1, entry_2
#         print self.simple.integral( overflow = True ), adjusted.integral()
#         print self.simple.GetBinContent(1), self.simple.GetBinContent(self.simple.nbins())
        # number of events should be unchanged
        # the adjusted histogram should have no overflow for this example
        self.assertEqual( self.simple.integral( overflow = True ), adjusted.integral() )
        # first bin (x_min) should contain all events
        # with x <= x_min
        x_min_bin = self.simple.FindBin(x_min)
        x_max_bin = self.simple.FindBin(x_max)
        self.assertEqual(self.simple.integral(0, x_min_bin), 
                         adjusted.GetBinContent(x_min_bin))
        # last bin (x_max) should contain all events
        # with x >= x_max
        self.assertEqual( self.simple.integral( x_max_bin, self.simple.nbins() + 1),
                         adjusted.GetBinContent( x_max_bin ) )
 def test_normalisation( self ):
     normalisation = {name:adjust_overflow_to_limit(histogram, x_min, x_max).Integral() for name, histogram in self.histograms_1.iteritems()}
     normalisation_from_fit_data = self.fit_data_1.normalisation
     normalisation_from_single_collection = self.single_collection.mc_normalisation()
     normalisation_from_collection = self.collection_1.mc_normalisation( 'signal region' )
     normalisation_from_collection_1 = self.collection_1.mc_normalisation()['signal region']
     for sample in normalisation.keys():
         self.assertEqual( normalisation[sample], normalisation_from_fit_data[sample] )
         self.assertEqual( normalisation[sample], normalisation_from_single_collection[sample] )
         self.assertEqual( normalisation[sample], normalisation_from_collection[sample] )
         self.assertEqual( normalisation[sample], normalisation_from_collection_1[sample] )
     
     # data normalisation
     normalisation = self.h_data.integral( overflow = True )
     normalisation_from_fit_data = self.fit_data_1.n_data()
     normalisation_from_single_collection = self.single_collection.n_data()
     normalisation_from_collection = self.collection_1.n_data( 'signal region' )
     normalisation_from_collection_1 = self.collection_1.n_data()['signal region']
     self.assertEqual( normalisation, normalisation_from_fit_data )
     self.assertEqual( normalisation, normalisation_from_single_collection )
     self.assertEqual( normalisation, normalisation_from_collection )
     self.assertEqual( normalisation, normalisation_from_collection_1 )
     
     self.assertAlmostEqual(normalisation, self.collection_1.max_n_data(), delta = 1 )
def get_histograms( channel, input_files, variable, met_type, variable_bin,
                   b_tag_bin, rebin = 1, fit_variable = 'absolute_eta',
                   scale_factors = None ):
    global b_tag_bin_VJets, fit_variables
    global electron_control_region, muon_control_region

    boundaries = measurement_config.fit_boundaries[fit_variable]
    histograms = {}
    if not variable in measurement_config.histogram_path_templates.keys():
        print 'Fatal Error: unknown variable ', variable
        sys.exit()

    fit_variable_name = ''
    fit_variable_name_data = ''
    fit_variable_template = measurement_config.histogram_path_templates[variable]
    ht_fill_list, other_fill_list = None, None
    if fit_variable == 'absolute_eta':
        ht_fill_list = ( analysis_type[channel], variable_bin, channel + '_' + fit_variable )
        other_fill_list = ( analysis_type[channel], met_type, variable_bin, channel + '_' + fit_variable )
    else:
        ht_fill_list = ( analysis_type[channel], variable_bin, fit_variable )
        other_fill_list = ( analysis_type[channel], met_type, variable_bin, fit_variable )
    if variable == 'HT':
        fit_variable_name = fit_variable_template % ht_fill_list
        fit_variable_name_data = fit_variable_name
    else:
        fit_variable_name = fit_variable_template % other_fill_list

        if 'JetRes' in met_type:
            fit_variable_name_data = fit_variable_name.replace( 'JetResDown', '' )
            fit_variable_name_data = fit_variable_name_data.replace( 'JetResUp', '' )
            if 'patPFMet' in met_type:
                fit_variable_name = fit_variable_name.replace( 'patPFMet', 'PFMET' )
        else:
            fit_variable_name_data = fit_variable_name

    for sample, file_name in input_files.iteritems():
        if not file_name:
            continue
        h_fit_variable = None
        if sample == 'data':
            h_fit_variable = get_histogram( file_name, fit_variable_name_data, b_tag_bin )
        elif sample == 'V+Jets':
            # extracting the inclusive V+Jets template across all bins from its specific b-tag bin (>=0 by default) and scaling it to analysis b-tag bin
            for var_bin in variable_bins_ROOT[variable]:
                temp_variable_name = fit_variable_name.replace(variable_bin, var_bin)
                if h_fit_variable == None:
                    h_fit_variable = get_histogram( file_name, temp_variable_name, b_tag_bin )
                else:
                    h_fit_variable += get_histogram( file_name, temp_variable_name, b_tag_bin )
                    
            h_fit_variable_for_scaling = get_histogram(file_name, fit_variable_name, b_tag_bin)
            scale = h_fit_variable_for_scaling.integral (overflow = True ) / h_fit_variable.integral( overflow = True )
            h_fit_variable.Scale(scale)
        else:
            h_fit_variable = get_histogram( file_name, fit_variable_name, b_tag_bin )
        h_fit_variable.Rebin( rebin )
        h_fit_variable = adjust_overflow_to_limit( h_fit_variable, boundaries[0], boundaries[1] )
        histograms[sample] = h_fit_variable

    h_qcd = get_qcd_histograms( input_files, variable, variable_bin,
                                           channel, fit_variable_name, rebin )

    if h_qcd.Integral() < 0.1:
        h_qcd.Scale( 0.1/h_qcd.Integral() )
        pass
    
    histograms['QCD'] = adjust_overflow_to_limit( h_qcd,
                                                 boundaries[0], boundaries[1] )
    
    # normalise histograms
    if not measurement_config.luminosity_scale == 1.0:
        for sample, histogram in histograms.iteritems():
            if sample == 'data':
                continue
            histogram.Scale( measurement_config.luminosity_scale )

    # apply normalisation scale factors for rate-changing systematics
    if scale_factors:
        for source, factor in scale_factors.iteritems():
            if 'luminosity' in source:
                for sample, histogram in histograms.iteritems():
                    if sample == 'data':
                        continue
                    histogram.Scale( factor )
            for sample, histogram in histograms.iteritems():
                if sample in source:
                    histogram.Scale( factor )
    return histograms
def get_histograms( channel, input_files, variable, met_type, variable_bin,
                   b_tag_bin, rebin = 1, fit_variable = 'absolute_eta',
                   scale_factors = None ):
    global b_tag_bin_VJets, fit_variables
    global electron_control_region, muon_control_region

    boundaries = measurement_config.fit_boundaries[fit_variable]
    histograms = {}
    if not variable in measurement_config.histogram_path_templates.keys():
        print 'Fatal Error: unknown variable ', variable
        sys.exit()

    fit_variable_name = ''
    fit_variable_name_data = ''
    fit_variable_template = measurement_config.histogram_path_templates[variable]
    ht_fill_list, other_fill_list = None, None
    if fit_variable == 'absolute_eta':
        ht_fill_list = ( analysis_type[channel], variable_bin, channel + '_' + fit_variable )
        other_fill_list = ( analysis_type[channel], met_type, variable_bin, channel + '_' + fit_variable )
    else:
        ht_fill_list = ( analysis_type[channel], variable_bin, fit_variable )
        other_fill_list = ( analysis_type[channel], met_type, variable_bin, fit_variable )
    if variable == 'HT':
        fit_variable_name = fit_variable_template % ht_fill_list
        fit_variable_name_data = fit_variable_name
    else:
        fit_variable_name = fit_variable_template % other_fill_list

        if 'JetRes' in met_type:
            fit_variable_name_data = fit_variable_name.replace( 'JetResDown', '' )
            fit_variable_name_data = fit_variable_name_data.replace( 'JetResUp', '' )
            if 'patPFMet' in met_type:
                fit_variable_name = fit_variable_name.replace( 'patPFMet', 'PFMET' )
        else:
            fit_variable_name_data = fit_variable_name

    for sample, file_name in input_files.iteritems():
        if not file_name:
            continue
        h_fit_variable = None
        if sample == 'data':
            h_fit_variable = get_histogram( file_name, fit_variable_name_data, b_tag_bin )
        elif sample == 'V+Jets':
            # extracting the V+Jets template from its specific b-tag bin (>=0 by default) and scaling it to analysis b-tag bin
            h_fit_variable = get_histogram( file_name, fit_variable_name, b_tag_bin )
            if not '_bl' in fit_variable:
                # this procedure is not valid for fit variables requiring at least one b-tag
                h_fit_variable_VJets_specific_b_tag_bin = get_histogram( file_name, fit_variable_name, b_tag_bin_VJets )
                try:
                    scale = h_fit_variable.integral( overflow = True ) / h_fit_variable_VJets_specific_b_tag_bin.integral( overflow = True )
                    h_fit_variable_VJets_specific_b_tag_bin.Scale( scale )
                    h_fit_variable = h_fit_variable_VJets_specific_b_tag_bin
                except:
                    print 'WARNING: V+Jets template from ' + str( file_name ) + ', histogram ' + fit_variable_name + ' in ' + b_tag_bin_VJets + \
                        ' b-tag bin is empty. Using central bin (' + b_tag_bin + '), integral = ' + str( h_fit_variable.Integral() )
        else:
            h_fit_variable = get_histogram( file_name, fit_variable_name, b_tag_bin )
        h_fit_variable.Rebin( rebin )
        # this is not working. M3 has fewer data events than absolute_eta
        h_fit_variable = adjust_overflow_to_limit( h_fit_variable, boundaries[0], boundaries[1] )
        histograms[sample] = h_fit_variable

    h_qcd = get_qcd_histograms( input_files, variable, variable_bin,
                                           channel, fit_variable_name, rebin )

    if h_qcd.Integral() < 0.1:
        h_qcd.Scale( 0.1/h_qcd.Integral() )
        pass
    
    histograms['QCD'] = adjust_overflow_to_limit( h_qcd,
                                                 boundaries[0], boundaries[1] )
    # normalise histograms
    if not measurement_config.luminosity_scale == 1.0:
        for sample, histogram in histograms.iteritems():
            if sample == 'data':
                continue
            histogram.Scale( measurement_config.luminosity_scale )

    # apply normalisation scale factors for rate-changing systematics
    if scale_factors:
        for source, factor in scale_factors.iteritems():
            if 'luminosity' in source:
                for sample, histogram in histograms.iteritems():
                    if sample == 'data':
                        continue
                    histogram.Scale( factor )
            for sample, histogram in histograms.iteritems():
                if sample in source:
                    histogram.Scale( factor )

    return histograms