예제 #1
0
def unfold_results(results, category, channel, tau_value, h_truth, h_measured,
                   h_response, h_fakes, method, visiblePS):
    global variable, path_to_JSON, options
    edges = bin_edges[variable]
    if visiblePS:
        edges = bin_edges_vis[variable]
    h_data = value_error_tuplelist_to_hist(results, edges)

    # Remove fakes before unfolding
    h_measured, h_data = removeFakes(h_measured, h_data, h_response)

    unfolding = Unfolding(h_truth,
                          h_measured,
                          h_response,
                          h_fakes,
                          method=method,
                          k_value=-1,
                          tau=tau_value)

    # turning off the unfolding errors for systematic samples
    if not category == 'central':
        unfoldCfg.error_treatment = 0
    else:
        unfoldCfg.error_treatment = options.error_treatment

    h_unfolded_data = unfolding.unfold(h_data)
    del unfolding
    return hist_to_value_error_tuplelist(
        h_unfolded_data), hist_to_value_error_tuplelist(h_data)
예제 #2
0
 def test_tau_closure(self):
     for channel in self.channels:
         for variable in self.variables:
             data = self.dict[channel][variable]['h_measured']
             truth = hist_to_value_error_tuplelist( self.dict[channel][variable]['h_truth'] )
             unfolded_result = hist_to_value_error_tuplelist(self.dict[channel][variable]['tau_unfolding_object'].unfold(data))
             # the difference between the truth and unfolded result should be within the unfolding error
             for (value, error), (true_value, _) in zip(unfolded_result, truth):
                 self.assertAlmostEquals(value, true_value, delta = error)
예제 #3
0
 def test_tau_closure(self):
     for channel in self.channels:
         for variable in self.variables:
             data = self.dict[channel][variable]['h_measured']
             truth = hist_to_value_error_tuplelist( self.dict[channel][variable]['h_truth'] )
             unfolded_result = hist_to_value_error_tuplelist(self.dict[channel][variable]['tau_unfolding_object'].unfold(data))
             # the difference between the truth and unfolded result should be within the unfolding error
             for (value, error), (true_value, _) in zip(unfolded_result, truth):
                 self.assertAlmostEquals(value, true_value, delta = error)
예제 #4
0
 def test_closure(self):
     for channel in self.channels:
         for variable in self.variables:
             # closure test
             unfolded_result = hist_to_value_error_tuplelist(
                 self.dict[channel][variable]["unfolding_object"].closureTest()
             )
             truth = hist_to_value_error_tuplelist(self.dict[channel][variable]["h_truth"])
             # the difference between the truth and unfolded result should be within the unfolding error
             for (value, error), (true_value, _) in zip(unfolded_result, truth):
                 self.assertAlmostEquals(value, true_value, delta=error)
def check_multiple_data_multiple_unfolding( input_file, method, channel ):
    global nbins, use_N_toy, output_folder, offset_toy_mc, offset_toy_data, k_value
    # same unfolding input, different data
    get_folder = input_file.Get
    pulls = []
    add_pull = pulls.append
    histograms = []
    add_histograms = histograms.append
    
    print 'Reading toy MC'
    start1 = time()
    mc_range = range( offset_toy_mc + 1, offset_toy_mc + use_N_toy + 1 )
    data_range = range( offset_toy_data + 1, offset_toy_data + use_N_toy + 1 )
    for nth_toy_mc in range( 1, 10000 + 1 ):  # read all of them (easier)
        if nth_toy_mc in mc_range or nth_toy_mc in data_range:
            folder_mc = get_folder( channel + '/toy_%d' % nth_toy_mc )
            add_histograms( get_histograms( folder_mc ) )
        else:
            add_histograms( ( 0, 0, 0 ) )
    print 'Done reading toy MC in', time() - start1, 's'       
    
    for nth_toy_mc in range( offset_toy_mc + 1, offset_toy_mc + use_N_toy + 1 ):
        print 'Doing MC no', nth_toy_mc
        h_truth, h_measured, h_response = histograms[nth_toy_mc - 1]
        unfolding_obj = Unfolding( h_truth, h_measured, h_response, method = method, k_value = k_value )
        unfold, get_pull, reset = unfolding_obj.unfold, unfolding_obj.pull, unfolding_obj.Reset
        
        for nth_toy_data in range( offset_toy_data + 1, offset_toy_data + use_N_toy + 1 ):
            if nth_toy_data == nth_toy_mc:
                continue
            print 'Doing MC no, ' + str( nth_toy_mc ) + ', data no', nth_toy_data
            h_data = histograms[nth_toy_data - 1][1]
            unfold( h_data )
            pull = get_pull()
            diff = unfolding_obj.unfolded_data - unfolding_obj.truth
            diff_tuple = hist_to_value_error_tuplelist( diff )
            unfolded = unfolding_obj.unfolded_data
            unfolded_tuple = hist_to_value_error_tuplelist( unfolded )
            all_data = {'unfolded': unfolded_tuple,
                        'difference' : diff_tuple,
                        'pull': pull,
                        'nth_toy_mc': nth_toy_mc,
                        'nth_toy_data':nth_toy_data
                        }
            
            add_pull( all_data )
            reset()
    save_pulls( pulls, test = 'multiple_data_multiple_unfolding', method = method, channel = channel )
    def __init__(self,
                 config,
                 measurement,
                 method=BACKGROUND_SUBTRACTION,
                 phase_space='FullPS'):
        self.config = config
        self.variable = measurement.variable
        self.category = measurement.name
        self.channel = measurement.channel
        self.method = method
        self.phase_space = phase_space
        self.measurement = measurement
        self.measurement.read()

        self.met_type = measurement.met_type
        self.fit_variables = ['M3']

        self.normalisation = {}
        self.initial_normalisation = {}
        self.templates = {}

        self.have_normalisation = False

        for sample, hist in self.measurement.histograms.items():
            h = deepcopy(hist)
            h_norm = h.integral()
            if h_norm > 0:
                h.Scale(1 / h.integral())
            self.templates[sample] = hist_to_value_error_tuplelist(h)
        self.auxiliary_info = {}
        self.auxiliary_info['norms'] = measurement.aux_info_norms
    def calculate_normalisation(self):
        '''
            1. get file names
            2. get histograms from files
            3. ???
            4. calculate normalisation based on self.method
        '''
        if self.have_normalisation:
            return
        histograms = self.measurement.histograms

        for sample, hist in histograms.items():
            # TODO: this should be a list of bin-contents
            # hist = fix_overflow(hist)
            # histograms[sample] = hist
            self.initial_normalisation[
                sample] = hist_to_value_error_tuplelist(hist)
            if self.method == self.BACKGROUND_SUBTRACTION and sample != 'TTJet':
                self.normalisation[sample] = self.initial_normalisation[sample]

        if self.method == self.BACKGROUND_SUBTRACTION:
            self.background_subtraction(histograms)
        if self.method == self.SIMULTANEOUS_FIT:
            self.simultaneous_fit(histograms)

        # next, let's round all numbers (they are event numbers after all
        for sample, values in self.normalisation.items():
            new_values = [(round(v, 1), round(e, 1)) for v, e in values]
            self.normalisation[sample] = new_values

        self.have_normalisation = True
    def calculate_normalisation(self):
        '''
            1. get file names
            2. get histograms from files
            3. ???
            4. calculate normalisation based on self.method
        '''
        if self.have_normalisation:
            return
        histograms = self.measurement.histograms

        for sample, hist in histograms.items():
            # TODO: this should be a list of bin-contents
            # hist = fix_overflow(hist)
            # histograms[sample] = hist
            self.initial_normalisation[sample] = hist_to_value_error_tuplelist(
                hist)
            if self.method == self.BACKGROUND_SUBTRACTION and sample != 'TTJet':
                self.normalisation[sample] = self.initial_normalisation[sample]

        if self.method == self.BACKGROUND_SUBTRACTION:
            self.background_subtraction(histograms)
        if self.method == self.SIMULTANEOUS_FIT:
            self.simultaneous_fit(histograms)

        # next, let's round all numbers (they are event numbers after all
        for sample, values in self.normalisation.items():
            new_values = [(round(v, 1), round(e, 1)) for v, e in values]
            self.normalisation[sample] = new_values

        self.have_normalisation = True
    def __init__(self,
                 config,
                 measurement,
                 method=BACKGROUND_SUBTRACTION,
                 phase_space='FullPS'):
        self.config = config
        self.variable = measurement.variable
        self.category = measurement.name
        self.channel = measurement.channel
        self.method = method
        self.phase_space = phase_space
        self.measurement = measurement
        self.measurement.read()

        self.met_type = measurement.met_type
        self.fit_variables = ['M3']

        self.normalisation = {}
        self.initial_normalisation = {}
        self.templates = {}

        self.have_normalisation = False

        for sample, hist in self.measurement.histograms.items():
            h = deepcopy(hist)
            h_norm = h.integral()
            if h_norm > 0:
                h.Scale(1 / h.integral())
            self.templates[sample] = hist_to_value_error_tuplelist(h)
        self.auxiliary_info = {}
        self.auxiliary_info['norms'] = measurement.aux_info_norms
def unfold_results(results, h_truth, h_measured, h_response, method):
    global bin_edges

    h_data = value_error_tuplelist_to_hist(results, bin_edges)
    unfolding = Unfolding(h_truth, h_measured, h_response, method=method)
    h_unfolded_data = unfolding.unfold(h_data)
    
    return hist_to_value_error_tuplelist(h_unfolded_data)
def unfold_results(results, category, channel, h_truth, h_measured, h_response,
                   method):
    global variable, path_to_JSON
    h_data = value_error_tuplelist_to_hist(results, bin_edges[variable])
    unfolding = Unfolding(h_truth, h_measured, h_response, method=method)

    #turning off the unfolding errors for systematic samples
    if category != 'central':
        unfoldCfg.Hreco = 0

    h_unfolded_data = unfolding.unfold(h_data)

    #export the D and SV distributions
    SVD_path = path_to_JSON + '/' + variable + '/unfolding_objects/' + channel + '/kv_' + str(
        unfoldCfg.SVD_k_value) + '/'
    make_folder_if_not_exists(SVD_path)
    if method == 'TSVDUnfold':
        SVDdist = TFile(
            SVD_path + method + '_SVDdistributions_' + category + '.root',
            'recreate')
        directory = SVDdist.mkdir('SVDdist')
        directory.cd()
        unfolding.unfoldObject.GetD().Write()
        unfolding.unfoldObject.GetSV().Write()
        #    unfolding.unfoldObject.GetUnfoldCovMatrix(data_covariance_matrix(h_data), unfoldCfg.SVD_n_toy).Write()
        SVDdist.Close()
    else:
        SVDdist = TFile(
            SVD_path + method + '_SVDdistributions_Hreco' +
            str(unfoldCfg.Hreco) + '_' + category + '.root', 'recreate')
        directory = SVDdist.mkdir('SVDdist')
        directory.cd()
        unfolding.unfoldObject.Impl().GetD().Write()
        unfolding.unfoldObject.Impl().GetSV().Write()
        h_truth.Write()
        h_measured.Write()
        h_response.Write()
        #    unfolding.unfoldObject.Impl().GetUnfoldCovMatrix(data_covariance_matrix(h_data), unfoldCfg.SVD_n_toy).Write()
        SVDdist.Close()

    #export the whole unfolding object if it doesn't exist
    if method == 'TSVDUnfold':
        unfolding_object_file_name = SVD_path + method + '_unfoldingObject_' + category + '.root'
    else:
        unfolding_object_file_name = SVD_path + method + '_unfoldingObject_Hreco' + str(
            unfoldCfg.Hreco) + '_' + category + '.root'
    if not os.path.isfile(unfolding_object_file_name):
        unfoldingObjectFile = TFile(unfolding_object_file_name, 'recreate')
        directory = unfoldingObjectFile.mkdir('unfoldingObject')
        directory.cd()
        if method == 'TSVDUnfold':
            unfolding.unfoldObject.Write()
        else:
            unfolding.unfoldObject.Impl().Write()
        unfoldingObjectFile.Close()

    del unfolding
    return hist_to_value_error_tuplelist(h_unfolded_data)
예제 #12
0
    def pull( self ):
        result = [9999999]
        
        if self.unfolded_data and self.truth:
            diff = self.unfolded_data - self.truth 
            value_error_tuplelist = hist_to_value_error_tuplelist( diff )

            result = [value / error for value, error in value_error_tuplelist]
        
        return result
예제 #13
0
    def pull( self ):
        result = [9999999]

        if self.unfolded_data and self.truth:
            diff = self.unfolded_data - self.truth
            value_error_tuplelist = hist_to_value_error_tuplelist( diff )

            result = [value / error for value, error in value_error_tuplelist]

        return result
def unfold_results( results, category, channel, k_value, h_truth, h_measured, h_response, h_fakes, method ):
    global variable, path_to_JSON, options
    h_data = value_error_tuplelist_to_hist( results, bin_edges[variable] )
    unfolding = Unfolding( h_truth, h_measured, h_response, h_fakes, method = method, k_value = k_value )
    
    # turning off the unfolding errors for systematic samples
    if not category == 'central':
        unfoldCfg.Hreco = 0
    else:
        unfoldCfg.Hreco = options.Hreco
        
    h_unfolded_data = unfolding.unfold( h_data )
    
    if options.write_unfolding_objects:
        # export the D and SV distributions
        SVD_path = path_to_JSON + '/unfolding_objects/' + channel + '/kv_' + str( k_value ) + '/'
        make_folder_if_not_exists( SVD_path )
        if method == 'TSVDUnfold':
            SVDdist = File( SVD_path + method + '_SVDdistributions_' + category + '.root', 'recreate' )
            directory = SVDdist.mkdir( 'SVDdist' )
            directory.cd()
            unfolding.unfoldObject.GetD().Write()
            unfolding.unfoldObject.GetSV().Write()
            #    unfolding.unfoldObject.GetUnfoldCovMatrix(data_covariance_matrix(h_data), unfoldCfg.SVD_n_toy).Write()
            SVDdist.Close()
        else:
            SVDdist = File( SVD_path + method + '_SVDdistributions_Hreco' + str( unfoldCfg.Hreco ) + '_' + category + '.root', 'recreate' )
            directory = SVDdist.mkdir( 'SVDdist' )
            directory.cd()
            unfolding.unfoldObject.Impl().GetD().Write()
            unfolding.unfoldObject.Impl().GetSV().Write()
            h_truth.Write()
            h_measured.Write()
            h_response.Write()
            #    unfolding.unfoldObject.Impl().GetUnfoldCovMatrix(data_covariance_matrix(h_data), unfoldCfg.SVD_n_toy).Write()
            SVDdist.Close()
    
        # export the whole unfolding object if it doesn't exist
        if method == 'TSVDUnfold':
            unfolding_object_file_name = SVD_path + method + '_unfoldingObject_' + category + '.root'
        else:
            unfolding_object_file_name = SVD_path + method + '_unfoldingObject_Hreco' + str( unfoldCfg.Hreco ) + '_' + category + '.root'
        if not os.path.isfile( unfolding_object_file_name ):
            unfoldingObjectFile = File( unfolding_object_file_name, 'recreate' )
            directory = unfoldingObjectFile.mkdir( 'unfoldingObject' )
            directory.cd()
            if method == 'TSVDUnfold':
                unfolding.unfoldObject.Write()
            else:
                unfolding.unfoldObject.Impl().Write()
            unfoldingObjectFile.Close()
    
    del unfolding
    return hist_to_value_error_tuplelist( h_unfolded_data )
def unfold_results( results, category, channel, tau_value, h_truth, h_measured, h_response, h_fakes, method, visiblePS ):
    global variable, path_to_JSON, options
    edges = bin_edges[variable]
    if visiblePS:
        edges = bin_edges_vis[variable]
    h_data = value_error_tuplelist_to_hist( results, edges )

    # Remove fakes before unfolding
    h_measured, h_data = removeFakes( h_measured, h_data, h_response )

    unfolding = Unfolding( h_truth, h_measured, h_response, h_fakes, method = method, k_value = -1, tau = tau_value )

    # turning off the unfolding errors for systematic samples
    if not category == 'central':
        unfoldCfg.error_treatment = 0
    else:
        unfoldCfg.error_treatment = options.error_treatment

    h_unfolded_data = unfolding.unfold( h_data )
    del unfolding
    return hist_to_value_error_tuplelist( h_unfolded_data ), hist_to_value_error_tuplelist( h_data )
예제 #16
0
    def pull ( self ):
      result = [9999999]

      if self.unfolded_data and self.truth:
          for bin in range(0,self.truth.GetNbinsX()):
            self.truth.SetBinError(bin,0)

          diff = self.unfolded_data - self.truth
          value_error_tuplelist = hist_to_value_error_tuplelist( diff )
          result = [value / error for value, error in value_error_tuplelist]

      return result
예제 #17
0
 def unfold( self, data ):
     have_zeros = [value == 0 for value,_ in hist_to_value_error_tuplelist( data )]
     if not False in have_zeros:
         raise ValueError('Data histograms contains only zeros')
     self.setup_unfolding( data )
     if self.method == 'TSVDUnfold' or self.method == 'TopSVDUnfold':
         self.unfolded_data = asrootpy( self.unfoldObject.Unfold( self.k_value ) )
     else:
         # remove unfold reports (faster)
         self.unfoldObject.SetVerbose( self.verbose )
         self.unfolded_data = asrootpy( self.unfoldObject.Hreco( self.Hreco ) )
     return self.unfolded_data
def get_unfolded_normalisation(TTJet_fit_results, category, channel):
    global variable, met_type, path_to_JSON, file_for_unfolding, file_for_powheg, file_for_mcatnlo 
    global file_for_matchingdown, file_for_matchingup, file_for_scaledown, file_for_scaleup
    global ttbar_generator_systematics

    files_for_systematics = {
                             ttbar_theory_systematic_prefix + 'matchingdown':file_for_matchingdown,
                             ttbar_theory_systematic_prefix + 'matchingup':file_for_matchingup,
                             ttbar_theory_systematic_prefix + 'scaledown':file_for_scaledown,
                             ttbar_theory_systematic_prefix + 'scaleup':file_for_scaleup,
                             }
    
    h_truth, h_measured, h_response = None, None, None
    if category in ttbar_generator_systematics and not 'ptreweight' in category:
        h_truth, h_measured, h_response = get_unfold_histogram_tuple(files_for_systematics[category], variable, channel, met_type)
    elif 'mcatnlo_matrix' in category:
        h_truth, h_measured, h_response = get_unfold_histogram_tuple(file_for_mcatnlo, variable, channel, met_type)
    else:
        h_truth, h_measured, h_response = get_unfold_histogram_tuple(file_for_unfolding, variable, channel, met_type)
    MADGRAPH_results = hist_to_value_error_tuplelist(h_truth)
    POWHEG_results = hist_to_value_error_tuplelist(get_unfold_histogram_tuple(file_for_powheg, variable, channel, met_type)[0])
    MCATNLO_results = hist_to_value_error_tuplelist(get_unfold_histogram_tuple(file_for_mcatnlo, variable, channel, met_type)[0])
    
    matchingdown_results = hist_to_value_error_tuplelist(get_unfold_histogram_tuple(file_for_matchingdown, variable, channel, met_type)[0])
    matchingup_results = hist_to_value_error_tuplelist(get_unfold_histogram_tuple(file_for_matchingup, variable, channel, met_type)[0])
    scaledown_results = hist_to_value_error_tuplelist(get_unfold_histogram_tuple(file_for_scaledown, variable, channel, met_type)[0])
    scaleup_results = hist_to_value_error_tuplelist(get_unfold_histogram_tuple(file_for_scaleup, variable, channel, met_type)[0])
    
    TTJet_fit_results_unfolded = unfold_results(TTJet_fit_results,
                                                category,
                                                channel,
                                                h_truth,
                                                h_measured,
                                                h_response,
                                                'RooUnfoldSvd'
#                                                'TSVDUnfold'
                                                )
        
    normalisation_unfolded = {
                              'TTJet_measured' : TTJet_fit_results,
                              'TTJet_unfolded' : TTJet_fit_results_unfolded,
                              'MADGRAPH': MADGRAPH_results,
                              # other generators
                              'POWHEG': POWHEG_results,
                              'MCATNLO': MCATNLO_results,
                              # systematics
                              'matchingdown': matchingdown_results,
                              'matchingup': matchingup_results,
                              'scaledown': scaledown_results,
                              'scaleup': scaleup_results
                              }
    write_data_to_JSON(normalisation_unfolded, path_to_JSON + '/xsection_measurement_results' + '/kv' + str(unfoldCfg.SVD_k_value) + '/' + category + '/normalisation_' + channel + '_' + met_type + '.txt')
    
    return normalisation_unfolded
예제 #19
0
 def unfold( self, data ):
     if data is None:
         raise ValueError('Data histogram is None')
     have_zeros = [value == 0 for value,_ in hist_to_value_error_tuplelist( data )]
     if not False in have_zeros:
         raise ValueError('Data histograms contains only zeros')
     self.setup_unfolding( data )
     if self.method == 'TSVDUnfold':
         self.unfolded_data = asrootpy( self.unfoldObject.Unfold( self.k_value ) )
     else:
         # remove unfold reports (faster)
         self.unfoldObject.SetVerbose( self.verbose )
         self.unfolded_data = asrootpy( self.unfoldObject.Hreco( self.error_treatment ) )
     return self.unfolded_data
예제 #20
0
 def unfold(self, data):
     if data is None:
         raise ValueError("Data histogram is None")
     have_zeros = [value == 0 for value, _ in hist_to_value_error_tuplelist(data)]
     if not False in have_zeros:
         raise ValueError("Data histograms contains only zeros")
     self.setup_unfolding(data)
     if self.method == "TSVDUnfold":
         self.unfolded_data = asrootpy(self.unfoldObject.Unfold(self.k_value))
     else:
         # remove unfold reports (faster)
         self.unfoldObject.SetVerbose(self.verbose)
         self.unfolded_data = asrootpy(self.unfoldObject.Hreco(self.error_treatment))
     return self.unfolded_data
def get_unfolded_normalisation(TTJet_fit_results, category, channel):
    global variable, met_type, path_to_JSON
    h_truth, h_measured, h_response = get_unfold_histogram_tuple(
        file_for_unfolding, variable, channel, met_type)
    MADGRAPH_results = hist_to_value_error_tuplelist(h_truth)
    POWHEG_results = hist_to_value_error_tuplelist(
        get_unfold_histogram_tuple(file_for_powheg, variable, channel,
                                   met_type)[0])
    MCATNLO_results = hist_to_value_error_tuplelist(
        get_unfold_histogram_tuple(file_for_mcatnlo, variable, channel,
                                   met_type)[0])

    matchingdown_results = hist_to_value_error_tuplelist(
        get_unfold_histogram_tuple(file_for_matchingdown, variable, channel,
                                   met_type)[0])
    matchingup_results = hist_to_value_error_tuplelist(
        get_unfold_histogram_tuple(file_for_matchingup, variable, channel,
                                   met_type)[0])
    scaledown_results = hist_to_value_error_tuplelist(
        get_unfold_histogram_tuple(file_for_scaledown, variable, channel,
                                   met_type)[0])
    scaleup_results = hist_to_value_error_tuplelist(
        get_unfold_histogram_tuple(file_for_scaleup, variable, channel,
                                   met_type)[0])

    TTJet_fit_results_unfolded = unfold_results(
        TTJet_fit_results, category, channel, h_truth, h_measured, h_response,
        'RooUnfoldSvd'
        #                                                'TSVDUnfold'
    )

    normalisation_unfolded = {
        'TTJet_measured': TTJet_fit_results,
        'TTJet_unfolded': TTJet_fit_results_unfolded,
        'MADGRAPH': MADGRAPH_results,
        #other generators
        'POWHEG': POWHEG_results,
        'MCATNLO': MCATNLO_results,
        #systematics
        'matchingdown': matchingdown_results,
        'matchingup': matchingup_results,
        'scaledown': scaledown_results,
        'scaleup': scaleup_results
    }
    write_data_to_JSON(
        normalisation_unfolded, path_to_JSON + '/' + variable +
        '/xsection_measurement_results' + '/kv' + str(unfoldCfg.SVD_k_value) +
        '/' + category + '/normalisation_' + channel + '_' + met_type + '.txt')

    return normalisation_unfolded
def get_unfolded_normalisation(TTJet_fit_results, category, channel):
    global variable, met_type, path_to_JSON
    h_truth, h_measured, h_response = get_unfold_histogram_tuple(file_for_unfolding, variable, channel, met_type)
    MADGRAPH_results = hist_to_value_error_tuplelist(h_truth)
    POWHEG_results = hist_to_value_error_tuplelist(get_unfold_histogram_tuple(file_for_powheg, variable, channel, met_type)[0])
    MCATNLO_results = hist_to_value_error_tuplelist(get_unfold_histogram_tuple(file_for_mcatnlo, variable, channel, met_type)[0])
    
    matchingdown_results = hist_to_value_error_tuplelist(get_unfold_histogram_tuple(file_for_matchingdown, variable, channel, met_type)[0])
    matchingup_results = hist_to_value_error_tuplelist(get_unfold_histogram_tuple(file_for_matchingup, variable, channel, met_type)[0])
    scaledown_results = hist_to_value_error_tuplelist(get_unfold_histogram_tuple(file_for_scaledown, variable, channel, met_type)[0])
    scaleup_results = hist_to_value_error_tuplelist(get_unfold_histogram_tuple(file_for_scaleup, variable, channel, met_type)[0])
    
    TTJet_fit_results_unfolded = unfold_results(TTJet_fit_results,
                                                category,
                                                channel,
                                                h_truth,
                                                h_measured,
                                                h_response,
                                                'RooUnfoldSvd'
#                                                'TSVDUnfold'
                                                )
        
    normalisation_unfolded = {
                              'TTJet_measured' : TTJet_fit_results,
                              'TTJet_unfolded' : TTJet_fit_results_unfolded,
                              'MADGRAPH': MADGRAPH_results,
                              #other generators
                              'POWHEG': POWHEG_results,
                              'MCATNLO': MCATNLO_results,
                              #systematics
                              'matchingdown': matchingdown_results,
                              'matchingup': matchingup_results,
                              'scaledown': scaledown_results,
                              'scaleup': scaleup_results
                              }
    write_data_to_JSON(normalisation_unfolded, path_to_JSON + '/' + variable + '/xsection_measurement_results' + '/kv' + str(unfoldCfg.SVD_k_value) + '/' + category + '/normalisation_' + channel + '_' + met_type + '.txt')
    
    return normalisation_unfolded
예제 #23
0
def get_unfolded_normalisation(TTJet_fit_results, category, channel, tau_value,
                               visiblePS):
    global variable, met_type, path_to_JSON, file_for_unfolding, file_for_powheg_pythia, file_for_amcatnlo_herwig, file_for_ptreweight, files_for_pdfs
    global centre_of_mass, luminosity, ttbar_xsection, load_fakes, method
    global file_for_powhegPythia8, file_for_madgraphMLM, file_for_amcatnlo
    # global file_for_matchingdown, file_for_matchingup
    global file_for_scaledown, file_for_scaleup
    global file_for_massdown, file_for_massup
    global ttbar_generator_systematics, ttbar_theory_systematics, pdf_uncertainties
    global use_ptreweight

    files_for_systematics = {
        # ttbar_theory_systematic_prefix + 'matchingdown'    :  file_for_matchingdown,
        # ttbar_theory_systematic_prefix + 'matchingup'      :  file_for_matchingup,
        ttbar_theory_systematic_prefix + 'scaledown':
        file_for_scaledown,
        ttbar_theory_systematic_prefix + 'scaleup':
        file_for_scaleup,
        ttbar_theory_systematic_prefix + 'massdown':
        file_for_massdown,
        ttbar_theory_systematic_prefix + 'massup':
        file_for_massup,

        # 'JES_down'        :  file_for_jesdown,
        # 'JES_up'        :  file_for_jesup,

        # 'JER_down'        :  file_for_jerdown,
        # 'JER_up'        :  file_for_jerup,

        # 'BJet_up'        :  file_for_bjetdown,
        # 'BJet_down'        :  file_for_bjetup,
        ttbar_theory_systematic_prefix + 'hadronisation':
        file_for_amcatnlo_herwig,
        ttbar_theory_systematic_prefix + 'NLOgenerator':
        file_for_amcatnlo,

        # 'ElectronEnUp' : file_for_ElectronEnUp,
        # 'ElectronEnDown' : file_for_ElectronEnDown,
        # 'MuonEnUp' : file_for_MuonEnUp,
        # 'MuonEnDown' : file_for_MuonEnDown,
        # 'TauEnUp' : file_for_TauEnUp,
        # 'TauEnDown' : file_for_TauEnDown,
        # 'UnclusteredEnUp' : file_for_UnclusteredEnUp,
        # 'UnclusteredEnDown' : file_for_UnclusteredEnDown,

        # 'Muon_up' : file_for_LeptonUp,
        # 'Muon_down' : file_for_LeptonDown,
        # 'Electron_up' : file_for_LeptonUp,
        # 'Electron_down' : file_for_LeptonDown,

        # 'PileUpSystematic' : file_for_PUSystematic,
    }

    h_truth, h_measured, h_response, h_fakes = None, None, None, None
    # Systematics where you change the response matrix
    if category in ttbar_generator_systematics or category in files_for_systematics:
        print 'Doing category', category, 'by changing response matrix'
        h_truth, h_measured, h_response, h_fakes = get_unfold_histogram_tuple(
            inputfile=files_for_systematics[category],
            variable=variable,
            channel=channel,
            met_type=met_type,
            centre_of_mass=centre_of_mass,
            ttbar_xsection=ttbar_xsection,
            luminosity=luminosity,
            load_fakes=load_fakes,
            visiblePS=visiblePS,
        )
    elif category in pdf_uncertainties:
        h_truth, h_measured, h_response, h_fakes = get_unfold_histogram_tuple(
            inputfile=files_for_pdfs[category],
            variable=variable,
            channel=channel,
            met_type=met_type,
            centre_of_mass=centre_of_mass,
            ttbar_xsection=ttbar_xsection,
            luminosity=luminosity,
            load_fakes=load_fakes,
            visiblePS=visiblePS,
        )
    # Systematics where you change input MC
    else:
        h_truth, h_measured, h_response, h_fakes = get_unfold_histogram_tuple(
            inputfile=file_for_unfolding,
            variable=variable,
            channel=channel,
            met_type=met_type,
            centre_of_mass=centre_of_mass,
            ttbar_xsection=ttbar_xsection,
            luminosity=luminosity,
            load_fakes=load_fakes,
            visiblePS=visiblePS,
        )


#     central_results = hist_to_value_error_tuplelist( h_truth )
    TTJet_fit_results_unfolded, TTJet_fit_results_withoutFakes = unfold_results(
        TTJet_fit_results,
        category,
        channel,
        tau_value,
        h_truth,
        h_measured,
        h_response,
        h_fakes,
        method,
        visiblePS,
    )
    normalisation_unfolded = {
        'TTJet_measured': TTJet_fit_results,
        'TTJet_measured_withoutFakes': TTJet_fit_results_withoutFakes,
        'TTJet_unfolded': TTJet_fit_results_unfolded
    }

    #
    # THESE ARE FOR GETTING THE HISTOGRAMS FOR COMPARING WITH UNFOLDED DATA
    #

    if category == 'central':
        # h_truth_matchingdown, _, _, _ = get_unfold_histogram_tuple( inputfile = file_for_matchingdown,
        #                                             variable = variable,
        #                                             channel = channel,
        #                                             met_type = met_type,
        #                                             centre_of_mass = centre_of_mass,
        #                                             ttbar_xsection = ttbar_xsection,
        #                                             luminosity = luminosity,
        #                                             load_fakes = load_fakes
        #                                             )
        # h_truth_matchingup, _, _, _ = get_unfold_histogram_tuple( inputfile = file_for_matchingup,
        #                                             variable = variable,
        #                                             channel = channel,
        #                                             met_type = met_type,
        #                                             centre_of_mass = centre_of_mass,
        #                                             ttbar_xsection = ttbar_xsection,
        #                                             luminosity = luminosity,
        #                                             load_fakes = load_fakes
        #                                             )
        h_truth_scaledown, _, _, _ = get_unfold_histogram_tuple(
            inputfile=file_for_scaledown,
            variable=variable,
            channel=channel,
            met_type=met_type,
            centre_of_mass=centre_of_mass,
            ttbar_xsection=ttbar_xsection,
            luminosity=luminosity,
            load_fakes=load_fakes,
            visiblePS=visiblePS,
        )
        h_truth_scaleup, _, _, _ = get_unfold_histogram_tuple(
            inputfile=file_for_scaleup,
            variable=variable,
            channel=channel,
            met_type=met_type,
            centre_of_mass=centre_of_mass,
            ttbar_xsection=ttbar_xsection,
            luminosity=luminosity,
            load_fakes=load_fakes,
            visiblePS=visiblePS,
        )

        h_truth_massdown, _, _, _ = get_unfold_histogram_tuple(
            inputfile=file_for_massdown,
            variable=variable,
            channel=channel,
            met_type=met_type,
            centre_of_mass=centre_of_mass,
            ttbar_xsection=ttbar_xsection,
            luminosity=luminosity,
            load_fakes=load_fakes,
            visiblePS=visiblePS,
        )
        h_truth_massup, _, _, _ = get_unfold_histogram_tuple(
            inputfile=file_for_massup,
            variable=variable,
            channel=channel,
            met_type=met_type,
            centre_of_mass=centre_of_mass,
            ttbar_xsection=ttbar_xsection,
            luminosity=luminosity,
            load_fakes=load_fakes,
            visiblePS=visiblePS,
        )

        h_truth_powhegPythia8, _, _, _ = get_unfold_histogram_tuple(
            inputfile=file_for_powhegPythia8,
            variable=variable,
            channel=channel,
            met_type=met_type,
            centre_of_mass=centre_of_mass,
            ttbar_xsection=ttbar_xsection,
            luminosity=luminosity,
            load_fakes=load_fakes,
            visiblePS=visiblePS,
        )

        h_truth_amcatnlo, _, _, _ = get_unfold_histogram_tuple(
            inputfile=file_for_amcatnlo,
            variable=variable,
            channel=channel,
            met_type=met_type,
            centre_of_mass=centre_of_mass,
            ttbar_xsection=ttbar_xsection,
            luminosity=luminosity,
            load_fakes=load_fakes,
            visiblePS=visiblePS,
        )

        h_truth_madgraphMLM, _, _, _ = get_unfold_histogram_tuple(
            inputfile=file_for_madgraphMLM,
            variable=variable,
            channel=channel,
            met_type=met_type,
            centre_of_mass=centre_of_mass,
            ttbar_xsection=ttbar_xsection,
            luminosity=luminosity,
            load_fakes=load_fakes,
            visiblePS=visiblePS,
        )
        h_truth_amcatnlo_HERWIG, _, _, _ = get_unfold_histogram_tuple(
            inputfile=file_for_amcatnlo_herwig,
            variable=variable,
            channel=channel,
            met_type=met_type,
            centre_of_mass=centre_of_mass,
            ttbar_xsection=ttbar_xsection,
            luminosity=luminosity,
            load_fakes=load_fakes,
            visiblePS=visiblePS,
        )

        # MADGRAPH_ptreweight_results = hist_to_value_error_tuplelist( h_truth_ptreweight )
        # POWHEG_PYTHIA_results = hist_to_value_error_tuplelist( h_truth_POWHEG_PYTHIA )
        # MCATNLO_results = None
        powhegPythia8_results = hist_to_value_error_tuplelist(
            h_truth_powhegPythia8)
        madgraphMLM_results = hist_to_value_error_tuplelist(
            h_truth_madgraphMLM)
        AMCATNLO_results = hist_to_value_error_tuplelist(h_truth_amcatnlo)
        amcatnlo_HERWIG_results = hist_to_value_error_tuplelist(
            h_truth_amcatnlo_HERWIG)

        # matchingdown_results = hist_to_value_error_tuplelist( h_truth_matchingdown )
        # matchingup_results = hist_to_value_error_tuplelist( h_truth_matchingup )
        scaledown_results = hist_to_value_error_tuplelist(h_truth_scaledown)
        scaleup_results = hist_to_value_error_tuplelist(h_truth_scaleup)
        massdown_results = hist_to_value_error_tuplelist(h_truth_massdown)
        massup_results = hist_to_value_error_tuplelist(h_truth_massup)

        normalisation_unfolded['powhegPythia8'] = powhegPythia8_results
        normalisation_unfolded['amcatnlo'] = AMCATNLO_results
        normalisation_unfolded['madgraphMLM'] = madgraphMLM_results
        normalisation_unfolded['amcatnlo_HERWIG'] = amcatnlo_HERWIG_results
        normalisation_unfolded['scaledown'] = scaledown_results
        normalisation_unfolded['scaleup'] = scaleup_results
        normalisation_unfolded['massdown'] = massdown_results
        normalisation_unfolded['massup'] = massup_results

    return normalisation_unfolded
                               },
             met_type='patType1CorrectedPFMet',
             b_tag_bin='2orMoreBtags',
                )
 write_fit_results_and_initial_values(fit_results_electron, fit_results_muon, initial_values_electron, initial_values_muon)
 
 #continue with only TTJet
 TTJet_fit_results_electron = fit_results_electron['TTJet']
 TTJet_fit_results_muon = fit_results_muon['TTJet']
 
 # get t values for systematics
 # for systematics we only need the TTJet results!
 # unfold all above
 
 h_truth, h_measured, h_response = get_unfold_histogram_tuple(file_for_unfolding, 'electron')
 MADGRAPH_results_electron = hist_to_value_error_tuplelist(h_truth)
 POWHEG_results_electron = hist_to_value_error_tuplelist(get_unfold_histogram_tuple(file_for_powheg, 'electron')[1])
 PYTHIA_results_electron = hist_to_value_error_tuplelist(get_unfold_histogram_tuple(file_for_pythia, 'electron')[1])
 MCATNLO_results_electron = hist_to_value_error_tuplelist(get_unfold_histogram_tuple(file_for_mcatnlo, 'electron')[1])
 
 matchingdown_results_electron = hist_to_value_error_tuplelist(get_unfold_histogram_tuple(file_for_matchingdown, 'electron')[1])
 matchingup_results_electron = hist_to_value_error_tuplelist(get_unfold_histogram_tuple(file_for_matchingup, 'electron')[1])
 scaledown_results_electron = hist_to_value_error_tuplelist(get_unfold_histogram_tuple(file_for_scaledown, 'electron')[1])
 scaleup_results_electron = hist_to_value_error_tuplelist(get_unfold_histogram_tuple(file_for_scaleup, 'electron')[1])
 
 TTJet_fit_results_electron_unfolded = unfold_results(TTJet_fit_results_electron,
                                                      h_truth,
                                                      h_measured,
                                                      h_response,
                                                      'RooUnfoldSvd')
 
def calculate_xsection( nEventsHistogram, variable ):
    resultsAsTuple = hist_to_value_error_tuplelist( nEventsHistogram )
    normalised_xsection = calculate_normalised_xsection( resultsAsTuple, bin_widths_visiblePS[variable], False )
    return value_error_tuplelist_to_hist(normalised_xsection, bin_edges_vis[variable])
def get_unfolded_normalisation( TTJet_fit_results, category, channel, k_value ):
    global variable, met_type, path_to_JSON, file_for_unfolding, file_for_powheg_pythia, file_for_powheg_herwig, file_for_mcatnlo, file_for_ptreweight, files_for_pdfs
    global centre_of_mass, luminosity, ttbar_xsection, load_fakes, method
    global file_for_matchingdown, file_for_matchingup, file_for_scaledown, file_for_scaleup
    global ttbar_generator_systematics, ttbar_theory_systematics, pdf_uncertainties
    global use_ptreweight

    files_for_systematics = {
                             ttbar_theory_systematic_prefix + 'matchingdown':file_for_matchingdown,
                             ttbar_theory_systematic_prefix + 'matchingup':file_for_matchingup,
                             ttbar_theory_systematic_prefix + 'scaledown':file_for_scaledown,
                             ttbar_theory_systematic_prefix + 'scaleup':file_for_scaleup,
                             ttbar_theory_systematic_prefix + 'powheg_pythia':file_for_powheg_pythia,
                             ttbar_theory_systematic_prefix + 'powheg_herwig':file_for_powheg_herwig,
                             ttbar_theory_systematic_prefix + 'ptreweight':file_for_ptreweight,
                             }
    
    h_truth, h_measured, h_response, h_fakes = None, None, None, None
    if category in ttbar_generator_systematics or category in ttbar_theory_systematics:
        h_truth, h_measured, h_response, h_fakes = get_unfold_histogram_tuple( inputfile = files_for_systematics[category],
                                                                              variable = variable,
                                                                              channel = channel,
                                                                              met_type = met_type,
                                                                              centre_of_mass = centre_of_mass,
                                                                              ttbar_xsection = ttbar_xsection,
                                                                              luminosity = luminosity,
                                                                              load_fakes = load_fakes
                                                                              )
    elif category in pdf_uncertainties:
        h_truth, h_measured, h_response, h_fakes = get_unfold_histogram_tuple( inputfile = files_for_pdfs[category],
                                                                              variable = variable,
                                                                              channel = channel,
                                                                              met_type = met_type,
                                                                              centre_of_mass = centre_of_mass,
                                                                              ttbar_xsection = ttbar_xsection,
                                                                              luminosity = luminosity,
                                                                              load_fakes = load_fakes
                                                                              )
    elif use_ptreweight:
        h_truth, h_measured, h_response, h_fakes = get_unfold_histogram_tuple( inputfile = file_for_ptreweight,
                                                                              variable = variable,
                                                                              channel = channel,
                                                                              met_type = met_type,
                                                                              centre_of_mass = centre_of_mass,
                                                                              ttbar_xsection = ttbar_xsection,
                                                                              luminosity = luminosity,
                                                                              load_fakes = load_fakes
                                                                              )
    else:
        h_truth, h_measured, h_response, h_fakes = get_unfold_histogram_tuple( inputfile = file_for_unfolding,
                                                                              variable = variable,
                                                                              channel = channel,
                                                                              met_type = met_type,
                                                                              centre_of_mass = centre_of_mass,
                                                                              ttbar_xsection = ttbar_xsection,
                                                                              luminosity = luminosity,
                                                                              load_fakes = load_fakes
                                                                              )

    h_truth_POWHEG_PYTHIA, _, _, _ = get_unfold_histogram_tuple( inputfile = file_for_powheg_pythia,
                                                variable = variable,
                                                channel = channel,
                                                met_type = met_type,
                                                centre_of_mass = centre_of_mass,
                                                ttbar_xsection = ttbar_xsection,
                                                luminosity = luminosity,
                                                load_fakes = load_fakes
                                                )
    h_truth_POWHEG_HERWIG, _, _, _ = get_unfold_histogram_tuple( inputfile = file_for_powheg_herwig,
                                                variable = variable,
                                                channel = channel,
                                                met_type = met_type,
                                                centre_of_mass = centre_of_mass,
                                                ttbar_xsection = ttbar_xsection,
                                                luminosity = luminosity,
                                                load_fakes = load_fakes
                                                )
    h_truth_MCATNLO, _, _, _ = get_unfold_histogram_tuple( inputfile = file_for_mcatnlo,
                                                variable = variable,
                                                channel = channel,
                                                met_type = met_type,
                                                centre_of_mass = centre_of_mass,
                                                ttbar_xsection = ttbar_xsection,
                                                luminosity = luminosity,
                                                load_fakes = load_fakes
                                                )
    h_truth_matchingdown, _, _, _ = get_unfold_histogram_tuple( inputfile = file_for_matchingdown,
                                                variable = variable,
                                                channel = channel,
                                                met_type = met_type,
                                                centre_of_mass = centre_of_mass,
                                                ttbar_xsection = ttbar_xsection,
                                                luminosity = luminosity,
                                                load_fakes = load_fakes
                                                )
    h_truth_matchingup, _, _, _ = get_unfold_histogram_tuple( inputfile = file_for_matchingup,
                                                variable = variable,
                                                channel = channel,
                                                met_type = met_type,
                                                centre_of_mass = centre_of_mass,
                                                ttbar_xsection = ttbar_xsection,
                                                luminosity = luminosity,
                                                load_fakes = load_fakes
                                                )
    h_truth_scaledown, _, _, _ = get_unfold_histogram_tuple( inputfile = file_for_scaledown,
                                                variable = variable,
                                                channel = channel,
                                                met_type = met_type,
                                                centre_of_mass = centre_of_mass,
                                                ttbar_xsection = ttbar_xsection,
                                                luminosity = luminosity,
                                                load_fakes = load_fakes
                                                )
    h_truth_scaleup, _, _, _ = get_unfold_histogram_tuple( inputfile = file_for_scaleup,
                                                variable = variable,
                                                channel = channel,
                                                met_type = met_type,
                                                centre_of_mass = centre_of_mass,
                                                ttbar_xsection = ttbar_xsection,
                                                luminosity = luminosity,
                                                load_fakes = load_fakes
                                                )
    h_truth_ptreweight, _, _, _ = get_unfold_histogram_tuple(
                                                inputfile = file_for_ptreweight,
                                                variable = variable,
                                                channel = channel,
                                                met_type = met_type,
                                                centre_of_mass = centre_of_mass,
                                                ttbar_xsection = ttbar_xsection,
                                                luminosity = luminosity,
                                                load_fakes = load_fakes
                                                )

    MADGRAPH_results = hist_to_value_error_tuplelist( h_truth )
    MADGRAPH_ptreweight_results = hist_to_value_error_tuplelist( h_truth_ptreweight )
    POWHEG_PYTHIA_results = hist_to_value_error_tuplelist( h_truth_POWHEG_PYTHIA )
    POWHEG_HERWIG_results = hist_to_value_error_tuplelist( h_truth_POWHEG_HERWIG )
    MCATNLO_results = hist_to_value_error_tuplelist( h_truth_MCATNLO )
    
    matchingdown_results = hist_to_value_error_tuplelist( h_truth_matchingdown )
    matchingup_results = hist_to_value_error_tuplelist( h_truth_matchingup )
    scaledown_results = hist_to_value_error_tuplelist( h_truth_scaledown )
    scaleup_results = hist_to_value_error_tuplelist( h_truth_scaleup )

    TTJet_fit_results_unfolded = unfold_results( TTJet_fit_results,
                                                category,
                                                channel,
                                                k_value,
                                                h_truth,
                                                h_measured,
                                                h_response,
                                                h_fakes,
                                                method
                                                )
        
    normalisation_unfolded = {
                              'TTJet_measured' : TTJet_fit_results,
                              'TTJet_unfolded' : TTJet_fit_results_unfolded,
                              'MADGRAPH': MADGRAPH_results,
                              'MADGRAPH_ptreweight': MADGRAPH_ptreweight_results,
                              # other generators
                              'POWHEG_PYTHIA': POWHEG_PYTHIA_results,
                              'POWHEG_HERWIG': POWHEG_HERWIG_results,
                              'MCATNLO': MCATNLO_results,
                              # systematics
                              'matchingdown': matchingdown_results,
                              'matchingup': matchingup_results,
                              'scaledown': scaledown_results,
                              'scaleup': scaleup_results
                              }
    
    return normalisation_unfolded
 def background_subtraction(self, histograms):
     ttjet_hist = clean_control_region(
         histograms, subtract=['QCD', 'V+Jets', 'SingleTop'])
     self.normalisation['TTJet'] = hist_to_value_error_tuplelist(ttjet_hist)
def check_multiple_data_multiple_unfolding(
        input_file, method, channel, variable,
        responseMatrix,
        n_toy_data, output_folder,
        tau_value=-1
        ):
    '''
        Loops through a n_toy_data of pseudo data, 
        unfolds the pseudo data and compares it to the MC truth
    '''
    # same unfolding input, different data
    get_folder = input_file.Get
    pulls = []
    add_pull = pulls.append
    histograms = []
    add_histograms = histograms.append

    print('Reading toy MC')
    start1 = time()
    data_range = range(0, n_toy_data)
    for nth_toy_data in range(0, n_toy_data + 1):  # read all of them (easier)
        if nth_toy_data in data_range:
            tpl = '{channel}/{variable}/toy_{nth}'
            folder_mc = tpl.format(channel=channel, variable=variable,
                                   nth=nth_toy_data+1)
            folder_mc = get_folder(folder_mc)
            add_histograms(get_measured_histogram(folder_mc))
        else:
            add_histograms(0)
    print('Done reading toy MC in', time() - start1, 's')


    # Get truth and measured histograms
    h_truth = get_truth_histogram( get_folder('{channel}/{variable}/Original'.format(channel=channel, variable=variable) ) )
    h_measured = get_measured_histogram( get_folder('{channel}/{variable}/Original'.format(channel=channel, variable=variable) ) )

    # Set response matrix
    h_response = responseMatrix


    # Make sure the pseudo data to be unfolded has the same integral as the response matrix
    measured_from_response = asrootpy( h_response.ProjectionX('px',1) )
    truth_from_response = asrootpy( h_response.ProjectionY() )
    truthScale = truth_from_response.Integral() / h_truth.Integral()
    h_truth.Scale( truthScale )
    h_measured.Scale( truthScale )

    for nth_toy_data in data_range:
        if nth_toy_data % 100 == 0 :
            print(
                'Doing data no', nth_toy_data)
        h_data = histograms[nth_toy_data]

        h_data.Scale( truthScale )

        unfolding_obj = Unfolding(
            h_data,
            h_truth, h_data, h_response, method=method, k_value=-1,
            tau=tau_value)
        unfold, get_pull = unfolding_obj.unfold, unfolding_obj.pull
        reset = unfolding_obj.Reset


        unfold()
        pull = get_pull()
        diff = unfolding_obj.unfolded_data - h_truth
        diff_tuple = hist_to_value_error_tuplelist(diff)

        truth_tuple = hist_to_value_error_tuplelist(unfolding_obj.truth)

        bias = []
        sumBias2 = 0
        for d, t in zip(diff_tuple, truth_tuple):
            b = d[0] / t[0]
            bias.append(b)

        unfolded = unfolding_obj.unfolded_data
        unfolded_tuple = hist_to_value_error_tuplelist(unfolded)

        all_data = {'unfolded': unfolded_tuple,
                    'difference': diff_tuple,
                    'truth': truth_tuple,
                    'bias':bias,
                    'pull': pull,
                    'nth_toy_data': nth_toy_data
                    }

        add_pull(all_data)
        reset()

    output_file_name = save_pulls(pulls, method,
                                channel, tau_value, output_folder)

    return output_file_name
unfolding_file3 = root_open('/storage/TopQuarkGroup/mc/8TeV/NoSkimUnfolding/v10/TTJets_scaleup_TuneZ2star_8TeV-madgraph-tauola/unfolding_v10_Summer12_DR53X-PU_S10_START53_V7A-v1_NoSkim/TTJets-scaleup_nTuple_53X_mc_merged_001.root')
unfolding_file4 = root_open('/storage/TopQuarkGroup/mc/8TeV/NoSkimUnfolding/v10/TTJets_scaledown_TuneZ2star_8TeV-madgraph-tauola/unfolding_v10_Summer12_DR53X-PU_S10_START53_V7A-v1_NoSkim/TTJets-scaledown_nTuple_53X_mc_merged_001.root')

test_file = root_open('test_unfolded.root')

test1 = test_file.Get(channel + '_MET__TTJet__TTJetsMatching__plus')
test2 = test_file.Get(channel + '_MET__TTJet__TTJetsMatching__minus')
test3 = test_file.Get(channel + '_MET__TTJet__TTJetsScale__plus')
test4 = test_file.Get(channel + '_MET__TTJet__TTJetsScale__minus')
test1.Sumw2()
test2.Sumw2()
test3.Sumw2()
test4.Sumw2()

folder = 'unfolding_MET_analyser_' + channel + '_channel_patMETsPFlow'
ref1 = hist_to_value_error_tuplelist(unfolding_file1.Get(folder + '/truth_AsymBins'))
ref2 = hist_to_value_error_tuplelist(unfolding_file2.Get(folder + '/truth_AsymBins'))
ref3 = hist_to_value_error_tuplelist(unfolding_file3.Get(folder + '/truth_AsymBins'))
ref4 = hist_to_value_error_tuplelist(unfolding_file4.Get(folder + '/truth_AsymBins'))
ref1 = value_error_tuplelist_to_hist(ref1, bin_edges['MET'])
ref2 = value_error_tuplelist_to_hist(ref2, bin_edges['MET'])
ref3 = value_error_tuplelist_to_hist(ref3, bin_edges['MET'])
ref4 = value_error_tuplelist_to_hist(ref4, bin_edges['MET'])

normalise([test1, test2, test3, test4, ref1, ref2, ref3, ref4])

draw_pair(test1, ref1, 'matching_up')
draw_pair(test2, ref2, 'matching_down')
draw_pair(test3, ref3, 'scale_up')
draw_pair(test4, ref4, 'scale_down')
 def background_subtraction(self, histograms):
     ttjet_hist = clean_control_region(histograms,
                                       subtract=['QCD', 'V+Jets', 'SingleTop'])
     self.normalisation[
         'TTJet'] = hist_to_value_error_tuplelist(ttjet_hist)
def check_multiple_data_multiple_unfolding(input_file,
                                           method,
                                           channel,
                                           variable,
                                           n_toy_mc,
                                           n_toy_data,
                                           output_folder,
                                           offset_toy_mc,
                                           offset_toy_data,
                                           k_value,
                                           tau_value=-1,
                                           run_matrix=None):
    '''
        Loops through a n_toy_mc x n_toy_data matrix of pseudo data versus
        simulation, unfolds the pseudo data and compares it to the MC truth
    '''
    # same unfolding input, different data
    get_folder = input_file.Get
    pulls = []
    add_pull = pulls.append
    histograms = []
    add_histograms = histograms.append

    print('Reading toy MC')
    start1 = time()
    mc_range = range(offset_toy_mc + 1, offset_toy_mc + n_toy_mc + 1)
    data_range = range(offset_toy_data + 1, offset_toy_data + n_toy_data + 1)
    for nth_toy_mc in range(1, 10000 + 1):  # read all of them (easier)
        if nth_toy_mc in mc_range or nth_toy_mc in data_range:
            tpl = '{channel}/{variable}/toy_{nth}'
            folder_mc = tpl.format(channel=channel,
                                   variable=variable,
                                   nth=nth_toy_mc)
            folder_mc = get_folder(folder_mc)
            add_histograms(get_histograms(folder_mc))
        else:
            add_histograms((0, 0, 0))
    print('Done reading toy MC in', time() - start1, 's')

    if not run_matrix:
        run_matrix = create_run_matrix(n_toy_mc, n_toy_data, offset_toy_mc,
                                       offset_toy_data)

    for nth_toy_mc, nth_toy_data in run_matrix:
        h_truth, h_measured, h_response = histograms[nth_toy_mc - 1]
        if tau_value >= 0:
            unfolding_obj = Unfolding(h_truth,
                                      h_measured,
                                      h_response,
                                      method=method,
                                      k_value=-1,
                                      tau=tau_value)
        else:
            unfolding_obj = Unfolding(h_truth,
                                      h_measured,
                                      h_response,
                                      method=method,
                                      k_value=k_value)
        unfold, get_pull = unfolding_obj.unfold, unfolding_obj.pull
        reset = unfolding_obj.Reset

        if nth_toy_data == nth_toy_mc:
            continue
        print('Doing MC no, ' + str(nth_toy_mc) + ', data no', nth_toy_data)
        h_data = histograms[nth_toy_data - 1][1]
        unfold(h_data)
        pull = get_pull()
        diff = unfolding_obj.unfolded_data - unfolding_obj.truth
        diff_tuple = hist_to_value_error_tuplelist(diff)
        unfolded = unfolding_obj.unfolded_data
        unfolded_tuple = hist_to_value_error_tuplelist(unfolded)
        all_data = {
            'unfolded': unfolded_tuple,
            'difference': diff_tuple,
            'pull': pull,
            'nth_toy_mc': nth_toy_mc,
            'nth_toy_data': nth_toy_data
        }

        add_pull(all_data)
        reset()

    save_pulls(pulls, 'multiple_data_multiple_unfolding', method, channel,
               output_folder, n_toy_mc, n_toy_data, offset_toy_mc,
               offset_toy_data)
예제 #32
0
    '/storage/TopQuarkGroup/mc/8TeV/NoSkimUnfolding/v10/TTJets_scaledown_TuneZ2star_8TeV-madgraph-tauola/unfolding_v10_Summer12_DR53X-PU_S10_START53_V7A-v1_NoSkim/TTJets-scaledown_nTuple_53X_mc_merged_001.root'
)

test_file = root_open('test_unfolded.root')

test1 = test_file.Get(channel + '_MET__TTJet__TTJetsMatching__plus')
test2 = test_file.Get(channel + '_MET__TTJet__TTJetsMatching__minus')
test3 = test_file.Get(channel + '_MET__TTJet__TTJetsScale__plus')
test4 = test_file.Get(channel + '_MET__TTJet__TTJetsScale__minus')
test1.Sumw2()
test2.Sumw2()
test3.Sumw2()
test4.Sumw2()

folder = 'unfolding_MET_analyser_' + channel + '_channel_patMETsPFlow'
ref1 = hist_to_value_error_tuplelist(
    unfolding_file1.Get(folder + '/truth_AsymBins'))
ref2 = hist_to_value_error_tuplelist(
    unfolding_file2.Get(folder + '/truth_AsymBins'))
ref3 = hist_to_value_error_tuplelist(
    unfolding_file3.Get(folder + '/truth_AsymBins'))
ref4 = hist_to_value_error_tuplelist(
    unfolding_file4.Get(folder + '/truth_AsymBins'))
ref1 = value_error_tuplelist_to_hist(ref1, bin_edges['MET'])
ref2 = value_error_tuplelist_to_hist(ref2, bin_edges['MET'])
ref3 = value_error_tuplelist_to_hist(ref3, bin_edges['MET'])
ref4 = value_error_tuplelist_to_hist(ref4, bin_edges['MET'])

normalise([test1, test2, test3, test4, ref1, ref2, ref3, ref4])

draw_pair(test1, ref1, 'matching_up')
draw_pair(test2, ref2, 'matching_down')
def get_unfolded_normalisation( TTJet_fit_results, category, channel, tau_value, visiblePS ):
    global variable, met_type, path_to_JSON, file_for_unfolding, file_for_powheg_pythia, file_for_amcatnlo_herwig, file_for_ptreweight, files_for_pdfs
    global centre_of_mass, luminosity, ttbar_xsection, load_fakes, method
    global file_for_powhegPythia8, file_for_madgraphMLM, file_for_amcatnlo
    # global file_for_matchingdown, file_for_matchingup
    global file_for_scaledown, file_for_scaleup
    global file_for_massdown, file_for_massup
    global ttbar_generator_systematics, ttbar_theory_systematics, pdf_uncertainties
    global use_ptreweight

    files_for_systematics = {
                             # ttbar_theory_systematic_prefix + 'matchingdown'    :  file_for_matchingdown,
                             # ttbar_theory_systematic_prefix + 'matchingup'      :  file_for_matchingup,
                             ttbar_theory_systematic_prefix + 'scaledown'       :  file_for_scaledown,
                             ttbar_theory_systematic_prefix + 'scaleup'         :  file_for_scaleup,
                             ttbar_theory_systematic_prefix + 'massdown'        :  file_for_massdown,
                             ttbar_theory_systematic_prefix + 'massup'          :  file_for_massup,

                             # 'JES_down'        :  file_for_jesdown,
                             # 'JES_up'        :  file_for_jesup,

                             # 'JER_down'        :  file_for_jerdown,
                             # 'JER_up'        :  file_for_jerup,

                             # 'BJet_up'        :  file_for_bjetdown,
                             # 'BJet_down'        :  file_for_bjetup,

                             ttbar_theory_systematic_prefix + 'hadronisation'   :  file_for_amcatnlo_herwig,
                             ttbar_theory_systematic_prefix + 'NLOgenerator'   :  file_for_amcatnlo,

                             # 'ElectronEnUp' : file_for_ElectronEnUp,
                             # 'ElectronEnDown' : file_for_ElectronEnDown,
                             # 'MuonEnUp' : file_for_MuonEnUp,
                             # 'MuonEnDown' : file_for_MuonEnDown,
                             # 'TauEnUp' : file_for_TauEnUp,
                             # 'TauEnDown' : file_for_TauEnDown,
                             # 'UnclusteredEnUp' : file_for_UnclusteredEnUp,
                             # 'UnclusteredEnDown' : file_for_UnclusteredEnDown,

                             # 'Muon_up' : file_for_LeptonUp,
                             # 'Muon_down' : file_for_LeptonDown,
                             # 'Electron_up' : file_for_LeptonUp,
                             # 'Electron_down' : file_for_LeptonDown,

                             # 'PileUpSystematic' : file_for_PUSystematic,
                             }

    h_truth, h_measured, h_response, h_fakes = None, None, None, None
    # Systematics where you change the response matrix
    if category in ttbar_generator_systematics or category in files_for_systematics :
        print 'Doing category',category,'by changing response matrix'
        h_truth, h_measured, h_response, h_fakes = get_unfold_histogram_tuple( inputfile = files_for_systematics[category],
                                                                              variable = variable,
                                                                              channel = channel,
                                                                              met_type = met_type,
                                                                              centre_of_mass = centre_of_mass,
                                                                              ttbar_xsection = ttbar_xsection,
                                                                              luminosity = luminosity,
                                                                              load_fakes = load_fakes,
                                                                              visiblePS = visiblePS,
                                                                              )
    elif category in pdf_uncertainties:
        h_truth, h_measured, h_response, h_fakes = get_unfold_histogram_tuple( inputfile = files_for_pdfs[category],
                                                                              variable = variable,
                                                                              channel = channel,
                                                                              met_type = met_type,
                                                                              centre_of_mass = centre_of_mass,
                                                                              ttbar_xsection = ttbar_xsection,
                                                                              luminosity = luminosity,
                                                                              load_fakes = load_fakes,
                                                                              visiblePS = visiblePS,
                                                                              )
    # Systematics where you change input MC
    else:
        h_truth, h_measured, h_response, h_fakes = get_unfold_histogram_tuple( inputfile = file_for_unfolding,
                                                                              variable = variable,
                                                                              channel = channel,
                                                                              met_type = met_type,
                                                                              centre_of_mass = centre_of_mass,
                                                                              ttbar_xsection = ttbar_xsection,
                                                                              luminosity = luminosity,
                                                                              load_fakes = load_fakes,
                                                                              visiblePS = visiblePS,
                                                                              )

#     central_results = hist_to_value_error_tuplelist( h_truth )
    TTJet_fit_results_unfolded, TTJet_fit_results_withoutFakes = unfold_results( TTJet_fit_results,
                                                category,
                                                channel,
                                                tau_value,
                                                h_truth,
                                                h_measured,
                                                h_response,
                                                h_fakes,
                                                method,
                                                visiblePS,
                                                )
    normalisation_unfolded = {
                      'TTJet_measured' : TTJet_fit_results,
                      'TTJet_measured_withoutFakes' : TTJet_fit_results_withoutFakes,
                      'TTJet_unfolded' : TTJet_fit_results_unfolded
                      }

    #
    # THESE ARE FOR GETTING THE HISTOGRAMS FOR COMPARING WITH UNFOLDED DATA
    #

    if category == 'central':
        # h_truth_matchingdown, _, _, _ = get_unfold_histogram_tuple( inputfile = file_for_matchingdown,
        #                                             variable = variable,
        #                                             channel = channel,
        #                                             met_type = met_type,
        #                                             centre_of_mass = centre_of_mass,
        #                                             ttbar_xsection = ttbar_xsection,
        #                                             luminosity = luminosity,
        #                                             load_fakes = load_fakes
        #                                             )
        # h_truth_matchingup, _, _, _ = get_unfold_histogram_tuple( inputfile = file_for_matchingup,
        #                                             variable = variable,
        #                                             channel = channel,
        #                                             met_type = met_type,
        #                                             centre_of_mass = centre_of_mass,
        #                                             ttbar_xsection = ttbar_xsection,
        #                                             luminosity = luminosity,
        #                                             load_fakes = load_fakes
        #                                             )
        h_truth_scaledown, _, _, _ = get_unfold_histogram_tuple( inputfile = file_for_scaledown,
                                                    variable = variable,
                                                    channel = channel,
                                                    met_type = met_type,
                                                    centre_of_mass = centre_of_mass,
                                                    ttbar_xsection = ttbar_xsection,
                                                    luminosity = luminosity,
                                                    load_fakes = load_fakes,
                                                    visiblePS = visiblePS,
                                                    )
        h_truth_scaleup, _, _, _ = get_unfold_histogram_tuple( inputfile = file_for_scaleup,
                                                    variable = variable,
                                                    channel = channel,
                                                    met_type = met_type,
                                                    centre_of_mass = centre_of_mass,
                                                    ttbar_xsection = ttbar_xsection,
                                                    luminosity = luminosity,
                                                    load_fakes = load_fakes,
                                                    visiblePS = visiblePS,
                                                    )

        h_truth_massdown, _, _, _ = get_unfold_histogram_tuple( inputfile = file_for_massdown,
                                                    variable = variable,
                                                    channel = channel,
                                                    met_type = met_type,
                                                    centre_of_mass = centre_of_mass,
                                                    ttbar_xsection = ttbar_xsection,
                                                    luminosity = luminosity,
                                                    load_fakes = load_fakes,
                                                    visiblePS = visiblePS,
                                                    )
        h_truth_massup, _, _, _ = get_unfold_histogram_tuple( inputfile = file_for_massup,
                                                    variable = variable,
                                                    channel = channel,
                                                    met_type = met_type,
                                                    centre_of_mass = centre_of_mass,
                                                    ttbar_xsection = ttbar_xsection,
                                                    luminosity = luminosity,
                                                    load_fakes = load_fakes,
                                                    visiblePS = visiblePS,
                                                    )

        h_truth_powhegPythia8, _, _, _ = get_unfold_histogram_tuple( inputfile = file_for_powhegPythia8,
                                                variable = variable,
                                                channel = channel,
                                                met_type = met_type,
                                                centre_of_mass = centre_of_mass,
                                                ttbar_xsection = ttbar_xsection,
                                                luminosity = luminosity,
                                                load_fakes = load_fakes,
                                                visiblePS = visiblePS,
                                                )

        h_truth_amcatnlo, _, _, _ = get_unfold_histogram_tuple( inputfile = file_for_amcatnlo,
                                                variable = variable,
                                                channel = channel,
                                                met_type = met_type,
                                                centre_of_mass = centre_of_mass,
                                                ttbar_xsection = ttbar_xsection,
                                                luminosity = luminosity,
                                                load_fakes = load_fakes,
                                                visiblePS = visiblePS,
                                                )

        h_truth_madgraphMLM, _, _, _ = get_unfold_histogram_tuple( inputfile = file_for_madgraphMLM,
                                                    variable = variable,
                                                    channel = channel,
                                                    met_type = met_type,
                                                    centre_of_mass = centre_of_mass,
                                                    ttbar_xsection = ttbar_xsection,
                                                    luminosity = luminosity,
                                                    load_fakes = load_fakes,
                                                    visiblePS = visiblePS,
                                                    )
        h_truth_amcatnlo_HERWIG, _, _, _ = get_unfold_histogram_tuple( inputfile = file_for_amcatnlo_herwig,
                                                    variable = variable,
                                                    channel = channel,
                                                    met_type = met_type,
                                                    centre_of_mass = centre_of_mass,
                                                    ttbar_xsection = ttbar_xsection,
                                                    luminosity = luminosity,
                                                    load_fakes = load_fakes,
                                                    visiblePS = visiblePS,
                                                    )

    
        # MADGRAPH_ptreweight_results = hist_to_value_error_tuplelist( h_truth_ptreweight )
        # POWHEG_PYTHIA_results = hist_to_value_error_tuplelist( h_truth_POWHEG_PYTHIA )
        # MCATNLO_results = None
        powhegPythia8_results = hist_to_value_error_tuplelist( h_truth_powhegPythia8 )
        madgraphMLM_results = hist_to_value_error_tuplelist( h_truth_madgraphMLM )
        AMCATNLO_results = hist_to_value_error_tuplelist( h_truth_amcatnlo )
        amcatnlo_HERWIG_results = hist_to_value_error_tuplelist( h_truth_amcatnlo_HERWIG )

        # matchingdown_results = hist_to_value_error_tuplelist( h_truth_matchingdown )
        # matchingup_results = hist_to_value_error_tuplelist( h_truth_matchingup )
        scaledown_results = hist_to_value_error_tuplelist( h_truth_scaledown )
        scaleup_results = hist_to_value_error_tuplelist( h_truth_scaleup )
        massdown_results = hist_to_value_error_tuplelist( h_truth_massdown )
        massup_results = hist_to_value_error_tuplelist( h_truth_massup )

        normalisation_unfolded['powhegPythia8'] =  powhegPythia8_results
        normalisation_unfolded['amcatnlo'] =  AMCATNLO_results
        normalisation_unfolded['madgraphMLM'] = madgraphMLM_results
        normalisation_unfolded['amcatnlo_HERWIG'] =  amcatnlo_HERWIG_results
        normalisation_unfolded['scaledown'] =  scaledown_results
        normalisation_unfolded['scaleup'] =  scaleup_results
        normalisation_unfolded['massdown'] =  massdown_results
        normalisation_unfolded['massup'] =  massup_results


    return normalisation_unfolded
def check_multiple_data_multiple_unfolding(
        input_file, method, channel, variable,
        n_toy_mc, n_toy_data, output_folder,
        offset_toy_mc, offset_toy_data,
        k_value, tau_value=-1,
        run_matrix=None):
    '''
        Loops through a n_toy_mc x n_toy_data matrix of pseudo data versus
        simulation, unfolds the pseudo data and compares it to the MC truth
    '''
    # same unfolding input, different data
    get_folder = input_file.Get
    pulls = []
    add_pull = pulls.append
    histograms = []
    add_histograms = histograms.append

    print('Reading toy MC')
    start1 = time()
    mc_range = range(offset_toy_mc + 1, offset_toy_mc + n_toy_mc + 1)
    data_range = range(offset_toy_data + 1, offset_toy_data + n_toy_data + 1)
    for nth_toy_mc in range(1, 10000 + 1):  # read all of them (easier)
        if nth_toy_mc in mc_range or nth_toy_mc in data_range:
            tpl = '{channel}/{variable}/toy_{nth}'
            folder_mc = tpl.format(channel=channel, variable=variable,
                                   nth=nth_toy_mc)
            folder_mc = get_folder(folder_mc)
            add_histograms(get_histograms(folder_mc))
        else:
            add_histograms((0, 0, 0))
    print('Done reading toy MC in', time() - start1, 's')

    if not run_matrix:
        run_matrix = create_run_matrix(n_toy_mc, n_toy_data, offset_toy_mc,
                                       offset_toy_data)

    for nth_toy_mc, nth_toy_data in run_matrix:
        h_truth, h_measured, h_response = histograms[nth_toy_mc - 1]
        if tau_value >= 0:
            unfolding_obj = Unfolding(
                h_truth, h_measured, h_response, method=method, k_value=-1,
                tau=tau_value)
        else:
            unfolding_obj = Unfolding(
                h_truth, h_measured, h_response, method=method, k_value=k_value)
        unfold, get_pull = unfolding_obj.unfold, unfolding_obj.pull
        reset = unfolding_obj.Reset

        if nth_toy_data == nth_toy_mc:
            continue
        print(
            'Doing MC no, ' + str(nth_toy_mc) + ', data no', nth_toy_data)
        h_data = histograms[nth_toy_data - 1][1]
        unfold(h_data)
        pull = get_pull()
        diff = unfolding_obj.unfolded_data - unfolding_obj.truth
        diff_tuple = hist_to_value_error_tuplelist(diff)
        unfolded = unfolding_obj.unfolded_data
        unfolded_tuple = hist_to_value_error_tuplelist(unfolded)
        all_data = {'unfolded': unfolded_tuple,
                    'difference': diff_tuple,
                    'pull': pull,
                    'nth_toy_mc': nth_toy_mc,
                    'nth_toy_data': nth_toy_data
                    }

        add_pull(all_data)
        reset()

    save_pulls(pulls, 'multiple_data_multiple_unfolding', method,
               channel, output_folder, n_toy_mc, n_toy_data, offset_toy_mc,
               offset_toy_data)