def calculate_normalisation(self): ''' 1. get file names 2. get histograms from files 3. ??? 4. calculate normalisation based on self.method ''' if self.have_normalisation: return histograms = self.measurement.histograms for sample, hist in histograms.items(): # TODO: this should be a list of bin-contents hist = fix_overflow(hist) histograms[sample] = hist self.initial_normalisation[ sample] = hist_to_value_error_tuplelist(hist) if self.method == self.BACKGROUND_SUBTRACTION and sample != 'TTJet': self.normalisation[sample] = self.initial_normalisation[sample] if self.method == self.BACKGROUND_SUBTRACTION: self.background_subtraction(histograms) if self.method == self.SIMULTANEOUS_FIT: self.simultaneous_fit(histograms) # next, let's round all numbers (they are event numbers after all for sample, values in self.normalisation.items(): new_values = [(round(v, 1), round(e, 1)) for v, e in values] self.normalisation[sample] = new_values self.have_normalisation = True
def calculate_normalisation(self): ''' 1. get file names 2. get histograms from files 3. ??? 4. calculate normalisation ''' if self.have_normalisation: return histograms = self.measurement.histograms for sample, hist in histograms.items(): # TODO: this should be a list of bin-contents hist = fix_overflow(hist) histograms[sample] = hist self.initial_normalisation[sample] = hist_to_value_error_tuplelist( hist) self.normalisation[sample] = self.initial_normalisation[sample] self.background_subtraction(histograms) # next, let's round all numbers (they are event numbers after all for sample, values in self.normalisation.items(): new_values = [(round(v, 1), round(e, 1)) for v, e in values] self.normalisation[sample] = new_values self.have_normalisation = True
def calculate_normalisation(self): ''' 1. get file names 2. get histograms from files 3. ??? 4. calculate normalisation ''' if self.have_normalisation: return histograms = self.measurement.histograms for sample, hist in histograms.items(): # TODO: this should be a list of bin-contents hist = fix_overflow(hist) histograms[sample] = hist self.initial_normalisation[sample] = hist_to_value_error_tuplelist(hist) self.normalisation[sample] = self.initial_normalisation[sample] self.background_subtraction(histograms) # next, let's round all numbers (they are event numbers after all for sample, values in self.normalisation.items(): new_values = [(round(v, 1), round(e, 1)) for v, e in values] self.normalisation[sample] = new_values self.have_normalisation = True
def test_overflow_2D( self ): before_fix = check_overflow_in_2DHist(self.h2) has_overflow_in_x = before_fix['has_overflow_in_x'] has_overflow_in_y = before_fix['has_overflow_in_y'] self.assertGreater(has_overflow_in_x, 0, '2D hist: No overflow in x present, wrong setup.') self.assertGreater(has_overflow_in_y, 0, '2D hist: No overflow in y present, wrong setup.') h2 = fix_overflow( self.h2 ) after_fix = check_overflow_in_2DHist(h2) has_overflow_in_x = after_fix['has_overflow_in_x'] has_overflow_in_y = after_fix['has_overflow_in_y'] # check if overflow has been reset self.assertEqual( has_overflow_in_x, 0, '2D hist: Overflow in x is not 0.' ) self.assertEqual( has_overflow_in_y, 0, '2D hist: Overflow in y is not 0.' ) # now check if new last bin content is equal to the old one plus overflow overflow_x_before = before_fix['overflow_x'] overflow_y_before = before_fix['overflow_y'] last_bin_content_x_before = before_fix['last_bin_content_x'] last_bin_content_y_before = before_fix['last_bin_content_y'] last_bin_content_x_after = after_fix['last_bin_content_x'] last_bin_content_y_after = after_fix['last_bin_content_y'] check_last_bin_content_x = [overflow + last_bin_content for overflow,last_bin_content in zip(overflow_x_before, last_bin_content_x_before)] check_last_bin_content_y = [overflow + last_bin_content for overflow,last_bin_content in zip(overflow_y_before, last_bin_content_y_before)] # remember, the last item in each list is actually the overflow, which should be 0 and the above calculation is not correct. self.assertTrue(check_equal_lists(check_last_bin_content_x[:-2], last_bin_content_x_after[:-2]), '2D hist: last bins in x are not correct.') self.assertTrue(check_equal_lists(check_last_bin_content_y[:-2], last_bin_content_y_after[:-2]), '2D hist: last bins in y are not correct.')
def setUp(self): # create histograms # self.h1 = Hist2D( 15, 0, 15, 15, 0, 15 ) # x_1 = [1, 3, 7, 7, 8, 1, 12, 7, 8, 6] # y_1 = [1, 7, 3, 7, 8, 12, 7, 12, 13, 11] self.h1 = Hist2D(60, 40, 100, 60, 40, 100) n_1 = 100000 x_1 = 60 + 10 * np.random.randn(n_1) y_1 = x_1 + np.random.randn(n_1) z_1 = np.vstack((x_1, y_1)).T self.h1.fill_array(z_1) self.h1 = fix_overflow(self.h1) self.histogram_information = [{"hist": self.h1, "CoM": 7, "channel": "test_1"}] self.histograms = [info["hist"] for info in self.histogram_information] # requirements for new binning self.p_min, self.s_min, self.n_min = 0.5, 0.5, 1000 self.bin_edges = [] self.purities_GetBinContent = [] self.stabilities_GetBinContent = [] self.n_events_GetBinContent = [] self.purities_Integral = [] self.stabilities_Integral = [] self.n_events_Integral = [] first_hist = self.histograms[0] n_bins = first_hist.GetNbinsX() current_bin_start = 0 current_bin_end = 0 while current_bin_end < n_bins: current_bin_end, p, s, n_gen_and_reco = pick_bins.get_next_end( self.histograms, current_bin_start, current_bin_end, self.p_min, self.s_min, self.n_min, 0 ) if not self.bin_edges: # if empty self.bin_edges.append(first_hist.GetXaxis().GetBinLowEdge(current_bin_start + 1)) self.bin_edges.append( first_hist.GetXaxis().GetBinLowEdge(current_bin_end) + first_hist.GetXaxis().GetBinWidth(current_bin_end) ) self.purities_Integral.append(p) self.stabilities_Integral.append(s) self.n_events_Integral.append(n_gen_and_reco) current_bin_start = current_bin_end self.h1_rebinned = rebin_2d(self.h1, self.bin_edges, self.bin_edges) self.purities_GetBinContent = calculate_purities(self.h1_rebinned) self.stabilities_GetBinContent = calculate_stabilities(self.h1_rebinned) self.n_events_GetBinContent = [int(self.h1_rebinned.GetBinContent(i, i)) for i in range(1, len(self.bin_edges))]
def __return_histogram(self, d_hist_info, ignoreUnderflow=True, useQCDControl=False, useQCDSystematicControl=False): ''' Takes basic histogram info and returns histo. Maybe this can move to ROOT_utilities? ''' from rootpy.io.file import File from rootpy.plotting import Hist from dps.utils.hist_utilities import fix_overflow f = d_hist_info['input_file'] tree = d_hist_info['tree'] qcd_tree = d_hist_info["qcd_control_region"] qcd_tree_for_normalisation = d_hist_info["qcd_normalisation_region"] var = d_hist_info['branch'] bins = d_hist_info['bin_edges'] lumi_scale = d_hist_info['lumi_scale'] scale = d_hist_info['scale'] weights = d_hist_info['weight_branches'] selection = d_hist_info['selection'] if useQCDControl: # replace SR tree with CR tree if useQCDSystematicControl: tree = qcd_tree_for_normalisation else: tree = qcd_tree # Remove the Lepton reweighting for the datadriven qcd (SF not derived for unisolated leptons) for weight in weights: if 'Electron' in weight: weights.remove(weight) elif 'Muon' in weight: weights.remove(weight) weights = "*".join(weights) # Selection will return a weight 0 or 1 depending on whether event passes selection weights_and_selection = '( {0} ) * ( {1} )'.format(weights, selection) scale *= lumi_scale root_file = File( f ) root_tree = root_file.Get( tree ) root_histogram = Hist( bins ) # Draw histogram of var for selection into root_histogram root_tree.Draw(var, selection = weights_and_selection, hist = root_histogram) root_histogram.Scale(scale) # When a tree is filled with a dummy variable, it will end up in the underflow, so ignore it if ignoreUnderflow: root_histogram.SetBinContent(0, 0) root_histogram.SetBinError(0,0) # Fix overflow (Moves entries from overflow bin into last bin i.e. last bin not |..| but |--> ) root_histogram = fix_overflow(root_histogram) root_file.Close() return root_histogram
def test_overflow_1D( self ): last_bin = self.h1.nbins() overflow_bin = last_bin + 1 overflow = self.h1.GetBinContent( overflow_bin ) last_bin_content = self.h1.GetBinContent( last_bin ) self.assertGreater( overflow, 0, '1D hist: No overflow present, wrong setup.' ) h1 = fix_overflow( self.h1 ) self.assertEqual( h1.GetBinContent( overflow_bin ), 0., '1D hist: Overflow bin is not 0.' ) self.assertEqual( h1.GetBinContent( last_bin ), last_bin_content + overflow, '1D hist: last bin is not correct.' )
def test_overflow_1D(self): last_bin = self.h1.nbins() overflow_bin = last_bin + 1 overflow = self.h1.GetBinContent(overflow_bin) last_bin_content = self.h1.GetBinContent(last_bin) self.assertGreater(overflow, 0, '1D hist: No overflow present, wrong setup.') h1 = fix_overflow(self.h1) self.assertEqual(h1.GetBinContent(overflow_bin), 0., '1D hist: Overflow bin is not 0.') self.assertEqual(h1.GetBinContent(last_bin), last_bin_content + overflow, '1D hist: last bin is not correct.')
def test_overflow_2D(self): before_fix = check_overflow_in_2DHist(self.h2) has_overflow_in_x = before_fix['has_overflow_in_x'] has_overflow_in_y = before_fix['has_overflow_in_y'] self.assertGreater(has_overflow_in_x, 0, '2D hist: No overflow in x present, wrong setup.') self.assertGreater(has_overflow_in_y, 0, '2D hist: No overflow in y present, wrong setup.') h2 = fix_overflow(self.h2) after_fix = check_overflow_in_2DHist(h2) has_overflow_in_x = after_fix['has_overflow_in_x'] has_overflow_in_y = after_fix['has_overflow_in_y'] # check if overflow has been reset self.assertEqual(has_overflow_in_x, 0, '2D hist: Overflow in x is not 0.') self.assertEqual(has_overflow_in_y, 0, '2D hist: Overflow in y is not 0.') # now check if new last bin content is equal to the old one plus overflow overflow_x_before = before_fix['overflow_x'] overflow_y_before = before_fix['overflow_y'] last_bin_content_x_before = before_fix['last_bin_content_x'] last_bin_content_y_before = before_fix['last_bin_content_y'] last_bin_content_x_after = after_fix['last_bin_content_x'] last_bin_content_y_after = after_fix['last_bin_content_y'] check_last_bin_content_x = [ overflow + last_bin_content for overflow, last_bin_content in zip( overflow_x_before, last_bin_content_x_before) ] check_last_bin_content_y = [ overflow + last_bin_content for overflow, last_bin_content in zip( overflow_y_before, last_bin_content_y_before) ] # remember, the last item in each list is actually the overflow, which should be 0 and the above calculation is not correct. self.assertTrue( check_equal_lists(check_last_bin_content_x[:-2], last_bin_content_x_after[:-2]), '2D hist: last bins in x are not correct.') self.assertTrue( check_equal_lists(check_last_bin_content_y[:-2], last_bin_content_y_after[:-2]), '2D hist: last bins in y are not correct.')
def setUp(self): # create histograms # self.h1 = Hist2D( 15, 0, 15, 15, 0, 15 ) # x_1 = [1, 3, 7, 7, 8, 1, 12, 7, 8, 6] # y_1 = [1, 7, 3, 7, 8, 12, 7, 12, 13, 11] self.h1 = Hist2D(60, 40, 100, 60, 40, 100) n_1 = 100000 x_1 = 60 + 10 * np.random.randn(n_1) y_1 = x_1 + np.random.randn(n_1) z_1 = np.vstack((x_1, y_1)).T self.h1.fill_array(z_1) self.h1 = fix_overflow(self.h1) self.histogram_information = [ { 'hist': self.h1, 'CoM': 7, 'channel': 'test_1' }, ] self.histograms = [info['hist'] for info in self.histogram_information] # requirements for new binning self.p_min, self.s_min, self.n_min = 0.5, 0.5, 1000 self.bin_edges = [] self.purities_GetBinContent = [] self.stabilities_GetBinContent = [] self.n_events_GetBinContent = [] self.purities_Integral = [] self.stabilities_Integral = [] self.n_events_Integral = [] first_hist = self.histograms[0] n_bins = first_hist.GetNbinsX() current_bin_start = 0 current_bin_end = 0 while current_bin_end < n_bins: current_bin_end, p, s, n_gen_and_reco = pick_bins.get_next_end( self.histograms, current_bin_start, current_bin_end, self.p_min, self.s_min, self.n_min, 0) if not self.bin_edges: # if empty self.bin_edges.append( first_hist.GetXaxis().GetBinLowEdge(current_bin_start + 1)) self.bin_edges.append( first_hist.GetXaxis().GetBinLowEdge(current_bin_end) + first_hist.GetXaxis().GetBinWidth(current_bin_end)) self.purities_Integral.append(p) self.stabilities_Integral.append(s) self.n_events_Integral.append(n_gen_and_reco) current_bin_start = current_bin_end self.h1_rebinned = rebin_2d(self.h1, self.bin_edges, self.bin_edges) self.purities_GetBinContent = calculate_purities(self.h1_rebinned) self.stabilities_GetBinContent = calculate_stabilities( self.h1_rebinned) self.n_events_GetBinContent = [ int(self.h1_rebinned.GetBinContent(i, i)) for i in range(1, len(self.bin_edges)) ]
def __return_histogram(self, d_hist_info, ignoreUnderflow=True, useQCDControl=False, useQCDSystematicControl=False): ''' Takes basic histogram info and returns histo. Maybe this can move to ROOT_utilities? ''' from rootpy.io.file import File from rootpy.plotting import Hist from dps.utils.hist_utilities import fix_overflow f = d_hist_info['input_file'] tree = d_hist_info['tree'] qcd_tree = d_hist_info["qcd_control_region"] qcd_tree_for_normalisation = d_hist_info["qcd_normalisation_region"] var = d_hist_info['branch'] bins = d_hist_info['bin_edges'] lumi_scale = d_hist_info['lumi_scale'] scale = d_hist_info['scale'] weights = d_hist_info['weight_branches'] selection = d_hist_info['selection'] if useQCDControl: # replace SR tree with CR tree if useQCDSystematicControl: tree = qcd_tree_for_normalisation else: tree = qcd_tree # Remove the Lepton reweighting for the datadriven qcd (SF not derived for unisolated leptons) for weight in weights: if 'Electron' in weight: weights.remove(weight) elif 'Muon' in weight: weights.remove(weight) weights = "*".join(weights) # Selection will return a weight 0 or 1 depending on whether event passes selection weights_and_selection = '( {0} ) * ( {1} )'.format(weights, selection) scale *= lumi_scale root_file = File(f) root_tree = root_file.Get(tree) root_histogram = Hist(bins) # Draw histogram of var for selection into root_histogram root_tree.Draw(var, selection=weights_and_selection, hist=root_histogram) root_histogram.Scale(scale) # When a tree is filled with a dummy variable, it will end up in the underflow, so ignore it if ignoreUnderflow: root_histogram.SetBinContent(0, 0) root_histogram.SetBinError(0, 0) # Fix overflow (Moves entries from overflow bin into last bin i.e. last bin not |..| but |--> ) root_histogram = fix_overflow(root_histogram) root_file.Close() return root_histogram