Beispiel #1
0
	def GetSingleMuHistograms(self):
		#this_analyses = ["trigmu_highmass_CSVTM", "trigmu_lowmass_CSVTM", "trigmubbh_highmass_CSVTM", "trigmubbl_lowmass_CSVTM", "trigmubbll_lowmass_CSVTM", "trigmu24i_lowmass_CSVTM", "trigmu24ibbl_lowmass_CSVTM", "trigmu24ibbll_lowmass_CSVTM", "trigmu40_lowmass_CSVTM", "trigmu40bbl_lowmass_CSVTM", "trigmu40bbll_lowmass_CSVTM"]
		this_analyses = ["trigmu24i_lowmass_CSVTM", "trigmu24ibbl_lowmass_CSVTM"]
		this_analyses.extend(["trigmu24i_highmass_CSVTM", "trigmu24ibbh_highmass_CSVTM"])
		for analysis in this_analyses:
			print "Opening " + analysis_config.get_b_histogram_filename(analysis, "SingleMu_2012")
			f = ROOT.TFile(analysis_config.get_b_histogram_filename(analysis, "SingleMu_2012"), "READ")
			self._mjj_histograms_fine[analysis] = f.Get("BHistograms/h_pfjet_mjj")
			self._mjj_histograms_fine[analysis].SetName("h_" + analysis + "_mjj_fine")
			self._mjj_histograms_fine[analysis].SetDirectory(0)
			self._mjj_histograms[analysis] = histogram_tools.rebin_histogram(self._mjj_histograms_fine[analysis], self._mass_bins, normalization_bin_width=1)
			self._mjj_histograms[analysis].SetName("h_" + analysis + "_mjj")
			self._mjj_histograms[analysis].SetDirectory(0)

			if not "highmass" in analysis:
				self._mjj_histograms_csvorder[analysis] = f.Get("BHistograms/h_pfjet_mjj_csvorder")
				self._mjj_histograms_csvorder[analysis].SetName("h_" + analysis + "_mjj_csvorder")
				self._mjj_histograms_csvorder[analysis].SetDirectory(0)
				self._mjj_histograms_csvorder[analysis] = histogram_tools.rebin_histogram(self._mjj_histograms_csvorder[analysis], self._mass_bins, normalization_bin_width=1)

				self._mjj_histograms_vetothirdjet[analysis] = f.Get("BHistograms/h_pfjet_mjj_vetothirdjet")
				self._mjj_histograms_vetothirdjet[analysis].SetName("h_" + analysis + "_mjj_vetothirdjet")
				self._mjj_histograms_vetothirdjet[analysis].SetDirectory(0)
				self._mjj_histograms_vetothirdjet[analysis] = histogram_tools.rebin_histogram(self._mjj_histograms_vetothirdjet[analysis], self._mass_bins, normalization_bin_width=1)

			if "bbll" in analysis:
				self._mjj_histograms_fine[analysis].Scale(1.7) # Prescale for singlemu + 60/53. The prescale was not computer for these analyses.
				self._mjj_histograms[analysis].Scale(1.7) # Prescale for singlemu + 60/53. The prescale was not computer for these analyses.
			self._mjj_histograms_fine[analysis].Rebin(5)
			self._analyses.append(analysis)
			f.Close()
Beispiel #2
0
def RunBHistogramsBackground(analysis, sample, files_per_job=1, retar=False, data_source=None):
	# Create working directory and cd
	start_directory = os.getcwd()
	working_directory = dijet_directory + "/data/EightTeeEeVeeBee/BHistograms/condor/submit_" + analysis + "_" + sample
	os.system("mkdir -pv " + working_directory)
	os.chdir(working_directory)

	command = "condor_cmsRun"
	if retar:
		command += " --retar "
	#input_txt = open("tmp.txt", 'w')
	#input_txt.write(analysis_config.files_QCDBEventTree[sample] + "\n")
	#input_txt.close()
	command += " --file-list=" + analysis_config.files_QCDBEventTree[sample] + " "
	command += " --files-per-job=" + str(files_per_job)
	command += " --submit-file=submit_" + analysis + "_" + sample + ".jdl "
	#command += " --output-file=" + output_prefix + "_" + sample + ".root "
	command += " --output-tag=BHistograms_" + sample + " "
	command += " --run "
	command += "  " + dijet_directory + "/CMSSW_5_3_32_patch3/src/MyTools/RootUtils/scripts/cmsRun_wrapper.sh " + analysis_config.analysis_cfgs[analysis] 
	command += " dataSource=simulation "
	command += " dataType=background "
	#command += "inputFiles=" + os.path.basename(input_files[sample])
	output_filename = os.path.basename(analysis_config.get_b_histogram_filename(analysis, sample)).replace(".root", "_\$\(Cluster\)_\$\(Process\).root")
	command += " outputFile=" + os.path.basename(analysis_config.get_b_histogram_filename(analysis, sample)).replace(".root", "_\$\(Cluster\)_\$\(Process\).root")
	print command
	os.system(command)
	os.system("rm -f tmp.txt")
	postprocessing_file = open('postprocessing.sh', 'w')
	postprocessing_file.write("#!/bin/bash\n")
	postprocessing_file.write("hadd " + working_directory + "/" + os.path.basename(analysis_config.get_b_histogram_filename(analysis, sample)) + " " + output_filename.replace("_\$\(Cluster\)_\$\(Process\)", "*") + "\n")
	postprocessing_file.close()

	# cd back
	os.chdir(start_directory)
	def __init__(self, numerator_analysis, denominator_analysis, samples):
		print "[OfflineBTagPlots::__init__] Initializing with sr = " + sr + ", samples = ",
		print samples
		self._samples = samples
		self._numerator_analysis = numerator_analysis
		self._denominator_analysis = denominator_analysis
		self._input_files = {"numerator":{}, "denominator":{}}
		for sample in self._samples:
			# For trigjetht, have to cobble together the frankenhist later.
			if not (numerator_analysis == "trigjetht_eta1p7_CSVTM" or numerator_analysis == "trigjetht_eta2p2_CSVTM"):
				print "Numerator input file = " + analysis_config.get_b_histogram_filename(self._numerator_analysis, sample)
				self._input_files["numerator"][sample] = TFile(analysis_config.get_b_histogram_filename(self._numerator_analysis, sample))
			if not (denominator_analysis == "trigjetht_eta1p7" or denominator_analysis == "trigjetht_eta2p2"):
				print "Denominator input file = " + analysis_config.get_b_histogram_filename(self._denominator_analysis, sample)
				self._input_files["denominator"][sample] = TFile(analysis_config.get_b_histogram_filename(self._denominator_analysis, sample))
Beispiel #4
0
def RunBHistogramsBackground(analysis,
                             sample,
                             files_per_job=1,
                             retar=False,
                             data_source=None):
    # Create working directory and cd
    start_directory = os.getcwd()
    working_directory = dijet_directory + "/data/EightTeeEeVeeBee/BHistograms/condor/submit_" + analysis + "_" + sample
    os.system("mkdir -pv " + working_directory)
    os.chdir(working_directory)

    command = "condor_cmsRun"
    if retar:
        command += " --retar "
    #input_txt = open("tmp.txt", 'w')
    #input_txt.write(analysis_config.files_QCDBEventTree[sample] + "\n")
    #input_txt.close()
    command += " --file-list=" + analysis_config.files_QCDBEventTree[
        sample] + " "
    command += " --files-per-job=" + str(files_per_job)
    command += " --submit-file=submit_" + analysis + "_" + sample + ".jdl "
    #command += " --output-file=" + output_prefix + "_" + sample + ".root "
    command += " --output-tag=BHistograms_" + sample + " "
    command += " --run "
    command += "  " + dijet_directory + "/CMSSW_5_3_32_patch3/src/MyTools/RootUtils/scripts/cmsRun_wrapper.sh " + analysis_config.analysis_cfgs[
        analysis]
    command += " dataSource=simulation "
    command += " dataType=background "
    #command += "inputFiles=" + os.path.basename(input_files[sample])
    output_filename = os.path.basename(
        analysis_config.get_b_histogram_filename(analysis, sample)).replace(
            ".root", "_\$\(Cluster\)_\$\(Process\).root")
    command += " outputFile=" + os.path.basename(
        analysis_config.get_b_histogram_filename(analysis, sample)).replace(
            ".root", "_\$\(Cluster\)_\$\(Process\).root")
    print command
    os.system(command)
    os.system("rm -f tmp.txt")
    postprocessing_file = open('postprocessing.sh', 'w')
    postprocessing_file.write("#!/bin/bash\n")
    postprocessing_file.write(
        "hadd " + working_directory + "/" + os.path.basename(
            analysis_config.get_b_histogram_filename(analysis, sample)) + " " +
        output_filename.replace("_\$\(Cluster\)_\$\(Process\)", "*") + "\n")
    postprocessing_file.close()

    # cd back
    os.chdir(start_directory)
Beispiel #5
0
def signal_acc_eff():
    for analysis in ["trigbbh_CSVTM", "trigbbl_CSVTM"]:
        if "bbl" in analysis:
            mjj_range = [296, 1246]
        elif "bbh" in analysis:
            mjj_range = [526, 1455]
        for model in ["Hbb", "RSG", "ZPrime"]:
            for mass in [350, 400, 500, 600, 750, 900, 1200]:
                if analysis == "trigbbh_CSVTM" and mass == 350:
                    continue
                f = TFile(
                    analysis_config.get_b_histogram_filename(
                        analysis,
                        analysis_config.simulation.get_signal_tag(
                            model, mass, "FULLSIM"),
                    ), "READ")
                nevents = (f.Get("BHistograms/h_sample_nevents")).Integral()
                h_mjj = f.Get("BHistograms/h_pfjet_mjj")
                low_bin = h_mjj.GetXaxis().FindBin(mjj_range[0] + 1.e-5)
                high_bin = h_mjj.GetXaxis().FindBin(mjj_range[1] - 1.e-5)
                nsignal = h_mjj.Integral(low_bin, high_bin)
                if nevents > 0:
                    acceff = 1. * nsignal / nevents
                else:
                    acceff = 0.
                print analysis + "\t&\t" + model + "\t&\t" + str(
                    mass) + "\t&\t" + str(nsignal) + "\t&\t" + str(
                        nevents) + "\t&\t" + str(acceff) + "\t\\\\\n"
Beispiel #6
0
	def GetBJetPlusXHistograms(self):
		this_analyses = ["trigbbh_CSVTM", "trigbbl_CSVTM", "trigbbll_CSVTM", "trigbbh_trigbbl_CSVTM"]
		this_samples = ["BJetPlusX_2012", "BJetPlusX_2012BCD"]
		for analysis in this_analyses:
			for sample in this_samples:
				name = analysis + "_" + sample
				f = ROOT.TFile(analysis_config.get_b_histogram_filename(analysis, sample), "READ")
				self._mjj_histograms_fine[name] = f.Get("BHistograms/h_pfjet_mjj")
				self._mjj_histograms_fine[name].SetName("h_" + name + "_mjj_fine")
				self._mjj_histograms_fine[name].SetDirectory(0)
				self._mjj_histograms[name] = histogram_tools.rebin_histogram(self._mjj_histograms_fine[name], self._mass_bins, normalization_bin_width=1)
				self._mjj_histograms[name].SetName("h_" + name + "_mjj")
				self._mjj_histograms[name].SetDirectory(0)
				self._mjj_histograms_csvorder[name] = f.Get("BHistograms/h_pfjet_mjj_csvorder")
				self._mjj_histograms_csvorder[name].SetName("h_" + name + "_mjj_csvorder")
				self._mjj_histograms_csvorder[name].SetDirectory(0)
				self._mjj_histograms_csvorder[name] = histogram_tools.rebin_histogram(self._mjj_histograms_csvorder[name], self._mass_bins, normalization_bin_width=1)
				self._mjj_histograms_vetothirdjet[name] = f.Get("BHistograms/h_pfjet_mjj_vetothirdjet")
				self._mjj_histograms_vetothirdjet[name].SetName("h_" + name + "_mjj_vetothirdjet")
				self._mjj_histograms_vetothirdjet[name].SetDirectory(0)
				self._mjj_histograms_vetothirdjet[name] = histogram_tools.rebin_histogram(self._mjj_histograms_vetothirdjet[name], self._mass_bins, normalization_bin_width=1)
				self._mjj_histograms_fine[name].Rebin(5)
				self._analyses.append(name)
				f.Close()

				if "bbll" in analysis:
					self._mjj_histograms[name].Scale(1.7)
					self._mjj_histograms_fine[name].Scale(1.7)
Beispiel #7
0
	def GetJetHTHistogram(self):
		for sr_name in ["highmass", "lowmass"]:
			analyses = {}
			HT_slices = []
			for mass in xrange(200, 600, 50):
				HT_slices.append("HT" + str(mass))
				analyses["HT" + str(mass)] = "trigjetht" + str(mass)
				if sr_name == "lowmass":
					analyses["HT" + str(mass)] += "_eta1p7"
				analyses["HT" + str(mass)] += "_CSVTM"
			sample = "JetHT_2012BCD"
			HT_slice_histograms = {}
			for HT_slice in HT_slices:
				f = TFile(analysis_config.get_b_histogram_filename(analyses[HT_slice], sample), "READ")
				HT_slice_histograms[HT_slice] = f.Get("BHistograms/h_pfjet_mjj")
				print "On file " + analysis_config.get_b_histogram_filename(analyses[HT_slice], sample)
				HT_slice_histograms[HT_slice].SetName(HT_slice_histograms[HT_slice].GetName() + "_" + analyses[HT_slice])
				HT_slice_histograms[HT_slice].SetDirectory(0)
				f.Close()
			HT_slices.append("HTUnprescaled")
			unprescaled_analysis_name = "trigjetht"
			if sr_name == "lowmass":
				unprescaled_analysis_name += "_eta1p7"
			unprescaled_analysis_name += "_CSVTM"
			analyses["HTUnprescaled"] = unprescaled_analysis_name
			f_unprescaled = TFile(analysis_config.get_b_histogram_filename(unprescaled_analysis_name, sample), "READ")
			HT_slice_histograms["HTUnprescaled"] = f_unprescaled.Get("BHistograms/h_pfjet_mjj")
			HT_slice_histograms["HTUnprescaled"].SetName(HT_slice_histograms["HTUnprescaled"].GetName() + "_" + analyses["HTUnprescaled"])
			HT_slice_histograms["HTUnprescaled"].SetDirectory(0)
			f_unprescaled.Close()
			ranges = {
				"HT200":[220, 386],
				"HT250":[386, 489],
				"HT300":[489, 526],
				"HT350":[526, 606],
				"HT400":[606, 649],
				"HT450":[649, 740],
				"HT500":[740, 788],
				"HT550":[788, 890],
				#"HT650":[800, 890],
				"HTUnprescaled":[890, 2000]
			}

			self._analyses.append("JetHT")
			self._mjj_histograms_fine["JetHT"] = self.FrankenHist(HT_slices, HT_slice_histograms, ranges)
			self._mjj_histograms["JetHT"] = histogram_tools.rebin_histogram(self._mjj_histograms_fine["JetHT"], self._mass_bins, normalization_bin_width=1)
			self._mjj_histograms_fine["JetHT"].Rebin(5)
 def __init__(self, numerator_analysis, denominator_analysis, samples):
     print "[OfflineBTagPlots::__init__] Initializing with sr = " + sr + ", samples = ",
     print samples
     self._samples = samples
     self._numerator_analysis = numerator_analysis
     self._denominator_analysis = denominator_analysis
     self._input_files = {"numerator": {}, "denominator": {}}
     for sample in self._samples:
         # For trigjetht, have to cobble together the frankenhist later.
         if not (numerator_analysis == "trigjetht_eta1p7_CSVTM"
                 or numerator_analysis == "trigjetht_eta2p2_CSVTM"):
             print "Numerator input file = " + analysis_config.get_b_histogram_filename(
                 self._numerator_analysis, sample)
             self._input_files["numerator"][sample] = TFile(
                 analysis_config.get_b_histogram_filename(
                     self._numerator_analysis, sample))
         if not (denominator_analysis == "trigjetht_eta1p7"
                 or denominator_analysis == "trigjetht_eta2p2"):
             print "Denominator input file = " + analysis_config.get_b_histogram_filename(
                 self._denominator_analysis, sample)
             self._input_files["denominator"][sample] = TFile(
                 analysis_config.get_b_histogram_filename(
                     self._denominator_analysis, sample))
Beispiel #9
0
    "QCD_Pt-800to1000_TuneZ2star_8TeV_pythia6",
    "QCD_Pt-1000to1400_TuneZ2star_8TeV_pythia6",
    "QCD_Pt-1400to1800_TuneZ2star_8TeV_pythia6",
    "QCD_Pt-1800_TuneZ2star_8TeV_pythia6"
]
analyses = [
    "NoTrigger_eta2p2", "NoTrigger_eta2p2_CSVTM", "NoTrigger_eta1p7",
    "NoTrigger_eta1p7_CSVTM", "trigbbl_CSVTM", "trigbbh_CSVTM"
]

lumi = 19710.

for analysis in analyses:
    first = True
    output_file = TFile(
        analysis_config.get_b_histogram_filename(
            analysis, "QCD_TuneZ2star_8TeV_pythia6"), "RECREATE")
    output_directory = output_file.mkdir("BHistograms")
    histograms = {}
    for sample in qcd_samples:
        input_file = TFile(
            analysis_config.get_b_histogram_filename(analysis, sample), "READ")
        input_directory = input_file.Get("BHistograms")
        input_directory.cd()
        xsec = analysis_config.simulation.background_cross_sections[sample]
        normalization = xsec * lumi / input_file.Get(
            "BHistograms/h_sample_nevents").Integral()
        for key in gDirectory.GetListOfKeys():
            key.Print()
            if "TH1" in key.GetClassName() or "TH2" in key.GetClassName():
                hist = key.ReadObj()
                hist.Scale(normalization)
    args = parser.parse_args()
    print args

    if args.ht:
        analyses = {}
        names = []
        for mass in xrange(200, 700, 50):
            if mass == 600:
                continue
            names.append("HT" + str(mass))
            analyses["HT" + str(mass)] = "trigjetht" + str(mass)
        sample = "JetHT_2012BCD"
        histograms = {}
        for name in names:
            f = TFile(
                analysis_config.get_b_histogram_filename(
                    analyses[name], sample), "READ")
            #histograms[name] = mjj_common.apply_dijet_binning_normalized(f.Get("BHistograms/h_pfjet_mjj"))
            print "[debug] For name " + name + ", input events = " + str(
                f.Get("BHistograms/h_input_nevents").GetEntries())
            print "[debug] \tPrescale = " + str(
                f.Get("BHistograms/h_pass_nevents_weighted").GetBinContent(1) /
                f.Get("BHistograms/h_pass_nevents").GetBinContent(1))
            histograms[name] = f.Get("BHistograms/h_pfjet_mjj").Rebin(20)
            histograms[name].SetName(histograms[name].GetName() + "_" + name)
            histograms[name].SetDirectory(0)
            f.Close()
        ht_threshold_plot(names,
                          histograms,
                          save_tag="jetht_thresholds",
                          x_range=[0., 1200.],
                          logy=True)
	style_counter = 0
	for name in names:
		histograms[name].SetLineWidth(2)
		histograms[name].SetLineColor(seaborn.GetColorRoot("dark", style_counter))
		histograms[name].Draw("hist same")

		#histograms[name].Draw("hist same")
		l.AddEntry(histograms[name], name, "pl")
		style_counter += 1
	l.Draw()
	c.SaveAs(analysis_config.figure_directory + "/" + c.GetName() + ".pdf")

if __name__ == "__main__":
	for model in ["Hbb", "RSG"]:
		for mass_point in [600, 750, 900, 1200]:
			f1 = TFile(analysis_config.get_b_histogram_filename("trigbbh_CSVTM", analysis_config.simulation.get_signal_tag(model, mass_point, "FULLSIM")), "READ")
			f2 = TFile(analysis_config.get_b_histogram_filename("trigbbh_CSVTM_bfat", analysis_config.simulation.get_signal_tag(model, mass_point, "FULLSIM")))
			
			histograms = {}
			histograms["ak5"] = f1.Get("BHistograms/h_pfjet_mjj")
			histograms["ak5"].SetName("h_ak5_" + model + "_" + str(mass_point))
			histograms["ak5"].SetDirectory(0)

			histograms["Fat, #DeltaR=1.1, p_{T}=30 GeV"] = f1.Get("BHistograms/h_fatjet_mjj")
			histograms["Fat, #DeltaR=1.1, p_{T}=30 GeV"].SetName("h_fat1p1_" + model + "_" + str(mass_point))
			histograms["Fat, #DeltaR=1.1, p_{T}=30 GeV"].SetDirectory(0)
			
			histograms["Fat, #DeltaR=0.8, p_{T}=15 GeV"] = f2.Get("BHistograms/h_fatjet_mjj")
			histograms["Fat, #DeltaR=0.8, p_{T}=15 GeV"].SetName("h_fat0p8_" + model + "_" + str(mass_point))
			histograms["Fat, #DeltaR=0.8, p_{T}=15 GeV"].SetDirectory(0)
Beispiel #12
0
	data_samples = args.data_samples.split(",")
	signal_samples = []
	if args.signal_samples == "all":
		for signal_model in ["Hbb", "RSG"]:
			for mass in [600, 750, 900, 1200]:
				signal_samples.append(analysis_config.simulation.get_signal_tag(signal_model, mass, "FULLSIM"))
	elif args.signal_samples:
		signal_samples = args.signal_samples.split(",")
	

	f_data = {}
	f_signal = {}
	for analysis in analyses:
		f_data[analysis] = {}
		for data_sample in data_samples:
			f_data[analysis][data_sample] = TFile(analysis_config.get_b_histogram_filename(analysis, data_sample), "READ")

		f_signal[analysis] = {}
		for signal_sample in signal_samples:
			f_signal[analysis][signal_sample] = TFile(analysis_config.get_b_histogram_filename(analysis, signal_sample), "READ")

	if args.mjj:
		for analysis in analyses:
			for data_sample in data_samples:
				save_file = TFile("/uscms/home/dryu/Dijets/data/EightTeeEeVeeBee/Results/mjj_fits_" + analysis + "_" + data_sample + ".root", "RECREATE")
				if "trigbbl" in analysis:
					fit_minima = {"pfjet":419.1}
				elif "trigbbh" in analysis:
					fit_minima = {"pfjet":526.1}
				for jet_type in ["pfjet"]:
					data_hist = f_data[analysis][data_sample].Get("BHistograms/h_" + jet_type + "_mjj")
import CMSDIJET.QCDAnalysis.mjj_fits
from CMSDIJET.QCDAnalysis.mjj_fits import *
import CMSDIJET.QCDAnalysis.analysis_configuration_8TeV as analysis_config
import CMSDIJET.QCDAnalysis.mjj_common as mjj_common
from CMSDIJET.QCDAnalysis.plots import AnalysisComparisonPlot

def f8(seq): # Dave Kirby
    # Order preserving
    seen = set()
    return [x for x in seq if x not in seen and not seen.add(x)]

if __name__ == "__main__":

	for analysis in ["lowmass", "highmass"]:
		if analysis == "highmass":
			f_bjetplusx = TFile(analysis_config.get_b_histogram_filename("trigbbh_CSVTM", "BJetPlusX_2012"), "READ")
			f_singlemu = TFile(analysis_config.get_b_histogram_filename("mu_highmass_CSVTM", "SingleMu_2012"), "READ")
		else:
			f_bjetplusx = TFile(analysis_config.get_b_histogram_filename("trigbbl_CSVTM", "BJetPlusX_2012BCD"), "READ")
			f_singlemu = TFile(analysis_config.get_b_histogram_filename("mu_lowmass_CSVTM", "SingleMu_2012"), "READ")
		print "[debug] For BJetsPlusX_2012, input events = " + str(f_bjetplusx.Get("BHistograms/h_input_nevents").GetEntries())
		print "[debug] For SingleMu_2012, input events = " + str(f_singlemu.Get("BHistograms/h_input_nevents").GetEntries())
		bjetplusx_histogram = f_bjetplusx.Get("BHistograms/h_pfjet_mjj").Rebin(25)
		bjetplusx_histogram.SetDirectory(0)
		f_bjetplusx.Close()
		singlemu_histogram = f_singlemu.Get("BHistograms/h_pfjet_mjj").Rebin(25)
		singlemu_histogram.SetDirectory(0)
		f_singlemu.Close()

		# Normalize the histograms above 450 GeV
		norm_low_bin = bjetplusx_histogram.GetXaxis().FindBin(450)
Beispiel #14
0
def RunBHistogramsSignal(analysis, sample, files_per_job=1, retar=False, data_source=None):
	# Create working directory and cd
	start_directory = os.getcwd()
	working_directory = dijet_directory + "/data/EightTeeEeVeeBee/BHistograms/condor/submit_" + analysis + "_" + sample
	os.system("mkdir -pv " + working_directory)
	os.chdir(working_directory)

	method = "csub"
	if method == "csub":
		input_files_txt = open(analysis_config.files_QCDBEventTree[sample], 'r')
		input_files = [line.strip() for line in input_files_txt]
		input_files_txt.close()

		bash_script_path = working_directory + "/run_" + analysis + "_" + sample + ".sh"
		bash_script = open(bash_script_path, 'w')
		bash_script.write("#!/bin/bash\n")
		bash_script.write("input_files=( " + " ".join([os.path.basename(x) for x in input_files]) + " )\n")
		output_filename = os.path.basename(analysis_config.get_b_histogram_filename(analysis, sample)).replace(".root", "_$1.root")
		bash_script.write("cmsRun " 
			+ os.path.basename(analysis_config.analysis_cfgs[analysis]) 
			+ " dataSource=simulation"
			+ " dataType=signal"
			+ " signalMass=" + str(analysis_config.simulation.signal_sample_masses[sample])
			+ " outputFile=" + output_filename
			+ " inputFiles=file:${input_files[$1]}\n"
		)
		bash_script.close()

		submit_command = "csub " + bash_script_path + " --cmssw "
		if not retar:
			submit_command += " --no_retar"
		submit_command += " -F " + ",".join(input_files) + "," + analysis_config.analysis_cfgs[analysis] + " -n " + str(len(input_files))
		os.system(submit_command)
	else:
		command = "condor_cmsRun"
		if retar:
			command += " --retar "
		#input_txt = open("tmp.txt", 'w')
		#input_txt.write(analysis_config.files_QCDBEventTree[sample] + "\n")
		#input_txt.close()
		command += " --file-list=" + analysis_config.files_QCDBEventTree[sample] + " "
		command += " --files-per-job=" + str(files_per_job)
		command += " --submit-file=submit_" + analysis + "_" + sample + ".jdl "
		#command += " --output-file=" + output_prefix + "_" + sample + ".root "
		command += " --output-tag=BHistograms_" + sample + " "
		command += " --run "
		command += "  " + dijet_directory + "/CMSSW_5_3_32_patch3/src/MyTools/RootUtils/scripts/cmsRun_wrapper.sh " + analysis_config.analysis_cfgs[analysis] 
		command += " dataSource=simulation "
		command += " dataType=signal "
		command += " signalMass=" + str(analysis_config.simulation.signal_sample_masses[sample]) + " "
		#command += "inputFiles=" + os.path.basename(input_files[sample])
		if "ZPrime" in sample:
			command += " bottomOnly=true "
		output_filename = os.path.basename(analysis_config.get_b_histogram_filename(analysis, sample)).replace(".root", "_\$\(Cluster\)_\$\(Process\).root")
		command += " outputFile=" + os.path.basename(analysis_config.get_b_histogram_filename(analysis, sample)).replace(".root", "_\$\(Cluster\)_\$\(Process\).root")
		print command
		os.system(command)
		os.system("rm -f tmp.txt")
	postprocessing_file = open('postprocessing.sh', 'w')
	postprocessing_file.write("#!/bin/bash\n")
	postprocessing_file.write("hadd " + working_directory + "/" + os.path.basename(analysis_config.get_b_histogram_filename(analysis, sample)) + " " + output_filename.replace("_\$\(Cluster\)_\$\(Process\)", "*").replace("$1", "*") + "\n")
	postprocessing_file.close()

	# cd back
	os.chdir(start_directory)
Beispiel #15
0
	print "\t\\label{table:X}\n",
	print "\\end{tabular}\n",
	print "\\end{table}\n",

if __name__ == "__main__":
	import argparse
	parser = argparse.ArgumentParser(description = 'Make tables from cutflow histograms')
	#parser.add_argument('analysis', type=str, help='Analysis name')
	#parser.add_argument('sample', type=str, help='Sample name')
	args = parser.parse_args()

	analyses = ["trigbbl_CSVTM", "trigbbh_CSVTM"]
	samples = ["BJetPlusX_2012"]
	for model in ["Hbb", "RSG"]:
		for mass in [400, 500, 600, 750, 900, 1200]:
			samples.append(analysis_config.simulation.get_signal_tag(model, mass, "FULLSIM"))

	for analysis in analyses:
		headers = []
		cutflow_histograms = {}
		for sample in samples:
			headers.append(sample)
			f = TFile(analysis_config.get_b_histogram_filename(analysis, sample), "READ")
			cutflow_histograms[sample] = f.Get("BHistograms/CutFlowCounter_QCDEventSelector").Clone()
			cutflow_histograms[sample].SetDirectory(0)
			f.Close()
		histogram_to_table(headers, cutflow_histograms, normalize=False, txt_file=analysis_config.figure_directory + "/cutflow_" + analysis + ".tex")
		histogram_to_table(headers, cutflow_histograms, normalize=True, txt_file=analysis_config.figure_directory + "/cuteff_" + analysis + ".tex")


Beispiel #16
0
            seaborn.GetColorRoot("dark", style_counter))
        histograms[name].Draw("hist same")

        #histograms[name].Draw("hist same")
        l.AddEntry(histograms[name], name, "pl")
        style_counter += 1
    l.Draw()
    c.SaveAs(analysis_config.figure_directory + "/" + c.GetName() + ".pdf")


if __name__ == "__main__":
    for model in ["Hbb", "RSG"]:
        for mass_point in [600, 750, 900, 1200]:
            f1 = TFile(
                analysis_config.get_b_histogram_filename(
                    "trigbbh_CSVTM",
                    analysis_config.simulation.get_signal_tag(
                        model, mass_point, "FULLSIM")), "READ")
            f2 = TFile(
                analysis_config.get_b_histogram_filename(
                    "trigbbh_CSVTM_bfat",
                    analysis_config.simulation.get_signal_tag(
                        model, mass_point, "FULLSIM")))

            histograms = {}
            histograms["ak5"] = f1.Get("BHistograms/h_pfjet_mjj")
            histograms["ak5"].SetName("h_ak5_" + model + "_" + str(mass_point))
            histograms["ak5"].SetDirectory(0)

            histograms["Fat, #DeltaR=1.1, p_{T}=30 GeV"] = f1.Get(
                "BHistograms/h_fatjet_mjj")
            histograms["Fat, #DeltaR=1.1, p_{T}=30 GeV"].SetName(
Beispiel #17
0
        "trigbbl_CSVTM": [400, 500, 600, 750],
        "trigbbh_CSVTM": [600, 750, 900, 1200]
    }

    if args.mjj:
        for analysis in analyses:
            names = []
            histograms = {}
            colors = {}
            styles = {}
            for model in models:
                color_counter = 0
                for mass in masses[analysis]:
                    f = TFile(
                        analysis_config.get_b_histogram_filename(
                            analysis,
                            analysis_config.simulation.get_signal_tag(
                                model, mass, "FULLSIM")))
                    if model == "Hbb":
                        name = "H, m=" + str(mass) + " GeV"
                    elif model == "RSG":
                        name = "G, m=" + str(mass) + " GeV"
                    names.append(name)
                    histograms[name] = f.Get("BHistograms/h_pfjet_mjj")
                    histograms[name].SetDirectory(0)
                    histograms[name].Rebin(25)
                    histograms[name].Scale(1. / histograms[name].Integral())
                    f.Close()
                    if model == "Hbb":
                        styles[name] = 2
                    elif model == "RSG":
                        styles[name] = 3
			plotter = OfflineBTagPlots(numerator_analysis, denominator_analysis, ["SingleMu_2012"])
			plotter.EfficiencyPlot(var="mjj", logy=True, binning=dijet_binning, save_tag="_" + wp + "_" + sr + "_onbtag_singlemu", ratio_range=[0.,0.1], x_range=[0., 2000.], legend_position="topright", numerator_legend="With online b-tag", denominator_legend="Without online b-tag", ratio_title="Online b-tag efficiency (no offline CSV)")

	if args.do_jetht:
		for sr in ["lowmass", "highmass"]:
			if sr == "lowmass":
				numerator_analysis = "trigjetht_eta1p7_CSVTM"
				denominator_analysis = "trigjetht_eta1p7"
			else:
				numerator_analysis = "trigjetht_eta2p2_CSVTM"
				denominator_analysis = "trigjetht_eta2p2"
			plotter = OfflineBTagPlots(numerator_analysis, denominator_analysis, ["JetHT_2012BCD"])
			plotter.FrankenEfficiencyPlot(logy=True, binning=dijet_binning, save_tag="_JetHT_CSVTM_" + sr, ratio_range=[0.,0.01], x_range=[0., 2000.], legend_position="topright", numerator_legend="With offline b-tag", denominator_legend="Without offline b-tag", ratio_title="Offline b-tag efficiency)")

		#for ht_slice in [200, 250, 300, 350, 400, 450, 500, 550, 650]:
		#	if sr == "lowmass":
		#		numerator_analysis = "trigjetht" + str(ht_slice) + "_eta1p7_CSVTM"
		#		denominator_analysis = "trigjetht" + str(ht_slice) + "_eta1p7"
		#	else:
		#		numerator_analysis = "trigjetht" + str(ht_slice) + "_CSVTM"
		#		denominator_analysis = "trigjetht" + str(ht_slice)

		#	plotter = OfflineBTagPlots(numerator_analysis, denominator_analysis, ["JetHT_2012BCD"])
		#	plotter.EfficiencyPlot(var="mjj", logy=True, binning=dijet_binning, save_tag="_CSVTM_" + sr + "_jetht" + str(ht_slice), ratio_range=[0.,0.01], x_range=[0., 2000.], legend_position="topright", numerator_legend="With offline b-tag", denominator_legend="Without offline b-tag", ratio_title="Offline b-tag efficiency", prescaled=True)

	if args.do_singlejet:
		for analysis in ["trigbbl_CSVTM", "trigbbh_CSVTM"]:
			f = TFile(analysis_config.get_b_histogram_filename(analysis, "BJetPlusX_2012"), "READ")
			h_mjj = histogram_tools.rebin_histogram(f.Get("BHistograms/h_pfjet_mjj"), dijet_binning)
			h_mjj_btagcorr = histogram_tools.rebin_histogram(f.Get("BHistograms/h_pfjet_mjj_btagcorr"), dijet_binning)
			EfficiencyPlot(h_mjj, h_mjj_btagcorr, name_num="No correction", name_den="b tag correction", logy=True, save_directory=analysis_config.figure_directory + "/OfflineBTag", save_tag="from_singlejet_"+analysis)
	def FrankenEfficiencyPlot(self, logy=True, binning=None, simple_rebin=None, save_tag="", x_range=None, ratio_range=None, legend_position="topright", numerator_legend=None, denominator_legend=None, ratio_title=None):
		ht_slices = ["HT200","HT250","HT300","HT350","HT400","HT450","HT500","HT550","HTUnprescaled"] # HT650
		if self._numerator_analysis == "trigjetht_eta1p7_CSVTM":
			numerator_analyses = {
				"HT200":"trigjetht200_eta1p7_CSVTM",
				"HT250":"trigjetht250_eta1p7_CSVTM",
				"HT300":"trigjetht300_eta1p7_CSVTM",
				"HT350":"trigjetht350_eta1p7_CSVTM",
				"HT400":"trigjetht400_eta1p7_CSVTM",
				"HT450":"trigjetht450_eta1p7_CSVTM",
				"HT500":"trigjetht500_eta1p7_CSVTM",
				"HT550":"trigjetht550_eta1p7_CSVTM",
				#"HT650":"trigjetht650_eta1p7_CSVTM",
				"HTUnprescaled":"trigjetht_eta1p7_CSVTM"
			}
		elif self._numerator_analysis == "trigjetht_eta2p2_CSVTM":
			numerator_analyses = {
				"HT200":"trigjetht200_CSVTM",
				"HT250":"trigjetht250_CSVTM",
				"HT300":"trigjetht300_CSVTM",
				"HT350":"trigjetht350_CSVTM",
				"HT400":"trigjetht400_CSVTM",
				"HT450":"trigjetht450_CSVTM",
				"HT500":"trigjetht500_CSVTM",
				"HT550":"trigjetht550_CSVTM",
				#"HT650":"trigjetht650_CSVTM",
				"HTUnprescaled":"trigjetht_CSVTM"
			}
		else:
			print "[OfflineBTagPlots::FrankenEfficiencyPlot] ERROR : numerator analysis must be trigjetht_eta1p7_CSVTM or trigjetht_eta2p2_CSVTM"
			sys.exit(1)
		if self._denominator_analysis == "trigjetht_eta1p7":
			denominator_analyses = {
				"HT200":"trigjetht200_eta1p7",
				"HT250":"trigjetht250_eta1p7",
				"HT300":"trigjetht300_eta1p7",
				"HT350":"trigjetht350_eta1p7",
				"HT400":"trigjetht400_eta1p7",
				"HT450":"trigjetht450_eta1p7",
				"HT500":"trigjetht500_eta1p7",
				"HT550":"trigjetht550_eta1p7",
				#"HT650":"trigjetht650_eta1p7",
				"HTUnprescaled":"trigjetht_eta1p7"
			}
		elif self._denominator_analysis == "trigjetht_eta2p2":
			denominator_analyses = {
				"HT200":"trigjetht200",
				"HT250":"trigjetht250",
				"HT300":"trigjetht300",
				"HT350":"trigjetht350",
				"HT400":"trigjetht400",
				"HT450":"trigjetht450",
				"HT500":"trigjetht500",
				"HT550":"trigjetht550",
				#"HT650":"trigjetht650",
				"HTUnprescaled":"trigjetht"
			}
		else:
			print "[OfflineBTagPlots::FrankenEfficiencyPlot] ERROR : denominator analysis must be trigjetht_eta1p7 or trigjetht_eta2p2"
			sys.exit(1)
		ht_ranges = {
			"HT200":[220, 386],
			"HT250":[386, 489],
			"HT300":[489, 526],
			"HT350":[526, 606],
			"HT400":[606, 649],
			"HT450":[649, 740],
			"HT500":[740, 788],
			"HT550":[788, 890],
			#"HT650":[890, 2000],
			"HTUnprescaled":[890, 2000]
		}
		numerator_histogram = None
		denominator_histogram = None
		for sample in self._samples:
			print "[EfficiencyPlot] DEBUG : Sample " + sample
			numerator_slice_histograms = {}
			denominator_slice_histograms = {}
			for slice_name in ht_slices:
				print slice_name
				print analysis_config.get_b_histogram_filename(numerator_analyses[slice_name], sample)
				numerator_file = TFile(analysis_config.get_b_histogram_filename(numerator_analyses[slice_name], sample), "READ")
				print analysis_config.get_b_histogram_filename(denominator_analyses[slice_name], sample)
				denominator_file = TFile(analysis_config.get_b_histogram_filename(denominator_analyses[slice_name], sample), "READ")

				# Check input nevents
				num_nevents = numerator_file.Get("BHistograms/h_input_nevents").Integral()
				den_nevents = denominator_file.Get("BHistograms/h_input_nevents").Integral()
				if num_nevents != den_nevents:
					# Allow tiny differences...?
					if abs((num_nevents - den_nevents) / den_nevents) < 0.001:
						print "[EfficiencyPlot] ERROR : Small inconsistency between number of events between numerator and denominator. I'm going to rescale away the difference, but you may want to fix this."
						numerator_normalization = den_nevents / num_nevents
						denominator_normalization = 1.
					else:
						print "[EfficiencyPlot] ERROR : Inconsistent number of events between numerator and denominator. Results would be wrong, so I'm aborting."
						print "[EfficiencyPlot] ERROR : \tNumerator = " + str(numerator_file.Get("BHistograms/h_input_nevents").Integral())
						print "[EfficiencyPlot] ERROR : \tDenominator = " + str(denominator_file.Get("BHistograms/h_input_nevents").Integral())
						sys.exit(1)
				else:
					numerator_normalization = 1.
					denominator_normalization = 1.
				numerator_slice_histograms[slice_name] = numerator_file.Get("BHistograms/h_pfjet_mjj")
				numerator_slice_histograms[slice_name].SetName("h_pfjet_mjj_num_" + slice_name + "_" + sample)
				numerator_slice_histograms[slice_name].SetDirectory(0)
				numerator_slice_histograms[slice_name].Scale(numerator_normalization)
				denominator_slice_histograms[slice_name] = denominator_file.Get("BHistograms/h_pfjet_mjj")
				denominator_slice_histograms[slice_name].SetName("h_pfjet_mjj_den_" + slice_name + "_" + sample)
				denominator_slice_histograms[slice_name].SetDirectory(0)
				denominator_slice_histograms[slice_name].Scale(denominator_normalization)
				numerator_file.Close()
				denominator_file.Close()
			# Make frankenhist
			this_numerator_histogram = self.FrankenHist(ht_slices, numerator_slice_histograms, ht_ranges)
			this_denominator_histogram = self.FrankenHist(ht_slices, denominator_slice_histograms, ht_ranges)
			if not numerator_histogram:
				numerator_histogram = this_numerator_histogram.Clone()
				numerator_histogram.SetName(numerator_histogram.GetName() + save_tag + "_num_" + str(time.time()))
				denominator_histogram = this_denominator_histogram.Clone()
				denominator_histogram.SetName(denominator_histogram.GetName() + save_tag + "_num_" + str(time.time()))
			else:
				numerator_histogram.Add(this_numerator_histogram)
				denominator_histogram.Add(this_denominator_histogram)

		# Rebin
		if binning:
			numerator_histogram = histogram_tools.rebin_histogram(numerator_histogram, binning)
			denominator_histogram = histogram_tools.rebin_histogram(denominator_histogram, binning)
		elif simple_rebin:
			numerator_histogram.Rebin(simple_rebin)
			denominator_histogram.Rebin(simple_rebin)


		cname = "c_offline_btag_eff_mjj"
		if logy:
			cname += "_log"
		c = TCanvas(cname, "Offline b-tag #epsilon", 800, 1000)
		top = TPad("top", "top", 0., 0.5, 1., 1.)
		top.SetBottomMargin(0.02)
		if logy:
			top.SetLogy()
		top.Draw()
		top.cd()

		frame_top = numerator_histogram.Clone()
		frame_top.Reset()
		if x_range:
			frame_top.GetXaxis().SetRangeUser(x_range[0], x_range[1])
		if logy:
			y_min = 0.1
			y_max = max(numerator_histogram.GetMaximum(), denominator_histogram.GetMaximum()) * 10.
		else:
			y_min = 0.
			y_max = max(numerator_histogram.GetMaximum(), denominator_histogram.GetMaximum()) * 1.5
		frame_top.SetMinimum(y_min)
		frame_top.SetMaximum(y_max)
		frame_top.GetXaxis().SetLabelSize(0)
		frame_top.GetXaxis().SetTitleSize(0)
		if binning:
			#frame_top.GetYaxis().SetTitle("Events / 1 GeV")
			frame_top.GetYaxis().SetTitle("Events")
		else:
			frame_top.GetYaxis().SetTitle("Events")
		frame_top.Draw("axis")
		print "numerator integral = " + str(numerator_histogram.Integral())
		print "denominator integral = " + str(denominator_histogram.Integral())
		numerator_histogram.SetMarkerStyle(20)
		numerator_histogram.SetMarkerColor(seaborn.GetColorRoot("default", 0))
		numerator_histogram.SetLineColor(seaborn.GetColorRoot("default", 0))
		numerator_histogram.Draw("same")
		denominator_histogram.SetMarkerStyle(24)
		denominator_histogram.SetMarkerColor(seaborn.GetColorRoot("default", 2))
		denominator_histogram.SetLineColor(seaborn.GetColorRoot("default", 2))
		denominator_histogram.Draw("same")
		if legend_position == "topright":
			l = TLegend(0.6, 0.6, 0.85, 0.8)
		elif legend_position == "bottomright":
			l = TLegend(0.6, 0.2, 0.85, 0.4)

		l.SetFillColor(0)
		l.SetBorderSize(0)
		if numerator_legend:
			l.AddEntry(numerator_histogram, numerator_legend)
		else:
			l.AddEntry(numerator_histogram, "CSVT+CSVM")
		if denominator_legend:
			l.AddEntry(denominator_histogram, denominator_legend)

		else:
			l.AddEntry(denominator_histogram, "No CSV")
		l.Draw()

		c.cd()
		bottom = TPad("bottom", "bottom", 0., 0., 1., 0.5)
		bottom.SetTopMargin(0.01)
		bottom.SetBottomMargin(0.2)
		bottom.Draw()
		bottom.cd()
		ratio_histogram = numerator_histogram.Clone()
		ratio_histogram.Reset()
		if x_range:
			ratio_histogram.GetXaxis().SetRangeUser(x_range[0], x_range[1])
		ratio_histogram.SetName(numerator_histogram.GetName() + "_ratio_" + save_tag + str(time.time()))
		ratio_histogram.SetDirectory(0)
		for bin in xrange(1, numerator_histogram.GetNbinsX() + 1):
			# Undo bin normalization
			if numerator_histogram.GetBinError(bin) > 0 and denominator_histogram.GetBinError(bin) > 0:
				num_unnormalized = (numerator_histogram.GetBinContent(bin))**2 / (numerator_histogram.GetBinError(bin))**2
				den_unnormalized = (denominator_histogram.GetBinContent(bin))**2 / (denominator_histogram.GetBinError(bin))**2
				num = numerator_histogram.GetBinContent(bin)
				den = denominator_histogram.GetBinContent(bin)
				ratio = 1. * num_unnormalized / den_unnormalized 
				ratio_err = sqrt(ratio * (1. - ratio) / den_unnormalized)
				#ratio_err = max(sqrt(ratio * (1. - ratio) / den), 1./den)
			else:
				ratio = 0.
				ratio_err = 0.
			ratio_histogram.SetBinContent(bin, ratio)
			ratio_histogram.SetBinError(bin, ratio_err)
		ratio_histogram.SetMarkerSize(1)
		ratio_histogram.SetMarkerColor(kBlack)
		ratio_histogram.SetLineColor(kBlack)
		ratio_histogram.SetLineWidth(2)
		ratio_histogram.GetXaxis().SetTitle("m_{jj} [GeV]")
		if ratio_title:
			ratio_histogram.GetYaxis().SetTitle(ratio_title)
		else:
			ratio_histogram.GetYaxis().SetTitle("Offline 2#timesb-tag efficiency")
		if ratio_range:
			ratio_histogram.SetMinimum(ratio_range[0])
			ratio_histogram.SetMaximum(ratio_range[1])
		else:
			ratio_histogram.SetMinimum(0.)
			ratio_histogram.SetMaximum(1.)
		ratio_histogram.Draw()

		c.cd()
		c.SaveAs(analysis_config.figure_directory + "/OfflineBTag/" + c.GetName() + save_tag + ".pdf")
		ROOT.SetOwnership(c, False)
		ROOT.SetOwnership(top, False)
		ROOT.SetOwnership(bottom, False)
                denominator_legend="Without offline b-tag",
                ratio_title="Offline b-tag efficiency)")

        #for ht_slice in [200, 250, 300, 350, 400, 450, 500, 550, 650]:
        #	if sr == "lowmass":
        #		numerator_analysis = "trigjetht" + str(ht_slice) + "_eta1p7_CSVTM"
        #		denominator_analysis = "trigjetht" + str(ht_slice) + "_eta1p7"
        #	else:
        #		numerator_analysis = "trigjetht" + str(ht_slice) + "_CSVTM"
        #		denominator_analysis = "trigjetht" + str(ht_slice)

        #	plotter = OfflineBTagPlots(numerator_analysis, denominator_analysis, ["JetHT_2012BCD"])
        #	plotter.EfficiencyPlot(var="mjj", logy=True, binning=dijet_binning, save_tag="_CSVTM_" + sr + "_jetht" + str(ht_slice), ratio_range=[0.,0.01], x_range=[0., 2000.], legend_position="topright", numerator_legend="With offline b-tag", denominator_legend="Without offline b-tag", ratio_title="Offline b-tag efficiency", prescaled=True)

    if args.do_singlejet:
        for analysis in ["trigbbl_CSVTM", "trigbbh_CSVTM"]:
            f = TFile(
                analysis_config.get_b_histogram_filename(
                    analysis, "BJetPlusX_2012"), "READ")
            h_mjj = histogram_tools.rebin_histogram(
                f.Get("BHistograms/h_pfjet_mjj"), dijet_binning)
            h_mjj_btagcorr = histogram_tools.rebin_histogram(
                f.Get("BHistograms/h_pfjet_mjj_btagcorr"), dijet_binning)
            EfficiencyPlot(h_mjj,
                           h_mjj_btagcorr,
                           name_num="No correction",
                           name_den="b tag correction",
                           logy=True,
                           save_directory=analysis_config.figure_directory +
                           "/OfflineBTag",
                           save_tag="from_singlejet_" + analysis)
Beispiel #21
0
	ROOT.SetOwnership(c, False)
	ROOT.SetOwnership(top, False)
	ROOT.SetOwnership(bottom, False)


if __name__ == "__main__":
	for analysis in ["trigbbh_CSVTM", "trigbbl_CSVTM"]:
		for model in ["Hbb", "RSG"]:
			if analysis == "trigbbh_CSVTM":
				masses = [600, 750, 900, 1200]
			elif analysis == "trigbbl_CSVTM":
				masses = [400, 500, 600]
			for mass in masses:
				print "On " + model + " / " + str(mass)

				signal_file = TFile(analysis_config.get_b_histogram_filename(analysis, analysis_config.simulation.get_signal_tag(model, mass, "FULLSIM")), "READ")
				signal_histogram = signal_file.Get("BHistograms/h_nminusone_PFDijetMaxDeltaEta_vs_PFMjj")
				xsec = 1. # 1 pb placeholder
				ngenevt = signal_file.Get("BHistograms/h_input_nevents").Integral()
				signal_histogram.Scale(19700. * xsec / ngenevt)

				print "Background file " + analysis_config.get_b_histogram_filename(analysis, "BJetPlusX_2012")
				background_file = TFile(analysis_config.get_b_histogram_filename(analysis, "BJetPlusX_2012"), "READ")
				background_histogram = background_file.Get("BHistograms/h_nminusone_PFDijetMaxDeltaEta_vs_PFMjj")

				signal_histogram_mjj = signal_histogram.ProjectionY()
				signal_fit_results = DoSignalFit(signal_histogram_mjj, fit_range=[mass-150., mass+150.])
				signal_x0 = signal_fit_results["fit"].GetParameter(2)
				signal_sigma = signal_fit_results["fit"].GetParameter(3)
				print "\tWindow = [" + str(signal_x0 - signal_sigma) + ", " + str(signal_x0 + signal_sigma) + "]"
Beispiel #22
0
	analyses = ["trigbbl_CSVTM", "trigbbh_CSVTM"]
	masses = {
		"trigbbl_CSVTM":[400, 500, 600, 750],
		"trigbbh_CSVTM":[600, 750, 900, 1200]
	}

	if args.mjj:
		for analysis in analyses:
			names = []
			histograms = {}
			colors = {}
			styles = {}
			for model in models:
				color_counter = 0
				for mass in masses[analysis]:
					f = TFile(analysis_config.get_b_histogram_filename(analysis, analysis_config.simulation.get_signal_tag(model, mass, "FULLSIM")))
					if model == "Hbb":
						name = "H, m=" + str(mass) + " GeV"
					elif model == "RSG":
						name = "G, m=" + str(mass) + " GeV"
					names.append(name)
					histograms[name] = f.Get("BHistograms/h_pfjet_mjj")
					histograms[name].SetDirectory(0)
					histograms[name].Rebin(25)
					histograms[name].Scale(1. / histograms[name].Integral())
					f.Close()
					if model == "Hbb":
						styles[name] = 2
					elif model == "RSG":
						styles[name] = 3
					colors[name] = seaborn.GetColorRoot("dark", color_counter)
gROOT.SetBatch(True)
gStyle.SetOptStat(0)
gStyle.SetOptTitle(0)
gSystem.Load("~/Dijets/CMSSW_5_3_32_patch3/lib/slc6_amd64_gcc472/libMyToolsRootUtils.so")
import CMSDIJET.QCDAnalysis.analysis_configuration_8TeV as analysis_config
sys.path.append("/uscms/home/dryu/Dijets/CMSSW_5_3_32_patch3/python/MyTools/RootUtils")
import histogram_tools
seaborn = Root.SeabornInterface()
seaborn.Initialize()


print "Loading histograms"
analyses = ["trigmu_highmass_CSVTM", "trigmu_lowmass_CSVTM", "trigmubbh_highmass_CSVTM", "trigmubbl_lowmass_CSVTM", "trigmubbll_lowmass_CSVTM"]
files = {}
for analysis in analyses:
    files[analysis] = ROOT.TFile(analysis_config.get_b_histogram_filename(analysis, "SingleMu_2012"), "READ")

from array import array
mass_bins = array("d", [1, 3, 6, 10, 16, 23, 31, 40, 50, 61, 74, 88, 103, 119, 137, 156, 176, 197, 220, 244, 270, 296, 325, 354, 386, 419, 453, 489, 526, 565, 606, 649, 693, 740, 788, 838, 890, 944, 1000, 1058, 1118, 1181, 1246, 1313, 1383, 1455, 1530, 1607, 1687, 1770, 1856, 1945, 2037, 2132, 2231, 2332, 2438, 2546, 2659, 2775, 2895, 3019, 3147, 3279, 3416, 3558, 3704, 3854, 4010, 4171, 4337, 4509, 4686, 4869, 5058, 5253, 5455, 5663, 5877, 6099, 6328, 6564, 6808, 7060, 7320, 7589, 7866, 8000])

variables = ["mjj", "pt1", "pt2", "pt_btag1", "pt_btag2"]
histograms = {}
for analysis in analyses:
    histograms[analysis] = {}
    for variable in variables:
        histograms[analysis][variable] = files[analysis].Get("BHistograms/h_pfjet_" + variable)
        histograms[analysis][variable].SetName("h_" + analysis + "_" + variable)
        histograms[analysis][variable].SetDirectory(0)
        if analysis == "trigmubbll_lowmass":
            histograms[analysis][variable].Scale(1.7) # Prescale
        if variable == "mjj":
Beispiel #24
0
def RunBHistogramsSignal(analysis,
                         sample,
                         files_per_job=1,
                         retar=False,
                         data_source=None):
    # Create working directory and cd
    start_directory = os.getcwd()
    working_directory = dijet_directory + "/data/EightTeeEeVeeBee/BHistograms/condor/submit_" + analysis + "_" + sample
    os.system("mkdir -pv " + working_directory)
    os.chdir(working_directory)

    method = "csub"
    if method == "csub":
        input_files_txt = open(analysis_config.files_QCDBEventTree[sample],
                               'r')
        input_files = [line.strip() for line in input_files_txt]
        input_files_txt.close()

        bash_script_path = working_directory + "/run_" + analysis + "_" + sample + ".sh"
        bash_script = open(bash_script_path, 'w')
        bash_script.write("#!/bin/bash\n")
        bash_script.write("input_files=( " +
                          " ".join([os.path.basename(x)
                                    for x in input_files]) + " )\n")
        output_filename = os.path.basename(
            analysis_config.get_b_histogram_filename(analysis,
                                                     sample)).replace(
                                                         ".root", "_$1.root")
        bash_script.write(
            "cmsRun " +
            os.path.basename(analysis_config.analysis_cfgs[analysis]) +
            " dataSource=simulation" + " dataType=signal" + " signalMass=" +
            str(analysis_config.simulation.signal_sample_masses[sample]) +
            " outputFile=" + output_filename +
            " inputFiles=file:${input_files[$1]}\n")
        bash_script.close()

        submit_command = "csub " + bash_script_path + " --cmssw "
        if not retar:
            submit_command += " --no_retar"
        submit_command += " -F " + ",".join(
            input_files
        ) + "," + analysis_config.analysis_cfgs[analysis] + " -n " + str(
            len(input_files))
        os.system(submit_command)
    else:
        command = "condor_cmsRun"
        if retar:
            command += " --retar "
        #input_txt = open("tmp.txt", 'w')
        #input_txt.write(analysis_config.files_QCDBEventTree[sample] + "\n")
        #input_txt.close()
        command += " --file-list=" + analysis_config.files_QCDBEventTree[
            sample] + " "
        command += " --files-per-job=" + str(files_per_job)
        command += " --submit-file=submit_" + analysis + "_" + sample + ".jdl "
        #command += " --output-file=" + output_prefix + "_" + sample + ".root "
        command += " --output-tag=BHistograms_" + sample + " "
        command += " --run "
        command += "  " + dijet_directory + "/CMSSW_5_3_32_patch3/src/MyTools/RootUtils/scripts/cmsRun_wrapper.sh " + analysis_config.analysis_cfgs[
            analysis]
        command += " dataSource=simulation "
        command += " dataType=signal "
        command += " signalMass=" + str(
            analysis_config.simulation.signal_sample_masses[sample]) + " "
        #command += "inputFiles=" + os.path.basename(input_files[sample])
        if "ZPrime" in sample:
            command += " bottomOnly=true "
        output_filename = os.path.basename(
            analysis_config.get_b_histogram_filename(
                analysis,
                sample)).replace(".root", "_\$\(Cluster\)_\$\(Process\).root")
        command += " outputFile=" + os.path.basename(
            analysis_config.get_b_histogram_filename(
                analysis, sample)).replace(
                    ".root", "_\$\(Cluster\)_\$\(Process\).root")
        print command
        os.system(command)
        os.system("rm -f tmp.txt")
    postprocessing_file = open('postprocessing.sh', 'w')
    postprocessing_file.write("#!/bin/bash\n")
    postprocessing_file.write(
        "hadd " + working_directory + "/" + os.path.basename(
            analysis_config.get_b_histogram_filename(analysis, sample)) + " " +
        output_filename.replace("_\$\(Cluster\)_\$\(Process\)", "*").replace(
            "$1", "*") + "\n")
    postprocessing_file.close()

    # cd back
    os.chdir(start_directory)
Beispiel #25
0
    ROOT.SetOwnership(bottom, False)


if __name__ == "__main__":
    for analysis in ["trigbbh_CSVTM", "trigbbl_CSVTM"]:
        for model in ["Hbb", "RSG"]:
            if analysis == "trigbbh_CSVTM":
                masses = [600, 750, 900, 1200]
            elif analysis == "trigbbl_CSVTM":
                masses = [400, 500, 600]
            for mass in masses:
                print "On " + model + " / " + str(mass)

                signal_file = TFile(
                    analysis_config.get_b_histogram_filename(
                        analysis,
                        analysis_config.simulation.get_signal_tag(
                            model, mass, "FULLSIM")), "READ")
                signal_histogram = signal_file.Get(
                    "BHistograms/h_nminusone_PFDijetMaxDeltaEta_vs_PFMjj")
                xsec = 1.  # 1 pb placeholder
                ngenevt = signal_file.Get(
                    "BHistograms/h_input_nevents").Integral()
                signal_histogram.Scale(19700. * xsec / ngenevt)

                print "Background file " + analysis_config.get_b_histogram_filename(
                    analysis, "BJetPlusX_2012")
                background_file = TFile(
                    analysis_config.get_b_histogram_filename(
                        analysis, "BJetPlusX_2012"), "READ")
                background_histogram = background_file.Get(
                    "BHistograms/h_nminusone_PFDijetMaxDeltaEta_vs_PFMjj")
Beispiel #26
0
def RunBHistogramsEOS(analysis, sample, files_per_job=20, retar=False, data_source=None):
	if not data_source:
		print "[RunBHistogramsEOS] ERROR : Please specify data_source = collision_data or simulation"
		sys.exit(1)

	# Create working directory and cd
	start_directory = os.getcwd()
	working_directory = dijet_directory + "/data/EightTeeEeVeeBee/BHistograms/condor/submit_" + analysis + "_" + sample
	os.system("mkdir -pv " + working_directory)
	os.chdir(working_directory)

	# Get input file list 
	input_files_txt = file(analysis_config.files_QCDBEventTree[sample], 'r')
	input_files = [line.strip() for line in input_files_txt]
	input_files_txt.close()

	method = "csub"
	if method == "csub":
		input_files_txt = open(analysis_config.files_QCDBEventTree[sample], 'r')
		input_files = [line.strip() for line in input_files_txt]
		input_files_txt.close()

		# Recalculate files_per_job to split evenly
		n_jobs = int(math.ceil(1. * len(input_files) / files_per_job))
		files_per_job = int(math.ceil(1. * len(input_files) / n_jobs))

		bash_script_path = working_directory + "/run_" + analysis + "_" + sample + ".sh"
		bash_script = open(bash_script_path, 'w')
		bash_script.write("#!/bin/bash\n")
		bash_script.write("input_files=( " + " ".join(input_files) + " )\n")
		bash_script.write("files_per_job=" + str(files_per_job) + "\n")
		bash_script.write("first_file_index=$(($1*$files_per_job))\n")
		bash_script.write("max_file_index=$((${#input_files[@]}-1))\n")
		bash_script.write("if [ $(($first_file_index+$files_per_job-1)) -gt $max_file_index ]; then\n")
		bash_script.write("	files_per_job=$(($max_file_index-$first_file_index+1))\n")
		bash_script.write("fi\n")
		bash_script.write("declare -a this_input_files=(${input_files[@]:$first_file_index:$files_per_job})\n")
		bash_script.write("function join { local IFS=\"$1\"; shift; echo \"$*\"; }\n")
		bash_script.write("this_input_files_string=\"$(join , ${this_input_files[@]})\"\n")
		bash_script.write("echo \"Input files:\"\n")
		bash_script.write("echo $this_input_files_string\n")
		output_filename = os.path.basename(analysis_config.get_b_histogram_filename(analysis, sample).replace(".root", "_$1.root"))
		bash_script.write("cmsRun " 
			+ os.path.basename(analysis_config.analysis_cfgs[analysis]) 
			+ " dataSource=" + data_source
			+ " dataType=data"
			+ " outputFile=" + output_filename
			+ " inputFiles=$this_input_files_string\n"
		)
		bash_script.close()

		submit_command = "csub " + bash_script_path + " --cmssw "
		if not retar:
			submit_command += " --no_retar"
		print "[debug] This job will have " + str(len(input_files)) + " / " + str(files_per_job) + " = " + str(n_jobs) + " jobs"
		submit_command += " -F " + "," + analysis_config.analysis_cfgs[analysis] + " -n " + str(n_jobs)
		os.system(submit_command)

	else:
		file_index = 0
		subjob_index = 0
		subjob_output_filenames = []
		while file_index < len(file_list):
			this_job_files = []
			while file_index < len(file_list) and len(this_job_files) < files_per_job:
				this_job_files.append(file_list[file_index].rstrip())
				file_index += 1
			if len(this_job_files) < 1:
				continue
			command = "echo \"condor_cmsRun"
			if retar:
				command += " --retar "
			command += " --submit-file=submit_" + analysis + "_" + sample + ".subjob" + str(subjob_index) + ".jdl "
			command += " --output-tag=BHistograms_" + sample + ".subjob" + str(subjob_index) + " "
			command += " --run " + dijet_directory + "/CMSSW_5_3_32_patch3/src/MyTools/RootUtils/scripts/cmsRun_wrapper.sh " + analysis_config.analysis_cfgs[analysis]
			command += " dataSource=" + data_source
			command += " dataType=data inputFiles="
			for input_file in this_job_files:
				command += input_file + ","
			command = command.rstrip(",")
			command += "\""
			subjob_output_filename = os.path.basename(analysis_config.get_b_histogram_filename(analysis, sample) + ".subjob" + str(subjob_index))
			command += " outputFile=" + subjob_output_filename
			subjob_output_filenames.append(subjob_output_filename)

			#print command
			os.system(command)
			subjob_index += 1

	# Postprocessing script
	merge_command = "hadd " + analysis_config.get_b_histogram_filename(analysis, sample) + " " + working_directory + "/" + os.path.basename(analysis_config.get_b_histogram_filename(analysis, sample)).replace(".root", "_*.root")
	postprocessing_file = open('postprocessing.py', 'w')
	postprocessing_file.write("import os\n")
	postprocessing_file.write("import sys\n")
	postprocessing_file.write("import glob\n")
	postprocessing_file.write("log_files = glob.glob(\"" + working_directory + "/*stderr\")\n")
	postprocessing_file.write("failed_logs = []\n")
	postprocessing_file.write("for log_file in log_files:\n")
	postprocessing_file.write("\tlog_file_handle = open(log_file, 'r')\n")
	postprocessing_file.write("\tfor line in log_file_handle:\n")
	postprocessing_file.write("\t\tif \"FAILURE\" in line:\n")
	postprocessing_file.write("\t\t\tfailed_logs.append(log_file)\n")
	postprocessing_file.write("if len(failed_logs) == 0:\n")
	postprocessing_file.write("\tos.system(\"" + merge_command + "\")\n")
	postprocessing_file.write("else:\n")
	postprocessing_file.write("\tprint(\"Some jobs failed. You need to retry them.\")\n")
	postprocessing_file.write("\tfor failed_log in failed_logs:\n")
	postprocessing_file.write("\t\tprint failed_log\n")
	postprocessing_file.close()

	# cd back
	os.chdir(start_directory)
    def FrankenEfficiencyPlot(self,
                              logy=True,
                              binning=None,
                              simple_rebin=None,
                              save_tag="",
                              x_range=None,
                              ratio_range=None,
                              legend_position="topright",
                              numerator_legend=None,
                              denominator_legend=None,
                              ratio_title=None):
        ht_slices = [
            "HT200", "HT250", "HT300", "HT350", "HT400", "HT450", "HT500",
            "HT550", "HTUnprescaled"
        ]  # HT650
        if self._numerator_analysis == "trigjetht_eta1p7_CSVTM":
            numerator_analyses = {
                "HT200": "trigjetht200_eta1p7_CSVTM",
                "HT250": "trigjetht250_eta1p7_CSVTM",
                "HT300": "trigjetht300_eta1p7_CSVTM",
                "HT350": "trigjetht350_eta1p7_CSVTM",
                "HT400": "trigjetht400_eta1p7_CSVTM",
                "HT450": "trigjetht450_eta1p7_CSVTM",
                "HT500": "trigjetht500_eta1p7_CSVTM",
                "HT550": "trigjetht550_eta1p7_CSVTM",
                #"HT650":"trigjetht650_eta1p7_CSVTM",
                "HTUnprescaled": "trigjetht_eta1p7_CSVTM"
            }
        elif self._numerator_analysis == "trigjetht_eta2p2_CSVTM":
            numerator_analyses = {
                "HT200": "trigjetht200_CSVTM",
                "HT250": "trigjetht250_CSVTM",
                "HT300": "trigjetht300_CSVTM",
                "HT350": "trigjetht350_CSVTM",
                "HT400": "trigjetht400_CSVTM",
                "HT450": "trigjetht450_CSVTM",
                "HT500": "trigjetht500_CSVTM",
                "HT550": "trigjetht550_CSVTM",
                #"HT650":"trigjetht650_CSVTM",
                "HTUnprescaled": "trigjetht_CSVTM"
            }
        else:
            print "[OfflineBTagPlots::FrankenEfficiencyPlot] ERROR : numerator analysis must be trigjetht_eta1p7_CSVTM or trigjetht_eta2p2_CSVTM"
            sys.exit(1)
        if self._denominator_analysis == "trigjetht_eta1p7":
            denominator_analyses = {
                "HT200": "trigjetht200_eta1p7",
                "HT250": "trigjetht250_eta1p7",
                "HT300": "trigjetht300_eta1p7",
                "HT350": "trigjetht350_eta1p7",
                "HT400": "trigjetht400_eta1p7",
                "HT450": "trigjetht450_eta1p7",
                "HT500": "trigjetht500_eta1p7",
                "HT550": "trigjetht550_eta1p7",
                #"HT650":"trigjetht650_eta1p7",
                "HTUnprescaled": "trigjetht_eta1p7"
            }
        elif self._denominator_analysis == "trigjetht_eta2p2":
            denominator_analyses = {
                "HT200": "trigjetht200",
                "HT250": "trigjetht250",
                "HT300": "trigjetht300",
                "HT350": "trigjetht350",
                "HT400": "trigjetht400",
                "HT450": "trigjetht450",
                "HT500": "trigjetht500",
                "HT550": "trigjetht550",
                #"HT650":"trigjetht650",
                "HTUnprescaled": "trigjetht"
            }
        else:
            print "[OfflineBTagPlots::FrankenEfficiencyPlot] ERROR : denominator analysis must be trigjetht_eta1p7 or trigjetht_eta2p2"
            sys.exit(1)
        ht_ranges = {
            "HT200": [220, 386],
            "HT250": [386, 489],
            "HT300": [489, 526],
            "HT350": [526, 606],
            "HT400": [606, 649],
            "HT450": [649, 740],
            "HT500": [740, 788],
            "HT550": [788, 890],
            #"HT650":[890, 2000],
            "HTUnprescaled": [890, 2000]
        }
        numerator_histogram = None
        denominator_histogram = None
        for sample in self._samples:
            print "[EfficiencyPlot] DEBUG : Sample " + sample
            numerator_slice_histograms = {}
            denominator_slice_histograms = {}
            for slice_name in ht_slices:
                print slice_name
                print analysis_config.get_b_histogram_filename(
                    numerator_analyses[slice_name], sample)
                numerator_file = TFile(
                    analysis_config.get_b_histogram_filename(
                        numerator_analyses[slice_name], sample), "READ")
                print analysis_config.get_b_histogram_filename(
                    denominator_analyses[slice_name], sample)
                denominator_file = TFile(
                    analysis_config.get_b_histogram_filename(
                        denominator_analyses[slice_name], sample), "READ")

                # Check input nevents
                num_nevents = numerator_file.Get(
                    "BHistograms/h_input_nevents").Integral()
                den_nevents = denominator_file.Get(
                    "BHistograms/h_input_nevents").Integral()
                if num_nevents != den_nevents:
                    # Allow tiny differences...?
                    if abs((num_nevents - den_nevents) / den_nevents) < 0.001:
                        print "[EfficiencyPlot] ERROR : Small inconsistency between number of events between numerator and denominator. I'm going to rescale away the difference, but you may want to fix this."
                        numerator_normalization = den_nevents / num_nevents
                        denominator_normalization = 1.
                    else:
                        print "[EfficiencyPlot] ERROR : Inconsistent number of events between numerator and denominator. Results would be wrong, so I'm aborting."
                        print "[EfficiencyPlot] ERROR : \tNumerator = " + str(
                            numerator_file.Get(
                                "BHistograms/h_input_nevents").Integral())
                        print "[EfficiencyPlot] ERROR : \tDenominator = " + str(
                            denominator_file.Get(
                                "BHistograms/h_input_nevents").Integral())
                        sys.exit(1)
                else:
                    numerator_normalization = 1.
                    denominator_normalization = 1.
                numerator_slice_histograms[slice_name] = numerator_file.Get(
                    "BHistograms/h_pfjet_mjj")
                numerator_slice_histograms[slice_name].SetName(
                    "h_pfjet_mjj_num_" + slice_name + "_" + sample)
                numerator_slice_histograms[slice_name].SetDirectory(0)
                numerator_slice_histograms[slice_name].Scale(
                    numerator_normalization)
                denominator_slice_histograms[
                    slice_name] = denominator_file.Get(
                        "BHistograms/h_pfjet_mjj")
                denominator_slice_histograms[slice_name].SetName(
                    "h_pfjet_mjj_den_" + slice_name + "_" + sample)
                denominator_slice_histograms[slice_name].SetDirectory(0)
                denominator_slice_histograms[slice_name].Scale(
                    denominator_normalization)
                numerator_file.Close()
                denominator_file.Close()
            # Make frankenhist
            this_numerator_histogram = self.FrankenHist(
                ht_slices, numerator_slice_histograms, ht_ranges)
            this_denominator_histogram = self.FrankenHist(
                ht_slices, denominator_slice_histograms, ht_ranges)
            if not numerator_histogram:
                numerator_histogram = this_numerator_histogram.Clone()
                numerator_histogram.SetName(numerator_histogram.GetName() +
                                            save_tag + "_num_" +
                                            str(time.time()))
                denominator_histogram = this_denominator_histogram.Clone()
                denominator_histogram.SetName(denominator_histogram.GetName() +
                                              save_tag + "_num_" +
                                              str(time.time()))
            else:
                numerator_histogram.Add(this_numerator_histogram)
                denominator_histogram.Add(this_denominator_histogram)

        # Rebin
        if binning:
            numerator_histogram = histogram_tools.rebin_histogram(
                numerator_histogram, binning)
            denominator_histogram = histogram_tools.rebin_histogram(
                denominator_histogram, binning)
        elif simple_rebin:
            numerator_histogram.Rebin(simple_rebin)
            denominator_histogram.Rebin(simple_rebin)

        cname = "c_offline_btag_eff_mjj"
        if logy:
            cname += "_log"
        c = TCanvas(cname, "Offline b-tag #epsilon", 800, 1000)
        top = TPad("top", "top", 0., 0.5, 1., 1.)
        top.SetBottomMargin(0.02)
        if logy:
            top.SetLogy()
        top.Draw()
        top.cd()

        frame_top = numerator_histogram.Clone()
        frame_top.Reset()
        if x_range:
            frame_top.GetXaxis().SetRangeUser(x_range[0], x_range[1])
        if logy:
            y_min = 0.1
            y_max = max(numerator_histogram.GetMaximum(),
                        denominator_histogram.GetMaximum()) * 10.
        else:
            y_min = 0.
            y_max = max(numerator_histogram.GetMaximum(),
                        denominator_histogram.GetMaximum()) * 1.5
        frame_top.SetMinimum(y_min)
        frame_top.SetMaximum(y_max)
        frame_top.GetXaxis().SetLabelSize(0)
        frame_top.GetXaxis().SetTitleSize(0)
        if binning:
            #frame_top.GetYaxis().SetTitle("Events / 1 GeV")
            frame_top.GetYaxis().SetTitle("Events")
        else:
            frame_top.GetYaxis().SetTitle("Events")
        frame_top.Draw("axis")
        print "numerator integral = " + str(numerator_histogram.Integral())
        print "denominator integral = " + str(denominator_histogram.Integral())
        numerator_histogram.SetMarkerStyle(20)
        numerator_histogram.SetMarkerColor(seaborn.GetColorRoot("default", 0))
        numerator_histogram.SetLineColor(seaborn.GetColorRoot("default", 0))
        numerator_histogram.Draw("same")
        denominator_histogram.SetMarkerStyle(24)
        denominator_histogram.SetMarkerColor(seaborn.GetColorRoot(
            "default", 2))
        denominator_histogram.SetLineColor(seaborn.GetColorRoot("default", 2))
        denominator_histogram.Draw("same")
        if legend_position == "topright":
            l = TLegend(0.6, 0.6, 0.85, 0.8)
        elif legend_position == "bottomright":
            l = TLegend(0.6, 0.2, 0.85, 0.4)

        l.SetFillColor(0)
        l.SetBorderSize(0)
        if numerator_legend:
            l.AddEntry(numerator_histogram, numerator_legend)
        else:
            l.AddEntry(numerator_histogram, "CSVT+CSVM")
        if denominator_legend:
            l.AddEntry(denominator_histogram, denominator_legend)

        else:
            l.AddEntry(denominator_histogram, "No CSV")
        l.Draw()

        c.cd()
        bottom = TPad("bottom", "bottom", 0., 0., 1., 0.5)
        bottom.SetTopMargin(0.01)
        bottom.SetBottomMargin(0.2)
        bottom.Draw()
        bottom.cd()
        ratio_histogram = numerator_histogram.Clone()
        ratio_histogram.Reset()
        if x_range:
            ratio_histogram.GetXaxis().SetRangeUser(x_range[0], x_range[1])
        ratio_histogram.SetName(numerator_histogram.GetName() + "_ratio_" +
                                save_tag + str(time.time()))
        ratio_histogram.SetDirectory(0)
        for bin in xrange(1, numerator_histogram.GetNbinsX() + 1):
            # Undo bin normalization
            if numerator_histogram.GetBinError(
                    bin) > 0 and denominator_histogram.GetBinError(bin) > 0:
                num_unnormalized = (numerator_histogram.GetBinContent(
                    bin))**2 / (numerator_histogram.GetBinError(bin))**2
                den_unnormalized = (denominator_histogram.GetBinContent(
                    bin))**2 / (denominator_histogram.GetBinError(bin))**2
                num = numerator_histogram.GetBinContent(bin)
                den = denominator_histogram.GetBinContent(bin)
                ratio = 1. * num_unnormalized / den_unnormalized
                ratio_err = sqrt(ratio * (1. - ratio) / den_unnormalized)
                #ratio_err = max(sqrt(ratio * (1. - ratio) / den), 1./den)
            else:
                ratio = 0.
                ratio_err = 0.
            ratio_histogram.SetBinContent(bin, ratio)
            ratio_histogram.SetBinError(bin, ratio_err)
        ratio_histogram.SetMarkerSize(1)
        ratio_histogram.SetMarkerColor(kBlack)
        ratio_histogram.SetLineColor(kBlack)
        ratio_histogram.SetLineWidth(2)
        ratio_histogram.GetXaxis().SetTitle("m_{jj} [GeV]")
        if ratio_title:
            ratio_histogram.GetYaxis().SetTitle(ratio_title)
        else:
            ratio_histogram.GetYaxis().SetTitle(
                "Offline 2#timesb-tag efficiency")
        if ratio_range:
            ratio_histogram.SetMinimum(ratio_range[0])
            ratio_histogram.SetMaximum(ratio_range[1])
        else:
            ratio_histogram.SetMinimum(0.)
            ratio_histogram.SetMaximum(1.)
        ratio_histogram.Draw()

        c.cd()
        c.SaveAs(analysis_config.figure_directory + "/OfflineBTag/" +
                 c.GetName() + save_tag + ".pdf")
        ROOT.SetOwnership(c, False)
        ROOT.SetOwnership(top, False)
        ROOT.SetOwnership(bottom, False)
sys.path.append(
    "/uscms/home/dryu/Dijets/CMSSW_5_3_32_patch3/python/MyTools/RootUtils")
import histogram_tools
seaborn = Root.SeabornInterface()
seaborn.Initialize()

print "Loading histograms"
analyses = [
    "trigmu_highmass_CSVTM", "trigmu_lowmass_CSVTM",
    "trigmubbh_highmass_CSVTM", "trigmubbl_lowmass_CSVTM",
    "trigmubbll_lowmass_CSVTM"
]
files = {}
for analysis in analyses:
    files[analysis] = ROOT.TFile(
        analysis_config.get_b_histogram_filename(analysis, "SingleMu_2012"),
        "READ")

from array import array
mass_bins = array("d", [
    1, 3, 6, 10, 16, 23, 31, 40, 50, 61, 74, 88, 103, 119, 137, 156, 176, 197,
    220, 244, 270, 296, 325, 354, 386, 419, 453, 489, 526, 565, 606, 649, 693,
    740, 788, 838, 890, 944, 1000, 1058, 1118, 1181, 1246, 1313, 1383, 1455,
    1530, 1607, 1687, 1770, 1856, 1945, 2037, 2132, 2231, 2332, 2438, 2546,
    2659, 2775, 2895, 3019, 3147, 3279, 3416, 3558, 3704, 3854, 4010, 4171,
    4337, 4509, 4686, 4869, 5058, 5253, 5455, 5663, 5877, 6099, 6328, 6564,
    6808, 7060, 7320, 7589, 7866, 8000
])

variables = ["mjj", "pt1", "pt2", "pt_btag1", "pt_btag2"]
histograms = {}
Beispiel #29
0
if __name__ == "__main__":
	for model in ["Hbb", "RSG"]:
		#for analysis_base in ["trigbbh", "trigbbl"]:
		for analysis_base in ["trigbbl", "trigbbh"]:
			if analysis_base == "trigbbh":
				masses = [600, 750, 900, 1200]
			elif analysis_base == "trigbbl":
				masses = [400, 600, 750, 900]
			analyses = [analysis_base + "_" + x for x in ["CSVL", "CSVM", "CSVT", "CSVTL", "CSVTM", "CSVML"]]
			for mass in masses:
				signal_histograms = {}
				data_histograms = {}
				for analysis in analyses:
					signal_sample = analysis_config.simulation.get_signal_tag(model, mass, "FULLSIM")
					#print "Signal file: " + analysis_config.get_b_histogram_filename(analysis, signal_sample)
					signal_histogram_file = TFile(analysis_config.get_b_histogram_filename(analysis, signal_sample), "READ")
					#print "Data file: " + analysis_config.get_b_histogram_filename(analysis, "BJetPlusX_2012")
					data_histogram_file = TFile(analysis_config.get_b_histogram_filename(analysis, "BJetPlusX_2012"), "READ")
					signal_histograms[analysis] = signal_histogram_file.Get("BHistograms/h_pfjet_mjj")
					if not signal_histograms[analysis]:
						print "ERROR : Couldn't find signal histogram BHistograms/h_pfjet_mjj in file " + analysis_config.get_b_histogram_filename(analysis, signal_sample)
						continue
					signal_histograms[analysis].SetDirectory(0)
					ngenevt = signal_histogram_file.Get("BHistograms/h_input_nevents").Integral()
					xsec = 1. # 1 pb placeholder
					signal_histograms[analysis].Scale(19700. * xsec / ngenevt)
					print "[debug] Data file " + analysis_config.get_b_histogram_filename(analysis, "BJetPlusX_2012")
					data_histograms[analysis] = data_histogram_file.Get("BHistograms/h_pfjet_mjj")
					data_histograms[analysis].SetDirectory(0)
					if not data_histograms[analysis]:
						print "ERROR : Couldn't find data histogram"
Beispiel #30
0
def RunBHistogramsEOS(analysis,
                      sample,
                      files_per_job=20,
                      retar=False,
                      data_source=None):
    if not data_source:
        print "[RunBHistogramsEOS] ERROR : Please specify data_source = collision_data or simulation"
        sys.exit(1)

    # Create working directory and cd
    start_directory = os.getcwd()
    working_directory = dijet_directory + "/data/EightTeeEeVeeBee/BHistograms/condor/submit_" + analysis + "_" + sample
    os.system("mkdir -pv " + working_directory)
    os.chdir(working_directory)

    # Get input file list
    input_files_txt = file(analysis_config.files_QCDBEventTree[sample], 'r')
    input_files = [line.strip() for line in input_files_txt]
    input_files_txt.close()

    method = "csub"
    if method == "csub":
        input_files_txt = open(analysis_config.files_QCDBEventTree[sample],
                               'r')
        input_files = [line.strip() for line in input_files_txt]
        input_files_txt.close()

        # Recalculate files_per_job to split evenly
        n_jobs = int(math.ceil(1. * len(input_files) / files_per_job))
        files_per_job = int(math.ceil(1. * len(input_files) / n_jobs))

        bash_script_path = working_directory + "/run_" + analysis + "_" + sample + ".sh"
        bash_script = open(bash_script_path, 'w')
        bash_script.write("#!/bin/bash\n")
        bash_script.write("input_files=( " + " ".join(input_files) + " )\n")
        bash_script.write("files_per_job=" + str(files_per_job) + "\n")
        bash_script.write("first_file_index=$(($1*$files_per_job))\n")
        bash_script.write("max_file_index=$((${#input_files[@]}-1))\n")
        bash_script.write(
            "if [ $(($first_file_index+$files_per_job-1)) -gt $max_file_index ]; then\n"
        )
        bash_script.write(
            "	files_per_job=$(($max_file_index-$first_file_index+1))\n")
        bash_script.write("fi\n")
        bash_script.write(
            "declare -a this_input_files=(${input_files[@]:$first_file_index:$files_per_job})\n"
        )
        bash_script.write(
            "function join { local IFS=\"$1\"; shift; echo \"$*\"; }\n")
        bash_script.write(
            "this_input_files_string=\"$(join , ${this_input_files[@]})\"\n")
        bash_script.write("echo \"Input files:\"\n")
        bash_script.write("echo $this_input_files_string\n")
        output_filename = os.path.basename(
            analysis_config.get_b_histogram_filename(analysis, sample).replace(
                ".root", "_$1.root"))
        bash_script.write(
            "cmsRun " +
            os.path.basename(analysis_config.analysis_cfgs[analysis]) +
            " dataSource=" + data_source + " dataType=data" + " outputFile=" +
            output_filename + " inputFiles=$this_input_files_string\n")
        bash_script.close()

        submit_command = "csub " + bash_script_path + " --cmssw "
        if not retar:
            submit_command += " --no_retar"
        print "[debug] This job will have " + str(
            len(input_files)) + " / " + str(files_per_job) + " = " + str(
                n_jobs) + " jobs"
        submit_command += " -F " + "," + analysis_config.analysis_cfgs[
            analysis] + " -n " + str(n_jobs)
        os.system(submit_command)

    else:
        file_index = 0
        subjob_index = 0
        subjob_output_filenames = []
        while file_index < len(file_list):
            this_job_files = []
            while file_index < len(file_list) and len(
                    this_job_files) < files_per_job:
                this_job_files.append(file_list[file_index].rstrip())
                file_index += 1
            if len(this_job_files) < 1:
                continue
            command = "echo \"condor_cmsRun"
            if retar:
                command += " --retar "
            command += " --submit-file=submit_" + analysis + "_" + sample + ".subjob" + str(
                subjob_index) + ".jdl "
            command += " --output-tag=BHistograms_" + sample + ".subjob" + str(
                subjob_index) + " "
            command += " --run " + dijet_directory + "/CMSSW_5_3_32_patch3/src/MyTools/RootUtils/scripts/cmsRun_wrapper.sh " + analysis_config.analysis_cfgs[
                analysis]
            command += " dataSource=" + data_source
            command += " dataType=data inputFiles="
            for input_file in this_job_files:
                command += input_file + ","
            command = command.rstrip(",")
            command += "\""
            subjob_output_filename = os.path.basename(
                analysis_config.get_b_histogram_filename(analysis, sample) +
                ".subjob" + str(subjob_index))
            command += " outputFile=" + subjob_output_filename
            subjob_output_filenames.append(subjob_output_filename)

            #print command
            os.system(command)
            subjob_index += 1

    # Postprocessing script
    merge_command = "hadd " + analysis_config.get_b_histogram_filename(
        analysis, sample) + " " + working_directory + "/" + os.path.basename(
            analysis_config.get_b_histogram_filename(
                analysis, sample)).replace(".root", "_*.root")
    postprocessing_file = open('postprocessing.py', 'w')
    postprocessing_file.write("import os\n")
    postprocessing_file.write("import sys\n")
    postprocessing_file.write("import glob\n")
    postprocessing_file.write("log_files = glob.glob(\"" + working_directory +
                              "/*stderr\")\n")
    postprocessing_file.write("failed_logs = []\n")
    postprocessing_file.write("for log_file in log_files:\n")
    postprocessing_file.write("\tlog_file_handle = open(log_file, 'r')\n")
    postprocessing_file.write("\tfor line in log_file_handle:\n")
    postprocessing_file.write("\t\tif \"FAILURE\" in line:\n")
    postprocessing_file.write("\t\t\tfailed_logs.append(log_file)\n")
    postprocessing_file.write("if len(failed_logs) == 0:\n")
    postprocessing_file.write("\tos.system(\"" + merge_command + "\")\n")
    postprocessing_file.write("else:\n")
    postprocessing_file.write(
        "\tprint(\"Some jobs failed. You need to retry them.\")\n")
    postprocessing_file.write("\tfor failed_log in failed_logs:\n")
    postprocessing_file.write("\t\tprint failed_log\n")
    postprocessing_file.close()

    # cd back
    os.chdir(start_directory)
	parser.add_argument('--btag_mc_notrig', action='store_true', help='Make online B tag efficiency plot from MC')
	args = parser.parse_args()
	print args

	if args.ht:
		analyses = {}
		names = []
		for mass in xrange(200, 700, 50):
			if mass == 600:
				continue
			names.append("HT" + str(mass))
			analyses["HT" + str(mass)] = "trigjetht" + str(mass)
		sample = "JetHT_2012BCD"
		histograms = {}
		for name in names:
			f = TFile(analysis_config.get_b_histogram_filename(analyses[name], sample), "READ")
			#histograms[name] = mjj_common.apply_dijet_binning_normalized(f.Get("BHistograms/h_pfjet_mjj"))
			print "[debug] For name " + name + ", input events = " + str(f.Get("BHistograms/h_input_nevents").GetEntries())
			print "[debug] \tPrescale = " + str(f.Get("BHistograms/h_pass_nevents_weighted").GetBinContent(1) / f.Get("BHistograms/h_pass_nevents").GetBinContent(1))
			histograms[name] = f.Get("BHistograms/h_pfjet_mjj").Rebin(20)
			histograms[name].SetName(histograms[name].GetName() + "_" + name)
			histograms[name].SetDirectory(0)
			f.Close()
		ht_threshold_plot(names, histograms, save_tag="jetht_thresholds", x_range=[0., 1200.], logy=True)

	if args.btag:
		f_jetht_save = TFile(analysis_config.dijet_directory + "/data/EightTeeEeVeeBee/TriggerEfficiency/trigeff_jetht_data.root", "RECREATE")
		for sr in ["lowmass", "highmass"]:
			ht_analyses = {}
			names = []
			for mass in xrange(200, 700, 50):
Beispiel #32
0
from ROOT import *
gSystem.Load("~/Dijets/CMSSW_5_3_32_patch3/lib/slc6_amd64_gcc472/libMyToolsRootUtils.so")
import CMSDIJET.QCDAnalysis.analysis_configuration_8TeV as analysis_config

# Combine the QCD MC samples with appropriate weights
qcd_samples = ["QCD_Pt-80to120_TuneZ2star_8TeV_pythia6","QCD_Pt-120to170_TuneZ2star_8TeV_pythia6","QCD_Pt-170to300_TuneZ2star_8TeV_pythia6","QCD_Pt-300to470_TuneZ2star_8TeV_pythia6","QCD_Pt-470to600_TuneZ2star_8TeV_pythia6","QCD_Pt-600to800_TuneZ2star_8TeV_pythia6","QCD_Pt-800to1000_TuneZ2star_8TeV_pythia6","QCD_Pt-1000to1400_TuneZ2star_8TeV_pythia6","QCD_Pt-1400to1800_TuneZ2star_8TeV_pythia6","QCD_Pt-1800_TuneZ2star_8TeV_pythia6"]
analyses = ["NoTrigger_eta2p2", "NoTrigger_eta2p2_CSVTM", "NoTrigger_eta1p7", "NoTrigger_eta1p7_CSVTM", "trigbbl_CSVTM", "trigbbh_CSVTM"]

lumi = 19710.

for analysis in analyses:
	first = True
	output_file = TFile(analysis_config.get_b_histogram_filename(analysis, "QCD_TuneZ2star_8TeV_pythia6"), "RECREATE")
	output_directory = output_file.mkdir("BHistograms")
	histograms = {}
	for sample in qcd_samples:
		input_file = TFile(analysis_config.get_b_histogram_filename(analysis, sample), "READ")
		input_directory = input_file.Get("BHistograms")
		input_directory.cd()
		xsec = analysis_config.simulation.background_cross_sections[sample]
		normalization = xsec * lumi / input_file.Get("BHistograms/h_sample_nevents").Integral()
		for key in gDirectory.GetListOfKeys():
			key.Print()
			if "TH1" in key.GetClassName() or "TH2" in key.GetClassName():
				hist = key.ReadObj()
				hist.Scale(normalization)
				if first:
					histograms[hist.GetName()] = hist
					histograms[hist.GetName()].SetDirectory(output_directory)
				else:
					histograms[hist.GetName()].Add(hist)