def __init__(self, outputDir, executable_analyze, samples, lepton_selections, central_or_shifts, max_files_per_job, era, use_lumi, lumi, debug, running_method, num_parallel_jobs, histograms_to_fit = [], select_rle_output = False, executable_prep_dcard="prepareDatacard"): analyzeConfig.__init__(self, outputDir, executable_analyze, "charge_flip", central_or_shifts, max_files_per_job, era, use_lumi, lumi, debug, running_method, num_parallel_jobs, histograms_to_fit) self.samples = samples self.prep_dcard_processesToCopy = ["data_obs", "DY", "DY_fake", "WJets", "TTbar", "Singletop", "Diboson"] self.prep_dcard_signals = [ "DY" ] self.lepton_selections = lepton_selections #self.hadTau_selection = hadTau_selection for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]: continue process_name = sample_info["process_name_specific"] for lepton_selection in self.lepton_selections: key_dir = getKey(sample_name, lepton_selection) for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD, DKEY_RLES ]: initDict(self.dirs, [ key_dir, dir_type ]) self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel, "_".join([ lepton_selection ]), process_name) ##print "self.dirs = ", self.dirs self.cfgFile_analyze_original = os.path.join(self.workingDir, "analyze_charge_flip_cfg.py") self.cfgFile_prep_dcard_original = os.path.join(self.workingDir, "prepareDatacards_cfg.py") #self.histogramDir_prep_dcard = "charge_flip_SS_Tight" self.select_rle_output = select_rle_output
def __init__(self, treeName, outputDir, executable_addMEM, samples, era, debug, running_method, max_files_per_job, mem_integrations_per_job, max_mem_integrations, num_parallel_jobs): self.treeName = treeName self.outputDir = outputDir self.executable_addMEM = executable_addMEM self.channel = "2lss_1tau" self.mem_integrations_per_job = mem_integrations_per_job self.max_files_per_job = max_files_per_job self.max_mem_integrations = max_mem_integrations self.samples = samples self.era = era self.debug = debug assert(running_method.lower() in [ "sbatch", "makefile"]), "Invalid running method: %s" % running_method self.running_method = running_method self.is_sbatch = False self.is_makefile = False if self.running_method.lower() == "sbatch": self.is_sbatch = True else: self.is_makefile = True self.makefile = os.path.join( self.outputDir, "Makefile_%s" % self.channel) self.num_parallel_jobs = num_parallel_jobs self.workingDir = os.getcwd() print "Working directory is: " + self.workingDir create_if_not_exists(self.outputDir) self.stdout_file = codecs.open(os.path.join( self.outputDir, "stdout_%s.log" % self.channel), 'w', 'utf-8') self.stderr_file = codecs.open(os.path.join( self.outputDir, "stderr_%s.log" % self.channel), 'w', 'utf-8') self.dirs = {} self.samples = samples self.cfgFile_addMEM_original = os.path.join(self.workingDir, "addMEM_2lss_1tau_cfg.py") self.cfgFiles_addMEM_modified = {} self.logFiles_addMEM = {} self.sbatchFile_addMEM = os.path.join( self.outputDir, "sbatch_addMEM_%s.py" % self.channel) self.inputFiles = {} self.outputFiles = {} self.hadd_records = {} self.filesToClean = [] for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]: continue process_name = sample_info["process_name_specific"] key_dir = getKey(sample_name) for dir_type in [ DKEY_CFGS, DKEY_NTUPLES, DKEY_FINAL_NTUPLES, DKEY_LOGS, DKEY_HADD ]: initDict(self.dirs, [ key_dir, dir_type ]) self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel, process_name) self.cvmfs_error_log = {}
def __init__(self, configDir, outputDir, executable_analyze, samples, lepton_selections, central_or_shifts, max_files_per_job, era, use_lumi, lumi, debug, running_method, num_parallel_jobs, histograms_to_fit = [], select_rle_output = False, executable_prep_dcard="prepareDatacard"): analyzeConfig.__init__(self, configDir, outputDir, executable_analyze, "charge_flip", central_or_shifts, max_files_per_job, era, use_lumi, lumi, debug, running_method, num_parallel_jobs, histograms_to_fit) self.samples = samples self.prep_dcard_processesToCopy = ["data_obs", "DY", "DY_fake", "WJets", "TTbar", "Singletop", "Diboson"] self.prep_dcard_signals = [ "DY" ] self.lepton_selections = lepton_selections #self.hadTau_selection = hadTau_selection for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]: continue process_name = sample_info["process_name_specific"] # Edit Siddh ~~~~~~~~~~~~~ run_process = False for sprocess_run in sample_process_run_s: if sprocess_run == process_name: run_process = True #print "Run process: ", sprocess_run if run_process == False: continue #print "run_process:",process_name # ~~~~~~~~~~~~~~~~~~~~~~~~~~ for lepton_selection in self.lepton_selections: key_dir = getKey(sample_name, lepton_selection) for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD, DKEY_RLES ]: initDict(self.dirs, [ key_dir, dir_type ]) if dir_type in [ DKEY_CFGS, DKEY_LOGS ]: self.dirs[key_dir][dir_type] = os.path.join(self.configDir, dir_type, self.channel, "_".join([ lepton_selection ]), process_name) else: self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel, "_".join([ lepton_selection ]), process_name) ##print "self.dirs = ", self.dirs self.cfgFile_analyze_original = os.path.join(self.workingDir, "analyze_charge_flip_cfg.py") self.cfgFile_prep_dcard_original = os.path.join(self.workingDir, "prepareDatacards_cfg.py") #self.histogramDir_prep_dcard = "charge_flip_SS_Tight" self.select_rle_output = select_rle_output
def __init__(self, outputDir, executable_analyze, samples, charge_selections, jet_minPt, jet_maxPt, jet_minAbsEta, jet_maxAbsEta, hadTau_selections, absEtaBins, ptBins, central_or_shifts, max_files_per_job, era, use_lumi, lumi, debug, running_method, num_parallel_jobs, executable_comp_jetToTauFakeRate): analyzeConfig.__init__(self, outputDir, executable_analyze, "jetToTauFakeRate", central_or_shifts, max_files_per_job, era, use_lumi, lumi, debug, running_method, num_parallel_jobs, []) self.samples = samples self.charge_selections = charge_selections self.jet_minPt = jet_minPt self.jet_maxPt = jet_maxPt self.jet_minAbsEta = jet_minAbsEta self.jet_maxAbsEta = jet_maxAbsEta self.hadTau_selections = hadTau_selections self.absEtaBins = absEtaBins self.ptBins = ptBins self.executable_comp_jetToTauFakeRate = executable_comp_jetToTauFakeRate for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]: continue process_name = sample_info["process_name_specific"] for charge_selection in self.charge_selections: key_dir = getKey(sample_name, charge_selection) for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD ]: initDict(self.dirs, [ key_dir, dir_type ]) self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel, "_".join([ charge_selection ]), process_name) ##print "self.dirs = ", self.dirs self.cfgFile_analyze_original = os.path.join(self.workingDir, "analyze_jetToTauFakeRate_cfg.py") self.histogramFile_comp_jetToTauFakeRate = {} self.cfgFile_comp_jetToTauFakeRate_original = os.path.join(self.workingDir, "comp_jetToTauFakeRate_cfg.py") self.cfgFile_comp_jetToTauFakeRate_modified = {} self.histogramDir_numerator = {} self.histogramDir_denominator = {}
def create(self): """Creates all necessary config files and runs the complete analysis workfow -- either locally or on the batch system """ for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]: continue process_name = sample_info["process_name_specific"] for lepton_selection in self.lepton_selections: for lepton_frWeight in self.lepton_frWeights: if lepton_frWeight == "enabled" and not lepton_selection.startswith("Fakeable"): continue lepton_selection_and_frWeight = get_lepton_selection_and_frWeight(lepton_selection, lepton_frWeight) central_or_shifts_extended = [ "" ] central_or_shifts_extended.extend(self.central_or_shifts) central_or_shifts_extended.extend([ "hadd", "addBackgrounds" ]) for central_or_shift_or_dummy in central_or_shifts_extended: process_name_extended = [ process_name, "hadd" ] for process_name_or_dummy in process_name_extended: key_dir = getKey(process_name_or_dummy, lepton_selection_and_frWeight, central_or_shift_or_dummy) for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_ROOT, DKEY_RLES, DKEY_SYNC ]: initDict(self.dirs, [ key_dir, dir_type ]) if dir_type in [ DKEY_CFGS, DKEY_LOGS ]: self.dirs[key_dir][dir_type] = os.path.join(self.configDir, dir_type, self.channel, "_".join([ lepton_selection_and_frWeight ]), process_name_or_dummy, central_or_shift_or_dummy) else: self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel, "_".join([ lepton_selection_and_frWeight ]), process_name_or_dummy, central_or_shift_or_dummy) for subdirectory in [ "addBackgrounds", "addBackgroundLeptonFakes", "prepareDatacards", "addSystFakeRates", "makePlots" ]: key_dir = getKey(subdirectory) for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_ROOT, DKEY_DCRD, DKEY_PLOT ]: initDict(self.dirs, [ key_dir, dir_type ]) if dir_type in [ DKEY_CFGS, DKEY_LOGS ]: self.dirs[key_dir][dir_type] = os.path.join(self.configDir, dir_type, self.channel, subdirectory) else: self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel, subdirectory) for dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT, DKEY_SYNC ]: initDict(self.dirs, [ dir_type ]) if dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT ]: self.dirs[dir_type] = os.path.join(self.configDir, dir_type, self.channel) else: self.dirs[dir_type] = os.path.join(self.outputDir, dir_type, self.channel) numDirectories = 0 for key in self.dirs.keys(): if type(self.dirs[key]) == dict: numDirectories += len(self.dirs[key]) else: numDirectories += 1 logging.info("Creating directory structure (numDirectories = %i)" % numDirectories) numDirectories_created = 0; frac = 1 for key in self.dirs.keys(): if type(self.dirs[key]) == dict: for dir_type in self.dirs[key].keys(): create_if_not_exists(self.dirs[key][dir_type]) numDirectories_created += len(self.dirs[key]) else: create_if_not_exists(self.dirs[key]) numDirectories_created = numDirectories_created + 1 while 100*numDirectories_created >= frac*numDirectories: logging.info(" %i%% completed" % frac) frac = frac + 1 logging.info("Done.") inputFileLists = {} for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]: continue logging.info("Checking input files for sample %s" % sample_info["process_name_specific"]) inputFileLists[sample_name] = generateInputFileList(sample_info, self.max_files_per_job) mcClosure_regex = re.compile('Fakeable_mcClosure_(?P<type>m|e)_wFakeRateWeights') for lepton_selection in self.lepton_selections: electron_selection = lepton_selection muon_selection = lepton_selection hadTauVeto_selection = "Tight" hadTauVeto_selection = "|".join([ hadTauVeto_selection, self.hadTauVeto_selection_part2 ]) if lepton_selection == "Fakeable_mcClosure_e": electron_selection = "Fakeable" muon_selection = "Tight" elif lepton_selection == "Fakeable_mcClosure_m": electron_selection = "Tight" muon_selection = "Fakeable" for lepton_frWeight in self.lepton_frWeights: if lepton_frWeight == "enabled" and not lepton_selection.startswith("Fakeable"): continue if lepton_frWeight == "disabled" and not lepton_selection in [ "Tight" ]: continue lepton_selection_and_frWeight = get_lepton_selection_and_frWeight(lepton_selection, lepton_frWeight) for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]: continue process_name = sample_info["process_name_specific"] logging.info("Creating configuration files to run '%s' for sample %s" % (self.executable_analyze, process_name)) sample_category = sample_info["sample_category"] is_mc = (sample_info["type"] == "mc") is_signal = (sample_category == "signal") for central_or_shift in self.central_or_shifts: inputFileList = inputFileLists[sample_name] for jobId in inputFileList.keys(): if central_or_shift != "central": isFR_shape_shift = (central_or_shift in systematics.FR_all) if not ((lepton_selection == "Fakeable" and isFR_shape_shift) or lepton_selection == "Tight"): continue if not is_mc and not isFR_shape_shift: continue if central_or_shift in systematics.LHE().ttH and sample_category != "signal": continue if central_or_shift in systematics.LHE().ttW and sample_category != "TTW": continue if central_or_shift in systematics.LHE().ttZ and sample_category != "TTZ": continue if central_or_shift in systematics.DYMCReweighting and not is_dymc_reweighting(sample_name): continue logging.info(" ... for '%s' and systematic uncertainty option '%s'" % (lepton_selection_and_frWeight, central_or_shift)) # build config files for executing analysis code key_analyze_dir = getKey(process_name, lepton_selection_and_frWeight, central_or_shift) analyze_job_tuple = (process_name, lepton_selection_and_frWeight, central_or_shift, jobId) key_analyze_job = getKey(*analyze_job_tuple) ntupleFiles = inputFileList[jobId] if len(ntupleFiles) == 0: logging.warning("No input ntuples for %s --> skipping job !!" % (key_analyze_job)) continue syncOutput = '' syncTree = '' syncRequireGenMatching = True if self.do_sync: mcClosure_match = mcClosure_regex.match(lepton_selection_and_frWeight) if lepton_selection_and_frWeight == 'Tight': syncOutput = os.path.join(self.dirs[key_analyze_dir][DKEY_SYNC], '%s_%s_SR.root' % (self.channel, central_or_shift)) syncTree = 'syncTree_%s_SR' % self.channel syncRequireGenMatching = True elif lepton_selection_and_frWeight == 'Fakeable_wFakeRateWeights': syncOutput = os.path.join(self.dirs[key_analyze_dir][DKEY_SYNC], '%s_%s_Fake.root' % (self.channel, central_or_shift)) syncTree = 'syncTree_%s_Fake' % self.channel elif mcClosure_match: mcClosure_type = mcClosure_match.group('type') syncOutput = os.path.join(self.dirs[key_analyze_dir][DKEY_SYNC], '%s_%s_mcClosure_%s.root' % (self.channel, central_or_shift, mcClosure_type)) syncTree = 'syncTree_%s_mcClosure_%s' % (self.channel, mcClosure_type) else: continue if syncTree and central_or_shift != "central": syncTree = os.path.join(central_or_shift, syncTree) syncRLE = '' if self.do_sync and self.rle_select: syncRLE = self.rle_select % syncTree if not os.path.isfile(syncRLE): logging.warning("Input RLE file for the sync is missing: %s; skipping the job" % syncRLE) continue if syncOutput: self.inputFiles_sync['sync'].append(syncOutput) cfgFile_modified_path = os.path.join(self.dirs[key_analyze_dir][DKEY_CFGS], "analyze_%s_%s_%s_%i_cfg.py" % analyze_job_tuple) logFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_LOGS], "analyze_%s_%s_%s_%i.log" % analyze_job_tuple) rleOutputFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_RLES], "rle_%s_%s_%s_%i.txt" % analyze_job_tuple) \ if self.select_rle_output else "" histogramFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_HIST], "analyze_%s_%s_%s_%i.root" % analyze_job_tuple) self.jobOptions_analyze[key_analyze_job] = { 'ntupleFiles' : ntupleFiles, 'cfgFile_modified' : cfgFile_modified_path, 'histogramFile' : histogramFile_path, 'logFile' : logFile_path, 'selEventsFileName_output' : rleOutputFile_path, 'electronSelection' : electron_selection, 'muonSelection' : muon_selection, 'apply_leptonGenMatching' : self.apply_leptonGenMatching, 'hadTauSelection_veto' : hadTauVeto_selection, 'applyFakeRateWeights' : self.applyFakeRateWeights if not lepton_selection == "Tight" else "disabled", 'central_or_shift' : central_or_shift, 'syncOutput' : syncOutput, 'syncTree' : syncTree, 'syncRLE' : syncRLE, 'syncRequireGenMatching' : syncRequireGenMatching, 'useNonNominal' : self.use_nonnominal, 'apply_hlt_filter' : self.hlt_filter, } self.createCfg_analyze(self.jobOptions_analyze[key_analyze_job], sample_info, lepton_selection) # initialize input and output file names for hadd_stage1 key_hadd_stage1_dir = getKey(process_name, lepton_selection_and_frWeight) hadd_stage1_job_tuple = (process_name, lepton_selection_and_frWeight) key_hadd_stage1_job = getKey(*hadd_stage1_job_tuple) if not key_hadd_stage1_job in self.inputFiles_hadd_stage1: self.inputFiles_hadd_stage1[key_hadd_stage1_job] = [] self.inputFiles_hadd_stage1[key_hadd_stage1_job].append(self.jobOptions_analyze[key_analyze_job]['histogramFile']) self.outputFile_hadd_stage1[key_hadd_stage1_job] = os.path.join(self.dirs[key_hadd_stage1_dir][DKEY_HIST], "hadd_stage1_%s_%s.root" % hadd_stage1_job_tuple) if self.do_sync: continue if is_mc: logging.info("Creating configuration files to run 'addBackgrounds' for sample %s" % process_name) sample_categories = [ sample_category ] if is_signal: sample_categories = [ "signal", "ttH", "ttH_htt", "ttH_hww", "ttH_hzz", "ttH_hmm", "ttH_hzg" ] for sample_category in sample_categories: # sum non-fake and fake contributions for each MC sample separately genMatch_categories = [ "nonfake", "conversions", "fake" ] for genMatch_category in genMatch_categories: key_hadd_stage1_job = getKey(process_name, lepton_selection_and_frWeight) key_addBackgrounds_dir = getKey(process_name, lepton_selection_and_frWeight, "addBackgrounds") addBackgrounds_job_tuple = None processes_input = None process_output = None if genMatch_category == "nonfake": # sum non-fake contributions for each MC sample separately # input processes: TT3l0g0j,... # output processes: TT; ... if sample_category in [ "signal" ]: lepton_genMatches = [] lepton_genMatches.extend(self.lepton_genMatches_nonfakes) lepton_genMatches.extend(self.lepton_genMatches_conversions) lepton_genMatches.extend(self.lepton_genMatches_fakes) processes_input = [ "%s%s" % (sample_category, genMatch) for genMatch in lepton_genMatches ] elif sample_category in [ "ttH" ]: lepton_genMatches = [] lepton_genMatches.extend(self.lepton_genMatches_nonfakes) lepton_genMatches.extend(self.lepton_genMatches_conversions) processes_input = [] processes_input.extend([ "%s%s" % ("ttH_htt", genMatch) for genMatch in lepton_genMatches ]) processes_input.extend([ "%s%s" % ("ttH_hww", genMatch) for genMatch in lepton_genMatches ]) processes_input.extend([ "%s%s" % ("ttH_hzz", genMatch) for genMatch in lepton_genMatches ]) processes_input.extend([ "%s%s" % ("ttH_hzg", genMatch) for genMatch in lepton_genMatches ]) processes_input.extend([ "%s%s" % ("ttH_hmm", genMatch) for genMatch in lepton_genMatches ]) else: processes_input = [ "%s%s" % (sample_category, genMatch) for genMatch in self.lepton_genMatches_nonfakes ] process_output = sample_category addBackgrounds_job_tuple = (process_name, sample_category, lepton_selection_and_frWeight) elif genMatch_category == "conversions": # sum fake contributions for each MC sample separately # input processes: TT2l1g0j, TT1l2g0j, TT0l3g0j; ... # output processes: TT_conversion; ... if sample_category in [ "signal" ]: processes_input = [ "%s%s" % (sample_category, genMatch) for genMatch in self.lepton_genMatches_conversions ] elif sample_category in [ "ttH" ]: processes_input = [] processes_input.extend([ "%s%s" % ("ttH_htt", genMatch) for genMatch in self.lepton_genMatches_conversions ]) processes_input.extend([ "%s%s" % ("ttH_hww", genMatch) for genMatch in self.lepton_genMatches_conversions ]) processes_input.extend([ "%s%s" % ("ttH_hzz", genMatch) for genMatch in self.lepton_genMatches_conversions ]) processes_input.extend([ "%s%s" % ("ttH_hzg", genMatch) for genMatch in self.lepton_genMatches_conversions ]) processes_input.extend([ "%s%s" % ("ttH_hmm", genMatch) for genMatch in self.lepton_genMatches_conversions ]) else: processes_input = [ "%s%s" % (sample_category, genMatch) for genMatch in self.lepton_genMatches_conversions ] process_output = "%s_conversion" % sample_category addBackgrounds_job_tuple = (process_name, "%s_conversion" % sample_category, lepton_selection_and_frWeight) elif genMatch_category == "fake": # sum fake contributions for each MC sample separately # input processes: TT2l0g1j, TT1l1g1j, TT1l0g2j, TT0l2g1j, TT0l1g2j, TT0l0g3j; ... # output processes: TT_fake; ... if sample_category in [ "signal" ]: processes_input = [ "%s%s" % (sample_category, genMatch) for genMatch in self.lepton_genMatches_fakes ] elif sample_category in [ "ttH" ]: processes_input = [] processes_input.extend([ "%s%s" % ("ttH_htt", genMatch) for genMatch in self.lepton_genMatches_fakes ]) processes_input.extend([ "%s%s" % ("ttH_hww", genMatch) for genMatch in self.lepton_genMatches_fakes ]) processes_input.extend([ "%s%s" % ("ttH_hzz", genMatch) for genMatch in self.lepton_genMatches_fakes ]) processes_input.extend([ "%s%s" % ("ttH_hzg", genMatch) for genMatch in self.lepton_genMatches_fakes ]) processes_input.extend([ "%s%s" % ("ttH_hmm", genMatch) for genMatch in self.lepton_genMatches_fakes ]) else: processes_input = [ "%s%s" % (sample_category, genMatch) for genMatch in self.lepton_genMatches_fakes ] process_output = "%s_fake" % sample_category addBackgrounds_job_tuple = (process_name, "%s_fake" % sample_category, lepton_selection_and_frWeight) if processes_input: logging.info(" ...for genMatch option = '%s'" % genMatch_category) key_addBackgrounds_job = getKey(*addBackgrounds_job_tuple) cfgFile_modified = os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], "addBackgrounds_%s_%s_%s_cfg.py" % addBackgrounds_job_tuple) outputFile = os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackgrounds_%s_%s_%s.root" % addBackgrounds_job_tuple) self.jobOptions_addBackgrounds[key_addBackgrounds_job] = { 'inputFile' : self.outputFile_hadd_stage1[key_hadd_stage1_job], 'cfgFile_modified' : cfgFile_modified, 'outputFile' : outputFile, 'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], os.path.basename(cfgFile_modified).replace("_cfg.py", ".log")), 'categories' : [ getHistogramDir(lepton_selection, lepton_frWeight) ], 'processes_input' : processes_input, 'process_output' : process_output } self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds[key_addBackgrounds_job]) # initialize input and output file names for hadd_stage1_5 key_hadd_stage1_5_dir = getKey("hadd", lepton_selection_and_frWeight) key_hadd_stage1_5_job = getKey(lepton_selection_and_frWeight) if not key_hadd_stage1_5_job in self.inputFiles_hadd_stage1_5: self.inputFiles_hadd_stage1_5[key_hadd_stage1_5_job] = [] self.inputFiles_hadd_stage1_5[key_hadd_stage1_5_job].append(self.jobOptions_addBackgrounds[key_addBackgrounds_job]['outputFile']) self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job] = os.path.join(self.dirs[key_hadd_stage1_5_dir][DKEY_HIST], "hadd_stage1_5_%s.root" % lepton_selection_and_frWeight) # add output files of hadd_stage1 for data to list of input files for hadd_stage1_5 if not is_mc: key_hadd_stage1_job = getKey(process_name, lepton_selection_and_frWeight) key_hadd_stage1_5_job = getKey(lepton_selection_and_frWeight) if not key_hadd_stage1_5_job in self.inputFiles_hadd_stage1_5: self.inputFiles_hadd_stage1_5[key_hadd_stage1_5_job] = [] self.inputFiles_hadd_stage1_5[key_hadd_stage1_5_job].append(self.outputFile_hadd_stage1[key_hadd_stage1_job]) if self.do_sync: continue # sum fake background contributions for the total of all MC sample # input processes: TT2l0g1j, TT1l1g1j, TT1l0g2j, TT0l3j, TT0l3j, TT0l3j, TT0l3j; ... # output process: fakes_mc key_hadd_stage1_5_job = getKey(lepton_selection_and_frWeight) key_addBackgrounds_dir = getKey("addBackgrounds") addBackgrounds_job_fakes_tuple = ("fakes_mc", lepton_selection_and_frWeight) key_addBackgrounds_job_fakes = getKey(*addBackgrounds_job_fakes_tuple) sample_categories = [] sample_categories.extend(self.nonfake_backgrounds) sample_categories.extend([ "signal" ]) processes_input = [] for sample_category in sample_categories: processes_input.append("%s_fake" % sample_category) self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes] = { 'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job], 'cfgFile_modified' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], "addBackgrounds_%s_%s_cfg.py" % addBackgrounds_job_fakes_tuple), 'outputFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackgrounds_%s_%s.root" % addBackgrounds_job_fakes_tuple), 'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], "addBackgrounds_%s_%s.log" % addBackgrounds_job_fakes_tuple), 'categories' : [ getHistogramDir(lepton_selection, lepton_frWeight) ], 'processes_input' : processes_input, 'process_output' : "fakes_mc" } self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes]) # sum conversion background contributions for the total of all MC sample # input processes: TT2l0g1j, TT1l1g1j, TT1l0g2j, TT0l3j, TT0l3j, TT0l3j, TT0l3j; ... # output process: conversions addBackgrounds_job_conversions_tuple = ("conversions", lepton_selection_and_frWeight) key_addBackgrounds_job_conversions = getKey(*addBackgrounds_job_conversions_tuple) sample_categories = [] sample_categories.extend(self.nonfake_backgrounds) sample_categories.extend([ "signal" ]) processes_input = [] for sample_category in sample_categories: processes_input.append("%s_conversion" % sample_category) self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_conversions] = { 'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job], 'cfgFile_modified' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], "addBackgrounds_%s_%s_cfg.py" % addBackgrounds_job_conversions_tuple), 'outputFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackgrounds_%s_%s.root" % addBackgrounds_job_conversions_tuple), 'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], "addBackgrounds_%s_%s.log" % addBackgrounds_job_conversions_tuple), 'categories' : [ getHistogramDir(lepton_selection, lepton_frWeight) ], 'processes_input' : processes_input, 'process_output' : "conversions" } self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_conversions]) # initialize input and output file names for hadd_stage2 key_hadd_stage1_5_job = getKey(lepton_selection_and_frWeight) key_hadd_stage2_dir = getKey("hadd", lepton_selection_and_frWeight) key_hadd_stage2_job = getKey(lepton_selection_and_frWeight) if not key_hadd_stage2_job in self.inputFiles_hadd_stage2: self.inputFiles_hadd_stage2[key_hadd_stage2_job] = [] if lepton_selection == "Tight": self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes]['outputFile']) self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_conversions]['outputFile']) self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job]) self.outputFile_hadd_stage2[key_hadd_stage2_job] = os.path.join(self.dirs[key_hadd_stage2_dir][DKEY_HIST], "hadd_stage2_%s.root" % lepton_selection_and_frWeight) if self.do_sync: if self.is_sbatch: logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_analyze) self.sbatchFile_analyze = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel) self.createScript_sbatch_syncNtuple(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze) logging.info("Creating Makefile") lines_makefile = [] self.addToMakefile_syncNtuple(lines_makefile) outputFile_sync_path = os.path.join(self.outputDir, DKEY_SYNC, '%s.root' % self.channel) self.outputFile_sync['sync'] = outputFile_sync_path self.targets.append(outputFile_sync_path) self.addToMakefile_hadd_sync(lines_makefile) self.createMakefile(lines_makefile) logging.info("Done.") return self.num_jobs logging.info("Creating configuration files to run 'addBackgroundFakes'") key_hadd_stage1_5_job = getKey(get_lepton_selection_and_frWeight("Fakeable", "enabled")) key_addFakes_job = getKey("fakes_data") category_sideband = "ttZctrl_Fakeable_wFakeRateWeights" self.jobOptions_addFakes[key_addFakes_job] = { 'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job], 'cfgFile_modified' : os.path.join(self.dirs[DKEY_CFGS], "addBackgroundLeptonFakes_cfg.py"), 'outputFile' : os.path.join(self.dirs[DKEY_HIST], "addBackgroundLeptonFakes.root"), 'logFile' : os.path.join(self.dirs[DKEY_LOGS], "addBackgroundLeptonFakes.log"), 'category_signal' : "ttZctrl_Tight", 'category_sideband' : category_sideband } self.createCfg_addFakes(self.jobOptions_addFakes[key_addFakes_job]) key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled")) self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addFakes[key_addFakes_job]['outputFile']) logging.info("Creating configuration files to run 'prepareDatacards'") for histogramToFit in self.histograms_to_fit: key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled")) key_prep_dcard_dir = getKey("prepareDatacards") prep_dcard_job_tuple = (self.channel, histogramToFit) key_prep_dcard_job = getKey(histogramToFit) self.jobOptions_prep_dcard[key_prep_dcard_job] = { 'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job], 'cfgFile_modified' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_CFGS], "prepareDatacards_%s_%s_cfg.py" % prep_dcard_job_tuple), 'datacardFile' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_DCRD], "prepareDatacards_%s_%s.root" % prep_dcard_job_tuple), 'histogramDir' : self.histogramDir_prep_dcard, 'histogramToFit' : histogramToFit, 'label' : None } self.createCfg_prep_dcard(self.jobOptions_prep_dcard[key_prep_dcard_job]) # add shape templates for the following systematic uncertainties: # - 'CMS_ttHl_Clos_norm_e' # - 'CMS_ttHl_Clos_shape_e' # - 'CMS_ttHl_Clos_norm_m' # - 'CMS_ttHl_Clos_shape_m' key_prep_dcard_job = getKey(histogramToFit) key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled")) key_add_syst_fakerate_dir = getKey("addSystFakeRates") add_syst_fakerate_job_tuple = (self.channel, histogramToFit) key_add_syst_fakerate_job = getKey(histogramToFit) self.jobOptions_add_syst_fakerate[key_add_syst_fakerate_job] = { 'inputFile' : self.jobOptions_prep_dcard[key_prep_dcard_job]['datacardFile'], 'cfgFile_modified' : os.path.join(self.dirs[key_add_syst_fakerate_dir][DKEY_CFGS], "addSystFakeRates_%s_%s_cfg.py" % add_syst_fakerate_job_tuple), 'outputFile' : os.path.join(self.dirs[key_add_syst_fakerate_dir][DKEY_DCRD], "addSystFakeRates_%s_%s.root" % add_syst_fakerate_job_tuple), 'category' : self.channel, 'histogramToFit' : histogramToFit, 'plots_outputFileName' : os.path.join(self.dirs[DKEY_PLOT], "addSystFakeRates.png") } histogramDir_nominal = self.histogramDir_prep_dcard for lepton_type in [ 'e', 'm' ]: lepton_mcClosure = "Fakeable_mcClosure_%s" % lepton_type if lepton_mcClosure not in self.lepton_selections: continue lepton_selection_and_frWeight = get_lepton_selection_and_frWeight(lepton_mcClosure, "enabled") key_addBackgrounds_job_fakes = getKey("fakes_mc", lepton_selection_and_frWeight) histogramDir_mcClosure = self.mcClosure_dir[lepton_mcClosure] self.jobOptions_add_syst_fakerate[key_add_syst_fakerate_job].update({ 'add_Clos_%s' % lepton_type : ("Fakeable_mcClosure_%s" % lepton_type) in self.lepton_selections, 'inputFile_nominal_%s' % lepton_type : self.outputFile_hadd_stage2[key_hadd_stage2_job], 'histogramName_nominal_%s' % lepton_type : "%s/sel/evt/fakes_mc/%s" % (histogramDir_nominal, histogramToFit), 'inputFile_mcClosure_%s' % lepton_type : self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes]['outputFile'], 'histogramName_mcClosure_%s' % lepton_type : "%s/sel/evt/fakes_mc/%s" % (histogramDir_mcClosure, histogramToFit) }) self.createCfg_add_syst_fakerate(self.jobOptions_add_syst_fakerate[key_add_syst_fakerate_job]) logging.info("Creating configuration files to run 'makePlots'") key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled")) key_makePlots_dir = getKey("makePlots") key_makePlots_job = getKey('') self.jobOptions_make_plots[key_makePlots_job] = { 'executable' : self.executable_make_plots, 'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job], 'cfgFile_modified' : os.path.join(self.dirs[key_makePlots_dir][DKEY_CFGS], "makePlots_%s_cfg.py" % self.channel), 'outputFile' : os.path.join(self.dirs[key_makePlots_dir][DKEY_PLOT], "makePlots_%s.png" % self.channel), 'histogramDir' : self.histogramDir_prep_dcard, 'label' : "t#bar{t}Z control region", 'make_plots_backgrounds' : self.make_plots_backgrounds } self.createCfg_makePlots(self.jobOptions_make_plots[key_makePlots_job]) if self.is_sbatch: logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_analyze) self.sbatchFile_analyze = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel) self.createScript_sbatch_analyze(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze) logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_addBackgrounds) self.sbatchFile_addBackgrounds = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addBackgrounds_%s.py" % self.channel) self.createScript_sbatch(self.executable_addBackgrounds, self.sbatchFile_addBackgrounds, self.jobOptions_addBackgrounds) self.sbatchFile_addBackgrounds_sum = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addBackgrounds_sum_%s.py" % self.channel) self.createScript_sbatch(self.executable_addBackgrounds, self.sbatchFile_addBackgrounds_sum, self.jobOptions_addBackgrounds_sum) logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_addFakes) self.sbatchFile_addFakes = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addFakes_%s.py" % self.channel) self.createScript_sbatch(self.executable_addFakes, self.sbatchFile_addFakes, self.jobOptions_addFakes) logging.info("Creating Makefile") lines_makefile = [] self.addToMakefile_analyze(lines_makefile) self.addToMakefile_hadd_stage1(lines_makefile) self.addToMakefile_backgrounds_from_data(lines_makefile) self.addToMakefile_hadd_stage2(lines_makefile) self.addToMakefile_prep_dcard(lines_makefile) self.addToMakefile_add_syst_fakerate(lines_makefile) self.addToMakefile_make_plots(lines_makefile) self.createMakefile(lines_makefile) logging.info("Done.") return self.num_jobs
def __init__(self, configDir, outputDir, executable_prodNtuple, cfgFile_prodNtuple, samples, max_files_per_job, era, preselection_cuts, leptonSelection, hadTauSelection, debug, running_method, version, num_parallel_jobs, pool_id=''): self.configDir = configDir self.outputDir = outputDir self.executable_prodNtuple = executable_prodNtuple self.max_num_jobs = 200000 self.samples = samples self.max_files_per_job = max_files_per_job self.era = era self.preselection_cuts = preselection_cuts self.leptonSelection = leptonSelection self.hadTauSelection = hadTauSelection self.debug = debug assert (running_method.lower() in ["sbatch", "makefile"]), "Invalid running method: %s" % running_method self.running_method = running_method self.is_sbatch = False self.is_makefile = False if self.running_method.lower() == "sbatch": self.is_sbatch = True else: self.is_makefile = True self.makefile = os.path.join(self.configDir, "Makefile_prodNtuple") self.num_parallel_jobs = num_parallel_jobs self.pool_id = pool_id if pool_id else uuid.uuid4() self.workingDir = os.getcwd() print "Working directory is: " + self.workingDir self.version = version create_if_not_exists(self.configDir) create_if_not_exists(self.outputDir) self.stdout_file = codecs.open( os.path.join(self.configDir, "stdout_prodNtuple.log"), 'w', 'utf-8') self.stderr_file = codecs.open( os.path.join(self.configDir, "stderr_prodNtuple.log"), 'w', 'utf-8') self.dirs = {} self.samples = samples self.cfgFile_prodNtuple_original = os.path.join( self.workingDir, cfgFile_prodNtuple) self.cfgFiles_prodNtuple_modified = {} self.logFiles_prodNtuple = {} self.sbatchFile_prodNtuple = os.path.join(self.configDir, "sbatch_prodNtuple.py") self.inputFiles = {} self.outputFiles = {} self.filesToClean = [] for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"]: continue process_name = sample_info["process_name_specific"] key_dir = getKey(sample_name) for dir_type in [DKEY_CFGS, DKEY_NTUPLES, DKEY_LOGS]: initDict(self.dirs, [key_dir, dir_type]) if dir_type in [DKEY_CFGS, DKEY_LOGS]: self.dirs[key_dir][dir_type] = os.path.join( self.configDir, dir_type, process_name) else: self.dirs[key_dir][dir_type] = os.path.join( self.outputDir, dir_type, process_name) for dir_type in [DKEY_CFGS, DKEY_LOGS]: initDict(self.dirs, [dir_type]) if dir_type in [DKEY_CFGS, DKEY_NTUPLES, DKEY_LOGS]: self.dirs[dir_type] = os.path.join(self.configDir, dir_type) else: self.dirs[dir_type] = os.path.join(self.outputDir, dir_type) ##print "self.dirs = ", self.dirs self.cvmfs_error_log = {}
def create(self): """Creates all necessary config files and runs the complete analysis workfow -- either locally or on the batch system """ for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"]: continue process_name = sample_info["process_name_specific"] key_dir = getKey(process_name) for dir_type in [DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_RLES]: initDict(self.dirs, [key_dir, dir_type]) if dir_type in [DKEY_CFGS, DKEY_LOGS]: self.dirs[key_dir][dir_type] = os.path.join( self.configDir, dir_type, self.channel, process_name) else: self.dirs[key_dir][dir_type] = os.path.join( self.outputDir, dir_type, self.channel, process_name) for dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT ]: initDict(self.dirs, [dir_type]) if dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT ]: self.dirs[dir_type] = os.path.join(self.configDir, dir_type, self.channel) else: self.dirs[dir_type] = os.path.join(self.outputDir, dir_type, self.channel) numDirectories = 0 for key in self.dirs.keys(): if type(self.dirs[key]) == dict: numDirectories += len(self.dirs[key]) else: numDirectories += 1 logging.info("Creating directory structure (numDirectories = %i)" % numDirectories) numDirectories_created = 0 frac = 1 for key in self.dirs.keys(): if type(self.dirs[key]) == dict: for dir_type in self.dirs[key].keys(): create_if_not_exists(self.dirs[key][dir_type]) numDirectories_created += len(self.dirs[key]) else: create_if_not_exists(self.dirs[key]) numDirectories_created = numDirectories_created + 1 while 100 * numDirectories_created >= frac * numDirectories: logging.info(" %i%% completed" % frac) frac = frac + 1 logging.info("Done.") inputFileLists = {} for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"]: continue logging.info("Checking input files for sample %s" % sample_info["process_name_specific"]) inputFileLists[sample_name] = generateInputFileList( sample_info, self.max_files_per_job) for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"]: continue process_name = sample_info["process_name_specific"] logging.info( "Creating configuration files to run '%s' for sample %s" % (self.executable_analyze, process_name)) inputFileList = inputFileLists[sample_name] for jobId in inputFileList.keys(): ##print "processing sample %s: jobId = %i" % (process_name, jobId) # build config files for executing analysis code key_analyze_dir = getKey(process_name) analyze_job_tuple = (process_name, jobId) key_analyze_job = getKey(*analyze_job_tuple) ntupleFiles = inputFileList[jobId] if len(ntupleFiles) == 0: logging.warning( "No input ntuples for %s --> skipping job !!" % (key_analyze_job)) continue cfgFile_modified_path = os.path.join( self.dirs[key_analyze_dir][DKEY_CFGS], "analyze_%s_%i_cfg.py" % analyze_job_tuple) logFile_path = os.path.join( self.dirs[key_analyze_dir][DKEY_LOGS], "analyze_%s_%i.log" % analyze_job_tuple) histogramFile_path = os.path.join( self.dirs[key_analyze_dir][DKEY_HIST], "analyze_%s_%i.root" % analyze_job_tuple) self.jobOptions_analyze[key_analyze_job] = { 'ntupleFiles': ntupleFiles, 'cfgFile_modified': cfgFile_modified_path, 'histogramFile': histogramFile_path, 'histogramDir': 'analyze_hadTopTagger', 'logFile': logFile_path, 'hadTauSelection': self.hadTau_selection, 'lumiScale': 1., 'selectBDT': True, } self.createCfg_analyze( self.jobOptions_analyze[key_analyze_job], sample_info) # initialize input and output file names for hadd_stage1 key_hadd_stage1_dir = getKey(process_name, lepton_selection_and_frWeight) key_hadd_stage1_job = getKey(process_name) if not key_hadd_stage1_job in self.inputFiles_hadd_stage1: self.inputFiles_hadd_stage1[key_hadd_stage1_job] = [] self.inputFiles_hadd_stage1[key_hadd_stage1_job].append( self.jobOptions_analyze[key_analyze_job]['histogramFile']) self.outputFile_hadd_stage1[ key_hadd_stage1_job] = os.path.join( self.dirs[key_hadd_stage1_dir][DKEY_HIST], "hadd_stage1_%s.root" % process_name) self.targets.append( self.outputFile_hadd_stage1[key_hadd_stage1_job]) self.sbatchFile_analyze = os.path.join( self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel) if self.is_sbatch: logging.info( "Creating script for submitting '%s' jobs to batch system" % self.executable_analyze) self.createScript_sbatch_analyze(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze) logging.info("Creating Makefile") lines_makefile = [] self.addToMakefile_analyze(lines_makefile) self.addToMakefile_hadd_stage1(lines_makefile) self.createMakefile(lines_makefile) logging.info("Done.") return self.num_jobs
def create(self): """Creates all necessary config files and runs the complete analysis workfow -- either locally or on the batch system """ for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"]: continue sample_category = sample_info["sample_category"] is_mc = (sample_info["type"] == "mc") process_name = sample_info["process_name_specific"] logging.info("Building dictionaries for sample %s..." % process_name) for lepton_selection in self.lepton_selections: for lepton_frWeight in self.lepton_frWeights: if lepton_frWeight == "enabled" and not lepton_selection.startswith("Fakeable"): continue if lepton_frWeight == "disabled" and not lepton_selection in ["Tight", "forBDTtraining"]: continue lepton_selection_and_frWeight = get_lepton_selection_and_frWeight(lepton_selection, lepton_frWeight) for leptonChargeSelection in self.leptonChargeSelections: central_or_shift_extensions = ["", "hadd", "addBackgrounds"] central_or_shift_dedicated = self.central_or_shifts if self.runTHweights(sample_info) else self.central_or_shifts_external central_or_shifts_extended = central_or_shift_extensions + central_or_shift_dedicated for central_or_shift_or_dummy in central_or_shifts_extended: process_name_extended = [ process_name, "hadd" ] for process_name_or_dummy in process_name_extended: if central_or_shift_or_dummy in [ "hadd", "addBackgrounds" ] and process_name_or_dummy in [ "hadd" ]: continue if central_or_shift_or_dummy not in central_or_shift_extensions and not self.accept_systematics( central_or_shift_or_dummy, is_mc, lepton_selection, leptonChargeSelection, sample_info ): continue key_dir = getKey(process_name_or_dummy, lepton_selection_and_frWeight, leptonChargeSelection, central_or_shift_or_dummy) for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_RLES, DKEY_SYNC ]: if dir_type == DKEY_SYNC and not self.do_sync: continue initDict(self.dirs, [ key_dir, dir_type ]) if dir_type in [ DKEY_CFGS, DKEY_LOGS ]: self.dirs[key_dir][dir_type] = os.path.join(self.configDir, dir_type, self.channel, "_".join([ lepton_selection_and_frWeight, leptonChargeSelection ]), process_name_or_dummy, central_or_shift_or_dummy) else: self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel, "_".join([ lepton_selection_and_frWeight, leptonChargeSelection ]), process_name_or_dummy) for subdirectory in [ "addBackgrounds", "addBackgroundLeptonFakes", "prepareDatacards", "addSystFakeRates", "makePlots" ]: key_dir = getKey(subdirectory) for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT ]: initDict(self.dirs, [ key_dir, dir_type ]) if dir_type in [ DKEY_CFGS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT ]: self.dirs[key_dir][dir_type] = os.path.join(self.configDir, dir_type, self.channel, subdirectory) else: self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel, subdirectory) for dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT, DKEY_SYNC ]: if dir_type == DKEY_SYNC and not self.do_sync: continue initDict(self.dirs, [ dir_type ]) if dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT ]: self.dirs[dir_type] = os.path.join(self.configDir, dir_type, self.channel) else: self.dirs[dir_type] = os.path.join(self.outputDir, dir_type, self.channel) numDirectories = 0 for key in self.dirs.keys(): if type(self.dirs[key]) == dict: numDirectories += len(self.dirs[key]) else: numDirectories += 1 logging.info("Creating directory structure (numDirectories = %i)" % numDirectories) numDirectories_created = 0; frac = 1 for key in self.dirs.keys(): if type(self.dirs[key]) == dict: for dir_type in self.dirs[key].keys(): create_if_not_exists(self.dirs[key][dir_type]) numDirectories_created += len(self.dirs[key]) else: create_if_not_exists(self.dirs[key]) numDirectories_created = numDirectories_created + 1 while 100*numDirectories_created >= frac*numDirectories: logging.info(" %i%% completed" % frac) frac = frac + 1 logging.info("Done.") inputFileLists = {} for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"]: continue logging.info("Checking input files for sample %s" % sample_info["process_name_specific"]) inputFileLists[sample_name] = generateInputFileList(sample_info, self.max_files_per_job) for lepton_selection in self.lepton_selections: hadTau_selection = "Tight" hadTau_selection = "|".join([hadTau_selection, self.hadTau_selection_part2]) electron_selection = lepton_selection muon_selection = lepton_selection if lepton_selection == "forBDTtraining": electron_selection = "Loose" muon_selection = "Loose" elif lepton_selection == "Fakeable_mcClosure_e": electron_selection = "Fakeable" muon_selection = "Tight" elif lepton_selection == "Fakeable_mcClosure_m": electron_selection = "Tight" muon_selection = "Fakeable" if "forBDTtraining" in lepton_selection: electron_selection = "Loose" muon_selection = "Loose" elif lepton_selection == "Fakeable_mcClosure_e": electron_selection = "Fakeable" muon_selection = "Tight" elif lepton_selection == "Fakeable_mcClosure_m": electron_selection = "Tight" muon_selection = "Fakeable" for lepton_frWeight in self.lepton_frWeights: if lepton_frWeight == "enabled" and not lepton_selection.startswith("Fakeable"): continue if lepton_frWeight == "disabled" and not lepton_selection in [ "Tight", "forBDTtraining" ]: continue lepton_selection_and_frWeight = get_lepton_selection_and_frWeight(lepton_selection, lepton_frWeight) for leptonChargeSelection in self.leptonChargeSelections: for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"]: continue process_name = sample_info["process_name_specific"] logging.info("Creating configuration files to run '%s' for sample %s" % (self.executable_analyze, process_name)) inputFileList = inputFileLists[sample_name] sample_category = sample_info["sample_category"] is_mc = (sample_info["type"] == "mc") use_th_weights = self.runTHweights(sample_info) central_or_shift_dedicated = self.central_or_shifts if use_th_weights else self.central_or_shifts_external for central_or_shift in central_or_shift_dedicated: if not self.accept_systematics( central_or_shift, is_mc, lepton_selection, leptonChargeSelection, sample_info ): continue central_or_shifts_local = [] if central_or_shift == "central" and not use_th_weights: for central_or_shift_local in self.central_or_shifts_internal: if self.accept_systematics( central_or_shift_local, is_mc, lepton_selection, leptonChargeSelection, sample_info ): central_or_shifts_local.append(central_or_shift_local) logging.info(" ... for '%s' and systematic uncertainty option '%s'" % (lepton_selection_and_frWeight, central_or_shift)) # build config files for executing analysis code key_analyze_dir = getKey(process_name, lepton_selection_and_frWeight, leptonChargeSelection, central_or_shift) for jobId in inputFileList.keys(): analyze_job_tuple = (process_name, lepton_selection_and_frWeight, leptonChargeSelection, central_or_shift, jobId) key_analyze_job = getKey(*analyze_job_tuple) ntupleFiles = inputFileList[jobId] if len(ntupleFiles) == 0: logging.warning("No input ntuples for %s --> skipping job !!" % (key_analyze_job)) continue cfgFile_modified_path = os.path.join(self.dirs[key_analyze_dir][DKEY_CFGS], "analyze_%s_%s_%s_%s_%i_cfg.py" % analyze_job_tuple) logFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_LOGS], "analyze_%s_%s_%s_%s_%i.log" % analyze_job_tuple) rleOutputFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_RLES], "rle_%s_%s_%s_%s_%i.txt" % analyze_job_tuple) \ if self.select_rle_output else "" histogramFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_HIST], "analyze_%s_%s_%s_%s_%i.root" % analyze_job_tuple) applyFakeRateWeights = self.applyFakeRateWeights \ if self.isBDTtraining or lepton_selection.find("Tight") == -1 \ else "disabled" self.jobOptions_analyze[key_analyze_job] = { 'ntupleFiles' : ntupleFiles, 'cfgFile_modified' : cfgFile_modified_path, 'histogramFile' : histogramFile_path, 'logFile' : logFile_path, 'selEventsFileName_output' : rleOutputFile_path, 'electronSelection' : electron_selection, 'muonSelection' : muon_selection, 'apply_leptonGenMatching' : self.apply_leptonGenMatching, 'leptonChargeSelection' : leptonChargeSelection, 'applyFakeRateWeights' : applyFakeRateWeights, 'hadTauSelection' : hadTau_selection, 'central_or_shift' : central_or_shift, 'central_or_shifts_local' : central_or_shifts_local, 'fillGenEvtHistograms' : True, 'selectBDT' : self.isBDTtraining, 'apply_hlt_filter' : self.hlt_filter, 'selectBDT' : self.isBDTtraining, } self.createCfg_analyze(self.jobOptions_analyze[key_analyze_job], sample_info, lepton_selection) # initialize input and output file names for hadd_stage1 key_hadd_stage1_dir = getKey(process_name, lepton_selection_and_frWeight, leptonChargeSelection) hadd_stage1_job_tuple = (process_name, lepton_selection_and_frWeight, leptonChargeSelection) key_hadd_stage1_job = getKey(*hadd_stage1_job_tuple) if not key_hadd_stage1_job in self.inputFiles_hadd_stage1: self.inputFiles_hadd_stage1[key_hadd_stage1_job] = [] self.inputFiles_hadd_stage1[key_hadd_stage1_job].append(self.jobOptions_analyze[key_analyze_job]['histogramFile']) self.outputFile_hadd_stage1[key_hadd_stage1_job] = os.path.join(self.dirs[key_hadd_stage1_dir][DKEY_HIST], "hadd_stage1_%s_%s_%s.root" % hadd_stage1_job_tuple) if self.isBDTtraining: continue # add output files of hadd_stage1 to list of input files for hadd_stage1_5 key_hadd_stage1_job = getKey(process_name, lepton_selection_and_frWeight, leptonChargeSelection) key_hadd_stage1_5_dir = getKey("hadd", lepton_selection_and_frWeight, leptonChargeSelection) hadd_stage1_5_job_tuple = (lepton_selection_and_frWeight, leptonChargeSelection) key_hadd_stage1_5_job = getKey(*hadd_stage1_5_job_tuple) if not key_hadd_stage1_5_job in self.inputFiles_hadd_stage1_5: self.inputFiles_hadd_stage1_5[key_hadd_stage1_5_job] = [] self.inputFiles_hadd_stage1_5[key_hadd_stage1_5_job].append(self.outputFile_hadd_stage1[key_hadd_stage1_job]) self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job] = os.path.join(self.dirs[key_hadd_stage1_5_dir][DKEY_HIST], "hadd_stage1_5_%s_%s.root" % hadd_stage1_5_job_tuple) if self.isBDTtraining: continue # sum fake background contributions for the total of all MC sample # input processes: TT_fake, TTW_fake, TTWW_fake, ... # output process: fakes_mc key_hadd_stage1_5_job = getKey(lepton_selection_and_frWeight, leptonChargeSelection) key_addBackgrounds_dir = getKey("addBackgrounds") addBackgrounds_job_fakes_tuple = ("fakes_mc", lepton_selection_and_frWeight, leptonChargeSelection) key_addBackgrounds_job_fakes = getKey(*addBackgrounds_job_fakes_tuple) sample_categories = [] sample_categories.extend(self.nonfake_backgrounds) processes_input = [] for sample_category in sample_categories: processes_input.append("%s_fake" % sample_category) self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes] = { 'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job], 'cfgFile_modified' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], "addBackgrounds_%s_%s_%s_cfg.py" % addBackgrounds_job_fakes_tuple), 'outputFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackgrounds_%s_%s_%s.root" % addBackgrounds_job_fakes_tuple), 'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], "addBackgrounds_%s_%s_%s.log" % addBackgrounds_job_fakes_tuple), 'categories' : [ getHistogramDir(lepton_selection, lepton_frWeight, leptonChargeSelection) ], 'processes_input' : processes_input, 'process_output' : "fakes_mc" } self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes]) # sum conversion background contributions for the total of all MC sample # input processes: TT_Convs, TTW_Convs, TTWW_Convs, ... # output process: Convs addBackgrounds_job_Convs_tuple = ("Convs", lepton_selection_and_frWeight, leptonChargeSelection) key_addBackgrounds_job_Convs = getKey(*addBackgrounds_job_Convs_tuple) sample_categories = [] sample_categories.extend(self.nonfake_backgrounds) processes_input = [] for sample_category in self.convs_backgrounds: processes_input.append("%s_Convs" % sample_category) self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_Convs] = { 'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job], 'cfgFile_modified' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], "addBackgrounds_%s_%s_%s_cfg.py" % addBackgrounds_job_Convs_tuple), 'outputFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackgrounds_%s_%s_%s.root" % addBackgrounds_job_Convs_tuple), 'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], "addBackgrounds_%s_%s_%s.log" % addBackgrounds_job_Convs_tuple), 'categories' : [ getHistogramDir(lepton_selection, lepton_frWeight, leptonChargeSelection) ], 'processes_input' : processes_input, 'process_output' : "Convs" } self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_Convs]) # sum signal contributions from HH->4tau ("tttt"), HH->2W2tau ("wwtt"), and HH->4W ("wwww"), # separately for "nonfake" and "fake" contributions genMatch_categories = [ "nonfake", "fake" ] for genMatch_category in genMatch_categories: for signal_base, signal_input in self.signal_io.items(): addBackgrounds_job_signal_tuple = (lepton_selection_and_frWeight, leptonChargeSelection, signal_base, genMatch_category) key_addBackgrounds_job_signal = getKey(*addBackgrounds_job_signal_tuple) if key_addBackgrounds_job_signal in self.jobOptions_addBackgrounds_sum.keys(): continue processes_input = signal_input process_output = signal_base if genMatch_category == "fake": processes_input = [ process_input + "_fake" for process_input in processes_input ] process_output += "_fake" self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_signal] = { 'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job], 'cfgFile_modified' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], "addBackgrounds_%s_%s_%s_%s_cfg.py" % addBackgrounds_job_signal_tuple), 'outputFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackgrounds_%s_%s_%s_%s.root" % addBackgrounds_job_signal_tuple), 'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], "addBackgrounds_%s_%s_%s_%s.log" % addBackgrounds_job_signal_tuple), 'categories' : [ getHistogramDir(lepton_selection, lepton_frWeight, leptonChargeSelection) ], 'processes_input' : processes_input, 'process_output' : process_output } self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_signal]) key_hadd_stage2_job = getKey(lepton_selection_and_frWeight, leptonChargeSelection) if not key_hadd_stage2_job in self.inputFiles_hadd_stage2: self.inputFiles_hadd_stage2[key_hadd_stage2_job] = [] if lepton_selection == "Tight": self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_signal]['outputFile']) # initialize input and output file names for hadd_stage2 key_hadd_stage1_5_job = getKey(lepton_selection_and_frWeight, leptonChargeSelection) key_hadd_stage2_dir = getKey("hadd", lepton_selection_and_frWeight, leptonChargeSelection) hadd_stage2_job_tuple = (lepton_selection_and_frWeight, leptonChargeSelection) key_hadd_stage2_job = getKey(*hadd_stage2_job_tuple) if not key_hadd_stage2_job in self.inputFiles_hadd_stage2: self.inputFiles_hadd_stage2[key_hadd_stage2_job] = [] if lepton_selection == "Tight": self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes]['outputFile']) self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_Convs]['outputFile']) self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job]) self.outputFile_hadd_stage2[key_hadd_stage2_job] = os.path.join(self.dirs[key_hadd_stage2_dir][DKEY_HIST], "hadd_stage2_%s_%s.root" % hadd_stage2_job_tuple) if self.isBDTtraining: if self.is_sbatch: logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_analyze) self.sbatchFile_analyze = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel) self.createScript_sbatch_analyze(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze) logging.info("Creating Makefile") lines_makefile = [] self.addToMakefile_analyze(lines_makefile) self.addToMakefile_hadd_stage1(lines_makefile) self.targets.extend(self.phoniesToAdd) self.addToMakefile_validate(lines_makefile) self.createMakefile(lines_makefile) logging.info("Done.") return self.num_jobs logging.info("Creating configuration files to run 'addBackgroundFakes'") for leptonChargeSelection in self.leptonChargeSelections: key_hadd_stage1_5_job = getKey(get_lepton_selection_and_frWeight("Fakeable", "enabled"), leptonChargeSelection) key_addFakes_dir = getKey("addBackgroundLeptonFakes") key_addFakes_job = getKey("data_fakes", leptonChargeSelection) category_sideband = "hh_4l_%s_Fakeable_wFakeRateWeights" % leptonChargeSelection self.jobOptions_addFakes[key_addFakes_job] = { 'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job], 'cfgFile_modified' : os.path.join(self.dirs[key_addFakes_dir][DKEY_CFGS], "addBackgroundLeptonFakes_%s_cfg.py" % leptonChargeSelection), 'outputFile' : os.path.join(self.dirs[key_addFakes_dir][DKEY_HIST], "addBackgroundLeptonFakes_%s.root" % leptonChargeSelection), 'logFile' : os.path.join(self.dirs[key_addFakes_dir][DKEY_LOGS], "addBackgroundLeptonFakes_%s.log" % leptonChargeSelection), 'category_signal' : "hh_4l_%s_Tight" % leptonChargeSelection, 'category_sideband' : category_sideband } self.createCfg_addFakes(self.jobOptions_addFakes[key_addFakes_job]) key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), leptonChargeSelection) self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addFakes[key_addFakes_job]['outputFile']) logging.info("Creating configuration files to run 'prepareDatacards'") for histogramToFit in self.histograms_to_fit: key_prep_dcard_dir = getKey("prepareDatacards") if "OS" in self.leptonChargeSelections: key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "OS") prep_dcard_job_tuple = (self.channel, "OS", histogramToFit) key_prep_dcard_job = getKey("OS", histogramToFit) self.jobOptions_prep_dcard[key_prep_dcard_job] = { 'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job], 'cfgFile_modified' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_CFGS], "prepareDatacards_%s_%s_%s_cfg.py" % prep_dcard_job_tuple), 'datacardFile' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_DCRD], "prepareDatacards_%s_%s_%s.root" % prep_dcard_job_tuple), 'histogramDir' : self.histogramDir_prep_dcard, 'histogramToFit' : histogramToFit, 'label' : '4l', } self.createCfg_prep_dcard(self.jobOptions_prep_dcard[key_prep_dcard_job]) if "SS" in self.leptonChargeSelections: key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "SS") prep_dcard_job_tuple = (self.channel, "SS", histogramToFit) key_prep_dcard_job = getKey("SS", histogramToFit) self.jobOptions_prep_dcard[key_prep_dcard_job] = { 'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job], 'cfgFile_modified' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_CFGS], "prepareDatacards_%s_%s_%s_cfg.py" % prep_dcard_job_tuple), 'datacardFile' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_DCRD], "prepareDatacards_%s_%s_%s.root" % prep_dcard_job_tuple), 'histogramDir' : self.histogramDir_prep_dcard_SS, 'histogramToFit' : histogramToFit, 'label' : '4l SS', } self.createCfg_prep_dcard(self.jobOptions_prep_dcard[key_prep_dcard_job]) # add shape templates for the following systematic uncertainties: # - 'CMS_ttHl_Clos_norm_e' # - 'CMS_ttHl_Clos_shape_e' # - 'CMS_ttHl_Clos_norm_m' # - 'CMS_ttHl_Clos_shape_m' for leptonChargeSelection in self.leptonChargeSelections: key_prep_dcard_job = getKey(leptonChargeSelection, histogramToFit) key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), leptonChargeSelection) key_add_syst_fakerate_dir = getKey("addSystFakeRates") add_syst_fakerate_job_tuple = (self.channel, leptonChargeSelection, histogramToFit) key_add_syst_fakerate_job = getKey(leptonChargeSelection, histogramToFit) self.jobOptions_add_syst_fakerate[key_add_syst_fakerate_job] = { 'inputFile' : self.jobOptions_prep_dcard[key_prep_dcard_job]['datacardFile'], 'cfgFile_modified' : os.path.join(self.dirs[key_add_syst_fakerate_dir][DKEY_CFGS], "addSystFakeRates_%s_%s_%s_cfg.py" % add_syst_fakerate_job_tuple), 'outputFile' : os.path.join(self.dirs[key_add_syst_fakerate_dir][DKEY_DCRD], "addSystFakeRates_%s_%s_%s.root" % add_syst_fakerate_job_tuple), 'category' : self.channel, 'histogramToFit' : histogramToFit, 'plots_outputFileName' : os.path.join(self.dirs[DKEY_PLOT], "addSystFakeRates.png") } histogramDir_nominal = None if leptonChargeSelection == "OS": histogramDir_nominal = self.histogramDir_prep_dcard elif leptonChargeSelection == "SS": histogramDir_nominal = self.histogramDir_prep_dcard_SS else: raise ValueError("Invalid parameter 'leptonChargeSelection' = %s !!" % leptonChargeSelection) for lepton_type in [ 'e', 'm' ]: lepton_mcClosure = "Fakeable_mcClosure_%s" % lepton_type if lepton_mcClosure not in self.lepton_selections: continue lepton_selection_and_frWeight = get_lepton_selection_and_frWeight(lepton_mcClosure, "enabled") key_addBackgrounds_job_fakes = getKey("fakes_mc", lepton_selection_and_frWeight, leptonChargeSelection) histogramDir_mcClosure = self.mcClosure_dir['%s_%s' % (lepton_mcClosure, leptonChargeSelection)] self.jobOptions_add_syst_fakerate[key_add_syst_fakerate_job].update({ 'add_Clos_%s' % lepton_type : ("Fakeable_mcClosure_%s" % lepton_type) in self.lepton_selections, 'inputFile_nominal_%s' % lepton_type : self.outputFile_hadd_stage2[key_hadd_stage2_job], 'histogramName_nominal_%s' % lepton_type : "%s/sel/evt/fakes_mc/%s" % (histogramDir_nominal, histogramToFit), 'inputFile_mcClosure_%s' % lepton_type : self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes]['outputFile'], 'histogramName_mcClosure_%s' % lepton_type : "%s/sel/evt/fakes_mc/%s" % (histogramDir_mcClosure, histogramToFit) }) self.createCfg_add_syst_fakerate(self.jobOptions_add_syst_fakerate[key_add_syst_fakerate_job]) logging.info("Creating configuration files to run 'makePlots'") key_makePlots_dir = getKey("makePlots") if "OS" in self.leptonChargeSelections: key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "OS") key_makePlots_job = getKey("OS") self.jobOptions_make_plots[key_makePlots_job] = { 'executable' : self.executable_make_plots, 'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job], 'cfgFile_modified' : os.path.join(self.dirs[key_makePlots_dir][DKEY_CFGS], "makePlots_%s_cfg.py" % self.channel), 'outputFile' : os.path.join(self.dirs[key_makePlots_dir][DKEY_PLOT], "makePlots_%s.png" % self.channel), 'histogramDir' : self.histogramDir_prep_dcard, 'label' : '4l', 'make_plots_backgrounds' : self.make_plots_backgrounds, } self.createCfg_makePlots(self.jobOptions_make_plots[key_makePlots_job]) if "SS" in self.leptonChargeSelections: key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "SS") key_makePlots_job = getKey("SS") self.jobOptions_make_plots[key_makePlots_job] = { 'executable' : self.executable_make_plots, 'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job], 'cfgFile_modified' : os.path.join(self.dirs[key_makePlots_dir][DKEY_CFGS], "makePlots_%s_SS_cfg.py" % self.channel), 'outputFile' : os.path.join(self.dirs[key_makePlots_dir][DKEY_PLOT], "makePlots_%s_SS.png" % self.channel), 'histogramDir' : self.histogramDir_prep_dcard_SS, 'label' : "4l SS", 'make_plots_backgrounds' : self.make_plots_backgrounds, } self.createCfg_makePlots(self.jobOptions_make_plots[key_makePlots_job]) if "Fakeable_mcClosure" in self.lepton_selections: #TODO key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "OS") key_makePlots_job = getKey("Fakeable_mcClosure", "OS") self.jobOptions_make_plots[key_makePlots_job] = { 'executable' : self.executable_make_plots_mcClosure, 'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job], 'cfgFile_modified' : os.path.join(self.dirs[key_makePlots_dir][DKEY_CFGS], "makePlots_mcClosure_%s_cfg.py" % self.channel), 'outputFile' : os.path.join(self.dirs[key_makePlots_dir][DKEY_PLOT], "makePlots_mcClosure_%s.png" % self.channel) } self.createCfg_makePlots_mcClosure(self.jobOptions_make_plots[key_makePlots_job]) if self.is_sbatch: logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_analyze) self.sbatchFile_analyze = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel) self.createScript_sbatch_analyze(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze) logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_addBackgrounds) self.sbatchFile_addBackgrounds = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addBackgrounds_%s.py" % self.channel) self.createScript_sbatch(self.executable_addBackgrounds, self.sbatchFile_addBackgrounds, self.jobOptions_addBackgrounds) self.sbatchFile_addBackgrounds_sum = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addBackgrounds_sum_%s.py" % self.channel) self.createScript_sbatch(self.executable_addBackgrounds, self.sbatchFile_addBackgrounds_sum, self.jobOptions_addBackgrounds_sum) logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_addFakes) self.sbatchFile_addFakes = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addFakes_%s.py" % self.channel) self.createScript_sbatch(self.executable_addFakes, self.sbatchFile_addFakes, self.jobOptions_addFakes) logging.info("Creating Makefile") lines_makefile = [] self.addToMakefile_analyze(lines_makefile) self.addToMakefile_hadd_stage1(lines_makefile) self.addToMakefile_backgrounds_from_data(lines_makefile) self.addToMakefile_hadd_stage2(lines_makefile) self.addToMakefile_prep_dcard(lines_makefile) self.addToMakefile_add_syst_fakerate(lines_makefile) self.addToMakefile_make_plots(lines_makefile) self.addToMakefile_validate(lines_makefile) self.createMakefile(lines_makefile) logging.info("Done.") return self.num_jobs
def __init__(self, outputDir, executable_analyze, cfgFile_analyze_original, samples, hadTau_selection, hadTau_charge_selections, applyFakeRateWeights, central_or_shifts, max_files_per_job, era, use_lumi, lumi, debug, running_method, num_parallel_jobs, executable_addBackgrounds, executable_addBackgroundJetToTauFakes, histograms_to_fit, select_rle_output = False, executable_prep_dcard="prepareDatacard"): analyzeConfig.__init__(self, outputDir, executable_analyze, "1l_2tau", central_or_shifts, max_files_per_job, era, use_lumi, lumi, debug, running_method, num_parallel_jobs, histograms_to_fit) self.samples = samples self.lepton_and_hadTau_selections = [ "Tight", "Fakeable", "Fakeable_mcClosure" ] self.lepton_and_hadTau_frWeights = [ "enabled", "disabled" ] self.hadTau_selection_part2 = hadTau_selection self.hadTau_charge_selections = hadTau_charge_selections self.applyFakeRateWeights = applyFakeRateWeights self.lepton_genMatches = [ "1l0j", "0l1j" ] self.hadTau_genMatches = [ "2t0e0m0j", "1t1e0m0j", "1t0e1m0j", "1t0e0m1j", "0t2e0m0j", "0t1e1m0j", "0t1e0m1j", "0t0e2m0j", "0t0e1m1j", "0t0e0m2j" ] self.apply_leptonGenMatching = None self.apply_hadTauGenMatching = None self.lepton_and_hadTau_genMatches_nonfakes = [] self.lepton_and_hadTau_genMatches_fakes = [] if self.applyFakeRateWeights == "3L": self.apply_leptonGenMatching = True self.apply_hadTauGenMatching = True for lepton_genMatch in self.lepton_genMatches: for hadTau_genMatch in self.hadTau_genMatches: lepton_and_hadTau_genMatch = "&".join([ lepton_genMatch, hadTau_genMatch ]) if lepton_genMatch.endswith("0j") and hadTau_genMatch.endswith("0j"): self.lepton_and_hadTau_genMatches_nonfakes.append(lepton_and_hadTau_genMatch) else: self.lepton_and_hadTau_genMatches_fakes.append(lepton_and_hadTau_genMatch) elif applyFakeRateWeights == "2tau": self.apply_leptonGenMatching = False self.apply_hadTauGenMatching = True for hadTau_genMatch in self.hadTau_genMatches: if hadTau_genMatch.endswith("0j"): self.lepton_and_hadTau_genMatches_nonfakes.append(hadTau_genMatch) else: self.lepton_and_hadTau_genMatches_fakes.append(hadTau_genMatch) else: raise ValueError("Invalid Configuration parameter 'applyFakeRateWeights' = %s !!" % applyFakeRateWeights) self.executable_addBackgrounds = executable_addBackgrounds self.executable_addFakes = executable_addBackgroundJetToTauFakes for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]: continue process_name = sample_info["process_name_specific"] for lepton_and_hadTau_selection in self.lepton_and_hadTau_selections: for lepton_and_hadTau_frWeight in self.lepton_and_hadTau_frWeights: if lepton_and_hadTau_frWeight == "enabled" and not lepton_and_hadTau_selection.startswith("Fakeable"): continue lepton_and_hadTau_selection_and_frWeight = get_lepton_and_hadTau_selection_and_frWeight(lepton_and_hadTau_selection, lepton_and_hadTau_frWeight) for hadTau_charge_selection in self.hadTau_charge_selections: key_dir = getKey(sample_name, lepton_and_hadTau_selection, lepton_and_hadTau_frWeight, hadTau_charge_selection) for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_RLES ]: initDict(self.dirs, [ key_dir, dir_type ]) self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel, "_".join([ lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection ]), process_name) for dir_type in [ DKEY_DCRD, DKEY_PLOT ]: initDict(self.dirs, [ dir_type ]) self.dirs[dir_type] = os.path.join(self.outputDir, dir_type, self.channel) ##print "self.dirs = ", self.dirs self.nonfake_backgrounds = [ "TT", "TTW", "TTZ", "EWK", "Rares" ] self.cfgFile_analyze_original = os.path.join(self.workingDir, cfgFile_analyze_original) self.cfgFile_addBackgrounds_original = os.path.join(self.workingDir, "addBackgrounds_cfg.py") self.cfgFile_addBackgrounds_modified = {} self.histogramFile_addBackgrounds = {} self.histogramDir_addBackgrounds = {} self.process_output_addBackgrounds = {} self.histogramFile_hadd_stage1_5 = os.path.join(self.outputDir, DKEY_HIST, "histograms_harvested_stage1_5_%s.root" % self.channel) self.cfgFile_addFakes_original = os.path.join(self.workingDir, "addBackgroundJetToTauFakes_cfg.py") self.cfgFile_addFakes_modified = {} self.histogramFile_addFakes = {} self.prep_dcard_processesToCopy = [ "data_obs" ] + self.nonfake_backgrounds + [ "fakes_data", "fakes_mc" ] self.histogramDir_prep_dcard = "1l_2tau_OS_Tight" self.histogramDir_prep_dcard_SS = "1l_2tau_SS_Tight" self.make_plots_backgrounds = self.nonfake_backgrounds + [ "fakes_data" ] self.cfgFile_make_plots_mcClosure_original = os.path.join(self.workingDir, "makePlots_mcClosure_cfg.py") self.cfgFiles_make_plots_mcClosure_modified = [] self.select_rle_output = select_rle_output
def create(self): """Creates all necessary config files and runs the complete analysis workfow -- either locally or on the batch system """ for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"]: continue sample_category = sample_info["sample_category"] is_mc = (sample_info["type"] == "mc") process_name = sample_info["process_name_specific"] logging.info("Building dictionaries for sample %s..." % process_name) for lepton_selection in self.lepton_selections: for lepton_frWeight in self.lepton_frWeights: if lepton_frWeight == "enabled" and not lepton_selection.startswith("Fakeable"): continue if lepton_frWeight == "disabled" and not lepton_selection in [ "Tight", "forBDTtraining" ]: continue lepton_selection_and_frWeight = get_lepton_selection_and_frWeight(lepton_selection, lepton_frWeight) for leptonChargeSelection in self.leptonChargeSelections: central_or_shift_extensions = ["", "hadd", "addBackgrounds"] central_or_shift_dedicated = self.central_or_shifts if self.runTHweights(sample_info) else self.central_or_shifts_external central_or_shifts_extended = central_or_shift_extensions + central_or_shift_dedicated for central_or_shift_or_dummy in central_or_shifts_extended: process_name_extended = [ process_name, "hadd" ] for process_name_or_dummy in process_name_extended: if central_or_shift_or_dummy in [ "hadd", "addBackgrounds" ] and process_name_or_dummy in [ "hadd" ]: continue if central_or_shift_or_dummy not in central_or_shift_extensions and not self.accept_systematics( central_or_shift_or_dummy, is_mc, lepton_selection, leptonChargeSelection, sample_info ): continue key_dir = getKey(process_name_or_dummy, lepton_selection_and_frWeight, leptonChargeSelection, central_or_shift_or_dummy) for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_RLES, DKEY_SYNC ]: if dir_type == DKEY_SYNC and not self.do_sync: continue initDict(self.dirs, [ key_dir, dir_type ]) if dir_type in [ DKEY_CFGS, DKEY_LOGS ]: self.dirs[key_dir][dir_type] = os.path.join(self.get_dir_type(dir_type), dir_type, self.channel, "_".join([ lepton_selection_and_frWeight, leptonChargeSelection ]), process_name_or_dummy, central_or_shift_or_dummy) else: self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel, "_".join([ lepton_selection_and_frWeight, leptonChargeSelection ]), process_name_or_dummy) for subdirectory in [ "addBackgrounds", "addBackgroundLeptonFakes", "addBackgroundLeptonFlips", "prepareDatacards", "addSystFakeRates", "makePlots" ]: key_dir = getKey(subdirectory) for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT ]: initDict(self.dirs, [ key_dir, dir_type ]) if dir_type in [ DKEY_CFGS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT ]: self.dirs[key_dir][dir_type] = os.path.join(self.get_dir_type(dir_type), dir_type, self.channel, subdirectory) else: self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel, subdirectory) for dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT, DKEY_SYNC ]: if dir_type == DKEY_SYNC and not self.do_sync: continue initDict(self.dirs, [ dir_type ]) if dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT ]: self.dirs[dir_type] = os.path.join(self.get_dir_type(dir_type), dir_type, self.channel) else: self.dirs[dir_type] = os.path.join(self.outputDir, dir_type, self.channel) numDirectories = 0 for key in self.dirs.keys(): if type(self.dirs[key]) == dict: numDirectories += len(self.dirs[key]) else: numDirectories += 1 logging.info("Creating directory structure (numDirectories = %i)" % numDirectories) numDirectories_created = 0; frac = 1 for key in self.dirs.keys(): if type(self.dirs[key]) == dict: for dir_type in self.dirs[key].keys(): create_if_not_exists(self.dirs[key][dir_type]) numDirectories_created += len(self.dirs[key]) else: create_if_not_exists(self.dirs[key]) numDirectories_created = numDirectories_created + 1 while 100*numDirectories_created >= frac*numDirectories: logging.info(" %i%% completed" % frac) frac = frac + 1 logging.info("Done.") inputFileLists = {} for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"]: continue logging.info("Checking input files for sample %s" % sample_info["process_name_specific"]) inputFileLists[sample_name] = generateInputFileList(sample_info, self.max_files_per_job) for lepton_selection in self.lepton_selections: electron_selection = lepton_selection muon_selection = lepton_selection hadTauVeto_selection = "Tight" hadTauVeto_selection = "|".join([ hadTauVeto_selection, self.hadTauVeto_selection_part2 ]) if lepton_selection == "forBDTtraining": electron_selection = "Loose" muon_selection = "Loose" elif lepton_selection == "Fakeable_mcClosure_e": electron_selection = "Fakeable" muon_selection = "Tight" elif lepton_selection == "Fakeable_mcClosure_m": electron_selection = "Tight" muon_selection = "Fakeable" for lepton_frWeight in self.lepton_frWeights: if lepton_frWeight == "enabled" and not lepton_selection.startswith("Fakeable"): continue if lepton_frWeight == "disabled" and not lepton_selection in [ "Tight", "forBDTtraining" ]: continue lepton_selection_and_frWeight = get_lepton_selection_and_frWeight(lepton_selection, lepton_frWeight) for leptonChargeSelection in self.leptonChargeSelections: if 'mcClosure' in lepton_selection and leptonChargeSelection != 'SS': continue for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"]: continue process_name = sample_info["process_name_specific"] logging.info("Creating configuration files to run '%s' for sample %s" % (self.executable_analyze, process_name)) inputFileList = inputFileLists[sample_name] sample_category = sample_info["sample_category"] is_mc = (sample_info["type"] == "mc") use_th_weights = self.runTHweights(sample_info) central_or_shift_dedicated = self.central_or_shifts if use_th_weights else self.central_or_shifts_external for central_or_shift in central_or_shift_dedicated: if not self.accept_systematics( central_or_shift, is_mc, lepton_selection, leptonChargeSelection, sample_info ): continue central_or_shifts_local = [] if central_or_shift == "central" and not use_th_weights: for central_or_shift_local in self.central_or_shifts_internal: if self.accept_systematics( central_or_shift_local, is_mc, lepton_selection, leptonChargeSelection, sample_info ): central_or_shifts_local.append(central_or_shift_local) logging.info(" ... for '%s' and systematic uncertainty option '%s'" % (lepton_selection_and_frWeight, central_or_shift)) # build config files for executing analysis code key_analyze_dir = getKey(process_name, lepton_selection_and_frWeight, leptonChargeSelection, central_or_shift) for jobId in inputFileList.keys(): analyze_job_tuple = (process_name, lepton_selection_and_frWeight, leptonChargeSelection, central_or_shift, jobId) key_analyze_job = getKey(*analyze_job_tuple) ntupleFiles = inputFileList[jobId] if len(ntupleFiles) == 0: logging.warning("No input ntuples for %s --> skipping job !!" % (key_analyze_job)) continue cfgFile_modified_path = os.path.join(self.dirs[key_analyze_dir][DKEY_CFGS], "analyze_%s_%s_%s_%s_%i_cfg.py" % analyze_job_tuple) logFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_LOGS], "analyze_%s_%s_%s_%s_%i.log" % analyze_job_tuple) rleOutputFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_RLES], "rle_%s_%s_%s_%s_%i.txt" % analyze_job_tuple) \ if self.select_rle_output else "" histogramFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_HIST], "analyze_%s_%s_%s_%s_%i.root" % analyze_job_tuple) applyFakeRateWeights = self.applyFakeRateWeights \ if lepton_selection.find("Tight") == -1 \ else "disabled" self.jobOptions_analyze[key_analyze_job] = { 'ntupleFiles' : ntupleFiles, 'cfgFile_modified' : cfgFile_modified_path, 'histogramFile' : histogramFile_path, 'logFile' : logFile_path, 'selEventsFileName_output' : rleOutputFile_path, 'electronSelection' : electron_selection, 'muonSelection' : muon_selection, 'apply_leptonGenMatching' : self.apply_leptonGenMatching, 'hadTauSelection' : hadTauVeto_selection, 'leptonChargeSelection' : leptonChargeSelection, 'applyFakeRateWeights' : applyFakeRateWeights, 'central_or_shift' : central_or_shift, 'central_or_shifts_local' : central_or_shifts_local, 'selectBDT' : self.isBDTtraining, 'apply_hlt_filter' : self.hlt_filter, 'useNonNominal' : self.use_nonnominal, 'fillGenEvtHistograms' : True, 'gen_mHH' : self.gen_mHH, } self.createCfg_analyze(self.jobOptions_analyze[key_analyze_job], sample_info, lepton_selection) # initialize input and output file names for hadd_stage1 key_hadd_stage1_dir = getKey(process_name, lepton_selection_and_frWeight, leptonChargeSelection) hadd_stage1_job_tuple = (process_name, lepton_selection_and_frWeight, leptonChargeSelection) key_hadd_stage1_job = getKey(*hadd_stage1_job_tuple) if not key_hadd_stage1_job in self.inputFiles_hadd_stage1: self.inputFiles_hadd_stage1[key_hadd_stage1_job] = [] self.inputFiles_hadd_stage1[key_hadd_stage1_job].append(self.jobOptions_analyze[key_analyze_job]['histogramFile']) self.outputFile_hadd_stage1[key_hadd_stage1_job] = os.path.join(self.dirs[key_hadd_stage1_dir][DKEY_HIST], "hadd_stage1_%s_%s_%s.root" % hadd_stage1_job_tuple) if self.isBDTtraining or self.do_sync: continue # add output files of hadd_stage1 to list of input files for hadd_stage1_5 key_hadd_stage1_job = getKey(process_name, lepton_selection_and_frWeight, leptonChargeSelection) key_hadd_stage1_5_dir = getKey("hadd", lepton_selection_and_frWeight, leptonChargeSelection) hadd_stage1_5_job_tuple = (lepton_selection_and_frWeight, leptonChargeSelection) key_hadd_stage1_5_job = getKey(*hadd_stage1_5_job_tuple) if not key_hadd_stage1_5_job in self.inputFiles_hadd_stage1_5: self.inputFiles_hadd_stage1_5[key_hadd_stage1_5_job] = [] self.inputFiles_hadd_stage1_5[key_hadd_stage1_5_job].append(self.outputFile_hadd_stage1[key_hadd_stage1_job]) self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job] = os.path.join(self.dirs[key_hadd_stage1_5_dir][DKEY_HIST], "hadd_stage1_5_%s_%s.root" % hadd_stage1_5_job_tuple) if self.isBDTtraining or self.do_sync: continue # sum fake background contributions for the total of all MC sample # input processes: TT_fake, TTW_fake, TTWW_fake, ... # output process: fakes_mc key_hadd_stage1_5_job = getKey(lepton_selection_and_frWeight, leptonChargeSelection) key_addBackgrounds_dir = getKey("addBackgrounds") addBackgrounds_job_fakes_tuple = ("fakes_mc", lepton_selection_and_frWeight, leptonChargeSelection) key_addBackgrounds_job_fakes = getKey(*addBackgrounds_job_fakes_tuple) sample_categories = self.get_sample_categories() processes_input = [] for sample_category in sample_categories: processes_input.append("%s_fake" % sample_category) self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes] = { 'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job], 'cfgFile_modified' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], "addBackgrounds_%s_%s_%s_cfg.py" % addBackgrounds_job_fakes_tuple), 'outputFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackgrounds_%s_%s_%s.root" % addBackgrounds_job_fakes_tuple), 'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], "addBackgrounds_%s_%s_%s.log" % addBackgrounds_job_fakes_tuple), 'categories' : [ getHistogramDir(category, lepton_selection, lepton_frWeight, leptonChargeSelection) for category in self.categories ], 'processes_input' : processes_input, 'process_output' : "fakes_mc" } self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes]) # sum fake background contributions for the total of all MC sample # input processes: TT_flip, TTW_flip, TTWW_flip, ... # output process: flips_mc addBackgrounds_job_flips_tuple = ("flips_mc", lepton_selection_and_frWeight, leptonChargeSelection) key_addBackgrounds_job_flips = getKey(*addBackgrounds_job_flips_tuple) processes_input = [] for sample_category in sample_categories: processes_input.append("%s_flip" % sample_category) self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_flips] = { 'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job], 'cfgFile_modified' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], "addBackgrounds_%s_%s_%s_cfg.py" % addBackgrounds_job_flips_tuple), 'outputFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackgrounds_%s_%s_%s.root" % addBackgrounds_job_flips_tuple), 'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], "addBackgrounds_%s_%s_%s.log" % addBackgrounds_job_flips_tuple), 'categories' : [ getHistogramDir(category, lepton_selection, lepton_frWeight, leptonChargeSelection) for category in self.categories ], 'processes_input' : processes_input, 'process_output' : "flips_mc" } self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_flips]) # sum conversion background contributions for the total of all MC sample # input processes: TT_Convs, TTW_Convs, TTWW_Convs, ... # output process: Convs addBackgrounds_job_Convs_tuple = ("Convs", lepton_selection_and_frWeight, leptonChargeSelection) key_addBackgrounds_job_Convs = getKey(*addBackgrounds_job_Convs_tuple) processes_input = [] for sample_category in self.convs_backgrounds: processes_input.append("%s_Convs" % sample_category) self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_Convs] = { 'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job], 'cfgFile_modified' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], "addBackgrounds_%s_%s_%s_cfg.py" % addBackgrounds_job_Convs_tuple), 'outputFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackgrounds_%s_%s_%s.root" % addBackgrounds_job_Convs_tuple), 'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], "addBackgrounds_%s_%s_%s.log" % addBackgrounds_job_Convs_tuple), 'categories' : [ getHistogramDir(category, lepton_selection, lepton_frWeight, leptonChargeSelection) for category in self.categories ], 'processes_input' : processes_input, 'process_output' : "Convs" } self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_Convs]) # sum signal contributions from HH->4tau ("tttt"), HH->2W2tau ("wwtt"), and HH->4W ("wwww"), # separately for "nonfake" and "fake" contributions genMatch_categories = [ "nonfake", "fake" ] for genMatch_category in genMatch_categories: for signal_base, signal_input in self.signal_io.items(): addBackgrounds_job_signal_tuple = (lepton_selection_and_frWeight, leptonChargeSelection, signal_base, genMatch_category) key_addBackgrounds_job_signal = getKey(*addBackgrounds_job_signal_tuple) if key_addBackgrounds_job_signal in self.jobOptions_addBackgrounds_sum.keys(): continue processes_input = signal_input process_output = signal_base if genMatch_category == "fake": processes_input = [ process_input + "_fake" for process_input in processes_input ] process_output += "_fake" self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_signal] = { 'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job], 'cfgFile_modified' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], "addBackgrounds_%s_%s_%s_%s_cfg.py" % addBackgrounds_job_signal_tuple), 'outputFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackgrounds_%s_%s_%s_%s.root" % addBackgrounds_job_signal_tuple), 'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], "addBackgrounds_%s_%s_%s_%s.log" % addBackgrounds_job_signal_tuple), 'categories' : [ getHistogramDir(category, lepton_selection, lepton_frWeight, leptonChargeSelection) for category in self.categories ], 'processes_input' : processes_input, 'process_output' : process_output } self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_signal]) key_hadd_stage2_job = getKey(lepton_selection_and_frWeight, leptonChargeSelection) if not key_hadd_stage2_job in self.inputFiles_hadd_stage2: self.inputFiles_hadd_stage2[key_hadd_stage2_job] = [] if lepton_selection == "Tight": self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_signal]['outputFile']) # initialize input and output file names for hadd_stage2 key_hadd_stage1_5_job = getKey(lepton_selection_and_frWeight, leptonChargeSelection) key_hadd_stage2_dir = getKey("hadd", lepton_selection_and_frWeight, leptonChargeSelection) hadd_stage2_job_tuple = (lepton_selection_and_frWeight, leptonChargeSelection) key_hadd_stage2_job = getKey(*hadd_stage2_job_tuple) if not key_hadd_stage2_job in self.inputFiles_hadd_stage2: self.inputFiles_hadd_stage2[key_hadd_stage2_job] = [] if lepton_selection == "Tight": self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes]['outputFile']) self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_flips]['outputFile']) self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_Convs]['outputFile']) self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job]) self.outputFile_hadd_stage2[key_hadd_stage2_job] = os.path.join(self.dirs[key_hadd_stage2_dir][DKEY_HIST], "hadd_stage2_%s_%s.root" % hadd_stage2_job_tuple) if self.isBDTtraining or self.do_sync: if self.is_sbatch: logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_analyze) self.sbatchFile_analyze = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel) if self.isBDTtraining: self.createScript_sbatch_analyze(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze) elif self.do_sync: self.createScript_sbatch_syncNtuple(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze) logging.info("Creating Makefile") lines_makefile = [] if self.isBDTtraining: self.addToMakefile_analyze(lines_makefile) self.addToMakefile_hadd_stage1(lines_makefile) elif self.do_sync: self.addToMakefile_syncNtuple(lines_makefile) outputFile_sync_path = os.path.join(self.outputDir, DKEY_SYNC, '%s.root' % self.channel) self.outputFile_sync['sync'] = outputFile_sync_path self.targets.append(outputFile_sync_path) self.addToMakefile_hadd_sync(lines_makefile) else: raise ValueError("Internal logic error") self.targets.extend(self.phoniesToAdd) self.addToMakefile_validate(lines_makefile) self.createMakefile(lines_makefile) logging.info("Done.") return self.num_jobs logging.info("Creating configuration files to run 'addBackgroundFakes'") for category in self.categories: for leptonChargeSelection in self.leptonChargeSelections: key_hadd_stage1_5_job = getKey(get_lepton_selection_and_frWeight("Fakeable", "enabled"), leptonChargeSelection) key_addFakes_dir = getKey("addBackgroundLeptonFakes") addFakes_job_tuple = (category, leptonChargeSelection) key_addFakes_job = getKey("data_fakes", *addFakes_job_tuple) self.jobOptions_addFakes[key_addFakes_job] = { 'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job], 'cfgFile_modified' : os.path.join(self.dirs[key_addFakes_dir][DKEY_CFGS], "addBackgroundLeptonFakes_%s_%s_cfg.py" % addFakes_job_tuple), 'outputFile' : os.path.join(self.dirs[key_addFakes_dir][DKEY_HIST], "addBackgroundLeptonFakes_%s_%s.root" % addFakes_job_tuple), 'logFile' : os.path.join(self.dirs[key_addFakes_dir][DKEY_LOGS], "addBackgroundLeptonFakes_%s_%s.log" % addFakes_job_tuple), 'category_signal' : getHistogramDir(category, "Tight", "disabled", leptonChargeSelection), 'category_sideband' : getHistogramDir(category, "Fakeable", "enabled", leptonChargeSelection) } self.createCfg_addFakes(self.jobOptions_addFakes[key_addFakes_job]) key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), leptonChargeSelection) self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addFakes[key_addFakes_job]['outputFile']) #-------------------------------------------------------------------------- # add histograms in OS and SS regions, # so that "data_fakes" background can be subtracted from OS control region used to estimate charge flip background key_hadd_stage1_5_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "OS") key_hadd_stage1_6_dir = getKey("hadd", get_lepton_selection_and_frWeight("Tight", "disabled"), "OS") key_hadd_stage1_6_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "OS") if key_hadd_stage1_6_job not in self.inputFiles_hadd_stage1_6: self.inputFiles_hadd_stage1_6[key_hadd_stage1_6_job] = [] for category in self.categories: key_addFakes_job = getKey("data_fakes", category, leptonChargeSelection) self.inputFiles_hadd_stage1_6[key_hadd_stage1_6_job].append(self.jobOptions_addFakes[key_addFakes_job]['outputFile']) self.inputFiles_hadd_stage1_6[key_hadd_stage1_6_job].append(self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job]) self.outputFile_hadd_stage1_6[key_hadd_stage1_6_job] = os.path.join(self.dirs[key_hadd_stage1_6_dir][DKEY_HIST], "hadd_stage1_6_Tight_OS.root") #-------------------------------------------------------------------------- logging.info("Creating configuration files to run 'addBackgroundFlips'") for category in self.categories: key_hadd_stage1_6_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "OS") key_addFlips_dir = getKey("addBackgroundLeptonFlips") key_addFlips_job = getKey("data_flips", category) self.jobOptions_addFlips[key_addFlips_job] = { 'inputFile' : self.outputFile_hadd_stage1_6[key_hadd_stage1_6_job], 'cfgFile_modified' : os.path.join(self.dirs[key_addFlips_dir][DKEY_CFGS], "addBackgroundLeptonFlips_%s_cfg.py" % category), 'outputFile' : os.path.join(self.dirs[key_addFlips_dir][DKEY_HIST], "addBackgroundLeptonFlips_%s.root" % category), 'logFile' : os.path.join(self.dirs[key_addFlips_dir][DKEY_LOGS], "addBackgroundLeptonFlips_%s.log" % category), 'category_signal' : getHistogramDir(category, "Tight", "disabled", "SS" ), 'category_sideband' : getHistogramDir(category, "Tight", "disabled", "OS" ) } self.createCfg_addFlips(self.jobOptions_addFlips[key_addFlips_job]) key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "SS") self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addFlips[key_addFlips_job]['outputFile']) logging.info("Creating configuration files to run 'prepareDatacards'...") for category in self.categories: for histogramToFit in self.histograms_to_fit: logging.info(" ... for category %s, histogram %s" % (category, histogramToFit)) prep_dcard_HH = set() for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"]: continue sample_category = sample_info["sample_category"] masses_to_exclude = ["3000", "2500", "2000", "1750", "1500", "1250"] if sample_category.startswith("signal"): sample_category = sample_info["sample_category_hh"] doAdd = False if "BDTOutput" in histogramToFit or "MVAOutput" in histogramToFit: if ("SM" in histogramToFit or any(nonresPoint in histogramToFit for nonresPoint in NONRESONANT_KEYS)) and 'nonresonant' in sample_category: doAdd = True if "spin0" in histogramToFit and "spin0" in sample_category and histogramToFit[9:13] in sample_category: doAdd = True if "spin2" in histogramToFit and "spin2" in sample_category and histogramToFit[9:13] in sample_category: doAdd = True for mass in masses_to_exclude: if mass in sample_category: doAdd = False else: doAdd = True if doAdd: if "wwww" in sample_category: prep_dcard_HH.add(sample_category.replace("wwww", "zzzz")) prep_dcard_HH.add(sample_category.replace("wwww", "wwww")) prep_dcard_HH.add(sample_category.replace("wwww", "zzww")) elif "wwtt" in sample_category: prep_dcard_HH.add(sample_category.replace("wwtt", "ttzz")) prep_dcard_HH.add(sample_category.replace("wwtt", "ttww")) elif "tttt" in sample_category: prep_dcard_HH.add(sample_category) else: raise ValueError("Failed to identify relevant HH decay mode(s) for 'sample_category' = %s !!" % sample_category) prep_dcard_HH = list(prep_dcard_HH) prep_dcard_H = [] prep_dcard_other_nonfake_backgrounds = [] for process in self.nonfake_backgrounds: if process in [ "VH", "WH", "ZH", "TH", "tHq", "tHW", "TTH", "TTWH", "TTZH", "ggH", "qqH" ]: prep_dcard_H.append("%s_hww" % process) prep_dcard_H.append("%s_hzz" % process) prep_dcard_H.append("%s_htt" % process) prep_dcard_H.append("%s_hbb" % process) else: prep_dcard_other_nonfake_backgrounds.append(process) self.prep_dcard_processesToCopy = [ "data_obs" ] + prep_dcard_HH + prep_dcard_H + prep_dcard_other_nonfake_backgrounds + [ "Convs", "data_fakes", "data_flips", "fakes_mc", "flips_mc" ] key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "SS") key_prep_dcard_dir = getKey("prepareDatacards") prep_dcard_job_tuple = (self.channel, category, "SS", histogramToFit) key_prep_dcard_job = getKey(category, "SS", histogramToFit) self.jobOptions_prep_dcard[key_prep_dcard_job] = { 'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job], 'cfgFile_modified' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_CFGS], "prepareDatacards_%s_%s_%s_%s_cfg.py" % prep_dcard_job_tuple), 'datacardFile' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_DCRD], "prepareDatacards_%s_%s_%s_%s.root" % prep_dcard_job_tuple), 'histogramDir' : getHistogramDir(category, "Tight", "disabled", "SS"), 'histogramToFit' : histogramToFit, 'label' : "2lSS" } self.createCfg_prep_dcard(self.jobOptions_prep_dcard[key_prep_dcard_job]) if "OS" in self.leptonChargeSelections: key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "OS") prep_dcard_job_tuple = (self.channel, category, "OS", histogramToFit) key_prep_dcard_job = getKey(category, "OS", histogramToFit) self.jobOptions_prep_dcard[key_prep_dcard_job] = { 'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job], 'cfgFile_modified' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_CFGS], "prepareDatacards_%s_%s_%s_%s_cfg.py" % prep_dcard_job_tuple), 'datacardFile' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_DCRD], "prepareDatacards_%s_%s_%s_%s.root" % prep_dcard_job_tuple), 'histogramDir' : getHistogramDir(category, "Tight", "disabled", "OS"), 'histogramToFit' : histogramToFit, 'label' : "2lOS", } self.createCfg_prep_dcard(self.jobOptions_prep_dcard[key_prep_dcard_job]) # add shape templates for the following systematic uncertainties: # - 'CMS_ttHl_Clos_norm_e' # - 'CMS_ttHl_Clos_shape_e' # - 'CMS_ttHl_Clos_norm_m' # - 'CMS_ttHl_Clos_shape_m' key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "SS") key_add_syst_fakerate_dir = getKey("addSystFakeRates") add_syst_fakerate_job_tuple = (self.channel, category, "SS", histogramToFit) key_add_syst_fakerate_job = getKey(category, "SS", histogramToFit) key_prep_dcard_job = getKey(category, "SS", histogramToFit) self.jobOptions_add_syst_fakerate[key_add_syst_fakerate_job] = { 'inputFile' : self.jobOptions_prep_dcard[key_prep_dcard_job]['datacardFile'], 'cfgFile_modified' : os.path.join(self.dirs[key_add_syst_fakerate_dir][DKEY_CFGS], "addSystFakeRates_%s_%s_%s_%s_cfg.py" % add_syst_fakerate_job_tuple), 'outputFile' : os.path.join(self.dirs[key_add_syst_fakerate_dir][DKEY_DCRD], "addSystFakeRates_%s_%s_%s_%s.root" % add_syst_fakerate_job_tuple), 'category' : category, 'histogramToFit' : histogramToFit, 'plots_outputFileName' : os.path.join(self.dirs[key_add_syst_fakerate_dir][DKEY_PLOT], "addSystFakeRates.png") } histogramDir_nominal = "%s/sel/evt/fakes_mc" % getHistogramDir(category, "Tight", "disabled", "SS") for lepton_type in [ 'e', 'm' ]: lepton_mcClosure = "Fakeable_mcClosure_%s" % lepton_type if lepton_mcClosure not in self.lepton_selections: continue lepton_selection_and_frWeight = get_lepton_selection_and_frWeight(lepton_mcClosure, "enabled") key_addBackgrounds_job_fakes = getKey("fakes_mc", lepton_selection_and_frWeight, "SS") histogramDir_mcClosure = "%s/sel/evt/fakes_mc" % self.mcClosure_dir['%s_%s' % (lepton_mcClosure, "SS")] if "BDTOutput" in histogramToFit or "MVAOutput" in histogramToFit: histogramDir_nominal = histogramDir_nominal.replace("/sel/evt", "/sel/datacard") histogramDir_mcClosure = histogramDir_mcClosure.replace("/sel/evt", "/sel/datacard") self.jobOptions_add_syst_fakerate[key_add_syst_fakerate_job].update({ 'add_Clos_%s' % lepton_type : ("Fakeable_mcClosure_%s" % lepton_type) in self.lepton_selections, 'inputFile_nominal_%s' % lepton_type : self.outputFile_hadd_stage2[key_hadd_stage2_job], 'histogramName_nominal_%s' % lepton_type : "%s/%s" % (histogramDir_nominal, histogramToFit), 'inputFile_mcClosure_%s' % lepton_type : self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes]['outputFile'], 'histogramName_mcClosure_%s' % lepton_type : "%s/%s" % (histogramDir_mcClosure, histogramToFit) }) self.createCfg_add_syst_fakerate(self.jobOptions_add_syst_fakerate[key_add_syst_fakerate_job]) logging.info("Creating configuration files to run 'makePlots'") key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "SS") key_makePlots_dir = getKey("makePlots") key_makePlots_job = getKey("SS") self.jobOptions_make_plots[key_makePlots_job] = { 'executable' : self.executable_make_plots, 'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job], 'cfgFile_modified' : os.path.join(self.dirs[key_makePlots_dir][DKEY_CFGS], "makePlots_%s_cfg.py" % self.channel), 'outputFile' : os.path.join(self.dirs[key_makePlots_dir][DKEY_PLOT], "makePlots_%s.png" % self.channel), 'histogramDir' : self.histogramDir_prep_dcard, 'label' : "2lSS", 'make_plots_backgrounds' : self.make_plots_backgrounds, } self.createCfg_makePlots(self.jobOptions_make_plots[key_makePlots_job]) if "OS" in self.leptonChargeSelections: key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "OS") key_makePlots_job = getKey("OS") self.jobOptions_make_plots[key_makePlots_job] = { 'executable' : self.executable_make_plots, 'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job], 'cfgFile_modified' : os.path.join(self.dirs[key_makePlots_dir][DKEY_CFGS], "makePlots_%s_OS_cfg.py" % self.channel), 'outputFile' : os.path.join(self.dirs[key_makePlots_dir][DKEY_PLOT], "makePlots_%s_OS.png" % self.channel), 'histogramDir' : self.histogramDir_prep_dcard_OS, 'label' : "2lOS", 'make_plots_backgrounds' : self.make_plots_backgrounds_OS, } self.createCfg_makePlots(self.jobOptions_make_plots[key_makePlots_job]) if "Fakeable_mcClosure" in self.lepton_selections: #TODO key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "SS") key_makePlots_job = getKey("Fakeable_mcClosure", "SS") self.jobOptions_make_plots[key_makePlots_job] = { 'executable' : self.executable_make_plots_mcClosure, 'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job], 'cfgFile_modified' : os.path.join(self.dirs[key_makePlots_dir][DKEY_CFGS], "makePlots_mcClosure_%s_cfg.py" % self.channel), 'outputFile' : os.path.join(self.dirs[key_makePlots_dir][DKEY_PLOT], "makePlots_mcClosure_%s.png" % self.channel), } self.createCfg_makePlots_mcClosure(self.jobOptions_make_plots[key_makePlots_job]) if self.is_sbatch: logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_analyze) self.sbatchFile_analyze = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel) self.createScript_sbatch_analyze(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze) logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_addBackgrounds) self.sbatchFile_addBackgrounds = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addBackgrounds_%s.py" % self.channel) self.createScript_sbatch(self.executable_addBackgrounds, self.sbatchFile_addBackgrounds, self.jobOptions_addBackgrounds) self.sbatchFile_addBackgrounds_sum = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addBackgrounds_sum_%s.py" % self.channel) self.createScript_sbatch(self.executable_addBackgrounds, self.sbatchFile_addBackgrounds_sum, self.jobOptions_addBackgrounds_sum) logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_addFakes) self.sbatchFile_addFakes = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addFakes_%s.py" % self.channel) self.createScript_sbatch(self.executable_addFakes, self.sbatchFile_addFakes, self.jobOptions_addFakes) logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_addFlips) self.sbatchFile_addFlips = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addFlips_%s.py" % self.channel) self.createScript_sbatch(self.executable_addFlips, self.sbatchFile_addFlips, self.jobOptions_addFlips) logging.info("Creating Makefile") lines_makefile = [] self.addToMakefile_analyze(lines_makefile) self.addToMakefile_hadd_stage1(lines_makefile) self.addToMakefile_backgrounds_from_data_withFlips(lines_makefile) self.addToMakefile_hadd_stage2(lines_makefile) self.addToMakefile_prep_dcard(lines_makefile) self.addToMakefile_add_syst_fakerate(lines_makefile) self.addToMakefile_make_plots(lines_makefile) self.addToMakefile_validate(lines_makefile) self.createMakefile(lines_makefile) logging.info("Done.") return self.num_jobs
def create(self): """Creates all necessary config files and runs the complete analysis workfow -- either locally or on the batch system """ for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"]: continue sample_category = sample_info["sample_category"] is_mc = (sample_info["type"] == "mc") process_name = sample_info["process_name_specific"] logging.info("Building dictionaries for sample %s..." % process_name) for lepton_and_hadTau_selection in self.lepton_and_hadTau_selections: for lepton_and_hadTau_frWeight in self.lepton_and_hadTau_frWeights: if lepton_and_hadTau_frWeight == "enabled" and not lepton_and_hadTau_selection.startswith("Fakeable"): continue if lepton_and_hadTau_frWeight == "disabled" and not lepton_and_hadTau_selection in [ "Tight", "forBDTtraining" ]: continue lepton_and_hadTau_selection_and_frWeight = get_lepton_and_hadTau_selection_and_frWeight(lepton_and_hadTau_selection, lepton_and_hadTau_frWeight) for chargeSumSelection in self.chargeSumSelections: central_or_shift_extensions = ["", "hadd", "addBackgrounds"] central_or_shift_dedicated = self.central_or_shifts if self.runTHweights(sample_info) else self.central_or_shifts_external central_or_shifts_extended = central_or_shift_extensions + central_or_shift_dedicated for central_or_shift_or_dummy in central_or_shifts_extended: process_name_extended = [ process_name, "hadd" ] for process_name_or_dummy in process_name_extended: if central_or_shift_or_dummy in [ "hadd", "addBackgrounds" ] and process_name_or_dummy in [ "hadd" ]: continue if central_or_shift_or_dummy not in central_or_shift_extensions and not self.accept_systematics( central_or_shift_or_dummy, is_mc, lepton_and_hadTau_selection, chargeSumSelection, sample_info ): continue key_dir = getKey(process_name_or_dummy, lepton_and_hadTau_selection_and_frWeight, chargeSumSelection, central_or_shift_or_dummy) for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_RLES ]: initDict(self.dirs, [ key_dir, dir_type ]) if dir_type in [ DKEY_CFGS, DKEY_LOGS ]: self.dirs[key_dir][dir_type] = os.path.join(self.get_dir_type(dir_type), dir_type, self.channel, "_".join([ lepton_and_hadTau_selection_and_frWeight, chargeSumSelection ]), process_name_or_dummy, central_or_shift_or_dummy) else: self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel, "_".join([ lepton_and_hadTau_selection_and_frWeight, chargeSumSelection ]), process_name_or_dummy) for subdirectory in [ "addBackgrounds", "addBackgroundLeptonFakes", "prepareDatacards", "addSystFakeRates", "makePlots" ]: key_dir = getKey(subdirectory) for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT ]: initDict(self.dirs, [ key_dir, dir_type ]) if dir_type in [ DKEY_CFGS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT ]: self.dirs[key_dir][dir_type] = os.path.join(self.get_dir_type(dir_type), dir_type, self.channel, subdirectory) else: self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel, subdirectory) for dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT ]: initDict(self.dirs, [ dir_type ]) if dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT ]: self.dirs[dir_type] = os.path.join(self.get_dir_type(dir_type), dir_type, self.channel) else: self.dirs[dir_type] = os.path.join(self.outputDir, dir_type, self.channel) numDirectories = 0 for key in self.dirs.keys(): if type(self.dirs[key]) == dict: numDirectories += len(self.dirs[key]) else: numDirectories += 1 logging.info("Creating directory structure (numDirectories = %i)" % numDirectories) numDirectories_created = 0; frac = 1 for key in self.dirs.keys(): if type(self.dirs[key]) == dict: for dir_type in self.dirs[key].keys(): create_if_not_exists(self.dirs[key][dir_type]) numDirectories_created += len(self.dirs[key]) else: create_if_not_exists(self.dirs[key]) numDirectories_created = numDirectories_created + 1 while 100*numDirectories_created >= frac*numDirectories: logging.info(" %i%% completed" % frac) frac = frac + 1 logging.info("Done.") inputFileLists = {} for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"]: continue logging.info("Checking input files for sample %s" % sample_info["process_name_specific"]) inputFileLists[sample_name] = generateInputFileList(sample_info, self.max_files_per_job) mcClosure_regex = re.compile('Fakeable_mcClosure_(?P<type>m|e|t)_wFakeRateWeights') for lepton_and_hadTau_selection in self.lepton_and_hadTau_selections: lepton_selection = lepton_and_hadTau_selection hadTau_selection = lepton_and_hadTau_selection electron_selection = lepton_selection muon_selection = lepton_selection if self.applyFakeRateWeights == "3tau": lepton_selection = "Tight" hadTau_selection = "|".join([ hadTau_selection, self.hadTau_selection_part2 ]) if lepton_and_hadTau_selection == "forBDTtraining": lepton_selection = "Loose" electron_selection = lepton_selection muon_selection = lepton_selection hadTau_selection = "Tight|%s" % self.hadTau_selection_relaxed elif lepton_and_hadTau_selection == "Fakeable_mcClosure_e": electron_selection = "Fakeable" muon_selection = "Tight" hadTau_selection = "Tight" hadTau_selection = "|".join([hadTau_selection, self.hadTau_selection_part2]) elif lepton_and_hadTau_selection == "Fakeable_mcClosure_m": electron_selection = "Tight" muon_selection = "Fakeable" hadTau_selection = "Tight" hadTau_selection = "|".join([hadTau_selection, self.hadTau_selection_part2]) elif lepton_and_hadTau_selection == "Fakeable_mcClosure_t": electron_selection = "Tight" muon_selection = "Tight" hadTau_selection = "Fakeable" hadTau_selection = "|".join([hadTau_selection, self.hadTau_selection_part2]) for lepton_and_hadTau_frWeight in self.lepton_and_hadTau_frWeights: if lepton_and_hadTau_frWeight == "enabled" and not lepton_and_hadTau_selection.startswith("Fakeable"): continue if lepton_and_hadTau_frWeight == "disabled" and not lepton_and_hadTau_selection in [ "Tight", "forBDTtraining" ]: continue lepton_and_hadTau_selection_and_frWeight = get_lepton_and_hadTau_selection_and_frWeight(lepton_and_hadTau_selection, lepton_and_hadTau_frWeight) for chargeSumSelection in self.chargeSumSelections: for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"]: continue process_name = sample_info["process_name_specific"] logging.info("Creating configuration files to run '%s' for sample %s" % (self.executable_analyze, process_name)) inputFileList = inputFileLists[sample_name] sample_category = sample_info["sample_category"] is_mc = (sample_info["type"] == "mc") use_th_weights = self.runTHweights(sample_info) central_or_shift_dedicated = self.central_or_shifts if use_th_weights else self.central_or_shifts_external for central_or_shift in central_or_shift_dedicated: if not self.accept_systematics( central_or_shift, is_mc, lepton_and_hadTau_selection, chargeSumSelection, sample_info ): continue central_or_shifts_local = [] if central_or_shift == "central" and not use_th_weights: for central_or_shift_local in self.central_or_shifts_internal: if self.accept_systematics( central_or_shift_local, is_mc, lepton_and_hadTau_selection, chargeSumSelection, sample_info ): central_or_shifts_local.append(central_or_shift_local) logging.info(" ... for '%s' and systematic uncertainty option '%s'" % (lepton_and_hadTau_selection_and_frWeight, central_or_shift)) # build config files for executing analysis code key_analyze_dir = getKey(process_name, lepton_and_hadTau_selection_and_frWeight, chargeSumSelection, central_or_shift) for jobId in inputFileList.keys(): analyze_job_tuple = (process_name, lepton_and_hadTau_selection_and_frWeight, chargeSumSelection, central_or_shift, jobId) key_analyze_job = getKey(*analyze_job_tuple) ntupleFiles = inputFileList[jobId] if len(ntupleFiles) == 0: logging.warning("No input ntuples for %s --> skipping job !!" % (key_analyze_job)) continue cfgFile_modified_path = os.path.join(self.dirs[key_analyze_dir][DKEY_CFGS], "analyze_%s_%s_%s_%s_%i_cfg.py" % analyze_job_tuple) logFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_LOGS], "analyze_%s_%s_%s_%s_%i.log" % analyze_job_tuple) rleOutputFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_RLES], "rle_%s_%s_%s_%s_%i.txt" % analyze_job_tuple) \ if self.select_rle_output else "" histogramFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_HIST], "analyze_%s_%s_%s_%s_%i.root" % analyze_job_tuple) applyFakeRateWeights = self.applyFakeRateWeights \ if not (lepton_selection == "Tight" and hadTau_selection.find("Tight") != -1) \ else "disabled" self.jobOptions_analyze[key_analyze_job] = { 'ntupleFiles' : ntupleFiles, 'cfgFile_modified' : cfgFile_modified_path, 'histogramFile' : histogramFile_path, 'logFile' : logFile_path, 'selEventsFileName_output' : rleOutputFile_path, 'electronSelection' : electron_selection, 'muonSelection' : muon_selection, 'apply_leptonGenMatching' : self.apply_leptonGenMatching, 'hadTauSelection' : hadTau_selection, 'apply_hadTauGenMatching' : self.apply_hadTauGenMatching, 'chargeSumSelection' : chargeSumSelection, 'applyFakeRateWeights' : applyFakeRateWeights, 'central_or_shift' : central_or_shift, 'central_or_shifts_local' : central_or_shifts_local, 'apply_hlt_filter' : self.hlt_filter, 'useNonNominal' : self.use_nonnominal, 'fillGenEvtHistograms' : True, 'selectBDT' : self.isBDTtraining, 'gen_mHH' : self.gen_mHH, } self.createCfg_analyze(self.jobOptions_analyze[key_analyze_job], sample_info, lepton_and_hadTau_selection) # initialize input and output file names for hadd_stage1 key_hadd_stage1_dir = getKey(process_name, lepton_and_hadTau_selection_and_frWeight, chargeSumSelection) hadd_stage1_job_tuple = (process_name, lepton_and_hadTau_selection_and_frWeight, chargeSumSelection) key_hadd_stage1_job = getKey(*hadd_stage1_job_tuple) if not key_hadd_stage1_job in self.inputFiles_hadd_stage1: self.inputFiles_hadd_stage1[key_hadd_stage1_job] = [] self.inputFiles_hadd_stage1[key_hadd_stage1_job].append(self.jobOptions_analyze[key_analyze_job]['histogramFile']) self.outputFile_hadd_stage1[key_hadd_stage1_job] = os.path.join(self.dirs[key_hadd_stage1_dir][DKEY_HIST], "hadd_stage1_%s_%s_%s.root" % hadd_stage1_job_tuple) if self.isBDTtraining: continue # add output files of hadd_stage1 to list of input files for hadd_stage1_5 key_hadd_stage1_job = getKey(process_name, lepton_and_hadTau_selection_and_frWeight, chargeSumSelection) key_hadd_stage1_5_dir = getKey("hadd", lepton_and_hadTau_selection_and_frWeight, chargeSumSelection) hadd_stage1_5_job_tuple = (lepton_and_hadTau_selection_and_frWeight, chargeSumSelection) key_hadd_stage1_5_job = getKey(*hadd_stage1_5_job_tuple) if not key_hadd_stage1_5_job in self.inputFiles_hadd_stage1_5: self.inputFiles_hadd_stage1_5[key_hadd_stage1_5_job] = [] self.inputFiles_hadd_stage1_5[key_hadd_stage1_5_job].append(self.outputFile_hadd_stage1[key_hadd_stage1_job]) self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job] = os.path.join(self.dirs[key_hadd_stage1_5_dir][DKEY_HIST], "hadd_stage1_5_%s_%s.root" % hadd_stage1_5_job_tuple) if self.isBDTtraining: continue # sum fake background contributions for the total of all MC sample # input processes: TT_fake, TTW_fake, TTWW_fake, ... # output process: fakes_mc key_hadd_stage1_5_job = getKey(lepton_and_hadTau_selection_and_frWeight, chargeSumSelection) key_addBackgrounds_dir = getKey("addBackgrounds") addBackgrounds_job_fakes_tuple = ("fakes_mc", lepton_and_hadTau_selection_and_frWeight, chargeSumSelection) key_addBackgrounds_job_fakes = getKey(*addBackgrounds_job_fakes_tuple) sample_categories = self.get_sample_categories() processes_input = [] for sample_category in sample_categories: processes_input.append("%s_fake" % sample_category) self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes] = { 'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job], 'cfgFile_modified' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], "addBackgrounds_%s_%s_%s_cfg.py" % addBackgrounds_job_fakes_tuple), 'outputFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackgrounds_%s_%s_%s.root" % addBackgrounds_job_fakes_tuple), 'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], "addBackgrounds_%s_%s_%s.log" % addBackgrounds_job_fakes_tuple), 'categories' : [ getHistogramDir(lepton_selection, hadTau_selection, lepton_and_hadTau_frWeight, chargeSumSelection) ], 'processes_input' : processes_input, 'process_output' : "fakes_mc" } self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes]) # sum conversion background contributions for the total of all MC sample # input processes: TT_Convs, TTW_Convs, TTWW_Convs, ... # output process: Convs addBackgrounds_job_Convs_tuple = ("Convs", lepton_and_hadTau_selection_and_frWeight, chargeSumSelection) key_addBackgrounds_job_Convs = getKey(*addBackgrounds_job_Convs_tuple) processes_input = [] for sample_category in self.convs_backgrounds: processes_input.append("%s_Convs" % sample_category) self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_Convs] = { 'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job], 'cfgFile_modified' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], "addBackgrounds_%s_%s_%s_cfg.py" % addBackgrounds_job_Convs_tuple), 'outputFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackgrounds_%s_%s_%s.root" % addBackgrounds_job_Convs_tuple), 'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], "addBackgrounds_%s_%s_%s.log" % addBackgrounds_job_Convs_tuple), 'categories' : [ getHistogramDir(lepton_selection, hadTau_selection, lepton_and_hadTau_frWeight, chargeSumSelection) ], 'processes_input' : processes_input, 'process_output' : "Convs" } self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_Convs]) if self.isBDTtraining: continue # initialize input and output file names for hadd_stage2 key_hadd_stage1_5_job = getKey(lepton_and_hadTau_selection_and_frWeight, chargeSumSelection) key_hadd_stage2_dir = getKey("hadd", lepton_and_hadTau_selection_and_frWeight, chargeSumSelection) hadd_stage2_job_tuple = (lepton_and_hadTau_selection_and_frWeight, chargeSumSelection) key_hadd_stage2_job = getKey(*hadd_stage2_job_tuple) if not key_hadd_stage2_job in self.inputFiles_hadd_stage2: self.inputFiles_hadd_stage2[key_hadd_stage2_job] = [] if lepton_and_hadTau_selection == "Tight": self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes]['outputFile']) self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_Convs]['outputFile']) self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job]) self.outputFile_hadd_stage2[key_hadd_stage2_job] = os.path.join(self.dirs[key_hadd_stage2_dir][DKEY_HIST], "hadd_stage2_%s_%s.root" % hadd_stage2_job_tuple) if self.isBDTtraining: if self.is_sbatch: logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_analyze) self.sbatchFile_analyze = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel) self.createScript_sbatch_analyze(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze) logging.info("Creating Makefile") lines_makefile = [] self.addToMakefile_analyze(lines_makefile) self.addToMakefile_hadd_stage1(lines_makefile) self.targets.extend(self.phoniesToAdd) self.addToMakefile_validate(lines_makefile) self.createMakefile(lines_makefile) logging.info("Done.") return self.num_jobs logging.info("Creating configuration files to run 'addBackgroundFakes'") for chargeSumSelection in self.chargeSumSelections: key_hadd_stage1_5_job = getKey(get_lepton_and_hadTau_selection_and_frWeight("Fakeable", "enabled"), chargeSumSelection) key_addFakes_dir = getKey("addBackgroundLeptonFakes") key_addFakes_job = getKey("data_fakes", chargeSumSelection) category_sideband = None if self.applyFakeRateWeights == "4L": category_sideband = "hh_1l_3tau_%s_Fakeable_wFakeRateWeights" % chargeSumSelection elif self.applyFakeRateWeights == "3tau": category_sideband = "hh_1l_3tau_%s_Fakeable_wFakeRateWeights" % chargeSumSelection else: raise ValueError("Invalid Configuration parameter 'applyFakeRateWeights' = %s !!" % self.applyFakeRateWeights) self.jobOptions_addFakes[key_addFakes_job] = { 'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job], 'cfgFile_modified' : os.path.join(self.dirs[key_addFakes_dir][DKEY_CFGS], "addBackgroundLeptonFakes_%s_cfg.py" % chargeSumSelection), 'outputFile' : os.path.join(self.dirs[key_addFakes_dir][DKEY_HIST], "addBackgroundLeptonFakes_%s.root" % chargeSumSelection), 'logFile' : os.path.join(self.dirs[key_addFakes_dir][DKEY_LOGS], "addBackgroundLeptonFakes_%s.log" % chargeSumSelection), 'category_signal' : "hh_1l_3tau_%s_Tight" % chargeSumSelection, 'category_sideband' : category_sideband } self.createCfg_addFakes(self.jobOptions_addFakes[key_addFakes_job]) key_hadd_stage2_job = getKey(get_lepton_and_hadTau_selection_and_frWeight("Tight", "disabled"), chargeSumSelection) self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addFakes[key_addFakes_job]['outputFile']) logging.info("Creating configuration files to run 'prepareDatacards'...") for histogramToFit in self.histograms_to_fit: logging.info(" ... for histogram %s" % histogramToFit) prep_dcard_HH = set() for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"]: continue sample_category = sample_info["sample_category"] if sample_category.startswith("signal"): sample_category = sample_info["sample_category_hh"] doAdd = False if "BDTOutput" in histogramToFit or "MVAOutput" in histogramToFit: if ("SM" in histogramToFit or any(nonresPoint in histogramToFit for nonresPoint in NONRESONANT_KEYS)) and 'nonresonant' in sample_category: doAdd = True if ("spin0" in histogramToFit and "spin0" in sample_category) or ("spin2" in histogramToFit and "spin2" in sample_category): startpos = None for pattern in [ "MVAOutput", "BDTOutput" ]: if pattern in histogramToFit: startpos = histogramToFit.find(pattern) + len(pattern) + 1 # CV: increment startpos by 1 to account for trailing "_" if not startpos: raise ValueError("Failed to parse histogram name = '%s' !!" % histogramToFit) endpos = histogramToFit.find("_", startpos) masspoint = histogramToFit[startpos:endpos] if ("_%s_" % masspoint) in sample_category: doAdd = True else: doAdd = True if doAdd: if "_wwww" in sample_category: prep_dcard_HH.add(sample_category.replace("_wwww", "_zzzz")) prep_dcard_HH.add(sample_category.replace("_wwww", "_wwww")) prep_dcard_HH.add(sample_category.replace("_wwww", "_zzww")) if not ("BDTOutput" in histogramToFit or "MVAOutput" in histogramToFit): prep_dcard_HH.add(sample_category.replace("_wwww", "")) elif "_wwtt" in sample_category: prep_dcard_HH.add(sample_category.replace("_wwtt", "_ttzz")) prep_dcard_HH.add(sample_category.replace("_wwtt", "_ttww")) if not ("BDTOutput" in histogramToFit or "MVAOutput" in histogramToFit): prep_dcard_HH.add(sample_category.replace("_wwtt", "")) elif "_tttt" in sample_category: prep_dcard_HH.add(sample_category) if not ("BDTOutput" in histogramToFit or "MVAOutput" in histogramToFit): prep_dcard_HH.add(sample_category.replace("_tttt", "")) else: raise ValueError("Failed to identify relevant HH decay mode(s) for 'sample_category' = %s !!" % sample_category) prep_dcard_HH = list(prep_dcard_HH) prep_dcard_H = [] prep_dcard_other_nonfake_backgrounds = [] for process in self.nonfake_backgrounds: if process in [ "VH", "WH", "ZH", "TH", "tHq", "tHW", "TTH", "TTWH", "TTZH", "ggH", "qqH" ]: prep_dcard_H.append("%s_hww" % process) prep_dcard_H.append("%s_hzz" % process) prep_dcard_H.append("%s_htt" % process) prep_dcard_H.append("%s_hbb" % process) else: prep_dcard_other_nonfake_backgrounds.append(process) self.prep_dcard_processesToCopy = [ "data_obs" ] + prep_dcard_HH + prep_dcard_H + prep_dcard_other_nonfake_backgrounds + [ "Convs", "data_fakes", "fakes_mc" ] key_prep_dcard_dir = getKey("prepareDatacards") if "OS" in self.chargeSumSelections: key_hadd_stage2_job = getKey(get_lepton_and_hadTau_selection_and_frWeight("Tight", "disabled"), "OS") prep_dcard_job_tuple = (self.channel, "OS", histogramToFit) key_prep_dcard_job = getKey("OS", histogramToFit) self.jobOptions_prep_dcard[key_prep_dcard_job] = { 'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job], 'cfgFile_modified' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_CFGS], "prepareDatacards_%s_%s_%s_cfg.py" % prep_dcard_job_tuple), 'datacardFile' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_DCRD], "prepareDatacards_%s_%s_%s.root" % prep_dcard_job_tuple), 'histogramDir' : self.histogramDir_prep_dcard, 'histogramToFit' : histogramToFit, 'label' : None } self.createCfg_prep_dcard(self.jobOptions_prep_dcard[key_prep_dcard_job]) if "SS" in self.chargeSumSelections: key_hadd_stage2_job = getKey(get_lepton_and_hadTau_selection_and_frWeight("Tight", "disabled"), "SS") prep_dcard_job_tuple = (self.channel, "SS", histogramToFit) key_prep_dcard_job = getKey("SS", histogramToFit) self.jobOptions_prep_dcard[key_prep_dcard_job] = { 'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job], 'cfgFile_modified' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_CFGS], "prepareDatacards_%s_%s_%s_cfg.py" % prep_dcard_job_tuple), 'datacardFile' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_DCRD], "prepareDatacards_%s_%s_%s.root" % prep_dcard_job_tuple), 'histogramDir' : self.histogramDir_prep_dcard_SS, 'histogramToFit' : histogramToFit, 'label' : 'SS' } self.createCfg_prep_dcard(self.jobOptions_prep_dcard[key_prep_dcard_job]) # add shape templates for the following systematic uncertainties: # - 'CMS_ttHl_Clos_norm_e' # - 'CMS_ttHl_Clos_shape_e' # - 'CMS_ttHl_Clos_norm_m' # - 'CMS_ttHl_Clos_shape_m' # - 'CMS_ttHl_Clos_norm_t' # - 'CMS_ttHl_Clos_shape_t' for chargeSumSelection in self.chargeSumSelections: key_prep_dcard_job = getKey(chargeSumSelection, histogramToFit) key_hadd_stage2_job = getKey(get_lepton_and_hadTau_selection_and_frWeight("Tight", "disabled"), chargeSumSelection) key_add_syst_fakerate_dir = getKey("addSystFakeRates") add_syst_fakerate_job_tuple = (self.channel, chargeSumSelection, histogramToFit) key_add_syst_fakerate_job = getKey(chargeSumSelection, histogramToFit) self.jobOptions_add_syst_fakerate[key_add_syst_fakerate_job] = { 'inputFile' : self.jobOptions_prep_dcard[key_prep_dcard_job]['datacardFile'], 'cfgFile_modified' : os.path.join(self.dirs[key_add_syst_fakerate_dir][DKEY_CFGS], "addSystFakeRates_%s_%s_%s_cfg.py" % add_syst_fakerate_job_tuple), 'outputFile' : os.path.join(self.dirs[key_add_syst_fakerate_dir][DKEY_DCRD], "addSystFakeRates_%s_%s_%s.root" % add_syst_fakerate_job_tuple), 'category' : self.channel, 'histogramToFit' : histogramToFit, 'plots_outputFileName' : os.path.join(self.dirs[key_add_syst_fakerate_dir][DKEY_PLOT], "addSystFakeRates.png") } histogramDir_nominal = None if chargeSumSelection == "OS": histogramDir_nominal = "%s/sel/evt/fakes_mc" % self.histogramDir_prep_dcard elif chargeSumSelection == "SS": histogramDir_nominal = "%s/sel/evt/fakes_mc" % self.histogramDir_prep_dcard_SS else: raise ValueError("Invalid parameter 'chargeSumSelection' = %s !!" % chargeSumSelection) for lepton_and_hadTau_type in [ 'e', 'm', 't' ]: lepton_and_hadTau_mcClosure = "Fakeable_mcClosure_%s" % lepton_and_hadTau_type if lepton_and_hadTau_mcClosure not in self.lepton_and_hadTau_selections: continue lepton_and_hadTau_selection_and_frWeight = get_lepton_and_hadTau_selection_and_frWeight(lepton_and_hadTau_mcClosure, "enabled") key_addBackgrounds_job_fakes = getKey("fakes_mc", lepton_and_hadTau_selection_and_frWeight, chargeSumSelection) histogramDir_mcClosure = "%s/sel/evt/fakes_mc" % self.mcClosure_dir['%s_%s' % (lepton_and_hadTau_mcClosure, chargeSumSelection)] if "BDTOutput" in histogramToFit or "MVAOutput" in histogramToFit: histogramDir_nominal = histogramDir_nominal.replace("/sel/evt", "/sel/datacard") histogramDir_mcClosure = histogramDir_mcClosure.replace("/sel/evt", "/sel/datacard") self.jobOptions_add_syst_fakerate[key_add_syst_fakerate_job].update({ 'add_Clos_%s' % lepton_and_hadTau_type : ("Fakeable_mcClosure_%s" % lepton_and_hadTau_type) in self.lepton_and_hadTau_selections, 'inputFile_nominal_%s' % lepton_and_hadTau_type : self.outputFile_hadd_stage2[key_hadd_stage2_job], 'histogramName_nominal_%s' % lepton_and_hadTau_type : "%s/%s" % (histogramDir_nominal, histogramToFit), 'inputFile_mcClosure_%s' % lepton_and_hadTau_type : self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes]['outputFile'], 'histogramName_mcClosure_%s' % lepton_and_hadTau_type : "%s/%s" % (histogramDir_mcClosure, histogramToFit) }) self.createCfg_add_syst_fakerate(self.jobOptions_add_syst_fakerate[key_add_syst_fakerate_job]) logging.info("Creating configuration files to run 'makePlots'") key_makePlots_dir = getKey("makePlots") if "OS" in self.chargeSumSelections: key_hadd_stage2_job = getKey(get_lepton_and_hadTau_selection_and_frWeight("Tight", "disabled"), "OS") key_makePlots_job = getKey("OS") self.jobOptions_make_plots[key_makePlots_job] = { 'executable' : self.executable_make_plots, 'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job], 'cfgFile_modified' : os.path.join(self.dirs[key_makePlots_dir][DKEY_CFGS], "makePlots_%s_cfg.py" % self.channel), 'outputFile' : os.path.join(self.dirs[key_makePlots_dir][DKEY_PLOT], "makePlots_%s.png" % self.channel), 'histogramDir' : self.histogramDir_prep_dcard, 'label' : '1l+3#tau_{h}', 'make_plots_backgrounds' : self.make_plots_backgrounds } self.createCfg_makePlots(self.jobOptions_make_plots[key_makePlots_job]) if "SS" in self.chargeSumSelections: key_hadd_stage2_job = getKey(get_lepton_and_hadTau_selection_and_frWeight("Tight", "disabled"), "SS") key_makePlots_job = getKey("SS") self.jobOptions_make_plots[key_makePlots_job] = { 'executable' : self.executable_make_plots, 'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job], 'cfgFile_modified' : os.path.join(self.dirs[key_makePlots_dir][DKEY_CFGS], "makePlots_%s_SS_cfg.py" % self.channel), 'outputFile' : os.path.join(self.dirs[key_makePlots_dir][DKEY_PLOT], "makePlots_%s_SS.png" % self.channel), 'histogramDir' : self.histogramDir_prep_dcard_SS, 'label' : "1l+3#tau_{h} SS", 'make_plots_backgrounds' : self.make_plots_backgrounds } self.createCfg_makePlots(self.jobOptions_make_plots[key_makePlots_job]) if "Fakeable_mcClosure" in self.lepton_and_hadTau_selections: #TODO key_hadd_stage2_job = getKey(get_lepton_and_hadTau_selection_and_frWeight("Tight", "disabled"), "OS") key_makePlots_job = getKey("Fakeable_mcClosure", "OS") self.jobOptions_make_plots[key_makePlots_job] = { 'executable' : self.executable_make_plots_mcClosure, 'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job], 'cfgFile_modified' : os.path.join(self.dirs[key_makePlots_dir][DKEY_CFGS], "makePlots_mcClosure_%s_cfg.py" % self.channel), 'outputFile' : os.path.join(self.dirs[key_makePlots_dir][DKEY_PLOT], "makePlots_mcClosure_%s.png" % self.channel) } self.createCfg_makePlots_mcClosure(self.jobOptions_make_plots[key_makePlots_job]) if self.is_sbatch: logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_analyze) self.sbatchFile_analyze = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel) self.createScript_sbatch_analyze(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze) logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_addBackgrounds) self.sbatchFile_addBackgrounds = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addBackgrounds_%s.py" % self.channel) self.createScript_sbatch(self.executable_addBackgrounds, self.sbatchFile_addBackgrounds, self.jobOptions_addBackgrounds) self.sbatchFile_addBackgrounds_sum = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addBackgrounds_sum_%s.py" % self.channel) self.createScript_sbatch(self.executable_addBackgrounds, self.sbatchFile_addBackgrounds_sum, self.jobOptions_addBackgrounds_sum) logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_addFakes) self.sbatchFile_addFakes = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addFakes_%s.py" % self.channel) self.createScript_sbatch(self.executable_addFakes, self.sbatchFile_addFakes, self.jobOptions_addFakes) logging.info("Creating Makefile") lines_makefile = [] self.addToMakefile_analyze(lines_makefile) self.addToMakefile_hadd_stage1(lines_makefile) self.addToMakefile_backgrounds_from_data(lines_makefile) self.addToMakefile_hadd_stage2(lines_makefile) self.addToMakefile_prep_dcard(lines_makefile) self.addToMakefile_add_syst_fakerate(lines_makefile) self.addToMakefile_make_plots(lines_makefile) self.addToMakefile_validate(lines_makefile) self.createMakefile(lines_makefile) logging.info("Done.") return self.num_jobs
def create(self): """Creates all necessary config files and runs the complete analysis workfow -- either locally or on the batch system """ for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"]: continue sample_category = sample_info["sample_category"] is_mc = (sample_info["type"] == "mc") process_name = sample_info["process_name_specific"] logging.info("Building dictionaries for sample %s..." % process_name) for chargeSumSelection in self.chargeSumSelections: for lepton_and_hadTau_selection in self.lepton_and_hadTau_selections: for lepton_and_hadTau_frWeight in self.lepton_and_hadTau_frWeights: if lepton_and_hadTau_frWeight == "enabled" and not lepton_and_hadTau_selection.startswith("Fakeable"): continue if lepton_and_hadTau_frWeight == "disabled" and not lepton_and_hadTau_selection in [ "Tight", "forBDTtraining" ]: continue lepton_and_hadTau_selection_and_frWeight = get_lepton_and_hadTau_selection_and_frWeight(lepton_and_hadTau_selection, lepton_and_hadTau_frWeight) central_or_shift_extensions = ["", "hadd", "copyHistograms", "addBackgrounds"] central_or_shift_dedicated = self.central_or_shifts if self.runTHweights(sample_info) else self.central_or_shifts_external central_or_shifts_extended = central_or_shift_extensions + central_or_shift_dedicated for central_or_shift_or_dummy in central_or_shifts_extended: process_name_extended = [ process_name, "hadd" ] for process_name_or_dummy in process_name_extended: if process_name_or_dummy in [ "hadd" ] and central_or_shift_or_dummy != "": continue evtcategories_extended = [""] evtcategories_extended.extend(self.evtCategories) if central_or_shift_or_dummy in [ "hadd", "copyHistograms", "addBackgrounds" ] and process_name_or_dummy in [ "hadd" ]: continue if central_or_shift_or_dummy not in central_or_shift_extensions and not self.accept_systematics( central_or_shift_or_dummy, is_mc, lepton_and_hadTau_selection, chargeSumSelection, sample_category, sample_name, sample_info ): continue key_dir = getKey(process_name_or_dummy, chargeSumSelection, lepton_and_hadTau_selection_and_frWeight, central_or_shift_or_dummy) for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_ROOT, DKEY_RLES, DKEY_SYNC ]: initDict(self.dirs, [ key_dir, dir_type ]) if dir_type in [ DKEY_CFGS, DKEY_LOGS ]: self.dirs[key_dir][dir_type] = os.path.join(self.configDir, dir_type, self.channel, "_".join([ lepton_and_hadTau_selection_and_frWeight, chargeSumSelection ]), process_name_or_dummy, central_or_shift_or_dummy) else: self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel, "_".join([ lepton_and_hadTau_selection_and_frWeight, chargeSumSelection ]), process_name_or_dummy, central_or_shift_or_dummy) for subdirectory in [ "addBackgrounds", "addBackgroundLeptonFakes", "prepareDatacards", "addSystFakeRates", "makePlots" ]: key_dir = getKey(subdirectory) for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_ROOT, DKEY_DCRD, DKEY_PLOT ]: initDict(self.dirs, [ key_dir, dir_type ]) if dir_type in [ DKEY_CFGS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT ]: self.dirs[key_dir][dir_type] = os.path.join(self.configDir, dir_type, self.channel, subdirectory) else: self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel, subdirectory) for dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT, DKEY_SYNC ]: initDict(self.dirs, [ dir_type ]) if dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT ]: self.dirs[dir_type] = os.path.join(self.configDir, dir_type, self.channel) else: self.dirs[dir_type] = os.path.join(self.outputDir, dir_type, self.channel) numDirectories = 0 for key in self.dirs.keys(): if type(self.dirs[key]) == dict: numDirectories += len(self.dirs[key]) else: numDirectories += 1 logging.info("Creating directory structure (numDirectories = %i)" % numDirectories) numDirectories_created = 0; frac = 1 for key in self.dirs.keys(): if type(self.dirs[key]) == dict: for dir_type in self.dirs[key].keys(): create_if_not_exists(self.dirs[key][dir_type]) numDirectories_created += len(self.dirs[key]) else: create_if_not_exists(self.dirs[key]) numDirectories_created = numDirectories_created + 1 while 100*numDirectories_created >= frac*numDirectories: logging.info(" %i%% completed" % frac) frac = frac + 1 logging.info("Done.") inputFileLists = {} for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"]: continue logging.info("Checking input files for sample %s" % sample_info["process_name_specific"]) inputFileLists[sample_name] = generateInputFileList(sample_info, self.max_files_per_job) for chargeSumSelection in self.chargeSumSelections: for lepton_and_hadTau_selection in self.lepton_and_hadTau_selections: lepton_selection = lepton_and_hadTau_selection ##hadTau_selection = None ##if lepton_and_hadTau_selection == "Tight": ## hadTau_selection = "%s|%s" % (lepton_and_hadTau_selection, self.hadTau_mva_wp) ##else: ## hadTau_selection = lepton_and_hadTau_selection hadTau_selection = "%s|%s" % (lepton_and_hadTau_selection, self.hadTau_mva_wp) electron_selection = lepton_selection muon_selection = lepton_selection if lepton_and_hadTau_selection == "Fakeable_mcClosure_e": electron_selection = "Fakeable" muon_selection = "Tight" hadTau_selection = "Tight|%s" % self.hadTau_mva_wp elif lepton_and_hadTau_selection == "Fakeable_mcClosure_m": electron_selection = "Tight" muon_selection = "Fakeable" hadTau_selection = "Tight|%s" % self.hadTau_mva_wp elif lepton_and_hadTau_selection == "Fakeable_mcClosure_t": electron_selection = "Tight" muon_selection = "Tight" hadTau_selection = "Fakeable" for lepton_and_hadTau_frWeight in self.lepton_and_hadTau_frWeights: if lepton_and_hadTau_frWeight == "enabled" and not lepton_and_hadTau_selection.startswith("Fakeable"): continue if lepton_and_hadTau_frWeight == "disabled" and not lepton_and_hadTau_selection in [ "Tight", "forBDTtraining" ]: continue lepton_and_hadTau_selection_and_frWeight = get_lepton_and_hadTau_selection_and_frWeight(lepton_and_hadTau_selection, lepton_and_hadTau_frWeight) for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"]: continue process_name = sample_info["process_name_specific"] logging.info("Creating configuration files to run '%s' for sample %s" % (self.executable_analyze, process_name)) inputFileList = inputFileLists[sample_name] sample_category = sample_info["sample_category"] is_mc = (sample_info["type"] == "mc") use_th_weights = self.runTHweights(sample_info) central_or_shift_dedicated = self.central_or_shifts if use_th_weights else self.central_or_shifts_external for central_or_shift in central_or_shift_dedicated: if not self.accept_systematics( central_or_shift, is_mc, lepton_and_hadTau_selection, chargeSumSelection, sample_category, sample_name, sample_info ): continue central_or_shifts_local = [] if central_or_shift == "central" and not use_th_weights: for central_or_shift_local in self.central_or_shifts_internal: if self.accept_systematics( central_or_shift_local, is_mc, lepton_and_hadTau_selection, chargeSumSelection, sample_category, sample_name, sample_info ): central_or_shifts_local.append(central_or_shift_local) logging.info(" ... for '%s' and systematic uncertainty option '%s'" % (lepton_and_hadTau_selection_and_frWeight, central_or_shift)) # build config files for executing analysis code key_analyze_dir = getKey(process_name, chargeSumSelection, lepton_and_hadTau_selection_and_frWeight, central_or_shift) for jobId in inputFileList.keys(): analyze_job_tuple = (process_name, chargeSumSelection, lepton_and_hadTau_selection_and_frWeight, central_or_shift, jobId) key_analyze_job = getKey(*analyze_job_tuple) ntupleFiles = inputFileList[jobId] if len(ntupleFiles) == 0: logging.warning("No input ntuples for %s --> skipping job !!" % (key_analyze_job)) continue cfgFile_modified_path = os.path.join(self.dirs[key_analyze_dir][DKEY_CFGS], "analyze_%s_%s_%s_%s_%i_cfg.py" % analyze_job_tuple) histogramFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_HIST], "analyze_%s_%s_%s_%s_%i.root" % analyze_job_tuple) logFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_LOGS], "analyze_%s_%s_%s_%s_%i.log" % analyze_job_tuple) rleOutputFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_RLES], "rle_%s_%s_%s_%s_%i.txt" % analyze_job_tuple) \ if self.select_rle_output else "" applyFakeRateWeights = self.applyFakeRateWeights \ if self.isBDTtraining or not lepton_and_hadTau_selection == "Tight" \ else "disabled" self.jobOptions_analyze[key_analyze_job] = { 'ntupleFiles' : ntupleFiles, 'cfgFile_modified' : cfgFile_modified_path, 'histogramFile' : histogramFile_path, 'logFile' : logFile_path, 'selEventsFileName_output' : rleOutputFile_path, 'electronSelection' : electron_selection, 'muonSelection' : muon_selection, 'apply_leptonGenMatching' : self.apply_leptonGenMatching, 'hadTauSelection' : hadTau_selection, 'apply_hadTauGenMatching' : self.apply_hadTauGenMatching, 'chargeSumSelection' : chargeSumSelection, 'applyFakeRateWeights' : applyFakeRateWeights, 'central_or_shift' : central_or_shift, 'central_or_shifts_local' : central_or_shifts_local, 'selectBDT' : self.isBDTtraining, 'apply_hlt_filter' : self.hlt_filter, 'useNonNominal' : self.use_nonnominal, 'fillGenEvtHistograms' : True, 'useObjectMultiplicity' : True, } self.createCfg_analyze(self.jobOptions_analyze[key_analyze_job], sample_info, lepton_and_hadTau_selection) # initialize input and output file names for hadd_stage1 key_hadd_stage1_dir = getKey(process_name, chargeSumSelection, lepton_and_hadTau_selection_and_frWeight, "hadd") hadd_stage1_job_tuple = (process_name, chargeSumSelection, lepton_and_hadTau_selection_and_frWeight) key_hadd_stage1_job = getKey(*hadd_stage1_job_tuple) if not key_hadd_stage1_job in self.inputFiles_hadd_stage1: self.inputFiles_hadd_stage1[key_hadd_stage1_job] = [] self.inputFiles_hadd_stage1[key_hadd_stage1_job].append(self.jobOptions_analyze[key_analyze_job]['histogramFile']) self.outputFile_hadd_stage1[key_hadd_stage1_job] = os.path.join(self.dirs[key_hadd_stage1_dir][DKEY_HIST], "hadd_stage1_%s_%s_%s.root" % hadd_stage1_job_tuple) if self.isBDTtraining: self.targets.append(self.outputFile_hadd_stage1[key_hadd_stage1_job]) if self.isBDTtraining: continue #---------------------------------------------------------------------------- # split hadd_stage1 files into separate files, one for each event category for category in self.evtCategories: key_hadd_stage1_job = getKey(process_name, chargeSumSelection, lepton_and_hadTau_selection_and_frWeight) key_copyHistograms_dir = getKey(process_name, chargeSumSelection, lepton_and_hadTau_selection_and_frWeight, "copyHistograms") copyHistograms_job_tuple = (category, process_name, chargeSumSelection, lepton_and_hadTau_selection_and_frWeight) key_copyHistograms_job = getKey(*copyHistograms_job_tuple) cfgFile_modified = os.path.join(self.dirs[key_copyHistograms_dir][DKEY_CFGS], "copyHistograms_%s_%s_%s_%s_cfg.py" % copyHistograms_job_tuple) outputFile = os.path.join(self.dirs[key_copyHistograms_dir][DKEY_HIST], "copyHistograms_%s_%s_%s_%s.root" % copyHistograms_job_tuple) self.jobOptions_copyHistograms[key_copyHistograms_job] = { 'inputFile' : self.outputFile_hadd_stage1[key_hadd_stage1_job], 'cfgFile_modified' : cfgFile_modified, 'outputFile' : outputFile, 'logFile' : os.path.join(self.dirs[key_copyHistograms_dir][DKEY_LOGS], os.path.basename(cfgFile_modified).replace("_cfg.py", ".log")), 'categories' : [ category ], } self.createCfg_copyHistograms(self.jobOptions_copyHistograms[key_copyHistograms_job]) #---------------------------------------------------------------------------- # add output files of copyHistograms jobs to list of input files for hadd_stage1_5 for category in self.evtCategories: key_copyHistograms_job = getKey(category, process_name, chargeSumSelection, lepton_and_hadTau_selection_and_frWeight) key_hadd_stage1_5_dir = getKey("hadd", chargeSumSelection, lepton_and_hadTau_selection_and_frWeight) hadd_stage1_5_job_tuple = (category, chargeSumSelection, lepton_and_hadTau_selection_and_frWeight) key_hadd_stage1_5_job = getKey(*hadd_stage1_5_job_tuple) if not key_hadd_stage1_5_job in self.inputFiles_hadd_stage1_5: self.inputFiles_hadd_stage1_5[key_hadd_stage1_5_job] = [] self.inputFiles_hadd_stage1_5[key_hadd_stage1_5_job].append(self.jobOptions_copyHistograms[key_copyHistograms_job]['outputFile']) self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job] = os.path.join(self.dirs[key_hadd_stage1_5_dir][DKEY_HIST], "hadd_stage1_5_%s_%s_%s.root" % hadd_stage1_5_job_tuple) if self.isBDTtraining: continue for category in self.evtCategories: # sum fake background contributions for the total of all MC sample # input processes: TT_fake, TTW_fake, TTWW_fake, ... # output process: fakes_mc key_hadd_stage1_5_job = getKey(category, chargeSumSelection, lepton_and_hadTau_selection_and_frWeight) key_addBackgrounds_dir = getKey("addBackgrounds") addBackgrounds_job_fakes_tuple = ("fakes_mc", category, chargeSumSelection, lepton_and_hadTau_selection_and_frWeight) key_addBackgrounds_job_fakes = getKey(*addBackgrounds_job_fakes_tuple) sample_categories = [] sample_categories.extend(self.nonfake_backgrounds) processes_input = [] for sample_category in sample_categories: processes_input.append("%s_fake" % sample_category) self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes] = { 'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job], 'cfgFile_modified' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], "addBackgrounds_%s_%s_%s_%s_cfg.py" % addBackgrounds_job_fakes_tuple), 'outputFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackgrounds_%s_%s_%s_%s.root" % addBackgrounds_job_fakes_tuple), 'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], "addBackgrounds_%s_%s_%s_%s.log" % addBackgrounds_job_fakes_tuple), 'categories' : [ getHistogramDir(category, lepton_and_hadTau_selection, lepton_and_hadTau_frWeight, chargeSumSelection) ], 'processes_input' : processes_input, 'process_output' : "fakes_mc" } self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes]) # sum conversion background contributions for the total of all MC sample # input processes: TT_Convs, TTW_Convs, TTWW_Convs, ... # output process: Convs addBackgrounds_job_Convs_tuple = ("Convs", category, lepton_and_hadTau_selection, lepton_and_hadTau_frWeight, chargeSumSelection) key_addBackgrounds_job_Convs = getKey(*addBackgrounds_job_Convs_tuple) sample_categories = [] sample_categories.extend(self.nonfake_backgrounds) processes_input = [] for sample_category in sample_categories: processes_input.append("%s_Convs" % sample_category) self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_Convs] = { 'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job], 'cfgFile_modified' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], "addBackgrounds_%s_%s_%s_%s_%s_cfg.py" % addBackgrounds_job_Convs_tuple), 'outputFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackgrounds_%s_%s_%s_%s_%s.root" % addBackgrounds_job_Convs_tuple), 'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], "addBackgrounds_%s_%s_%s_%s_%s.log" % addBackgrounds_job_Convs_tuple), 'categories' : [ getHistogramDir(category, lepton_and_hadTau_selection, lepton_and_hadTau_frWeight, chargeSumSelection) ], 'processes_input' : processes_input, 'process_output' : "Convs" } self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_Convs]) # sum signal contributions from gluon fusion and VBF HH production, # separately for "nonfake" and "fake" contributions genMatch_categories = [ "nonfake", "fake" ] for genMatch_category in genMatch_categories: for signal_base, signal_input in self.signal_io.items(): addBackgrounds_job_signal_tuple = (category, chargeSumSelection, lepton_and_hadTau_selection, signal_base, genMatch_category) key_addBackgrounds_job_signal = getKey(*addBackgrounds_job_signal_tuple) if key_addBackgrounds_job_signal in self.jobOptions_addBackgrounds_sum.keys(): continue processes_input = signal_input process_output = signal_base if genMatch_category == "fake": processes_input = [ process_input + "_fake" for process_input in processes_input ] process_output += "_fake" self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_signal] = { 'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job], 'cfgFile_modified' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], "addBackgrounds_%s_%s_%s_%s_%s_cfg.py" % addBackgrounds_job_signal_tuple), 'outputFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackgrounds_%s_%s_%s_%s_%s.root" % addBackgrounds_job_signal_tuple), 'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], "addBackgrounds_%s_%s_%s_%s_%s.log" % addBackgrounds_job_signal_tuple), 'categories' : [ getHistogramDir(category, lepton_and_hadTau_selection, lepton_and_hadTau_frWeight, chargeSumSelection) ], 'processes_input' : processes_input, 'process_output' : process_output } self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_signal]) key_hadd_stage2_job = getKey(category, chargeSumSelection, lepton_and_hadTau_selection_and_frWeight) if not key_hadd_stage2_job in self.inputFiles_hadd_stage2: self.inputFiles_hadd_stage2[key_hadd_stage2_job] = [] if lepton_selection == "Tight": self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_signal]['outputFile']) # initialize input and output file names for hadd_stage2 key_hadd_stage1_5_job = getKey(category, chargeSumSelection, lepton_and_hadTau_selection_and_frWeight) key_hadd_stage2_dir = getKey("hadd", chargeSumSelection, lepton_and_hadTau_selection_and_frWeight) hadd_stage2_job_tuple = (category, chargeSumSelection, lepton_and_hadTau_selection_and_frWeight) key_hadd_stage2_job = getKey(*hadd_stage2_job_tuple) if not key_hadd_stage2_job in self.inputFiles_hadd_stage2: self.inputFiles_hadd_stage2[key_hadd_stage2_job] = [] if lepton_selection == "Tight": self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes]['outputFile']) self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_Convs]['outputFile']) self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job]) self.outputFile_hadd_stage2[key_hadd_stage2_job] = os.path.join(self.dirs[key_hadd_stage2_dir][DKEY_HIST], "hadd_stage2_%s_%s_%s.root" % hadd_stage2_job_tuple) if self.isBDTtraining: if self.is_sbatch: logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_analyze) self.sbatchFile_analyze = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel) self.createScript_sbatch_analyze(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze) logging.info("Creating Makefile") lines_makefile = [] self.addToMakefile_analyze(lines_makefile) self.addToMakefile_hadd_stage1(lines_makefile) self.createMakefile(lines_makefile) logging.info("Done") return self.num_jobs logging.info("Creating configuration files to run 'addBackgroundFakes'") for chargeSumSelection in self.chargeSumSelections: for category in self.evtCategories: key_hadd_stage1_5_job = getKey(category, chargeSumSelection, get_lepton_and_hadTau_selection_and_frWeight("Fakeable", "enabled")) key_addFakes_dir = getKey("addBackgroundLeptonFakes") addFakes_job_tuple = (category, chargeSumSelection) key_addFakes_job = getKey("data_fakes", *addFakes_job_tuple) self.jobOptions_addFakes[key_addFakes_job] = { 'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job], 'cfgFile_modified' : os.path.join(self.dirs[key_addFakes_dir][DKEY_CFGS], "addBackgroundLeptonFakes_%s_%s_cfg.py" % addFakes_job_tuple), 'outputFile' : os.path.join(self.dirs[key_addFakes_dir][DKEY_HIST], "addBackgroundLeptonFakes_%s_%s.root" % addFakes_job_tuple), 'logFile' : os.path.join(self.dirs[key_addFakes_dir][DKEY_LOGS], "addBackgroundLeptonFakes_%s_%s.log" % addFakes_job_tuple), 'category_signal' : getHistogramDir(category, "Tight", "disabled", chargeSumSelection), 'category_sideband' : getHistogramDir(category, "Fakeable", "enabled", chargeSumSelection) } self.createCfg_addFakes(self.jobOptions_addFakes[key_addFakes_job]) key_hadd_stage2_job = getKey(category, chargeSumSelection, get_lepton_and_hadTau_selection_and_frWeight("Tight", "disabled")) self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addFakes[key_addFakes_job]['outputFile']) logging.info("Creating configuration files to run 'prepareDatacards'") for chargeSumSelection in self.chargeSumSelections: for category in self.evtCategories: for histogramToFit in self.histograms_to_fit: key_hadd_stage2_job = getKey(category, chargeSumSelection, get_lepton_and_hadTau_selection_and_frWeight("Tight", "disabled")) key_prep_dcard_dir = getKey("prepareDatacards") prep_dcard_job_tuple = (self.channel, category, chargeSumSelection, histogramToFit) key_prep_dcard_job = getKey(category, chargeSumSelection, histogramToFit) self.jobOptions_prep_dcard[key_prep_dcard_job] = { 'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job], 'cfgFile_modified' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_CFGS], "prepareDatacards_%s_%s_%s_%s_cfg.py" % prep_dcard_job_tuple), 'datacardFile' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_DCRD], "prepareDatacards_%s_%s_%s_%s.root" % prep_dcard_job_tuple), 'histogramDir' : getHistogramDir(category, "Tight", "disabled", chargeSumSelection), 'histogramToFit' : histogramToFit } self.createCfg_prep_dcard(self.jobOptions_prep_dcard[key_prep_dcard_job]) # add shape templates for the following systematic uncertainties: # - 'CMS_ttHl_Clos_norm_e' # - 'CMS_ttHl_Clos_shape_e' # - 'CMS_ttHl_Clos_norm_m' # - 'CMS_ttHl_Clos_shape_m' key_add_syst_fakerate_dir = getKey("addSystFakeRates") add_syst_fakerate_job_tuple = (self.channel, category, chargeSumSelection, histogramToFit) key_add_syst_fakerate_job = getKey(category, chargeSumSelection, histogramToFit) self.jobOptions_add_syst_fakerate[key_add_syst_fakerate_job] = { 'inputFile' : self.jobOptions_prep_dcard[key_prep_dcard_job]['datacardFile'], 'cfgFile_modified' : os.path.join(self.dirs[key_add_syst_fakerate_dir][DKEY_CFGS], "addSystFakeRates_%s_%s_%s_%s_cfg.py" % add_syst_fakerate_job_tuple), 'outputFile' : os.path.join(self.dirs[key_add_syst_fakerate_dir][DKEY_DCRD], "addSystFakeRates_%s_%s_%s_%s.root" % add_syst_fakerate_job_tuple), 'category' : category, 'histogramToFit' : histogramToFit, 'plots_outputFileName' : os.path.join(self.dirs[key_add_syst_fakerate_dir][DKEY_PLOT], "addSystFakeRates.png") } histogramDir_nominal = getHistogramDir(category, "Tight", "disabled", chargeSumSelection) for lepton_and_hadTau_type in [ 'e', 'm', 't' ]: lepton_and_hadTau_mcClosure = "Fakeable_mcClosure_%s" % lepton_and_hadTau_type if lepton_and_hadTau_mcClosure not in self.lepton_and_hadTau_selections: continue lepton_and_hadTau_selection_and_frWeight = get_lepton_and_hadTau_selection_and_frWeight(lepton_and_hadTau_mcClosure, "enabled") key_addBackgrounds_job_fakes = getKey("fakes_mc", category, chargeSumSelection, lepton_and_hadTau_selection_and_frWeight) histogramDir_mcClosure = self.mcClosure_dir[lepton_and_hadTau_mcClosure+'_%s' %chargeSumSelection] histogramDir_mcClosure = histogramDir_mcClosure.replace(self.evtCategory_inclusive, category) self.jobOptions_add_syst_fakerate[key_add_syst_fakerate_job].update({ 'add_Clos_%s' % lepton_and_hadTau_type : ("Fakeable_mcClosure_%s" % lepton_and_hadTau_type) in self.lepton_and_hadTau_selections, 'inputFile_nominal_%s' % lepton_and_hadTau_type : self.outputFile_hadd_stage2[key_hadd_stage2_job], 'histogramName_nominal_%s' % lepton_and_hadTau_type : "%s/sel/evt/fakes_mc/%s" % (histogramDir_nominal, histogramToFit), 'inputFile_mcClosure_%s' % lepton_and_hadTau_type : self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes]['outputFile'], 'histogramName_mcClosure_%s' % lepton_and_hadTau_type : "%s/sel/evt/fakes_mc/%s" % (histogramDir_mcClosure, histogramToFit) }) self.createCfg_add_syst_fakerate(self.jobOptions_add_syst_fakerate[key_add_syst_fakerate_job]) logging.info("Creating configuration files to run 'makePlots'") for chargeSumSelection in self.chargeSumSelections: key_hadd_stage2_job = getKey(self.evtCategory_inclusive, chargeSumSelection, get_lepton_and_hadTau_selection_and_frWeight("Tight", "disabled")) key_makePlots_dir = getKey("makePlots") key_makePlots_job = getKey(chargeSumSelection) self.jobOptions_make_plots[key_makePlots_job] = { 'executable' : self.executable_make_plots, 'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job], 'cfgFile_modified' : os.path.join(self.dirs[key_makePlots_dir][DKEY_CFGS], "makePlots_%s_%s_cfg.py" % (self.channel, chargeSumSelection)), 'outputFile' : os.path.join(self.dirs[key_makePlots_dir][DKEY_PLOT], "makePlots_%s_%s.png" % (self.channel, chargeSumSelection)), 'histogramDir' : getHistogramDir(self.evtCategory_inclusive, "Tight", "disabled", chargeSumSelection), 'label' : '1l1tau', 'make_plots_backgrounds' : self.make_plots_backgrounds } self.createCfg_makePlots(self.jobOptions_make_plots[key_makePlots_job]) if "Fakeable_mcClosure" in self.lepton_and_hadTau_selections: #TODO key_makePlots_job = getKey(chargeSumSelection) key_hadd_stage2 = getKey(chargeSumSelection, get_lepton_and_hadTau_selection_and_frWeight("Tight", "disabled")) self.jobOptions_make_plots[key_makePlots_job] = { 'executable' : self.executable_make_plots_mcClosure, 'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2], 'cfgFile_modified' : os.path.join(self.dirs[key_makePlots_dir][DKEY_CFGS], "makePlots_mcClosure_%s_%s_cfg.py" % (self.channel, chargeSumSelection)), 'outputFile' : os.path.join(self.dirs[key_makePlots_dir][DKEY_PLOT], "makePlots_mcClosure_%s_%s.png" % (self.channel, chargeSumSelection)) } self.createCfg_makePlots_mcClosure(self.jobOptions_make_plots[key_makePlots_job]) if self.is_sbatch: logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_analyze) self.sbatchFile_analyze = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel) self.createScript_sbatch_analyze(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze) logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_copyHistograms) self.sbatchFile_copyHistograms = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_copyHistograms_%s.py" % self.channel) self.createScript_sbatch_copyHistograms(self.executable_copyHistograms, self.sbatchFile_copyHistograms, self.jobOptions_copyHistograms) logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_addBackgrounds) self.sbatchFile_addBackgrounds = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addBackgrounds_%s.py" % self.channel) self.createScript_sbatch_addBackgrounds(self.executable_addBackgrounds, self.sbatchFile_addBackgrounds, self.jobOptions_addBackgrounds) self.sbatchFile_addBackgrounds_sum = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addBackgrounds_sum_%s.py" % self.channel) self.createScript_sbatch_addBackgrounds(self.executable_addBackgrounds, self.sbatchFile_addBackgrounds_sum, self.jobOptions_addBackgrounds_sum) logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_addFakes) self.sbatchFile_addFakes = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addFakes_%s.py" % self.channel) self.createScript_sbatch_addFakes(self.executable_addFakes, self.sbatchFile_addFakes, self.jobOptions_addFakes) logging.info("Creating Makefile") lines_makefile = [] self.addToMakefile_analyze(lines_makefile) self.addToMakefile_hadd_stage1(lines_makefile) self.addToMakefile_copyHistograms(lines_makefile, make_target = "phony_copyHistograms", make_dependency = "phony_hadd_stage1") self.addToMakefile_backgrounds_from_data(lines_makefile, make_dependency = "phony_copyHistograms") #---------------------------------------------------------------------------- self.addToMakefile_hadd_stage2(lines_makefile) self.addToMakefile_prep_dcard(lines_makefile) self.addToMakefile_add_syst_fakerate(lines_makefile) self.addToMakefile_make_plots(lines_makefile) self.createMakefile(lines_makefile) logging.info("Done") return self.num_jobs
def create(self): """Creates all necessary config files and runs the complete analysis workfow -- either locally or on the batch system """ for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]: continue process_name = sample_info["process_name_specific"] for lepton_selection in self.lepton_selections: #lepton_and_hadTau_selection_and_frWeight = get_lepton_and_hadTau_selection_and_frWeight(lepton_and_hadTau_selection, lepton_and_hadTau_frWeight) key_dir = getKey(process_name, lepton_selection) for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_RLES ]: initDict(self.dirs, [ key_dir, dir_type ]) if dir_type in [ DKEY_CFGS, DKEY_LOGS ]: self.dirs[key_dir][dir_type] = os.path.join(self.configDir, dir_type, self.channel, "_".join([ lepton_selection ]), process_name) else: self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel, "_".join([ lepton_selection ]), process_name) for dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT ]: initDict(self.dirs, [ dir_type ]) if dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT ]: self.dirs[dir_type] = os.path.join(self.configDir, dir_type, self.channel) else: self.dirs[dir_type] = os.path.join(self.outputDir, dir_type, self.channel) ##print "self.dirs = ", self.dirs for key in self.dirs.keys(): if type(self.dirs[key]) == dict: for dir_type in self.dirs[key].keys(): create_if_not_exists(self.dirs[key][dir_type]) else: create_if_not_exists(self.dirs[key]) inputFileLists = {} for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]: continue logging.info("Checking input files for sample %s" % sample_info["process_name_specific"]) inputFileLists[sample_name] = generateInputFileList(sample_name, sample_info, self.max_files_per_job, self.debug) for lepton_selection in self.lepton_selections: for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]: continue process_name = sample_info["process_name_specific"] # Edit Siddh ~~~~~~~~~~~~~ run_process = False for sprocess_run in sample_process_run_s: if sprocess_run == process_name: run_process = True #print "Run process: ", sprocess_run if run_process == False: continue #print "run_process:",process_name # ~~~~~~~~~~~~~~~~~~~~~~~~~~ #if not ("DY" in process_name or "Muon" in process_name): continue logging.info("Creating configuration files to run '%s' for sample %s" % (self.executable_analyze, process_name)) sample_category = sample_info["sample_category"] is_mc = (sample_info["type"] == "mc") is_signal = (sample_category == "signal") inputFileList = inputFileLists[sample_name] for central_or_shift in self.central_or_shifts: for jobId in inputFileList.keys(): #if central_or_shift != "central" and not (lepton_and_hadTau_selection.startswith("Tight") and lepton_charge_selection == "SS"): # continue if central_or_shift != "central" and not is_mc: continue # build config files for executing analysis code key_dir = getKey(process_name, lepton_selection) key_analyze_job = getKey(process_name, lepton_selection, central_or_shift, jobId) ntupleFiles = inputFileList[jobId] if len(ntupleFiles) == 0: print "Warning: ntupleFiles['%s'] = %s --> skipping job !!" % (key_job, ntupleFiles) continue self.jobOptions_analyze[key_analyze_job] = { 'ntupleFiles' : ntupleFiles, 'cfgFile_modified' : os.path.join(self.dirs[key_dir][DKEY_CFGS], "analyze_%s_%s_%s_%s_%i_cfg.py" % \ (self.channel, process_name, lepton_selection, central_or_shift, jobId)), 'histogramFile' : os.path.join(self.dirs[key_dir][DKEY_HIST], "%s_%s_%s_%i.root" % \ (process_name, lepton_selection, central_or_shift, jobId)), 'logFile' : os.path.join(self.dirs[key_dir][DKEY_LOGS], "analyze_%s_%s_%s_%s_%i.log" % \ (self.channel, process_name, lepton_selection, central_or_shift, jobId)), 'rleOutputFile' : os.path.join(self.dirs[key_dir][DKEY_RLES], "rle_%s_%s_%s_%s_%i.txt" % \ (self.channel, process_name, lepton_selection, central_or_shift, jobId)) if self.select_rle_output else "", 'sample_category' : sample_category, 'triggers' : sample_info["triggers"], 'lepton_selection' : lepton_selection, #'apply_leptonGenMatching' : self.apply_leptonGenMatching, #'apply_hadTauGenMatching' : self.apply_hadTauGenMatching, #'applyFakeRateWeights' : self.applyFakeRateWeights if not (lepton_selection == "Tight" and hadTau_selection.find("Tight") != -1) else "disabled", 'applyFakeRateWeights' : "disabled", 'use_HIP_mitigation_mediumMuonId' : True, 'is_mc' : is_mc, 'central_or_shift' : central_or_shift, 'lumi_scale' : 1. if not (self.use_lumi and is_mc) else sample_info["xsection"] * self.lumi / sample_info["nof_events"], 'apply_genWeight' : sample_info["genWeight"] if (is_mc and "genWeight" in sample_info.keys()) else False, 'apply_trigger_bits' : (is_mc and (self.era == "2015" or (self.era == "2016" and sample_info["reHLT"]))) or not is_mc } #applyFakeRateWeights = self.applyFakeRateWeights #if lepton_and_hadTau_frWeight == "disabled": # applyFakeRateWeights = "disabled" self.createCfg_analyze(self.jobOptions_analyze[key_analyze_job]) # initialize input and output file names for hadd_stage1 key_hadd_stage1 = getKey(process_name, lepton_selection) if not key_hadd_stage1 in self.inputFiles_hadd_stage1.keys(): self.inputFiles_hadd_stage1[key_hadd_stage1] = [] self.inputFiles_hadd_stage1[key_hadd_stage1].append(self.jobOptions_analyze[key_analyze_job]['histogramFile']) self.outputFile_hadd_stage1[key_hadd_stage1] = os.path.join(self.dirs[DKEY_HIST], "histograms_harvested_stage1_%s_%s_%s.root" % \ (self.channel, process_name, lepton_selection)) print key_hadd_stage1, self.channel, process_name, lepton_selection, self.outputFile_hadd_stage1[key_hadd_stage1] #key_hadd_stage1 = getKey(process_name, lepton_selection) #key_hadd_stage1_5 = getKey(lepton_selection) #print self.inputFiles_hadd_stage1_5 #self.inputFiles_hadd_stage1_5[key_hadd_stage1_5].append(self.outputFile_hadd_stage1[key_hadd_stage1]) key_addBackgrounds_job = getKey(lepton_selection) sample_categories = [] sample_categories.extend([ "signal" ]) processes_input = [] # initialize input and output file names for hadd_stage2 key_hadd_stage2 = getKey(lepton_selection) if not key_hadd_stage2 in self.inputFiles_hadd_stage2.keys(): self.inputFiles_hadd_stage2[key_hadd_stage2] = [] #if lepton_selection == "Tight": # self.inputFiles_hadd_stage2[key_hadd_stage2].append(self.jobOptions_addBackgrounds[key_addBackgrounds_job]['outputFile']) #key_hadd_stage1_5 = getKey(lepton_and_hadTau_selection_and_frWeight) self.inputFiles_hadd_stage2[key_hadd_stage2].append(self.outputFile_hadd_stage1[key_hadd_stage1]) self.outputFile_hadd_stage2[key_hadd_stage2] = os.path.join(self.dirs[DKEY_HIST], "histograms_harvested_stage2_%s_%s.root" % \ (self.channel, lepton_selection)) key_hadd_stage2 = getKey(lepton_selection) #self.inputFiles_hadd_stage2[key_hadd_stage2].append(self.jobOptions_addFlips[key_addFlips_job]['outputFile']) logging.info("Creating configuration files to run 'prepareDatacards'") processesToCopy = [] for process in self.prep_dcard_processesToCopy: processesToCopy.append(process) self.prep_dcard_processesToCopy = processesToCopy processesToCopy = [] for process in self.prep_dcard_signals: processesToCopy.append(process) self.prep_dcard_signals = processesToCopy for histogramToFit in self.histograms_to_fit: key_prep_dcard_job = getKey(histogramToFit) key_hadd_stage2 = getKey(lepton_selection) self.jobOptions_prep_dcard[key_prep_dcard_job] = { 'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2], 'cfgFile_modified' : os.path.join(self.dirs[DKEY_CFGS], "prepareDatacards_%s_%s_cfg.py" % (self.channel, histogramToFit)), 'datacardFile' : os.path.join(self.dirs[DKEY_DCRD], "prepareDatacards_%s_%s.root" % (self.channel, histogramToFit)), 'histogramDir' : self.histogramDir_prep_dcard, 'histogramToFit' : histogramToFit, 'label' : None } self.createCfg_prep_dcard(self.jobOptions_prep_dcard[key_prep_dcard_job]) if self.is_sbatch: logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_analyze) self.sbatchFile_analyze = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel) self.createScript_sbatch_analyze(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze) logging.info("Creating Makefile") lines_makefile = [] self.addToMakefile_analyze(lines_makefile) self.addToMakefile_hadd_stage1(lines_makefile) #self.addToMakefile_backgrounds_from_data(lines_makefile) self.addToMakefile_hadd_stage2(lines_makefile) self.addToMakefile_prep_dcard(lines_makefile) #self.addToMakefile_add_syst_dcard(lines_makefile) #self.addToMakefile_make_plots(lines_makefile) self.createMakefile(lines_makefile) logging.info("Done")
def create(self): """Creates all necessary config files and runs the complete analysis workfow -- either locally or on the batch system """ for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"]: continue process_name = sample_info["process_name_specific"] sample_category = sample_info["sample_category"] is_mc = (sample_info["type"] == "mc") logging.info("Building dictionaries for sample %s..." % process_name) central_or_shift_extensions = ["", "hadd", "addBackgrounds"] central_or_shifts_extended = central_or_shift_extensions + self.central_or_shifts for central_or_shift_or_dummy in central_or_shifts_extended: process_name_extended = [ process_name, "hadd" ] for process_name_or_dummy in process_name_extended: if central_or_shift_or_dummy in [ "hadd", "addBackgrounds" ] and process_name_or_dummy in [ "hadd" ]: if not is_mc: continue if not self.accept_central_or_shift(central_or_shift_or_dummy, sample_info): continue key_dir = getKey(process_name_or_dummy, central_or_shift_or_dummy) for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_RLES ]: initDict(self.dirs, [ key_dir, dir_type ]) if dir_type in [ DKEY_CFGS, DKEY_LOGS ]: self.dirs[key_dir][dir_type] = os.path.join(self.configDir, dir_type, self.channel, process_name_or_dummy, central_or_shift_or_dummy) else: self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel, process_name_or_dummy) for subdirectory in [ "addBackgrounds", "prepareDatacards" ]: key_dir = getKey(subdirectory) for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT ]: initDict(self.dirs, [ key_dir, dir_type ]) if dir_type in [ DKEY_CFGS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT ]: self.dirs[key_dir][dir_type] = os.path.join(self.configDir, dir_type, self.channel, subdirectory) else: self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel, subdirectory) for dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_HIST, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT, DKEY_COMBINE_OUTPUT ]: initDict(self.dirs, [ dir_type ]) if dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_DCRD, DKEY_HADD_RT, DKEY_PLOT, DKEY_COMBINE_OUTPUT ]: self.dirs[dir_type] = os.path.join(self.configDir, dir_type, self.channel) else: self.dirs[dir_type] = os.path.join(self.outputDir, dir_type, self.channel) numDirectories = 0 for key in self.dirs.keys(): if type(self.dirs[key]) == dict: numDirectories += len(self.dirs[key]) else: numDirectories += 1 logging.info("Creating directory structure (numDirectories = %i)" % numDirectories) numDirectories_created = 0; frac = 1 for key in self.dirs.keys(): if type(self.dirs[key]) == dict: for dir_type in self.dirs[key].keys(): create_if_not_exists(self.dirs[key][dir_type]) numDirectories_created += len(self.dirs[key]) else: create_if_not_exists(self.dirs[key]) numDirectories_created = numDirectories_created + 1 while 100*numDirectories_created >= frac*numDirectories: logging.info(" %i%% completed" % frac) frac = frac + 1 logging.info("Done.") inputFileLists = {} for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"]: continue logging.info("Checking input files for sample %s" % sample_info["process_name_specific"]) inputFileLists[sample_name] = generateInputFileList(sample_info, self.max_files_per_job) self.inputFileIds = {} for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"]: continue process_name = sample_info["process_name_specific"] logging.info("Creating configuration files to run '%s' for sample %s" % (self.executable_analyze, process_name)) inputFileList = inputFileLists[sample_name] is_mc = (sample_info["type"] == "mc") sample_category = sample_info["sample_category"] for central_or_shift in self.central_or_shifts: if central_or_shift != "central" and not is_mc: continue if not self.accept_central_or_shift(central_or_shift, sample_info): continue key_analyze_dir = getKey(process_name, central_or_shift) for jobId in inputFileList.keys(): analyze_job_tuple = (process_name, central_or_shift, jobId) key_analyze_job = getKey(*analyze_job_tuple) ntupleFiles = inputFileList[jobId] if len(ntupleFiles) == 0: logging.warning("No input ntuples for %s --> skipping job !!" % (key_analyze_job)) continue rleOutputFile = os.path.join( self.dirs[key_analyze_dir][DKEY_RLES], "rle_{channel}_{process_name}_{central_or_shift}_{jobId}_%s_%s.txt".format( channel = self.channel, process_name = process_name, central_or_shift = central_or_shift, jobId = jobId, )) if self.select_rle_output else "" cfgFile_modified_path = os.path.join(self.dirs[key_analyze_dir][DKEY_CFGS], "analyze_%s_%s_%i_cfg.py" % analyze_job_tuple) logFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_LOGS], "analyze_%s_%s_%i.log" % analyze_job_tuple) histogramFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_HIST], "analyze_%s_%s_%i.root" % analyze_job_tuple) self.jobOptions_analyze[key_analyze_job] = { 'ntupleFiles' : ntupleFiles, 'cfgFile_modified' : cfgFile_modified_path, 'histogramFile' : histogramFile_path, 'selEventsFileName_output' : rleOutputFile, 'logFile' : logFile_path, 'absEtaBins_e' : self.absEtaBins_e, 'ptBins_e' : self.ptBins_e, 'absEtaBins_mu' : self.absEtaBins_mu, 'ptBins_mu' : self.ptBins_mu, 'central_or_shift' : central_or_shift, 'fillGenEvtHistograms' : self.fillGenEvtHistograms, 'triggers_mu_cfg' : "leptonFR_triggers['{}']['{}']".format(self.era, 'mu'), 'triggers_e_cfg' : "leptonFR_triggers['{}']['{}']".format(self.era, 'e'), 'lep_mva_cut_e' : float(self.lep_mva_cut_e), 'lep_mva_cut_mu' : float(self.lep_mva_cut_mu), } self.createCfg_analyze(self.jobOptions_analyze[key_analyze_job], sample_info) # initialize input and output file names for hadd_stage1 key_hadd_stage1_dir = getKey(process_name) key_hadd_stage1_job = getKey(process_name) if not key_hadd_stage1_job in self.inputFiles_hadd_stage1: self.inputFiles_hadd_stage1[key_hadd_stage1_job] = [] self.inputFiles_hadd_stage1[key_hadd_stage1_job].append(self.jobOptions_analyze[key_analyze_job]['histogramFile']) self.outputFile_hadd_stage1[key_hadd_stage1_job] = os.path.join(self.dirs[key_hadd_stage1_dir][DKEY_HIST], "hadd_stage1_%s.root" % process_name) # initialize input and output file names for hadd_stage1_5 key_hadd_stage1_5_dir = getKey("hadd") key_hadd_stage1_5_job = getKey('') if not key_hadd_stage1_5_job in self.inputFiles_hadd_stage1_5: self.inputFiles_hadd_stage1_5[key_hadd_stage1_5_job] = [] for key_hadd_stage1_job in self.outputFile_hadd_stage1.keys(): self.inputFiles_hadd_stage1_5[key_hadd_stage1_5_job].append(self.outputFile_hadd_stage1[key_hadd_stage1_job]) self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job] = os.path.join(self.dirs[key_hadd_stage1_5_dir][DKEY_HIST], "hadd_stage1_5.root" ) # sum fake contributions for the total of all MC samples # input processes: TTj,... ## HERE !! # output process: fakes_mc key_hadd_stage1_5_job = getKey('') key_addBackgrounds_dir = getKey("addBackgrounds") key_addBackgrounds_job_sum = getKey("fakes_mc") sample_categories = [] sample_categories.extend(self.nonfake_backgrounds) sample_categories.extend(self.ttHProcs) processes_input = [] for sample_category in sample_categories: processes_input.append("%sj" % sample_category) self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_sum] = { 'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job], 'cfgFile_modified' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], "addBackgrounds_%s_cfg.py" % "fakes_mc"), 'outputFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackgrounds_%s.root" % "fakes_mc"), 'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], "addBackgrounds_%s.log" % "fakes_mc"), 'categories' : [ "LeptonFakeRate/numerator/electrons_tight", "LeptonFakeRate/denominator/electrons_fakeable", "LeptonFakeRate/numerator/muons_tight", "LeptonFakeRate/denominator/muons_fakeable" ], 'processes_input' : processes_input, 'process_output' : "fakes_mc", 'histogramsToCopy' : list(self.histograms_to_fit.keys()), 'sysShifts' : [] } self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_sum]) # create configuration files to run 'addBackgrounds_LeptonFakeRate' key_addBackgrounds_job_leptonFR = getKey('') self.jobOptions_addBackgrounds_LeptonFakeRate[key_addBackgrounds_job_leptonFR] = { 'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job], 'cfgFile_modified' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], os.path.basename(self.cfgFile_addBackgrounds_LeptonFakeRate)), 'outputFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackground_LeptonFakeRate.root"), 'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], os.path.basename(self.cfgFile_addBackgrounds_LeptonFakeRate.replace("_cfg.py", ".log")) ), } self.createCfg_addBackgrounds_LeptonFakeRate(self.jobOptions_addBackgrounds_LeptonFakeRate[key_addBackgrounds_job_leptonFR]) # create configuration files to run 'addBackgrounds_Convs_LeptonFakeRate' key_addBackgrounds_job_conv = getKey('') self.jobOptions_addBackgrounds_Convs_LeptonFakeRate[key_addBackgrounds_job_conv] = { 'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job], 'cfgFile_modified' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], os.path.basename(self.cfgFile_addBackgrounds_Convs_LeptonFakeRate)), 'outputFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackground_Convs_LeptonFakeRate.root"), 'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], os.path.basename(self.cfgFile_addBackgrounds_Convs_LeptonFakeRate.replace("_cfg.py", ".log")) ), } self.createCfg_addBackgrounds_Convs_LeptonFakeRate(self.jobOptions_addBackgrounds_Convs_LeptonFakeRate[key_addBackgrounds_job_conv]) # initialize input and output file names for hadd_stage2 key_hadd_stage2_dir = getKey("hadd") key_hadd_stage2_job = getKey('') if not key_hadd_stage2_job in self.inputFiles_hadd_stage2: self.inputFiles_hadd_stage2[key_hadd_stage2_job] = [] # CV: hadd_stage_1_5 output file does not need to be added as input for hadd_stage_2, # as addBackgrounds_LeptonFakeRate output file contains all histograms except fakes_mc self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_sum]['outputFile']) self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addBackgrounds_LeptonFakeRate[key_addBackgrounds_job_leptonFR]['outputFile']) self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addBackgrounds_Convs_LeptonFakeRate[key_addBackgrounds_job_conv]['outputFile']) self.outputFile_hadd_stage2[key_hadd_stage2_job] = os.path.join(self.dirs[key_hadd_stage2_dir][DKEY_HIST], "hadd_stage2.root") # We need to generate the eta and pt bins for electrons and muons lepton_bins = {} categories = [] for lepton in ['electron', 'muon']: if lepton not in lepton_bins: lepton_bins[lepton] = {} absEtaBins = None ptBins = None lepton_short = None if lepton == 'electron': absEtaBins = self.absEtaBins_e ptBins = self.ptBins_e lepton_short = 'e' elif lepton == 'muon': absEtaBins = self.absEtaBins_mu ptBins = self.ptBins_mu lepton_short = 'mu' else: raise ValueError('Invalid lepton type: %s' % lepton) for selection in ['tight', 'fakeable']: if selection not in lepton_bins[lepton]: lepton_bins[lepton][selection] = [] num_or_den = None if selection == 'tight': num_or_den = 'numerator' elif selection == 'fakeable': num_or_den = 'denominator' else: raise ValueError('Invalid lepton selection: %s' % selection) for absEtaBin_idx in range(0, len(absEtaBins) - 1): absEtaBinLowerEdge = absEtaBins[absEtaBin_idx] absEtaBinUpperEdge = absEtaBins[absEtaBin_idx + 1] absEtaBinString = getEtaBin(absEtaBinLowerEdge, absEtaBinUpperEdge) for ptBin_idx in range(0, len(ptBins) - 1): ptBinsLowerEdge = ptBins[ptBin_idx] ptBinsUpperEdge = ptBins[ptBin_idx + 1] ptBinString = getPtBin(ptBinsLowerEdge, ptBinsUpperEdge) absEta_and_ptBinString = '%s_%s' % (absEtaBinString, ptBinString) lepton_bins[lepton][selection].append( construct_lepton_params( lepton, lepton_short, selection, absEta_and_ptBinString, error_msg = "No fit parameter range specified for abs(eta) range = (%.3f, %.3f) and " "pT range = (%.3f, %.3f) for lepton type '%s' !!" % \ (absEtaBinLowerEdge, absEtaBinUpperEdge, ptBinsLowerEdge, ptBinsUpperEdge, lepton) ) + (absEtaBinLowerEdge, absEtaBinUpperEdge, ptBinsLowerEdge, ptBinsUpperEdge, 0) ) categories.append( ( "LeptonFakeRate/%s/%ss_%s/%s/%s" % (num_or_den, lepton, selection, absEtaBinString, ptBinString), "%ss_%s_%s_shapes" % (lepton, selection, absEta_and_ptBinString), ) ) # Let's also add inclusive category lepton_bins[lepton][selection].append( construct_lepton_params( lepton, lepton_short, selection, 'incl', error_msg = "No fit parameter range specified for lepton type %s" % lepton ) + (-1., -1., -1., -1., 1) ) categories.append( ( "LeptonFakeRate/%s/%ss_%s/incl" % (num_or_den, lepton, selection), "%ss_%s_incl_shapes" % (lepton, selection), ) ) lepton_bins_merged = [] for lepton_type in lepton_bins: for lepton_selection in lepton_bins[lepton_type]: lepton_bins_merged.extend(lepton_bins[lepton_type][lepton_selection]) if self.prep_dcard: logging.info("Creating configuration files to run 'prepareDatacards_LeptonFakeRate'") datacards = [] for histogramToFit in self.histograms_to_fit: key_prep_dcard_dir = getKey("prepareDatacards") key_prep_dcard_job = getKey(histogramToFit) datacard = os.path.join(self.dirs[key_prep_dcard_dir][DKEY_DCRD], "prepareDatacards_%s.root" % (histogramToFit)) self.jobOptions_prep_dcard[key_prep_dcard_job] = { 'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job], 'cfgFile_modified' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_CFGS], "prepareDatacards_LeptonFakeRate_%s_cfg.py" % histogramToFit), 'datacardFile' : datacard, 'histogramDir' : (self.histogramDir_prep_dcard), 'histogramToFit' : histogramToFit, 'label' : None, 'categories' : categories, } datacards.append(datacard) self.createCfg_prep_dcard_LeptonFakeRate(self.jobOptions_prep_dcard[key_prep_dcard_job]) # Create setupDatacards_LeptonFakeRate.py script from the template systematics_leptonFR = [] for systematic in self.central_or_shifts: if systematic == 'central': continue systematic_name = systematic.replace('Up', '').replace('Down', '') if systematic_name not in systematics_leptonFR: systematics_leptonFR.append(systematic_name) setup_dcards_template_file = os.path.join(jinja_template_dir, 'setupDatacards_LeptonFakeRate.py.template') with open(setup_dcards_template_file, 'r') as setup_dcards_template_file_ptr: setup_dcards_template = setup_dcards_template_file_ptr.read() setup_dcards_script = jinja2.Template(setup_dcards_template).render( leptons = lepton_bins_merged, central_or_shifts = systematics_leptonFR, signal_process = "QCD" if self.use_QCD_fromMC else "data_fakes", ) setup_dcards_script_path = os.path.join(self.dirs[DKEY_SCRIPTS], 'setupDatacards_LeptonFakeRate.py') logging.debug("writing setupDatacards_LeptonFakeRate script file = '%s'" % setup_dcards_script_path) with codecs.open(setup_dcards_script_path, "w", "utf-8") as setup_dcards_script_file: setup_dcards_script_file.write(setup_dcards_script) setup_dcards_script_file.flush() os.fsync(setup_dcards_script_file.fileno()) add_chmodX(setup_dcards_script_path) if self.use_QCD_fromMC: postfit_plot_script_path = os.path.join(os.environ['CMSSW_BASE'], 'src/tthAnalysis/HiggsToTauTau/data/leptonFR/scripts/postFitPlot_fakes_from_mc.py') yieldtable_script_path = os.path.join(os.environ['CMSSW_BASE'], 'src/tthAnalysis/HiggsToTauTau/data/leptonFR/scripts/yieldTable_fakes_from_mc.py') else: postfit_plot_script_path = os.path.join(os.environ['CMSSW_BASE'], 'src/tthAnalysis/HiggsToTauTau/data/leptonFR/scripts/postFitPlot_fakes_from_data.py') yieldtable_script_path = os.path.join(os.environ['CMSSW_BASE'], 'src/tthAnalysis/HiggsToTauTau/data/leptonFR/scripts/yieldTable_fakes_from_data.py') # Create run_postFit.sh script from the template combine_output_dir = os.path.join(self.dirs[DKEY_COMBINE_OUTPUT], 'output') postfit_template_file = os.path.join(jinja_template_dir, 'run_postFit.sh.template') with open(postfit_template_file, 'r') as postfit_template_file_ptr: postfit_template = postfit_template_file_ptr.read() for lepton in ['electron', 'muon']: for selection in ['fakeable', 'tight']: is_num = selection == 'tight' for params in lepton_bins[lepton][selection]: l_array, l_range, l_sub_dir, l_eta_low, l_eta_high, l_pt_low, l_pt_high, l_is_inclusive = params postfit_script = jinja2.Template(postfit_template).render( new_cmssw_base = self.cmssw_base_dir_combine, setup_dcards_script = setup_dcards_script_path, postfit_plot_script = postfit_plot_script_path, int_lumi_data = self.lumi, yieldtable_script = yieldtable_script_path, output_dir = combine_output_dir, numerator_plotLabel = self.numerator_plotLabel, denominator_plotLabel = self.denominator_plotLabel, l_array = l_array, l_range = l_range, l_sub_dir = l_sub_dir, l_eta_low = l_eta_low, l_eta_high = l_eta_high, l_pt_low = l_pt_low, l_pt_high = l_pt_high, l_is_inclusive = l_is_inclusive, is_num = is_num, numerator_output_dir = os.path.join(combine_output_dir, 'mlfit_LeptonFakeRate_%s' % self.numerator_histogram), denominator_output_dir = os.path.join(combine_output_dir, 'mlfit_LeptonFakeRate_%s' % self.denominator_histogram), selection = selection, lepton_letter = 'e' if lepton == 'electron' else 'mu', grep_value = "QCD" if self.use_QCD_fromMC else "data_fakes", ) postfit_script_path = os.path.join( self.dirs[DKEY_SCRIPTS], 'mlfit_%s_%s.sh' % (self.numerator_histogram if is_num else self.denominator_histogram, l_array) ) logging.debug("Writing run_postFit script file = '%s'" % postfit_script_path) with codecs.open(postfit_script_path, "w", "utf-8") as postfit_script_file: postfit_script_file.write(postfit_script) postfit_script_file.flush() os.fsync(postfit_script_file.fileno()) add_chmodX(postfit_script_path) key_prep_dcard_dir = getKey("prepareDatacards") fit_value_file = os.path.join(combine_output_dir, 'fit_values.txt') makefile_template_file = os.path.join(jinja_template_dir, 'Makefile_postFit.template') makefile_template = open(makefile_template_file, 'r').read() makefile_templatized = jinja2.Template(makefile_template).render( new_cmssw_base = self.cmssw_base_dir_combine, setup_dcards_script = setup_dcards_script_path, numerator_histogram = self.numerator_histogram, denominator_histogram = self.denominator_histogram, scripts_dir = self.dirs[DKEY_SCRIPTS], numerator_datacard = os.path.join(self.dirs[key_prep_dcard_dir][DKEY_DCRD], "prepareDatacards_%s.root" % self.numerator_histogram), denominator_datacard = os.path.join(self.dirs[key_prep_dcard_dir][DKEY_DCRD], "prepareDatacards_%s.root" % self.denominator_histogram), output_dir = combine_output_dir, numerator_output_dir = os.path.join(combine_output_dir, 'mlfit_LeptonFakeRate_%s' % self.numerator_histogram), denominator_output_dir = os.path.join(combine_output_dir, 'mlfit_LeptonFakeRate_%s' % self.denominator_histogram), lepton_bins = lepton_bins, fit_values = fit_value_file, ) makefile_path = os.path.join(self.dirs[DKEY_SCRIPTS], 'Makefile_postFit') logging.debug("Writing run_postFit script file = '%s'" % makefile_path) with codecs.open(makefile_path, "w", "utf-8") as makefile_path_file: makefile_path_file.write(makefile_templatized) makefile_path_file.flush() os.fsync(makefile_path_file.fileno()) self.jobOptions_combine = { 'inputFile' : ' '.join(datacards), 'outputFile' : fit_value_file, 'makefile_path' : makefile_path, 'logFile' : os.path.join(self.dirs[DKEY_LOGS], 'postFit.log'), } key_comp_LeptonFakeRate = getKey('') leptonFR_final_output = os.path.join(combine_output_dir, 'leptonFakeRates.root') self.jobOptions_comp_LeptonFakeRate[key_comp_LeptonFakeRate] = { 'inputFile' : [ fit_value_file, self.outputFile_hadd_stage2[key_hadd_stage2_job] ], 'outputFile' : leptonFR_final_output, 'absEtaBins_e' : self.absEtaBins_e, 'ptBins_e' : self.ptBins_e, 'absEtaBins_mu' : self.absEtaBins_mu, 'ptBins_mu' : self.ptBins_mu, 'logFile' : os.path.join(self.dirs[DKEY_LOGS], os.path.basename(self.cfgFile_comp_LeptonFakeRate).replace('_cfg.py', '.log')), 'cfgFile_modified' : os.path.join(self.dirs[DKEY_CFGS], os.path.basename(self.cfgFile_comp_LeptonFakeRate)), 'plots_outputFileName' : os.path.join(self.dirs[DKEY_PLOT], "comp_LeptonFakeRate.png") } self.createCfg_comp_LeptonFakeRate(self.jobOptions_comp_LeptonFakeRate[key_comp_LeptonFakeRate]) self.targets.append(self.jobOptions_comp_LeptonFakeRate[key_comp_LeptonFakeRate]['outputFile']) self.sbatchFile_analyze = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_analyze_LeptonFakeRate.py") self.sbatchFile_addBackgrounds_sum = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addBackgrounds_sum_LeptonFakeRate.py") self.sbatchFile_addBackgrounds_LeptonFakeRate = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addBackgrounds_LeptonFakeRate.py") self.sbatchFile_addBackgrounds_Convs_LeptonFakeRate = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addBackgrounds_Convs_LeptonFakeRate.py") self.sbatchFile_comp_LeptonFakeRate = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_comp_LeptonFakeRate.py") if self.is_sbatch: logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_analyze) self.createScript_sbatch_analyze(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze) self.createScript_sbatch(self.executable_addBackgrounds_recursively, self.sbatchFile_addBackgrounds_sum, self.jobOptions_addBackgrounds_sum) self.createScript_sbatch(self.executable_addBackgrounds_LeptonFakeRate, self.sbatchFile_addBackgrounds_LeptonFakeRate, self.jobOptions_addBackgrounds_LeptonFakeRate) self.createScript_sbatch(self.executable_addBackgrounds_LeptonFakeRate, self.sbatchFile_addBackgrounds_Convs_LeptonFakeRate, self.jobOptions_addBackgrounds_Convs_LeptonFakeRate) logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_comp_LeptonFakeRate) self.createScript_sbatch(self.executable_comp_LeptonFakeRate, self.sbatchFile_comp_LeptonFakeRate, self.jobOptions_comp_LeptonFakeRate) lines_makefile = [] self.addToMakefile_analyze(lines_makefile) self.addToMakefile_hadd_stage1(lines_makefile) self.addToMakefile_backgrounds_from_data(lines_makefile) ## this step now does both e Conv, data_fakes and fakes_mc computation # self.addToMakefile_backgrounds_from_MC(lines_makefile) self.addToMakefile_hadd_stage2(lines_makefile, make_dependency = " ".join([ "phony_addBackgrounds_LeptonFakeRate", "phony_addBackgrounds_Convs_LeptonFakeRate", "phony_addBackgrounds_sum" ])) self.addToMakefile_prep_dcard(lines_makefile) self.addToMakefile_combine(lines_makefile) self.addToMakefile_comp_LeptonFakeRate(lines_makefile) self.createMakefile(lines_makefile) logging.info("Done.") return self.num_jobs
def __init__( self, configDir, outputDir, cfgFile_prodNtuple, samples, max_files_per_job, era, preselection_cuts, leptonSelection, hadTauWP, check_output_files, running_method, version, num_parallel_jobs, pileup, golden_json, dry_run, isDebug, gen_matching_by_index, use_nonnominal, use_home, skip_tools_step, verbose=False, pool_id='', ): self.configDir = configDir self.outputDir = outputDir self.max_num_jobs = 200000 self.samples = samples self.max_files_per_job = max_files_per_job self.era = era self.preselection_cuts = preselection_cuts self.leptonSelection = leptonSelection self.hadTauWP = hadTauWP self.check_output_files = check_output_files self.verbose = verbose self.dry_run = dry_run self.isDebug = isDebug self.gen_matching_by_index = gen_matching_by_index self.use_nonnominal = use_nonnominal self.use_home = use_home self.pileup = pileup self.golden_json = golden_json if running_method.lower() not in ["sbatch", "makefile"]: raise ValueError("Invalid running method: %s" % running_method) if not os.path.isfile(self.pileup): raise ValueError('No such file: %s' % self.pileup) self.pileup_histograms = get_pileup_histograms(self.pileup) if not os.path.isfile(self.golden_json): raise ValueError('No such file: %s' % self.golden_json) self.running_method = running_method self.is_sbatch = self.running_method.lower() == "sbatch" self.is_makefile = not self.is_sbatch self.makefile = os.path.join(self.configDir, "Makefile_prodNtuple") self.num_parallel_jobs = num_parallel_jobs self.skip_tools_step = skip_tools_step self.pool_id = pool_id if pool_id else uuid.uuid4() self.workingDir = os.getcwd() logging.info("Working directory is: %s" % self.workingDir) self.template_dir = os.path.join(os.getenv('CMSSW_BASE'), 'src', 'tthAnalysis', 'HiggsToTauTau', 'test', 'templates') logging.info("Templates directory is: %s" % self.template_dir) self.version = version self.samples = samples create_if_not_exists(self.configDir) create_if_not_exists(self.outputDir) self.stdout_file_path = os.path.join(self.configDir, "stdout_prodNtuple.log") self.stderr_file_path = os.path.join(self.configDir, "stderr_prodNtuple.log") self.sw_ver_file_cfg = os.path.join(self.configDir, "VERSION_prodNtuple.log") self.sw_ver_file_out = os.path.join(self.outputDir, "VERSION_prodNtuple.log") self.stdout_file_path, self.stderr_file_path, self.sw_ver_file_cfg, self.sw_ver_file_out = get_log_version( (self.stdout_file_path, self.stderr_file_path, self.sw_ver_file_cfg, self.sw_ver_file_out)) self.cfgFile_prodNtuple_original = os.path.join( self.template_dir, cfgFile_prodNtuple) self.sbatchFile_prodNtuple = os.path.join(self.configDir, "sbatch_prodNtuple.py") self.cfgFiles_prodNtuple_modified = {} self.logFiles_prodNtuple = {} self.inputFiles = {} self.outputFiles = {} self.filesToClean = [] self.dirs = {} for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"]: continue process_name = sample_info["process_name_specific"] key_dir = getKey(sample_name) for dir_type in [DKEY_CFGS, DKEY_NTUPLES, DKEY_LOGS]: initDict(self.dirs, [key_dir, dir_type]) if dir_type in [DKEY_CFGS, DKEY_LOGS]: self.dirs[key_dir][dir_type] = os.path.join( self.configDir, dir_type, process_name) else: self.dirs[key_dir][dir_type] = os.path.join( self.outputDir, dir_type, process_name) for dir_type in [DKEY_CFGS, DKEY_LOGS]: initDict(self.dirs, [dir_type]) if dir_type in [DKEY_CFGS, DKEY_LOGS]: self.dirs[dir_type] = os.path.join(self.configDir, dir_type) else: self.dirs[dir_type] = os.path.join(self.outputDir, dir_type) self.cvmfs_error_log = {} self.executable = "produceNtuple.sh"
def create(self): """Creates all necessary config files and runs the complete analysis workfow -- either locally or on the batch system """ for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]: continue process_name = sample_info["process_name_specific"] for lepton_and_hadTau_selection in self.lepton_and_hadTau_selections: for lepton_and_hadTau_frWeight in self.lepton_and_hadTau_frWeights: if lepton_and_hadTau_frWeight == "enabled" and not lepton_and_hadTau_selection.startswith("Fakeable"): continue lepton_and_hadTau_selection_and_frWeight = get_lepton_and_hadTau_selection_and_frWeight(lepton_and_hadTau_selection, lepton_and_hadTau_frWeight) for hadTau_charge_selection in self.hadTau_charge_selections: key_dir = getKey(process_name, lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection) for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_RLES ]: initDict(self.dirs, [ key_dir, dir_type ]) if dir_type in [ DKEY_CFGS, DKEY_LOGS ]: self.dirs[key_dir][dir_type] = os.path.join(self.configDir, dir_type, self.channel, "_".join([ lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection ]), process_name) else: self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel, "_".join([ lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection ]), process_name) for dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT ]: initDict(self.dirs, [ dir_type ]) if dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT ]: self.dirs[dir_type] = os.path.join(self.configDir, dir_type, self.channel) else: self.dirs[dir_type] = os.path.join(self.outputDir, dir_type, self.channel) ##print "self.dirs = ", self.dirs for key in self.dirs.keys(): if type(self.dirs[key]) == dict: for dir_type in self.dirs[key].keys(): create_if_not_exists(self.dirs[key][dir_type]) else: create_if_not_exists(self.dirs[key]) inputFileLists = {} for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]: continue logging.info("Checking input files for sample %s" % sample_info["process_name_specific"]) inputFileLists[sample_name] = generateInputFileList(sample_name, sample_info, self.max_files_per_job, self.debug) for lepton_and_hadTau_selection in self.lepton_and_hadTau_selections: lepton_selection = lepton_and_hadTau_selection if self.applyFakeRateWeights == "2tau": lepton_selection = "Tight" hadTau_selection = "|".join([ lepton_and_hadTau_selection, self.hadTau_selection_part2 ]) if lepton_and_hadTau_selection == "forBDTtraining": lepton_selection = "Loose" hadTau_selection = "Tight|%s" % self.hadTau_selection_relaxed for lepton_and_hadTau_frWeight in self.lepton_and_hadTau_frWeights: if lepton_and_hadTau_frWeight == "enabled" and not lepton_and_hadTau_selection.startswith("Fakeable"): continue if lepton_and_hadTau_frWeight == "disabled" and not lepton_and_hadTau_selection in [ "Tight", "forBDTtraining" ]: continue lepton_and_hadTau_selection_and_frWeight = get_lepton_and_hadTau_selection_and_frWeight(lepton_and_hadTau_selection, lepton_and_hadTau_frWeight) for hadTau_charge_selection in self.hadTau_charge_selections: for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]: continue process_name = sample_info["process_name_specific"] logging.info("Creating configuration files to run '%s' for sample %s" % (self.executable_analyze, process_name)) sample_category = sample_info["sample_category"] is_mc = (sample_info["type"] == "mc") is_signal = (sample_category == "signal") for central_or_shift in self.central_or_shifts: inputFileList = inputFileLists[sample_name] for jobId in inputFileList.keys(): if central_or_shift != "central": isFR_shape_shift = False for FR_shape_shift in [ "CMS_ttHl_FRe_shape", "CMS_ttHl_FRm_shape", "CMS_ttHl_FRjt_norm", "CMS_ttHl_FRjt_shape" ]: if central_or_shift.find(FR_shape_shift) != -1: isFR_shape_shift = True if not ((lepton_and_hadTau_selection == "Fakeable" and hadTau_charge_selection == "OS" and isFR_shape_shift) or (lepton_and_hadTau_selection == "Tight" and hadTau_charge_selection == "OS")): continue if not is_mc and not isFR_shape_shift: continue if central_or_shift.startswith("CMS_ttHl_thu_shape_ttH") and sample_category != "signal": continue if central_or_shift.startswith("CMS_ttHl_thu_shape_ttW") and sample_category != "TTW": continue if central_or_shift.startswith("CMS_ttHl_thu_shape_ttZ") and sample_category != "TTZ": continue ##print "processing sample %s: jobId = %i, central_or_shift = '%s'" % (process_name, jobId, central_or_shift) # build config files for executing analysis code key_dir = getKey(process_name, lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection) key_analyze_job = getKey(process_name, lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection, central_or_shift, jobId) ntupleFiles = inputFileList[jobId] if len(ntupleFiles) == 0: print "Warning: ntupleFiles['%s'] = %s --> skipping job !!" % (key_file, ntupleFiles) continue self.jobOptions_analyze[key_analyze_job] = { 'ntupleFiles' : ntupleFiles, 'cfgFile_modified' : os.path.join(self.dirs[key_dir][DKEY_CFGS], "analyze_%s_%s_%s_%s_%s_%i_cfg.py" % \ (self.channel, process_name, lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection, central_or_shift, jobId)), 'histogramFile' : os.path.join(self.dirs[key_dir][DKEY_HIST], "%s_%s_%s_%s_%i.root" % \ (process_name, lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection, central_or_shift, jobId)), 'logFile' : os.path.join(self.dirs[key_dir][DKEY_LOGS], "analyze_%s_%s_%s_%s_%s_%i.log" % \ (self.channel, process_name, lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection, central_or_shift, jobId)), 'rleOutputFile' : os.path.join(self.dirs[key_dir][DKEY_RLES], "rle_%s_%s_%s_%s_%s_%i.txt" % \ (self.channel, process_name, lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection, central_or_shift, jobId)) if self.select_rle_output else "", 'sample_category' : sample_category, 'process_name_specific' : sample_info["process_name_specific"], 'triggers' : sample_info["triggers"], 'lepton_selection' : lepton_selection, 'apply_leptonGenMatching' : self.apply_leptonGenMatching, 'hadTau_selection' : hadTau_selection, 'apply_hadTauGenMatching' : self.apply_hadTauGenMatching, 'hadTau_charge_selection' : hadTau_charge_selection, 'applyFakeRateWeights' : self.applyFakeRateWeights if not (lepton_selection == "Tight" and hadTau_selection.find("Tight") != -1) else "disabled", ##'use_HIP_mitigation_bTag' : sample_info["use_HIP_mitigation_bTag"], ##'use_HIP_mitigation_mediumMuonId' : sample_info["use_HIP_mitigation_mediumMuonId"], 'use_HIP_mitigation_bTag' : True, 'use_HIP_mitigation_mediumMuonId' : True, 'is_mc' : is_mc, 'central_or_shift' : central_or_shift, 'lumi_scale' : 1. if not (self.use_lumi and is_mc) else sample_info["xsection"] * self.lumi / sample_info["nof_events"], 'apply_genWeight' : sample_info["genWeight"] if (is_mc and "genWeight" in sample_info) else False, 'apply_trigger_bits' : (is_mc and (self.era == "2015" or (self.era == "2016" and sample_info["reHLT"]))) or not is_mc, 'selectBDT': self.isBDTtraining, 'changeBranchNames' : self.changeBranchNames } self.createCfg_analyze(self.jobOptions_analyze[key_analyze_job]) # initialize input and output file names for hadd_stage1 key_hadd_stage1 = getKey(process_name, lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection) if not key_hadd_stage1 in self.inputFiles_hadd_stage1: self.inputFiles_hadd_stage1[key_hadd_stage1] = [] self.inputFiles_hadd_stage1[key_hadd_stage1].append(self.jobOptions_analyze[key_analyze_job]['histogramFile']) self.outputFile_hadd_stage1[key_hadd_stage1] = os.path.join(self.dirs[DKEY_HIST], "histograms_harvested_stage1_%s_%s_%s_%s.root" % \ (self.channel, process_name, lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection)) if self.isBDTtraining: continue if is_mc: logging.info("Creating configuration files to run 'addBackgrounds' for sample %s" % process_name) sample_categories = [ sample_category ] if is_signal: sample_categories = [ "signal", "ttH", "ttH_htt", "ttH_hww", "ttH_hzz" ] for sample_category in sample_categories: # sum non-fake contributions for each MC sample separately # input processes: TT2t0e0m0j, TT1t1e0m0j, TT1t0e1m0j", TT0t2e0m0j, TT0t1e1m0j, TT0t0e2m0j; TTW2t0e0m0j,... # output processes: TT; ... key_hadd_stage1 = getKey(process_name, lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection) key_addBackgrounds_job = getKey(process_name, sample_category, lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection) processes_input = None if sample_category in [ "signal" ]: lepton_and_hadTau_genMatches = [] lepton_and_hadTau_genMatches.extend(self.lepton_and_hadTau_genMatches_nonfakes) lepton_and_hadTau_genMatches.extend(self.lepton_and_hadTau_genMatches_fakes) processes_input = [ "%s%s" % (sample_category, genMatch) for genMatch in lepton_and_hadTau_genMatches ] elif sample_category in [ "ttH" ]: processes_input = [] processes_input.extend([ "%s%s" % ("ttH_htt", genMatch) for genMatch in self.lepton_and_hadTau_genMatches_nonfakes ]) processes_input.extend([ "%s%s" % ("ttH_hww", genMatch) for genMatch in self.lepton_and_hadTau_genMatches_nonfakes ]) processes_input.extend([ "%s%s" % ("ttH_hzz", genMatch) for genMatch in self.lepton_and_hadTau_genMatches_nonfakes ]) else: processes_input = [ "%s%s" % (sample_category, genMatch) for genMatch in self.lepton_and_hadTau_genMatches_nonfakes ] if processes_input: self.jobOptions_addBackgrounds[key_addBackgrounds_job] = { 'inputFile' : self.outputFile_hadd_stage1[key_hadd_stage1], 'cfgFile_modified' : os.path.join(self.dirs[DKEY_CFGS], "addBackgrounds_%s_%s_%s_%s_%s_cfg.py" % \ (self.channel, process_name, sample_category, lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection)), 'outputFile' : os.path.join(self.dirs[DKEY_HIST], "addBackgrounds_%s_%s_%s_%s_%s.root" % \ (self.channel, process_name, sample_category, lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection)), 'logFile' : os.path.join(self.dirs[DKEY_LOGS], "addBackgrounds_%s_%s_%s_%s_%s.log" % \ (self.channel, process_name, sample_category, lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection)), 'categories' : [ getHistogramDir(lepton_and_hadTau_selection, lepton_and_hadTau_frWeight, hadTau_charge_selection) ], 'processes_input' : processes_input, 'process_output' : sample_category } self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds[key_addBackgrounds_job]) # initialize input and output file names for hadd_stage1_5 key_hadd_stage1_5 = getKey(lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection) if not key_hadd_stage1_5 in self.inputFiles_hadd_stage1_5: self.inputFiles_hadd_stage1_5[key_hadd_stage1_5] = [] self.inputFiles_hadd_stage1_5[key_hadd_stage1_5].append(self.jobOptions_addBackgrounds[key_addBackgrounds_job]['outputFile']) self.outputFile_hadd_stage1_5[key_hadd_stage1_5] = os.path.join(self.dirs[DKEY_HIST], "histograms_harvested_stage1_5_%s_%s_%s.root" % \ (self.channel, lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection)) # sum fake contributions for each MC sample separately # input processes: TT1t0e0m1j, TT0t1e0m1j, TT0t0e1m1j, TT0t0e0m2j; TTW1t0e0m1j,... # output processes: TT_fake; ... key_hadd_stage1 = getKey(process_name, lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection) key_addBackgrounds_job = getKey(process_name, "%s_fake" % sample_category, lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection) processes_input = None if sample_category in [ "signal" ]: processes_input = [ "%s%s" % (sample_category, genMatch) for genMatch in self.lepton_and_hadTau_genMatches_fakes ] elif sample_category in [ "ttH" ]: processes_input = [] processes_input.extend([ "%s%s" % ("ttH_htt", genMatch) for genMatch in self.lepton_and_hadTau_genMatches_fakes ]) processes_input.extend([ "%s%s" % ("ttH_hww", genMatch) for genMatch in self.lepton_and_hadTau_genMatches_fakes ]) processes_input.extend([ "%s%s" % ("ttH_hzz", genMatch) for genMatch in self.lepton_and_hadTau_genMatches_fakes ]) else: processes_input = [ "%s%s" % (sample_category, genMatch) for genMatch in self.lepton_and_hadTau_genMatches_fakes ] if processes_input: self.jobOptions_addBackgrounds[key_addBackgrounds_job] = { 'inputFile' : self.outputFile_hadd_stage1[key_hadd_stage1], 'cfgFile_modified' : os.path.join(self.dirs[DKEY_CFGS], "addBackgrounds_%s_fakes_%s_%s_%s_%s_cfg.py" % \ (self.channel, process_name, sample_category, lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection)), 'outputFile' : os.path.join(self.dirs[DKEY_HIST], "addBackgrounds_%s_fakes_%s_%s_%s_%s.root" % \ (self.channel, process_name, sample_category, lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection)), 'logFile' : os.path.join(self.dirs[DKEY_LOGS], "addBackgrounds_%s_fakes_%s_%s_%s_%s.log" % \ (self.channel, process_name, sample_category, lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection)), 'categories' : [ getHistogramDir(lepton_and_hadTau_selection, lepton_and_hadTau_frWeight, hadTau_charge_selection) ], 'processes_input' : processes_input, 'process_output' : "%s_fake" % sample_category } self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds[key_addBackgrounds_job]) # initialize input and output file names for hadd_stage1_5 key_hadd_stage1_5 = getKey(lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection) if not key_hadd_stage1_5 in self.inputFiles_hadd_stage1_5: self.inputFiles_hadd_stage1_5[key_hadd_stage1_5] = [] self.inputFiles_hadd_stage1_5[key_hadd_stage1_5].append(self.jobOptions_addBackgrounds[key_addBackgrounds_job]['outputFile']) self.outputFile_hadd_stage1_5[key_hadd_stage1_5] = os.path.join(self.dirs[DKEY_HIST], "histograms_harvested_stage1_5_%s_%s_%s.root" % \ (self.channel, lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection)) if self.isBDTtraining: continue # add output files of hadd_stage1 for data to list of input files for hadd_stage1_5 if not is_mc: key_hadd_stage1 = getKey(process_name, lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection) key_hadd_stage1_5 = getKey(lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection) if not key_hadd_stage1_5 in self.inputFiles_hadd_stage1_5: self.inputFiles_hadd_stage1_5[key_hadd_stage1_5] = [] self.inputFiles_hadd_stage1_5[key_hadd_stage1_5].append(self.outputFile_hadd_stage1[key_hadd_stage1]) if self.isBDTtraining: continue # sum fake contributions for the total of all MC sample # input processes: TT1t0e0m1j, TT0t1e0m1j, TT0t0e1m1j, TT0t0e0m2j; TTW1t0e0m1j,... # output process: fakes_mc key_addBackgrounds_job = getKey(lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection) sample_categories = [] sample_categories.extend(self.nonfake_backgrounds) sample_categories.extend([ "signal" ]) processes_input = [] for sample_category in sample_categories: processes_input.append("%s_fake" % sample_category) self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job] = { 'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5], 'cfgFile_modified' : os.path.join(self.dirs[DKEY_CFGS], "addBackgrounds_%s_fakes_mc_%s_%s_cfg.py" % \ (self.channel, lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection)), 'outputFile' : os.path.join(self.dirs[DKEY_HIST], "addBackgrounds_%s_fakes_mc_%s_%s.root" % \ (self.channel, lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection)), 'logFile' : os.path.join(self.dirs[DKEY_LOGS], "addBackgrounds_%s_fakes_mc_%s_%s.log" % \ (self.channel, lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection)), 'categories' : [ getHistogramDir(lepton_and_hadTau_selection, lepton_and_hadTau_frWeight, hadTau_charge_selection) ], 'processes_input' : processes_input, 'process_output' : "fakes_mc" } self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job]) # initialize input and output file names for hadd_stage2 key_hadd_stage2 = getKey(lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection) if not key_hadd_stage2 in self.inputFiles_hadd_stage2: self.inputFiles_hadd_stage2[key_hadd_stage2] = [] if lepton_and_hadTau_selection == "Tight": self.inputFiles_hadd_stage2[key_hadd_stage2].append(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job]['outputFile']) key_hadd_stage1_5 = getKey(lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection) self.inputFiles_hadd_stage2[key_hadd_stage2].append(self.outputFile_hadd_stage1_5[key_hadd_stage1_5]) self.outputFile_hadd_stage2[key_hadd_stage2] = os.path.join(self.dirs[DKEY_HIST], "histograms_harvested_stage2_%s_%s_%s.root" % \ (self.channel, lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection)) if self.isBDTtraining: if self.is_sbatch: logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_analyze) self.sbatchFile_analyze = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel) self.createScript_sbatch_analyze(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze) logging.info("Creating Makefile") lines_makefile = [] self.addToMakefile_analyze(lines_makefile) self.addToMakefile_hadd_stage1(lines_makefile) self.createMakefile(lines_makefile) logging.info("Done") return self.num_jobs logging.info("Creating configuration files to run 'addBackgroundFakes'") for hadTau_charge_selection in self.hadTau_charge_selections: key_addFakes_job = getKey("fakes_data", hadTau_charge_selection) key_hadd_stage1_5 = getKey(get_lepton_and_hadTau_selection_and_frWeight("Fakeable", "enabled"), hadTau_charge_selection) category_sideband = None if self.applyFakeRateWeights == "3L": category_sideband = "1l_2tau_%s_Fakeable_wFakeRateWeights" % hadTau_charge_selection elif self.applyFakeRateWeights == "2tau": category_sideband = "1l_2tau_%s_Fakeable_wFakeRateWeights" % hadTau_charge_selection else: raise ValueError("Invalid Configuration parameter 'applyFakeRateWeights' = %s !!" % applyFakeRateWeights) self.jobOptions_addFakes[key_addFakes_job] = { 'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5], 'cfgFile_modified' : os.path.join(self.dirs[DKEY_CFGS], "addBackgroundLeptonFakes_%s_%s_cfg.py" % \ (self.channel, hadTau_charge_selection)), 'outputFile' : os.path.join(self.dirs[DKEY_HIST], "addBackgroundLeptonFakes_%s_%s.root" % \ (self.channel, hadTau_charge_selection)), 'logFile' : os.path.join(self.dirs[DKEY_LOGS], "addBackgroundLeptonFakes_%s_%s.log" % \ (self.channel, hadTau_charge_selection)), 'category_signal' : "1l_2tau_%s_Tight" % hadTau_charge_selection, 'category_sideband' : category_sideband } self.createCfg_addFakes(self.jobOptions_addFakes[key_addFakes_job]) key_hadd_stage2 = getKey(get_lepton_and_hadTau_selection_and_frWeight("Tight", "disabled"), hadTau_charge_selection) self.inputFiles_hadd_stage2[key_hadd_stage2].append(self.jobOptions_addFakes[key_addFakes_job]['outputFile']) logging.info("Creating configuration files to run 'prepareDatacards'") for histogramToFit in self.histograms_to_fit: key_prep_dcard_job = getKey(histogramToFit) key_hadd_stage2 = getKey(get_lepton_and_hadTau_selection_and_frWeight("Tight", "disabled"), "OS") self.jobOptions_prep_dcard[key_prep_dcard_job] = { 'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2], 'cfgFile_modified' : os.path.join(self.dirs[DKEY_CFGS], "prepareDatacards_%s_%s_cfg.py" % (self.channel, histogramToFit)), 'datacardFile' : os.path.join(self.dirs[DKEY_DCRD], "prepareDatacards_%s_%s.root" % (self.channel, histogramToFit)), 'histogramDir' : self.histogramDir_prep_dcard, 'histogramToFit' : histogramToFit, 'label' : None } self.createCfg_prep_dcard(self.jobOptions_prep_dcard[key_prep_dcard_job]) if "SS" in self.hadTau_charge_selections: key_prep_dcard_job = getKey(histogramToFit, "SS") key_hadd_stage2 = getKey(get_lepton_and_hadTau_selection_and_frWeight("Tight", "disabled"), "SS") self.jobOptions_prep_dcard[key_prep_dcard_job] = { 'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2], 'cfgFile_modified' : os.path.join(self.dirs[DKEY_CFGS], "prepareDatacards_%s_SS_%s_cfg.py" % (self.channel, histogramToFit)), 'datacardFile' : os.path.join(self.dirs[DKEY_DCRD], "prepareDatacards_%s_SS_%s.root" % (self.channel, histogramToFit)), 'histogramDir' : self.histogramDir_prep_dcard_SS, 'histogramToFit' : histogramToFit, 'label' : 'SS' } self.createCfg_prep_dcard(self.jobOptions_prep_dcard[key_prep_dcard_job]) logging.info("Creating configuration files to run 'makePlots'") key_makePlots_job = getKey("OS") key_hadd_stage2 = getKey(get_lepton_and_hadTau_selection_and_frWeight("Tight", "disabled"), "OS") self.jobOptions_make_plots[key_makePlots_job] = { 'executable' : self.executable_make_plots, 'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2], 'cfgFile_modified' : os.path.join(self.dirs[DKEY_CFGS], "makePlots_%s_cfg.py" % self.channel), 'outputFile' : os.path.join(self.dirs[DKEY_PLOT], "makePlots_%s.png" % self.channel), 'histogramDir' : self.histogramDir_prep_dcard, 'label' : None, 'make_plots_backgrounds' : self.make_plots_backgrounds } self.createCfg_makePlots(self.jobOptions_make_plots[key_makePlots_job]) if "SS" in self.hadTau_charge_selections: key_makePlots_job = getKey("SS") key_hadd_stage2 = getKey(get_lepton_and_hadTau_selection_and_frWeight("Tight", "disabled"), "SS") self.jobOptions_make_plots[key_makePlots_job] = { 'executable' : self.executable_make_plots, 'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2], 'cfgFile_modified' : os.path.join(self.dirs[DKEY_CFGS], "makePlots_%s_SS_cfg.py" % self.channel), 'outputFile' : os.path.join(self.dirs[DKEY_PLOT], "makePlots_%s_SS.png" % self.channel), 'histogramDir' : self.histogramDir_prep_dcard_SS, 'label' : "SS", 'make_plots_backgrounds' : self.make_plots_backgrounds } self.createCfg_makePlots(self.jobOptions_make_plots[key_makePlots_job]) if "Fakeable_mcClosure" in self.lepton_and_hadTau_selections: key_makePlots_job = getKey("OS") key_hadd_stage2 = getKey(get_lepton_and_hadTau_selection_and_frWeight("Tight", "disabled"), "OS") self.jobOptions_make_plots[key_makePlots_job] = { 'executable' : self.executable_make_plots_mcClosure, 'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2], 'cfgFile_modified' : os.path.join(self.dirs[DKEY_CFGS], "makePlots_mcClosure_%s_cfg.py" % self.channel), 'outputFile' : os.path.join(self.dirs[DKEY_PLOT], "makePlots_mcClosure_%s.png" % self.channel) } self.createCfg_makePlots_mcClosure(self.jobOptions_make_plots[key_makePlots_job]) if self.is_sbatch: logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_analyze) self.sbatchFile_analyze = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel) self.createScript_sbatch_analyze(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze) logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_addBackgrounds) self.sbatchFile_addBackgrounds = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addBackgrounds_%s.py" % self.channel) self.createScript_sbatch(self.executable_addBackgrounds, self.sbatchFile_addBackgrounds, self.jobOptions_addBackgrounds) self.sbatchFile_addBackgrounds_sum = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addBackgrounds_sum_%s.py" % self.channel) self.createScript_sbatch(self.executable_addBackgrounds, self.sbatchFile_addBackgrounds_sum, self.jobOptions_addBackgrounds_sum) logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_addFakes) self.sbatchFile_addFakes = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addFakes_%s.py" % self.channel) self.createScript_sbatch(self.executable_addFakes, self.sbatchFile_addFakes, self.jobOptions_addFakes) logging.info("Creating Makefile") lines_makefile = [] self.addToMakefile_analyze(lines_makefile) self.addToMakefile_hadd_stage1(lines_makefile) self.addToMakefile_backgrounds_from_data(lines_makefile) self.addToMakefile_hadd_stage2(lines_makefile) self.addToMakefile_prep_dcard(lines_makefile) self.addToMakefile_make_plots(lines_makefile) self.createMakefile(lines_makefile) logging.info("Done") return self.num_jobs
def __init__( self, configDir, localDir, outputDir, output_file, executable, samples, max_files_per_job, era, binning, use_gen_weight, check_output_files, running_method, num_parallel_jobs, pool_id='', verbose=False, dry_run=False, use_home=False, keep_logs=False, submission_cmd=None, ): self.configDir = configDir self.localDir = localDir self.outputDir = outputDir self.executable = executable self.max_num_jobs = 200000 self.samples = samples self.max_files_per_job = max_files_per_job self.era = era self.binning = binning self.use_gen_weight = use_gen_weight self.check_output_files = check_output_files self.verbose = verbose self.dry_run = dry_run self.use_home = use_home self.keep_logs = keep_logs if running_method.lower() not in ["sbatch", "makefile"]: raise ValueError("Invalid running method: %s" % running_method) self.running_method = running_method self.is_sbatch = self.running_method.lower() == "sbatch" self.is_makefile = not self.is_sbatch self.makefile = os.path.join(self.localDir, "Makefile_nonResDenom") self.num_parallel_jobs = num_parallel_jobs self.pool_id = pool_id if pool_id else uuid.uuid4() self.workingDir = os.getcwd() logging.info("Working directory is: %s" % self.workingDir) self.template_dir = os.path.join(os.getenv('CMSSW_BASE'), 'src', 'tthAnalysis', 'HiggsToTauTau', 'test', 'templates') logging.info("Templates directory is: %s" % self.template_dir) create_if_not_exists(self.configDir) create_if_not_exists(self.localDir) create_if_not_exists(self.outputDir) self.output_file = os.path.join(self.outputDir, output_file) self.stdout_file_path = os.path.join(self.localDir, "stdout_nonResDenom.log") self.stderr_file_path = os.path.join(self.localDir, "stderr_nonResDenom.log") self.sw_ver_file_cfg = os.path.join(self.localDir, "VERSION_nonResDenom.log") self.sw_ver_file_out = os.path.join(self.outputDir, "VERSION_nonResDenom.log") self.submission_out = os.path.join(self.localDir, "SUBMISSION_nonResDenom.log") self.stdout_file_path, self.stderr_file_path, self.sw_ver_file_cfg, self.sw_ver_file_out, self.submission_out = get_log_version( (self.stdout_file_path, self.stderr_file_path, self.sw_ver_file_cfg, self.sw_ver_file_out, self.submission_out)) check_submission_cmd(self.submission_out, submission_cmd) self.sbatchFile_nonResDenom = os.path.join(self.localDir, "sbatch_nonResDenom.py") self.cfgFiles_nonResDenom = {} self.logFiles_nonResDenom = {} self.scriptFiles_nonResDenom = {} self.jobOptions_sbatch = {} self.inputFiles = {} self.outputFiles_tmp = {} self.outputFiles = {} self.phoniesToAdd = [] self.filesToClean = [self.configDir] self.targets = [] self.dirs = {} all_dirs = [ DKEY_CFGS, DKEY_HISTO_TMP, DKEY_HISTO, DKEY_PLOTS, DKEY_LOGS, DKEY_SCRIPTS, DKEY_HADD_RT ] cfg_dirs = [ DKEY_CFGS, DKEY_LOGS, DKEY_PLOTS, DKEY_SCRIPTS, DKEY_HADD_RT ] self.gen_weights = {} if self.use_gen_weight: ref_genweights = os.path.join(os.environ['CMSSW_BASE'], 'src', 'tthAnalysis', 'HiggsToTauTau', 'data', 'refGenWeight_{}.txt'.format(era)) with open(ref_genweights, 'r') as f: for line in f: line_split = line.strip().split() assert (len(line_split) == 2) sample_name = line_split[0] ref_genweight = float(line_split[1]) assert (sample_name not in self.gen_weights) self.gen_weights[sample_name] = ref_genweight for sample_name, sample_info in self.samples.items(): if not sample_info['use_it']: continue process_name = sample_info["process_name_specific"] if self.use_gen_weight: assert (re.sub('_duplicate$', '', process_name) in self.gen_weights) key_dir = getKey(process_name) for dir_type in all_dirs: if dir_type == DKEY_PLOTS: continue initDict(self.dirs, [key_dir, dir_type]) if dir_type in cfg_dirs: dir_choice = self.configDir if dir_type == DKEY_CFGS else self.localDir self.dirs[key_dir][dir_type] = os.path.join( dir_choice, dir_type, process_name) else: self.dirs[key_dir][dir_type] = os.path.join( self.outputDir, dir_type, process_name) for dir_type in cfg_dirs: initDict(self.dirs, [dir_type]) dir_choice = self.configDir if dir_type == DKEY_CFGS else self.localDir self.dirs[dir_type] = os.path.join(dir_choice, dir_type) if dir_choice != self.configDir: self.filesToClean.append(self.dirs[dir_type]) self.cvmfs_error_log = {} self.num_jobs = { 'hadd': 0, 'nonResDenom': 0, 'plot': 0, }
def __init__(self, treeName, outputDir, cfgDir, executable_addMEM, samples, era, debug, running_method, max_files_per_job, mem_integrations_per_job, max_mem_integrations, num_parallel_jobs, leptonSelection, hadTauSelection, isForBDTtraining, channel, pool_id=''): self.treeName = treeName self.outputDir = outputDir self.cfgDir = cfgDir self.executable_addMEM = executable_addMEM self.mem_integrations_per_job = mem_integrations_per_job self.max_files_per_job = max_files_per_job self.max_mem_integrations = max_mem_integrations self.samples = samples self.era = era self.debug = debug self.channel = channel self.leptonSelection = leptonSelection self.hadTauSelection = hadTauSelection self.hadTauDefinition = self.hadTauSelection.split('|')[0] self.hadTauWorkingPoint = self.hadTauSelection.split('|')[1] self.maxPermutations_branchName = "maxPermutations_addMEM_%s_lep%s_tau%s_%s" % ( self.channel, self.leptonSelection, self.hadTauDefinition, self.hadTauWorkingPoint, ) self.isForBDTtraining = isForBDTtraining if running_method.lower() not in ["sbatch", "makefile"]: raise ValueError("Invalid running method: %s" % running_method) self.running_method = running_method self.is_sbatch = False self.is_makefile = False if self.running_method.lower() == "sbatch": self.is_sbatch = True else: self.is_makefile = True self.makefile = os.path.join(self.cfgDir, "Makefile_%s" % self.channel) self.num_parallel_jobs = num_parallel_jobs self.pool_id = pool_id if pool_id else uuid.uuid4() self.workingDir = os.getcwd() logging.info("Working directory is: {workingDir}".format( workingDir=self.workingDir)) for dirPath in [self.outputDir, self.cfgDir]: create_if_not_exists(dirPath) self.stdout_file = codecs.open( os.path.join(self.cfgDir, "stdout_%s.log" % self.channel), 'w', 'utf-8') self.stderr_file = codecs.open( os.path.join(self.cfgDir, "stderr_%s.log" % self.channel), 'w', 'utf-8') self.dirs = {} self.samples = samples self.cfgFiles_addMEM_modified = {} self.shFiles_addMEM_modified = {} self.logFiles_addMEM = {} self.sbatchFile_addMEM = os.path.join( self.cfgDir, "sbatch_addMEM_%s.py" % self.channel) self.inputFiles = {} self.outputFiles = {} self.hadd_records = {} self.filesToClean = [] for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"] or \ sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]: continue process_name = sample_info["process_name_specific"] key_dir = getKey(sample_name) for dir_type in [DKEY_NTUPLES, DKEY_FINAL_NTUPLES]: initDict(self.dirs, [key_dir, dir_type]) self.dirs[key_dir][dir_type] = os.path.join( self.outputDir, dir_type, self.channel, process_name) for dir_type in [DKEY_CFGS, DKEY_LOGS, DKEY_HADD, DKEY_HADD_RT]: initDict(self.dirs, [key_dir, dir_type]) self.dirs[key_dir][dir_type] = os.path.join( self.cfgDir, dir_type, self.channel, process_name) self.cvmfs_error_log = {}
def __init__(self, outputDir, executable_analyze, cfgFile_analyze_original, samples, lepton_charge_selections, hadTau_selection, applyFakeRateWeights, central_or_shifts, max_files_per_job, era, use_lumi, lumi, debug, running_method, num_parallel_jobs, executable_addBackgrounds, executable_addFakes, executable_addFlips, histograms_to_fit, select_rle_output = False, executable_prep_dcard="prepareDatacard"): analyzeConfig.__init__(self, outputDir, executable_analyze, "2lss_1tau", central_or_shifts, max_files_per_job, era, use_lumi, lumi, debug, running_method, num_parallel_jobs, histograms_to_fit) self.samples = samples self.lepton_and_hadTau_selections = [ "Tight", "Fakeable", "Fakeable_mcClosure" ] self.lepton_and_hadTau_frWeights = [ "enabled", "disabled" ] self.lepton_charge_selections = lepton_charge_selections self.hadTau_selection_part2 = hadTau_selection self.applyFakeRateWeights = applyFakeRateWeights self.lepton_genMatches = [ "2l0j", "1l1j", "0l2j" ] self.hadTau_genMatches = [ "1t0e0m0j", "0t1e0m0j", "0t0e1m0j", "0t0e0m1j" ] self.apply_leptonGenMatching = None self.apply_hadTauGenMatching = None self.lepton_and_hadTau_genMatches_nonfakes = [] self.lepton_and_hadTau_genMatches_fakes = [] if self.applyFakeRateWeights == "3L": self.apply_leptonGenMatching = True self.apply_hadTauGenMatching = True for lepton_genMatch in self.lepton_genMatches: for hadTau_genMatch in self.hadTau_genMatches: lepton_and_hadTau_genMatch = "&".join([ lepton_genMatch, hadTau_genMatch ]) if lepton_genMatch.endswith("0j") and hadTau_genMatch.endswith("0j"): self.lepton_and_hadTau_genMatches_nonfakes.append(lepton_and_hadTau_genMatch) else: self.lepton_and_hadTau_genMatches_fakes.append(lepton_and_hadTau_genMatch) elif applyFakeRateWeights == "2lepton": self.apply_leptonGenMatching = True self.apply_hadTauGenMatching = False for lepton_genMatch in self.lepton_genMatches: if lepton_genMatch.endswith("0j"): self.lepton_and_hadTau_genMatches_nonfakes.append(lepton_genMatch) else: self.lepton_and_hadTau_genMatches_fakes.append(lepton_genMatch) else: raise ValueError("Invalid Configuration parameter 'applyFakeRateWeights' = %s !!" % applyFakeRateWeights) self.executable_addBackgrounds = executable_addBackgrounds self.executable_addFakes = executable_addFakes self.executable_addFlips = executable_addFlips for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]: continue process_name = sample_info["process_name_specific"] for lepton_and_hadTau_selection in self.lepton_and_hadTau_selections: for lepton_and_hadTau_frWeight in self.lepton_and_hadTau_frWeights: if lepton_and_hadTau_frWeight == "enabled" and not lepton_and_hadTau_selection.startswith("Fakeable"): continue lepton_and_hadTau_selection_and_frWeight = get_lepton_and_hadTau_selection_and_frWeight(lepton_and_hadTau_selection, lepton_and_hadTau_frWeight) for lepton_charge_selection in self.lepton_charge_selections: key_dir = getKey(sample_name, lepton_and_hadTau_selection, lepton_and_hadTau_frWeight, lepton_charge_selection) for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_RLES ]: initDict(self.dirs, [ key_dir, dir_type ]) self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel, "_".join([ lepton_and_hadTau_selection_and_frWeight, lepton_charge_selection ]), process_name) for dir_type in [ DKEY_DCRD, DKEY_PLOT ]: initDict(self.dirs, [ dir_type ]) self.dirs[dir_type] = os.path.join(self.outputDir, dir_type, self.channel) ##print "self.dirs = ", self.dirs if self.applyFakeRateWeights == "3L": self.nonfake_backgrounds = [ "TT", "TTW", "TTZ", "EWK", "Rares" ] self.prep_dcard_processesToCopy = [ "data_obs", "TT", "TTW", "TTZ", "EWK", "Rares", "fakes_data", "fakes_mc", "flips_data" ] self.make_plots_backgrounds = [ "TT", "TTW", "TTZ", "EWK", "Rares", "fakes_data", "flips_data" ] elif applyFakeRateWeights == "2lepton": if era == '2015': for sample_name in [ "/TTJets_SingleLeptFromTbar_TuneCUETP8M1_13TeV-madgraphMLM-pythia8/RunIIFall15MiniAODv2-PU25nsData2015v1_76X_mcRun2_asymptotic_v12-v2/MINIAODSIM", "/TTJets_SingleLeptFromT_TuneCUETP8M1_13TeV-madgraphMLM-pythia8/RunIIFall15MiniAODv2-PU25nsData2015v1_76X_mcRun2_asymptotic_v12-v1/MINIAODSIM", "/TTJets_DiLept_TuneCUETP8M1_13TeV-madgraphMLM-pythia8/RunIIFall15MiniAODv2-PU25nsData2015v1_76X_mcRun2_asymptotic_v12-v1/MINIAODSIM", "/DYJetsToLL_M-10to50_TuneCUETP8M1_13TeV-amcatnloFXFX-pythia8/RunIIFall15MiniAODv2-PU25nsData2015v1_76X_mcRun2_asymptotic_v12-v1/MINIAODSIM", "/DYJetsToLL_M-50_TuneCUETP8M1_13TeV-madgraphMLM-pythia8/RunIIFall15MiniAODv2-PU25nsData2015v1_76X_mcRun2_asymptotic_v12-v1/MINIAODSIM", "/WJetsToLNu_TuneCUETP8M1_13TeV-amcatnloFXFX-pythia8/RunIIFall15MiniAODv2-PU25nsData2015v1_76X_mcRun2_asymptotic_v12-v1/MINIAODSIM", "/ST_tW_top_5f_inclusiveDecays_13TeV-powheg-pythia8_TuneCUETP8M1/RunIIFall15MiniAODv2-PU25nsData2015v1_76X_mcRun2_asymptotic_v12-v1/MINIAODSIM", "/ST_tW_antitop_5f_inclusiveDecays_13TeV-powheg-pythia8_TuneCUETP8M1/RunIIFall15MiniAODv2-PU25nsData2015v1_76X_mcRun2_asymptotic_v12-v1/MINIAODSIM", "/ST_t-channel_4f_leptonDecays_13TeV-amcatnlo-pythia8_TuneCUETP8M1/RunIIFall15MiniAODv2-PU25nsData2015v1_76X_mcRun2_asymptotic_v12-v1/MINIAODSIM", "/ST_t-channel_4f_leptonDecays_13TeV-amcatnlo-pythia8_TuneCUETP8M1/RunIIFall15MiniAODv2-PU25nsData2015v1_76X_mcRun2_asymptotic_v12_ext1-v1/MINIAODSIM", "/ST_s-channel_4f_leptonDecays_13TeV-amcatnlo-pythia8_TuneCUETP8M1/RunIIFall15MiniAODv2-PU25nsData2015v1_76X_mcRun2_asymptotic_v12-v2/MINIAODSIM", "/WWTo2L2Nu_13TeV-powheg/RunIIFall15MiniAODv2-PU25nsData2015v1_76X_mcRun2_asymptotic_v12-v1/MINIAODSIM", "/ZZTo4L_13TeV_powheg_pythia8/RunIIFall15MiniAODv2-PU25nsData2015v1_76X_mcRun2_asymptotic_v12-v1/MINIAODSIM" ]: self.samples[sample_name]["sample_category"] = "background_data_estimate" self.samples["/WZTo3LNu_TuneCUETP8M1_13TeV-powheg-pythia8/RunIIFall15MiniAODv2-PU25nsData2015v1_76X_mcRun2_asymptotic_v12-v1/MINIAODSIM"]["sample_category"] = "WZ" elif era == '2016': for sample_name in [ '/TTJets_SingleLeptFromT_TuneCUETP8M1_13TeV-madgraphMLM-pythia8/RunIISpring16MiniAODv2-PUSpring16_80X_mcRun2_asymptotic_2016_miniAODv2_v0-v1/MINIAODSIM', '/TTJets_SingleLeptFromT_TuneCUETP8M1_13TeV-madgraphMLM-pythia8/RunIISpring16MiniAODv2-PUSpring16_80X_mcRun2_asymptotic_2016_miniAODv2_v0_ext1-v1/MINIAODSIM', '/TTJets_SingleLeptFromTbar_TuneCUETP8M1_13TeV-madgraphMLM-pythia8/RunIISpring16MiniAODv2-PUSpring16_80X_mcRun2_asymptotic_2016_miniAODv2_v0-v1/MINIAODSIM', '/TTJets_SingleLeptFromTbar_TuneCUETP8M1_13TeV-madgraphMLM-pythia8/RunIISpring16MiniAODv2-PUSpring16_80X_mcRun2_asymptotic_2016_miniAODv2_v0_ext1-v1/MINIAODSIM', '/TTJets_DiLept_TuneCUETP8M1_13TeV-madgraphMLM-pythia8/RunIISpring16MiniAODv2-PUSpring16_80X_mcRun2_asymptotic_2016_miniAODv2_v0-v4/MINIAODSIM', '/TTJets_DiLept_TuneCUETP8M1_13TeV-madgraphMLM-pythia8/RunIISpring16MiniAODv2-PUSpring16_80X_mcRun2_asymptotic_2016_miniAODv2_v0_ext1-v1/MINIAODSIM', '/DYJetsToLL_M-10to50_TuneCUETP8M1_13TeV-amcatnloFXFX-pythia8/RunIISpring16MiniAODv2-PUSpring16_80X_mcRun2_asymptotic_2016_miniAODv2_v0-v1/MINIAODSIM', '/DYJetsToLL_M-50_TuneCUETP8M1_13TeV-madgraphMLM-pythia8/RunIISpring16MiniAODv2-PUSpring16RAWAODSIM_reHLT_80X_mcRun2_asymptotic_v14_ext1-v1/MINIAODSIM', '/WJetsToLNu_TuneCUETP8M1_13TeV-amcatnloFXFX-pythia8/RunIISpring16MiniAODv2-PUSpring16RAWAODSIM_reHLT_80X_mcRun2_asymptotic_v14_ext1-v1/MINIAODSIM', '/ST_tW_antitop_5f_inclusiveDecays_13TeV-powheg-pythia8_TuneCUETP8M1/RunIISpring16MiniAODv2-PUSpring16_80X_mcRun2_asymptotic_2016_miniAODv2_v0-v1/MINIAODSIM', '/ST_tW_top_5f_NoFullyHadronicDecays_13TeV-powheg_TuneCUETP8M1/RunIISpring16MiniAODv2-PUSpring16_80X_mcRun2_asymptotic_2016_miniAODv2_v0-v1/MINIAODSIM', '/ST_t-channel_antitop_4f_inclusiveDecays_13TeV-powhegV2-madspin-pythia8_TuneCUETP8M1/RunIISpring16MiniAODv2-PUSpring16_80X_mcRun2_asymptotic_2016_miniAODv2_v0-v1/MINIAODSIM', '/ST_t-channel_top_4f_inclusiveDecays_13TeV-powhegV2-madspin-pythia8_TuneCUETP8M1/RunIISpring16MiniAODv2-PUSpring16_80X_mcRun2_asymptotic_2016_miniAODv2_v0-v1/MINIAODSIM', '/ST_s-channel_4f_leptonDecays_13TeV-amcatnlo-pythia8_TuneCUETP8M1/RunIISpring16MiniAODv2-PUSpring16_80X_mcRun2_asymptotic_2016_miniAODv2_v0-v1/MINIAODSIM', '/WWTo2L2Nu_13TeV-powheg/RunIISpring16MiniAODv2-PUSpring16_80X_mcRun2_asymptotic_2016_miniAODv2_v0-v1/MINIAODSIM', '/ZZTo4L_13TeV-amcatnloFXFX-pythia8/RunIISpring16MiniAODv2-PUSpring16RAWAODSIM_reHLT_80X_mcRun2_asymptotic_v14-v1/MINIAODSIM' ]: self.samples[sample_name]["sample_category"] = "background_data_estimate" self.samples["/WZTo3LNu_TuneCUETP8M1_13TeV-powheg-pythia8/RunIISpring16MiniAODv2-PUSpring16_80X_mcRun2_asymptotic_2016_miniAODv2_v0-v1/MINIAODSIM"]["sample_category"] = "WZ" else: raise ValueError("Invalid Configuration parameter 'era' = %s !!" % era) self.nonfake_backgrounds = [ "TTW", "TTZ", "WZ", "Rares" ] self.prep_dcard_processesToCopy = [ "data_obs", "TTW", "TTZ", "WZ", "Rares", "fakes_data", "fakes_mc", "flips_data" ] self.make_plots_backgrounds = [ "TTW", "TTZ", "WZ", "Rares", "fakes_data", "flips_data" ] else: raise ValueError("Invalid Configuration parameter 'applyFakeRateWeights' = %s !!" % applyFakeRateWeights) self.cfgFile_analyze_original = os.path.join(self.workingDir, cfgFile_analyze_original) self.cfgFile_addBackgrounds_original = os.path.join(self.workingDir, "addBackgrounds_cfg.py") self.cfgFile_addBackgrounds_modified = {} self.histogramFile_addBackgrounds = {} self.histogramDir_addBackgrounds = {} self.process_output_addBackgrounds = {} self.histogramFile_hadd_stage1_5 = os.path.join(self.outputDir, DKEY_HIST, "histograms_harvested_stage1_5_%s.root" % self.channel) self.cfgFile_addFakes_original = os.path.join(self.workingDir, "addBackgroundLeptonFakes_cfg.py") self.cfgFile_addFakes_modified = {} self.histogramFile_addFakes = {} self.histogramFile_addFlips = os.path.join(self.outputDir, DKEY_HIST, "addBackgroundLeptonFlips_%s.root" % self.channel) self.cfgFile_addFlips_original = os.path.join(self.workingDir, "addBackgroundLeptonFlips_cfg.py") self.cfgFile_addFlips_modified = os.path.join(self.outputDir, DKEY_CFGS, "addBackgroundLeptonFlips_%s_cfg.py" % self.channel) self.histogramDir_prep_dcard = "2lss_1tau_SS_Tight" self.histogramDir_prep_dcard_OS = "2lss_1tau_OS_Tight" self.cfgFile_make_plots_original = os.path.join(self.workingDir, "makePlots_2lss_1tau_cfg.py") self.cfgFile_make_plots_mcClosure_original = os.path.join(self.workingDir, "makePlots_mcClosure_cfg.py") self.cfgFiles_make_plots_mcClosure_modified = [] self.select_rle_output = select_rle_output
def create(self): """Creates all necessary config files and runs the complete analysis workfow -- either locally or on the batch system """ for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]: continue process_name = sample_info["process_name_specific"] key_dir = getKey(process_name) for dir_type in [DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_RLES]: initDict(self.dirs, [key_dir, dir_type]) if dir_type in [DKEY_CFGS, DKEY_LOGS]: self.dirs[key_dir][dir_type] = os.path.join( self.configDir, dir_type, self.channel, process_name) else: self.dirs[key_dir][dir_type] = os.path.join( self.outputDir, dir_type, self.channel, process_name) for dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT ]: initDict(self.dirs, [dir_type]) if dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT ]: self.dirs[dir_type] = os.path.join(self.configDir, dir_type, self.channel) else: self.dirs[dir_type] = os.path.join(self.outputDir, dir_type, self.channel) ##print "self.dirs = ", self.dirs for key in self.dirs.keys(): if type(self.dirs[key]) == dict: for dir_type in self.dirs[key].keys(): create_if_not_exists(self.dirs[key][dir_type]) else: create_if_not_exists(self.dirs[key]) inputFileLists = {} for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]: continue logging.info("Checking input files for sample %s" % sample_info["process_name_specific"]) inputFileLists[sample_name] = generateInputFileList( sample_name, sample_info, self.max_files_per_job, self.debug) for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]: continue process_name = sample_info["process_name_specific"] logging.info( "Creating configuration files to run '%s' for sample %s" % (self.executable_analyze, process_name)) sample_category = sample_info["sample_category"] is_mc = (sample_info["type"] == "mc") inputFileList = inputFileLists[sample_name] for jobId in inputFileList.keys(): ##print "processing sample %s: jobId = %i" % (process_name, jobId) # build config files for executing analysis code key_dir = getKey(process_name) key_analyze_job = getKey(process_name, jobId) ntupleFiles = inputFileList[jobId] if len(ntupleFiles) == 0: print "Warning: ntupleFiles['%s'] = %s --> skipping job !!" % ( key_file, ntupleFiles) continue self.jobOptions_analyze[key_analyze_job] = { 'ntupleFiles' : ntupleFiles, 'cfgFile_modified' : os.path.join(self.dirs[key_dir][DKEY_CFGS], "analyze_%s_%s_%i_cfg.py" % \ (self.channel, process_name, jobId)), 'histogramFile' : os.path.join(self.dirs[key_dir][DKEY_HIST], "%s_%i.root" % \ (process_name, jobId)), 'logFile' : os.path.join(self.dirs[key_dir][DKEY_LOGS], "analyze_%s_%s_%i.log" % \ (self.channel, process_name, jobId)), 'sample_category' : sample_category, 'hadTau_selection' : self.hadTau_selection, 'use_HIP_mitigation_mediumMuonId' : True, 'is_mc' : is_mc, 'lumi_scale' : 1. if not (self.use_lumi and is_mc) else sample_info["xsection"] * self.lumi / sample_info["nof_events"], 'apply_genWeight' : sample_info["genWeight"] if (is_mc and "genWeight" in sample_info) else False, 'selectBDT' : True, 'changeBranchNames' : self.changeBranchNames } self.createCfg_analyze( self.jobOptions_analyze[key_analyze_job]) # initialize input and output file names for hadd_stage1 key_hadd_stage1 = getKey(process_name) if not key_hadd_stage1 in self.inputFiles_hadd_stage1: self.inputFiles_hadd_stage1[key_hadd_stage1] = [] self.inputFiles_hadd_stage1[key_hadd_stage1].append( self.jobOptions_analyze[key_analyze_job]['histogramFile']) self.outputFile_hadd_stage1[key_hadd_stage1] = os.path.join(self.dirs[DKEY_HIST], "histograms_harvested_stage1_%s_%s.root" % \ (self.channel, process_name)) self.targets.append( self.outputFile_hadd_stage1[key_hadd_stage1]) if self.is_sbatch: logging.info( "Creating script for submitting '%s' jobs to batch system" % self.executable_analyze) self.sbatchFile_analyze = os.path.join( self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel) self.createScript_sbatch_analyze(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze) logging.info("Creating Makefile") lines_makefile = [] self.addToMakefile_analyze(lines_makefile) self.addToMakefile_hadd_stage1(lines_makefile) self.createMakefile(lines_makefile) logging.info("Done") return self.num_jobs
def create(self): """Creates all necessary config files and runs the complete analysis workfow -- either locally or on the batch system """ for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"]: continue process_name = sample_info["process_name_specific"] sample_category = sample_info["sample_category"] is_mc = (sample_info["type"] == "mc") logging.info("Building dictionaries for sample %s..." % process_name) for lepton_selection in self.lepton_selections: for lepton_frWeight in self.lepton_frWeights: if lepton_frWeight == "enabled" and not lepton_selection.startswith("Fakeable"): continue if lepton_frWeight == "disabled" and not lepton_selection in ["Tight"]: continue lepton_selection_and_frWeight = get_lepton_selection_and_frWeight(lepton_selection, lepton_frWeight) for lepton_charge_selection in self.lepton_charge_selections: if 'mcClosure' in lepton_selection and lepton_charge_selection != 'SS': # Run MC closure only for the region that complements the SR continue central_or_shift_extensions = ["", "hadd", "addBackgrounds"] central_or_shift_dedicated = self.central_or_shifts if self.runTHweights(sample_info) else self.central_or_shifts_external central_or_shifts_extended = central_or_shift_extensions + central_or_shift_dedicated for central_or_shift_or_dummy in central_or_shifts_extended: process_name_extended = [ process_name, "hadd" ] for process_name_or_dummy in process_name_extended: if central_or_shift_or_dummy in [ "hadd", "addBackgrounds" ] and process_name_or_dummy in [ "hadd" ]: continue if central_or_shift_or_dummy not in central_or_shift_extensions and not self.accept_systematics( central_or_shift_or_dummy, is_mc, lepton_selection, lepton_charge_selection, sample_info ): continue key_dir = getKey(process_name_or_dummy, lepton_selection_and_frWeight, lepton_charge_selection, central_or_shift_or_dummy) for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_RLES, DKEY_SYNC ]: if dir_type == DKEY_SYNC and not self.do_sync: continue initDict(self.dirs, [ key_dir, dir_type ]) if dir_type in [ DKEY_CFGS, DKEY_LOGS ]: self.dirs[key_dir][dir_type] = os.path.join(self.configDir, dir_type, self.channel, "_".join([ lepton_selection_and_frWeight, lepton_charge_selection ]), process_name_or_dummy, central_or_shift_or_dummy) else: self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel, "_".join([ lepton_selection_and_frWeight, lepton_charge_selection ]), process_name_or_dummy) for subdirectory in [ "addBackgrounds", "addBackgroundLeptonFakes", "addBackgroundLeptonFlips", "prepareDatacards", "addSystFakeRates", "makePlots" ]: key_dir = getKey(subdirectory) for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT ]: initDict(self.dirs, [ key_dir, dir_type ]) if dir_type in [ DKEY_CFGS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT ]: self.dirs[key_dir][dir_type] = os.path.join(self.configDir, dir_type, self.channel, subdirectory) else: self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel, subdirectory) for dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT, DKEY_SYNC ]: if dir_type == DKEY_SYNC and not self.do_sync: continue initDict(self.dirs, [ dir_type ]) if dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT ]: self.dirs[dir_type] = os.path.join(self.configDir, dir_type, self.channel) else: self.dirs[dir_type] = os.path.join(self.outputDir, dir_type, self.channel) numDirectories = 0 for key in self.dirs.keys(): if type(self.dirs[key]) == dict: numDirectories += len(self.dirs[key]) else: numDirectories += 1 logging.info("Creating directory structure (numDirectories = %i)" % numDirectories) numDirectories_created = 0; frac = 1 for key in self.dirs.keys(): if type(self.dirs[key]) == dict: for dir_type in self.dirs[key].keys(): create_if_not_exists(self.dirs[key][dir_type]) numDirectories_created += len(self.dirs[key]) else: create_if_not_exists(self.dirs[key]) numDirectories_created = numDirectories_created + 1 while 100*numDirectories_created >= frac*numDirectories: logging.info(" %i%% completed" % frac) frac = frac + 1 logging.info("Done.") inputFileLists = {} for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"]: continue logging.info("Checking input files for sample %s" % sample_info["process_name_specific"]) inputFileLists[sample_name] = generateInputFileList(sample_info, self.max_files_per_job) mcClosure_regex = re.compile('Fakeable_mcClosure_(?P<type>m|e)_wFakeRateWeights') for lepton_selection in self.lepton_selections: electron_selection = lepton_selection muon_selection = lepton_selection hadTauVeto_selection = "Tight" hadTauVeto_selection = "|".join([ hadTauVeto_selection, self.hadTauVeto_selection_part2 ]) if lepton_selection == "Fakeable_mcClosure_e": electron_selection = "Fakeable" muon_selection = "Tight" elif lepton_selection == "Fakeable_mcClosure_m": electron_selection = "Tight" muon_selection = "Fakeable" for lepton_frWeight in self.lepton_frWeights: if lepton_frWeight == "enabled" and not lepton_selection.startswith("Fakeable"): continue if lepton_frWeight == "disabled" and not lepton_selection in [ "Tight" ]: continue lepton_selection_and_frWeight = get_lepton_selection_and_frWeight(lepton_selection, lepton_frWeight) for lepton_charge_selection in self.lepton_charge_selections: if 'mcClosure' in lepton_selection and lepton_charge_selection != 'SS': # Run MC closure only for the region that complements the SR continue for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"]: continue process_name = sample_info["process_name_specific"] logging.info("Creating configuration files to run '%s' for sample %s" % (self.executable_analyze, process_name)) inputFileList = inputFileLists[sample_name] sample_category = sample_info["sample_category"] is_mc = (sample_info["type"] == "mc") use_th_weights = self.runTHweights(sample_info) central_or_shift_dedicated = self.central_or_shifts if use_th_weights else self.central_or_shifts_external for central_or_shift in central_or_shift_dedicated: if not self.accept_systematics( central_or_shift, is_mc, lepton_selection, lepton_charge_selection, sample_info ): continue central_or_shifts_local = [] if central_or_shift == "central" and not use_th_weights: for central_or_shift_local in self.central_or_shifts_internal: if self.accept_systematics( central_or_shift_local, is_mc, lepton_selection, lepton_charge_selection, sample_info ): central_or_shifts_local.append(central_or_shift_local) logging.info(" ... for '%s' and systematic uncertainty option '%s'" % (lepton_selection_and_frWeight, central_or_shift)) # build config files for executing analysis code key_analyze_dir = getKey(process_name, lepton_selection_and_frWeight, lepton_charge_selection, central_or_shift) for jobId in inputFileList.keys(): analyze_job_tuple = (process_name, lepton_selection_and_frWeight, lepton_charge_selection, central_or_shift, jobId) key_analyze_job = getKey(*analyze_job_tuple) ntupleFiles = inputFileList[jobId] if len(ntupleFiles) == 0: logging.warning("No input ntuples for %s --> skipping job !!" % (key_analyze_job)) continue syncOutput = '' syncTree = '' syncGenMatch = self.lepton_genMatches_nonfakes if self.do_sync: mcClosure_match = mcClosure_regex.match(lepton_selection_and_frWeight) if lepton_selection_and_frWeight == 'Tight': if lepton_charge_selection == 'SS': syncOutput = os.path.join(self.dirs[key_analyze_dir][DKEY_SYNC], '%s_%s_SR.root' % (self.channel, central_or_shift)) syncTree = 'syncTree_%s_SR' % self.channel elif lepton_charge_selection == 'OS': syncOutput = os.path.join(self.dirs[key_analyze_dir][DKEY_SYNC], '%s_%s_Flip.root' % (self.channel, central_or_shift)) syncTree = 'syncTree_%s_Flip' % self.channel else: continue elif lepton_selection_and_frWeight == 'Fakeable_wFakeRateWeights' and lepton_charge_selection == 'SS': syncOutput = os.path.join(self.dirs[key_analyze_dir][DKEY_SYNC], '%s_%s_Fake.root' % (self.channel, central_or_shift)) syncTree = 'syncTree_%s_Fake' % self.channel elif mcClosure_match and lepton_charge_selection == 'SS': mcClosure_type = mcClosure_match.group('type') syncOutput = os.path.join(self.dirs[key_analyze_dir][DKEY_SYNC], '%s_%s_mcClosure_%s.root' % (self.channel, central_or_shift, mcClosure_type)) syncTree = 'syncTree_%s_mcClosure_%s' % (self.channel, mcClosure_type) else: continue if syncTree and central_or_shift != "central": syncTree = os.path.join(central_or_shift, syncTree) syncRLE = '' if self.do_sync and self.rle_select: syncRLE = self.rle_select % syncTree if not os.path.isfile(syncRLE): logging.warning("Input RLE file for the sync is missing: %s; skipping the job" % syncRLE) continue if syncOutput: self.inputFiles_sync['sync'].append(syncOutput) cfgFile_modified_path = os.path.join(self.dirs[key_analyze_dir][DKEY_CFGS], "analyze_%s_%s_%s_%s_%i_cfg.py" % analyze_job_tuple) logFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_LOGS], "analyze_%s_%s_%s_%s_%i.log" % analyze_job_tuple) rleOutputFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_RLES], "rle_%s_%s_%s_%s_%i.txt" % analyze_job_tuple) \ if self.select_rle_output else "" histogramFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_HIST], "analyze_%s_%s_%s_%s_%i.root" % analyze_job_tuple) self.jobOptions_analyze[key_analyze_job] = { 'ntupleFiles' : ntupleFiles, 'cfgFile_modified' : cfgFile_modified_path, 'histogramFile' : histogramFile_path, 'logFile' : logFile_path, 'selEventsFileName_output' : rleOutputFile_path, 'electronSelection' : electron_selection, 'muonSelection' : muon_selection, 'apply_leptonGenMatching' : self.apply_leptonGenMatching, 'leptonChargeSelection' : lepton_charge_selection, 'hadTauSelection_veto' : hadTauVeto_selection, 'applyFakeRateWeights' : self.applyFakeRateWeights if not lepton_selection == "Tight" else "disabled", 'central_or_shift' : central_or_shift, 'central_or_shifts_local' : central_or_shifts_local, 'syncOutput' : syncOutput, 'syncTree' : syncTree, 'syncRLE' : syncRLE, 'useNonNominal' : self.use_nonnominal, 'apply_hlt_filter' : self.hlt_filter, 'syncGenMatch' : syncGenMatch, } self.createCfg_analyze(self.jobOptions_analyze[key_analyze_job], sample_info, lepton_selection) # initialize input and output file names for hadd_stage1 key_hadd_stage1_dir = getKey(process_name, lepton_selection_and_frWeight, lepton_charge_selection) hadd_stage1_job_tuple = (process_name, lepton_selection_and_frWeight, lepton_charge_selection) key_hadd_stage1_job = getKey(*hadd_stage1_job_tuple) if not key_hadd_stage1_job in self.inputFiles_hadd_stage1: self.inputFiles_hadd_stage1[key_hadd_stage1_job] = [] self.inputFiles_hadd_stage1[key_hadd_stage1_job].append(self.jobOptions_analyze[key_analyze_job]['histogramFile']) self.outputFile_hadd_stage1[key_hadd_stage1_job] = os.path.join(self.dirs[key_hadd_stage1_dir][DKEY_HIST], "hadd_stage1_%s_%s_%s.root" % hadd_stage1_job_tuple) if self.do_sync: continue if is_mc: logging.info("Creating configuration files to run 'addBackgrounds' for sample %s" % process_name) sample_categories = [ sample_category ] for sample_category in sample_categories: # sum non-fake and fake contributions for each MC sample separately genMatch_categories = [ "nonfake", "Convs", "fake", "flip" ] for genMatch_category in genMatch_categories: key_hadd_stage1_job = getKey(process_name, lepton_selection_and_frWeight, lepton_charge_selection) key_addBackgrounds_dir = getKey(process_name, lepton_selection_and_frWeight, lepton_charge_selection, "addBackgrounds") addBackgrounds_job_tuple = None processes_input = None process_output = None if genMatch_category == "nonfake": # sum non-fake contributions for each MC sample separately # input processes: TT2l0g0j; ... # output processes: TT; ... if sample_category in self.ttHProcs: lepton_genMatches = [] lepton_genMatches.extend(self.lepton_genMatches_nonfakes) lepton_genMatches.extend(self.lepton_genMatches_Convs) processes_input = [] processes_input.extend([ "%s%s" % ("ttH_htt", genMatch) for genMatch in lepton_genMatches ]) processes_input.extend([ "%s%s" % ("ttH_hww", genMatch) for genMatch in lepton_genMatches ]) processes_input.extend([ "%s%s" % ("ttH_hzz", genMatch) for genMatch in lepton_genMatches ]) processes_input.extend([ "%s%s" % ("ttH_hzg", genMatch) for genMatch in lepton_genMatches ]) processes_input.extend([ "%s%s" % ("ttH_hmm", genMatch) for genMatch in lepton_genMatches ]) else: processes_input = [ "%s%s" % (sample_category, genMatch) for genMatch in self.lepton_genMatches_nonfakes ] process_output = sample_category addBackgrounds_job_tuple = (process_name, sample_category, lepton_selection_and_frWeight, lepton_charge_selection) elif genMatch_category == "Convs": # sum conversion background contributions for each MC sample separately # input processes: TT1l1g0j, TT0l2g0j; ... # output processes: TT_Convs; ... if sample_category in self.ttHProcs: processes_input = [] processes_input.extend([ "%s%s" % ("ttH_htt", genMatch) for genMatch in self.lepton_genMatches_Convs ]) processes_input.extend([ "%s%s" % ("ttH_hww", genMatch) for genMatch in self.lepton_genMatches_Convs ]) processes_input.extend([ "%s%s" % ("ttH_hzz", genMatch) for genMatch in self.lepton_genMatches_Convs ]) processes_input.extend([ "%s%s" % ("ttH_hzg", genMatch) for genMatch in self.lepton_genMatches_Convs ]) processes_input.extend([ "%s%s" % ("ttH_hmm", genMatch) for genMatch in self.lepton_genMatches_Convs ]) else: processes_input = [ "%s%s" % (sample_category, genMatch) for genMatch in self.lepton_genMatches_Convs ] process_output = "%s_Convs" % sample_category addBackgrounds_job_tuple = (process_name, "%s_Convs" % sample_category, lepton_selection_and_frWeight, lepton_charge_selection) elif genMatch_category == "fake": # sum fake contributions for each MC sample separately # input processes: TT1l0g1j, TT0l1g1j, TT0l0g2j; ... # output processes: TT_fake; ... if sample_category in self.ttHProcs: processes_input = [] processes_input.extend([ "%s%s" % ("ttH_htt", genMatch) for genMatch in self.lepton_genMatches_fakes ]) processes_input.extend([ "%s%s" % ("ttH_hww", genMatch) for genMatch in self.lepton_genMatches_fakes ]) processes_input.extend([ "%s%s" % ("ttH_hzz", genMatch) for genMatch in self.lepton_genMatches_fakes ]) processes_input.extend([ "%s%s" % ("ttH_hzg", genMatch) for genMatch in self.lepton_genMatches_fakes ]) processes_input.extend([ "%s%s" % ("ttH_hmm", genMatch) for genMatch in self.lepton_genMatches_fakes ]) else: processes_input = [ "%s%s" % (sample_category, genMatch) for genMatch in self.lepton_genMatches_fakes ] process_output = "%s_fake" % sample_category addBackgrounds_job_tuple = (process_name, "%s_fake" % sample_category, lepton_selection_and_frWeight, lepton_charge_selection) elif genMatch_category == "flip": # sum flip contributions for each MC sample separately # input processes: TT2l2f0g0j&2t0e0m0j, TT2l1f0g0j&2t0e0m0j; ... # output processes: TT_flip; ... if sample_category in self.ttHProcs: processes_input = [] processes_input.extend([ "%s%s" % ("ttH_htt", genMatch) for genMatch in self.lepton_genMatches_flips ]) processes_input.extend([ "%s%s" % ("ttH_hww", genMatch) for genMatch in self.lepton_genMatches_flips ]) processes_input.extend([ "%s%s" % ("ttH_hzz", genMatch) for genMatch in self.lepton_genMatches_flips ]) processes_input.extend([ "%s%s" % ("ttH_hzg", genMatch) for genMatch in self.lepton_genMatches_flips ]) processes_input.extend([ "%s%s" % ("ttH_hmm", genMatch) for genMatch in self.lepton_genMatches_flips ]) else: processes_input = [ "%s%s" % (sample_category, genMatch) for genMatch in self.lepton_genMatches_flips ] process_output = "%s_flip" % sample_category addBackgrounds_job_tuple = (process_name, "%s_flip" % sample_category, lepton_selection_and_frWeight, lepton_charge_selection) if processes_input: logging.info(" ...for genMatch option = '%s'" % genMatch_category) key_addBackgrounds_job = getKey(*addBackgrounds_job_tuple) cfgFile_modified = os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], "addBackgrounds_%s_%s_%s_%s_cfg.py" % addBackgrounds_job_tuple) outputFile = os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackgrounds_%s_%s_%s_%s.root" % addBackgrounds_job_tuple) self.jobOptions_addBackgrounds[key_addBackgrounds_job] = { 'inputFile' : self.outputFile_hadd_stage1[key_hadd_stage1_job], 'cfgFile_modified' : cfgFile_modified, 'outputFile' : outputFile, 'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], os.path.basename(cfgFile_modified).replace("_cfg.py", ".log")), 'categories' : [ getHistogramDir(lepton_selection, lepton_frWeight, lepton_charge_selection) ], 'processes_input' : processes_input, 'process_output' : process_output } self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds[key_addBackgrounds_job]) # initialize input and output file names for hadd_stage1_5 key_hadd_stage1_5_dir = getKey("hadd", lepton_selection_and_frWeight, lepton_charge_selection) hadd_stage1_5_job_tuple = (lepton_selection_and_frWeight, lepton_charge_selection) key_hadd_stage1_5_job = getKey(*hadd_stage1_5_job_tuple) if not key_hadd_stage1_5_job in self.inputFiles_hadd_stage1_5: self.inputFiles_hadd_stage1_5[key_hadd_stage1_5_job] = [] self.inputFiles_hadd_stage1_5[key_hadd_stage1_5_job].append(self.jobOptions_addBackgrounds[key_addBackgrounds_job]['outputFile']) self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job] = os.path.join(self.dirs[key_hadd_stage1_5_dir][DKEY_HIST], "hadd_stage1_5_%s_%s.root" % hadd_stage1_5_job_tuple) # add output files of hadd_stage1 for data to list of input files for hadd_stage1_5 if not is_mc: key_hadd_stage1_job = getKey(process_name, lepton_selection_and_frWeight, lepton_charge_selection) key_hadd_stage1_5_job = getKey(lepton_selection_and_frWeight, lepton_charge_selection) if not key_hadd_stage1_5_job in self.inputFiles_hadd_stage1_5: self.inputFiles_hadd_stage1_5[key_hadd_stage1_5_job] = [] self.inputFiles_hadd_stage1_5[key_hadd_stage1_5_job].append(self.outputFile_hadd_stage1[key_hadd_stage1_job]) if self.do_sync: continue # sum fake background contributions for the total of all MC samples # input processes: TT1l0g1j, TT0l1g1j, TT0l0g2j; ... # output process: fakes_mc key_hadd_stage1_5_job = getKey(lepton_selection_and_frWeight, lepton_charge_selection) key_addBackgrounds_dir = getKey("addBackgrounds") addBackgrounds_job_fakes_tuple = ("fakes_mc", lepton_selection_and_frWeight, lepton_charge_selection) key_addBackgrounds_job_fakes = getKey(*addBackgrounds_job_fakes_tuple) sample_categories = [] sample_categories.extend(self.nonfake_backgrounds) sample_categories.extend(self.ttHProcs) processes_input = [] for sample_category in sample_categories: processes_input.append("%s_fake" % sample_category) self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes] = { 'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job], 'cfgFile_modified' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], "addBackgrounds_%s_%s_%s_cfg.py" % addBackgrounds_job_fakes_tuple), 'outputFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackgrounds_%s_%s_%s.root" % addBackgrounds_job_fakes_tuple), 'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], "addBackgrounds_%s_%s_%s.log" % addBackgrounds_job_fakes_tuple), 'categories' : [ getHistogramDir(lepton_selection, lepton_frWeight, lepton_charge_selection) ], 'processes_input' : processes_input, 'process_output' : "fakes_mc" } self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes]) # sum flip background contributions for the total of all MC sample # input processes: TT2l1f0g0j,TT2l2f0g0j; ... # output process: flips_mc addBackgrounds_job_flips_tuple = ("flips_mc", lepton_selection_and_frWeight, lepton_charge_selection) key_addBackgrounds_job_flips = getKey(*addBackgrounds_job_flips_tuple) sample_categories = [] sample_categories.extend(self.nonfake_backgrounds) sample_categories.extend(self.ttHProcs) processes_input = [] for sample_category in sample_categories: processes_input.append("%s_flip" % sample_category) self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_flips] = { 'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job], 'cfgFile_modified' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], "addBackgrounds_%s_%s_%s_cfg.py" % addBackgrounds_job_flips_tuple), 'outputFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackgrounds_%s_%s_%s.root" % addBackgrounds_job_flips_tuple), 'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], "addBackgrounds_%s_%s_%s.log" % addBackgrounds_job_flips_tuple), 'categories' : [ getHistogramDir(lepton_selection, lepton_frWeight, lepton_charge_selection) ], 'processes_input' : processes_input, 'process_output' : "flips_mc" } self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_flips]) # sum conversion background contributions for the total of all MC samples # input processes: TT1l1g0j, TT0l2g0j; ... # output process: Convs addBackgrounds_job_Convs_tuple = ("Convs", lepton_selection_and_frWeight, lepton_charge_selection) key_addBackgrounds_job_Convs = getKey(*addBackgrounds_job_Convs_tuple) sample_categories = [] sample_categories.extend(self.nonfake_backgrounds) sample_categories.extend(self.ttHProcs) processes_input = [] for sample_category in self.convs_backgrounds: processes_input.append("%s_Convs" % sample_category) self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_Convs] = { 'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job], 'cfgFile_modified' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], "addBackgrounds_%s_%s_%s_cfg.py" % addBackgrounds_job_Convs_tuple), 'outputFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackgrounds_%s_%s_%s.root" % addBackgrounds_job_Convs_tuple), 'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], "addBackgrounds_%s_%s_%s.log" % addBackgrounds_job_Convs_tuple), 'categories' : [ getHistogramDir(lepton_selection, lepton_frWeight, lepton_charge_selection) ], 'processes_input' : processes_input, 'process_output' : "Convs" } self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_Convs]) # initialize input and output file names for hadd_stage2 key_hadd_stage1_5_job = getKey(lepton_selection_and_frWeight, lepton_charge_selection) key_hadd_stage2_dir = getKey("hadd", lepton_selection_and_frWeight, lepton_charge_selection) hadd_stage2_job_tuple = (lepton_selection_and_frWeight, lepton_charge_selection) key_hadd_stage2_job = getKey(*hadd_stage2_job_tuple) if not key_hadd_stage2_job in self.inputFiles_hadd_stage2: self.inputFiles_hadd_stage2[key_hadd_stage2_job] = [] if lepton_selection == "Tight": self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes]['outputFile']) self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_flips]['outputFile']) self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_Convs]['outputFile']) self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job]) self.outputFile_hadd_stage2[key_hadd_stage2_job] = os.path.join(self.dirs[key_hadd_stage2_dir][DKEY_HIST], "hadd_stage2_%s_%s.root" % hadd_stage2_job_tuple) if self.do_sync: if self.is_sbatch: logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_analyze) self.sbatchFile_analyze = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel) self.createScript_sbatch_syncNtuple(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze) logging.info("Creating Makefile") lines_makefile = [] self.addToMakefile_syncNtuple(lines_makefile) outputFile_sync_path = os.path.join(self.outputDir, DKEY_SYNC, '%s.root' % self.channel) self.outputFile_sync['sync'] = outputFile_sync_path self.addToMakefile_hadd_sync(lines_makefile) self.addToMakefile_validate(lines_makefile) self.targets.extend(self.phoniesToAdd) self.createMakefile(lines_makefile) logging.info("Done.") return self.num_jobs logging.info("Creating configuration files to run 'addBackgroundFakes'") for lepton_charge_selection in self.lepton_charge_selections: key_hadd_stage1_5_job = getKey(get_lepton_selection_and_frWeight("Fakeable", "enabled"), lepton_charge_selection) key_addFakes_dir = getKey("addBackgroundLeptonFakes") addFakes_job_tuple = (lepton_charge_selection) key_addFakes_job = getKey("data_fakes", lepton_charge_selection) category_sideband = None if self.applyFakeRateWeights == "2lepton": category_sideband = "ttWctrl_%s_Fakeable_wFakeRateWeights" % lepton_charge_selection else: raise ValueError("Invalid Configuration parameter 'applyFakeRateWeights' = %s !!" % self.applyFakeRateWeights) self.jobOptions_addFakes[key_addFakes_job] = { 'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job], 'cfgFile_modified' : os.path.join(self.dirs[key_addFakes_dir][DKEY_CFGS], "addBackgroundLeptonFakes_%s_cfg.py" % addFakes_job_tuple), 'outputFile' : os.path.join(self.dirs[key_addFakes_dir][DKEY_HIST], "addBackgroundLeptonFakes_%s.root" % addFakes_job_tuple), 'logFile' : os.path.join(self.dirs[key_addFakes_dir][DKEY_LOGS], "addBackgroundLeptonFakes_%s.log" % addFakes_job_tuple), 'category_signal' : "ttWctrl_%s_Tight" % lepton_charge_selection, 'category_sideband' : category_sideband } self.createCfg_addFakes(self.jobOptions_addFakes[key_addFakes_job]) key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), lepton_charge_selection) self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addFakes[key_addFakes_job]['outputFile']) #-------------------------------------------------------------------------- # CV: add histograms in OS and SS regions, # so that "data_fakes" background can be subtracted from OS control region used to estimate charge flip background key_hadd_stage1_5_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "OS") key_addFakes_job = getKey("data_fakes", "OS") key_hadd_stage1_6_dir = getKey("hadd", get_lepton_selection_and_frWeight("Tight", "disabled"), "OS") key_hadd_stage1_6_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "OS") if key_hadd_stage1_6_job not in self.inputFiles_hadd_stage1_6: self.inputFiles_hadd_stage1_6[key_hadd_stage1_6_job] = [] self.inputFiles_hadd_stage1_6[key_hadd_stage1_6_job].append(self.jobOptions_addFakes[key_addFakes_job]['outputFile']) self.inputFiles_hadd_stage1_6[key_hadd_stage1_6_job].append(self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job]) self.outputFile_hadd_stage1_6[key_hadd_stage1_6_job] = os.path.join(self.dirs[key_hadd_stage1_6_dir][DKEY_HIST], "hadd_stage1_6_Tight_OS.root") #-------------------------------------------------------------------------- logging.info("Creating configuration files to run 'addBackgroundFlips'") key_addFlips_dir = getKey("addBackgroundLeptonFlips") key_addFlips_job = getKey("data_flips") self.jobOptions_addFlips[key_addFlips_job] = { 'inputFile' : self.outputFile_hadd_stage1_6, 'cfgFile_modified' : os.path.join(self.dirs[key_addFlips_dir][DKEY_CFGS], "addBackgroundLeptonFlips_cfg.py"), 'outputFile' : os.path.join(self.dirs[key_addFlips_dir][DKEY_HIST], "addBackgroundLeptonFlips.root"), 'logFile' : os.path.join(self.dirs[key_addFlips_dir][DKEY_LOGS], "addBackgroundLeptonFlips.log"), 'category_signal' : "ttWctrl_SS_Tight", 'category_sideband' : "ttWctrl_OS_Tight" } self.createCfg_addFlips(self.jobOptions_addFlips[key_addFlips_job]) key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "SS") self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addFlips[key_addFlips_job]['outputFile']) logging.info("Creating configuration files to run 'prepareDatacards'") for histogramToFit in self.histograms_to_fit: key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "SS") key_prep_dcard_dir = getKey("prepareDatacards") prep_dcard_job_tuple = (self.channel, histogramToFit) key_prep_dcard_job = getKey(histogramToFit) self.jobOptions_prep_dcard[key_prep_dcard_job] = { 'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job], 'cfgFile_modified' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_CFGS], "prepareDatacards_%s_%s_cfg.py" % prep_dcard_job_tuple), 'datacardFile' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_DCRD], "prepareDatacards_%s_%s.root" % prep_dcard_job_tuple), 'histogramDir' : self.histogramDir_prep_dcard, 'histogramToFit' : histogramToFit, 'label' : None } self.createCfg_prep_dcard(self.jobOptions_prep_dcard[key_prep_dcard_job]) # add shape templates for the following systematic uncertainties: # - 'CMS_ttHl_Clos_norm_e' # - 'CMS_ttHl_Clos_shape_e' # - 'CMS_ttHl_Clos_norm_m' # - 'CMS_ttHl_Clos_shape_m' key_add_syst_fakerate_dir = getKey("addSystFakeRates") add_syst_fakerate_job_tuple = (self.channel, histogramToFit) key_add_syst_fakerate_job = getKey(histogramToFit) self.jobOptions_add_syst_fakerate[key_add_syst_fakerate_job] = { 'inputFile' : self.jobOptions_prep_dcard[key_prep_dcard_job]['datacardFile'], 'cfgFile_modified' : os.path.join(self.dirs[key_add_syst_fakerate_dir][DKEY_CFGS], "addSystFakeRates_%s_%s_cfg.py" % add_syst_fakerate_job_tuple), 'outputFile' : os.path.join(self.dirs[key_add_syst_fakerate_dir][DKEY_DCRD], "addSystFakeRates_%s_%s.root" % add_syst_fakerate_job_tuple), 'category' : self.channel, 'histogramToFit' : histogramToFit, 'plots_outputFileName' : os.path.join(self.dirs[key_add_syst_fakerate_dir][DKEY_PLOT], "addSystFakeRates.png") } histogramDir_nominal = self.histogramDir_prep_dcard for lepton_type in [ 'e', 'm' ]: lepton_mcClosure = "Fakeable_mcClosure_%s" % lepton_type if lepton_mcClosure not in self.lepton_selections: continue lepton_selection_and_frWeight = get_lepton_selection_and_frWeight(lepton_mcClosure, "enabled") key_addBackgrounds_job_fakes = getKey("fakes_mc", lepton_selection_and_frWeight, 'SS') histogramDir_mcClosure = self.mcClosure_dir['%s_%s' % (lepton_mcClosure, 'SS')] self.jobOptions_add_syst_fakerate[key_add_syst_fakerate_job].update({ 'add_Clos_%s' % lepton_type : ("Fakeable_mcClosure_%s" % lepton_type) in self.lepton_selections, 'inputFile_nominal_%s' % lepton_type : self.outputFile_hadd_stage2[key_hadd_stage2_job], 'histogramName_nominal_%s' % lepton_type : "%s/sel/evt/fakes_mc/%s" % (histogramDir_nominal, histogramToFit), 'inputFile_mcClosure_%s' % lepton_type : self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes]['outputFile'], 'histogramName_mcClosure_%s' % lepton_type : "%s/sel/evt/fakes_mc/%s" % (histogramDir_mcClosure, histogramToFit) }) self.createCfg_add_syst_fakerate(self.jobOptions_add_syst_fakerate[key_add_syst_fakerate_job]) logging.info("Creating configuration files to run 'makePlots'") key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "SS") key_makePlots_dir = getKey("makePlots") key_makePlots_job = getKey("SS") self.jobOptions_make_plots[key_makePlots_job] = { 'executable' : self.executable_make_plots, 'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job], 'cfgFile_modified' : os.path.join(self.dirs[key_makePlots_dir][DKEY_CFGS], "makePlots_%s_cfg.py" % self.channel), 'outputFile' : os.path.join(self.dirs[key_makePlots_dir][DKEY_PLOT], "makePlots_%s.png" % self.channel), 'histogramDir' : self.histogramDir_prep_dcard, 'label' : "t#bar{t}W control region", 'make_plots_backgrounds' : self.make_plots_backgrounds } self.createCfg_makePlots(self.jobOptions_make_plots[key_makePlots_job]) self.sbatchFile_analyze = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel) self.sbatchFile_addBackgrounds = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addBackgrounds_%s.py" % self.channel) self.sbatchFile_addBackgrounds_sum = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addBackgrounds_sum_%s.py" % self.channel) self.sbatchFile_addFakes = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addFakes_%s.py" % self.channel) self.sbatchFile_addFlips = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addFlips_%s.py" % self.channel) if self.is_sbatch: logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_analyze) self.createScript_sbatch_analyze(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze) logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_addBackgrounds) self.createScript_sbatch(self.executable_addBackgrounds, self.sbatchFile_addBackgrounds, self.jobOptions_addBackgrounds) self.createScript_sbatch(self.executable_addBackgrounds, self.sbatchFile_addBackgrounds_sum, self.jobOptions_addBackgrounds_sum) logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_addFakes) self.createScript_sbatch(self.executable_addFakes, self.sbatchFile_addFakes, self.jobOptions_addFakes) logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_addFlips) self.createScript_sbatch(self.executable_addFlips, self.sbatchFile_addFlips, self.jobOptions_addFlips) logging.info("Creating Makefile") lines_makefile = [] self.addToMakefile_analyze(lines_makefile) self.addToMakefile_hadd_stage1(lines_makefile) self.addToMakefile_backgrounds_from_data_withFlips(lines_makefile) self.addToMakefile_hadd_stage2(lines_makefile) self.addToMakefile_prep_dcard(lines_makefile) self.addToMakefile_add_syst_fakerate(lines_makefile) self.addToMakefile_make_plots(lines_makefile) self.addToMakefile_validate(lines_makefile) self.createMakefile(lines_makefile) logging.info("Done.") return self.num_jobs
def create(self): """Creates all necessary config files and runs the complete analysis workfow -- either locally or on the batch system """ for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"]: continue process_name = sample_info["process_name_specific"] sample_category = sample_info["sample_category"] is_mc = (sample_info["type"] == "mc") logging.info("Building dictionaries for sample %s..." % process_name) for charge_selection in self.charge_selections: central_or_shift_extensions = ["", "hadd", "addBackgrounds"] central_or_shifts_extended = central_or_shift_extensions + self.central_or_shifts for central_or_shift_or_dummy in central_or_shifts_extended: process_name_extended = [process_name, "hadd"] for process_name_or_dummy in process_name_extended: if central_or_shift_or_dummy in [ "hadd" ] and process_name_or_dummy in ["hadd"]: continue if central_or_shift_or_dummy != "central" and central_or_shift_or_dummy not in central_or_shift_extensions: if not is_mc: continue if not self.accept_central_or_shift( central_or_shift_or_dummy, sample_info): continue key_dir = getKey(process_name_or_dummy, charge_selection, central_or_shift_or_dummy) for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_RLES ]: initDict(self.dirs, [key_dir, dir_type]) if dir_type in [DKEY_CFGS, DKEY_LOGS]: self.dirs[key_dir][dir_type] = os.path.join( self.configDir, dir_type, self.channel, "_".join([charge_selection]), process_name_or_dummy, central_or_shift_or_dummy) else: self.dirs[key_dir][dir_type] = os.path.join( self.outputDir, dir_type, self.channel, "_".join([charge_selection]), process_name_or_dummy) for subdirectory in ["comp_jetToTauFakeRate", "makePlots"]: key_dir = getKey(subdirectory) for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT ]: initDict(self.dirs, [key_dir, dir_type]) if dir_type in [DKEY_CFGS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT]: self.dirs[key_dir][dir_type] = os.path.join( self.configDir, dir_type, self.channel, subdirectory) else: self.dirs[key_dir][dir_type] = os.path.join( self.outputDir, dir_type, self.channel, subdirectory) for dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT ]: initDict(self.dirs, [dir_type]) if dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT ]: self.dirs[dir_type] = os.path.join(self.configDir, dir_type, self.channel) else: self.dirs[dir_type] = os.path.join(self.outputDir, dir_type, self.channel) numDirectories = 0 for key in self.dirs.keys(): if type(self.dirs[key]) == dict: numDirectories += len(self.dirs[key]) else: numDirectories += 1 logging.info("Creating directory structure (numDirectories = %i)" % numDirectories) numDirectories_created = 0 frac = 1 for key in self.dirs.keys(): if type(self.dirs[key]) == dict: for dir_type in self.dirs[key].keys(): create_if_not_exists(self.dirs[key][dir_type]) numDirectories_created += len(self.dirs[key]) else: create_if_not_exists(self.dirs[key]) numDirectories_created = numDirectories_created + 1 while 100 * numDirectories_created >= frac * numDirectories: logging.info(" %i%% completed" % frac) frac = frac + 1 logging.info("Done.") inputFileLists = {} for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"]: continue logging.info("Checking input files for sample %s" % sample_info["process_name_specific"]) inputFileLists[sample_name] = generateInputFileList( sample_info, self.max_files_per_job) self.inputFileIds = {} for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"]: continue process_name = sample_info["process_name_specific"] inputFileList = inputFileLists[sample_name] logging.info( "Creating configuration files to run '%s' for sample %s" % (self.executable_analyze, process_name)) is_mc = (sample_info["type"] == "mc") sample_category = sample_info["sample_category"] for charge_selection in self.charge_selections: for central_or_shift in self.central_or_shifts: if central_or_shift != "central" and not is_mc: continue if not self.accept_central_or_shift( central_or_shift, sample_info): continue # build config files for executing analysis code key_analyze_dir = getKey(process_name, charge_selection, central_or_shift) for jobId in inputFileList.keys(): analyze_job_tuple = (process_name, charge_selection, central_or_shift, jobId) key_analyze_job = getKey(*analyze_job_tuple) ntupleFiles = inputFileList[jobId] if len(ntupleFiles) == 0: logging.warning( "No input ntuples for %s --> skipping job !!" % (key_analyze_job)) continue cfgFile_modified_path = os.path.join( self.dirs[key_analyze_dir][DKEY_CFGS], "analyze_%s_%s_%s_%i_cfg.py" % analyze_job_tuple) logFile_path = os.path.join( self.dirs[key_analyze_dir][DKEY_LOGS], "analyze_%s_%s_%s_%i.log" % analyze_job_tuple) histogramFile_path = os.path.join( self.dirs[key_analyze_dir][DKEY_HIST], "analyze_%s_%s_%s_%i.root" % analyze_job_tuple) rleOutputFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_RLES], "rle_%s_%s_%s_%i.txt" % analyze_job_tuple) \ if self.select_rle_output else "" self.jobOptions_analyze[key_analyze_job] = { 'ntupleFiles': ntupleFiles, 'cfgFile_modified': cfgFile_modified_path, 'histogramFile': histogramFile_path, 'logFile': logFile_path, 'chargeSelection': charge_selection, 'jet_minPt': self.jet_minPt, 'jet_maxPt': self.jet_maxPt, 'jet_minAbsEta': self.jet_minAbsEta, 'jet_maxAbsEta': self.jet_maxAbsEta, 'hadTau_selection_tight': self.hadTau_selection_tight, 'hadTauSelection_denominator': self.hadTau_selection_denominator, 'hadTauSelections_numerator': self.hadTau_selections_numerator, 'trigMatchingOptions': self.trigMatchingOptions, 'selEventsFileName_output': rleOutputFile_path, 'absEtaBins': self.absEtaBins, 'decayModes': self.decayModes, 'central_or_shift': central_or_shift, 'central_or_shifts_local': [], 'apply_hlt_filter': self.hlt_filter, } self.createCfg_analyze( self.jobOptions_analyze[key_analyze_job], sample_info) # initialize input and output file names for hadd_stage1 key_hadd_stage1_dir = getKey(process_name, charge_selection) hadd_stage1_job_tuple = (process_name, charge_selection) key_hadd_stage1_job = getKey(*hadd_stage1_job_tuple) if not key_hadd_stage1_job in self.inputFiles_hadd_stage1: self.inputFiles_hadd_stage1[ key_hadd_stage1_job] = [] self.inputFiles_hadd_stage1[ key_hadd_stage1_job].append( self.jobOptions_analyze[key_analyze_job] ['histogramFile']) self.outputFile_hadd_stage1[ key_hadd_stage1_job] = os.path.join( self.dirs[key_hadd_stage1_dir][DKEY_HIST], "hadd_stage1_%s_%s.root" % hadd_stage1_job_tuple) # initialize input and output file names for hadd_stage2 key_hadd_stage1_job = getKey(process_name, charge_selection) key_hadd_stage2_dir = getKey("hadd", charge_selection) key_hadd_stage2_job = getKey(charge_selection) if not key_hadd_stage2_job in self.inputFiles_hadd_stage2: self.inputFiles_hadd_stage2[key_hadd_stage2_job] = [] self.inputFiles_hadd_stage2[key_hadd_stage2_job].append( self.outputFile_hadd_stage1[key_hadd_stage1_job]) self.outputFile_hadd_stage2[ key_hadd_stage2_job] = os.path.join( self.dirs[key_hadd_stage2_dir][DKEY_HIST], "hadd_stage2_%s.root" % charge_selection) logging.info( "Creating configuration files for executing 'comp_jetToTauFakeRate'" ) for charge_selection in self.charge_selections: charge_key = "comp_%s" % charge_selection self.comp_input_files[charge_key] = [] for trigMatchingOption in self.trigMatchingOptions: key_hadd_stage2_job = getKey(charge_selection) key_comp_jetToTauFakeRate_dir = getKey("comp_jetToTauFakeRate") key_comp_jetToTauFakeRate_job = getKey(charge_selection, trigMatchingOption) self.jobOptions_comp_jetToTauFakeRate[ key_comp_jetToTauFakeRate_job] = { 'inputFile': self.outputFile_hadd_stage2[key_hadd_stage2_job], 'cfgFile_modified': os.path.join( self.dirs[DKEY_CFGS], "comp_jetToTauFakeRate_%s_%s_cfg.py" % (charge_selection, trigMatchingOption)), 'outputFile': os.path.join( self.dirs[DKEY_HIST], "comp_jetToTauFakeRate_%s_%s.root" % (charge_selection, trigMatchingOption)), 'logFile': os.path.join( self.dirs[DKEY_LOGS], "comp_jetToTauFakeRate_%s_%s.log" % (charge_selection, trigMatchingOption)), 'looseRegion': "jetToTauFakeRate_%s_%s/denominator/" % (charge_selection, trigMatchingOption), 'tightRegion': "jetToTauFakeRate_%s_%s/numerator/" % (charge_selection, trigMatchingOption), 'absEtaBins': self.absEtaBins, 'ptBins': self.ptBins, 'decayModes': self.decayModes, 'hadTauSelections': self.hadTau_selections_numerator, 'trigMatchingOption': trigMatchingOption, 'plots_outputFileName': os.path.join( self.dirs[key_comp_jetToTauFakeRate_dir] [DKEY_PLOT], "comp_jetToTauFakeRate_%s.png" % trigMatchingOption) } self.createCfg_comp_jetToTauFakeRate( self.jobOptions_comp_jetToTauFakeRate[ key_comp_jetToTauFakeRate_job]) comp_output = self.jobOptions_comp_jetToTauFakeRate[ key_comp_jetToTauFakeRate_job]['outputFile'] self.targets.append(comp_output) self.comp_input_files[charge_key].append(comp_output) self.comp_output_files[charge_key] = os.path.join( self.dirs[DKEY_HIST], "comp_jetToTauFakeRate_%s.root" % charge_selection) logging.info("Creating configuration files to run 'makePlots'") for charge_selection in self.charge_selections: key_hadd_stage2_job = getKey(charge_selection) key_makePlots_dir = getKey("makePlots") key_makePlots_job = getKey(charge_selection) self.jobOptions_make_plots[key_makePlots_job] = { 'executable': self.executable_make_plots, 'inputFile': self.outputFile_hadd_stage2[key_hadd_stage2_job], 'cfgFile_modified': os.path.join(self.dirs[key_makePlots_dir][DKEY_CFGS], "makePlots_%s_cfg.py" % self.channel), 'outputFile': os.path.join(self.dirs[key_makePlots_dir][DKEY_PLOT], "makePlots_%s.png" % self.channel), 'histogramDir': "jetToTauFakeRate_%s" % charge_selection, 'label': None, 'make_plots_backgrounds': self.make_plots_backgrounds } self.createCfg_makePlots( self.jobOptions_make_plots[key_makePlots_job]) for trigMatchingOption in self.trigMatchingOptions: self.cfgFile_make_plots = self.cfgFile_make_plots_denominator for absEtaBin in ["absEtaLt1_5", "absEta1_5to9_9"]: key_hadd_stage2_job = getKey(charge_selection) key_makePlots_job = getKey(charge_selection, trigMatchingOption, absEtaBin, "denominator") self.jobOptions_make_plots[key_makePlots_job] = { 'executable' : self.executable_make_plots, 'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job], 'cfgFile_modified' : os.path.join( self.dirs[key_makePlots_dir][DKEY_CFGS], "makePlots_%s_%s_%s_denominator_%s_cfg.py" % \ (self.channel, charge_selection, trigMatchingOption, absEtaBin)), 'outputFile' : os.path.join( self.dirs[key_makePlots_dir][DKEY_PLOT], "makePlots_%s_%s_%s_denominator_%s.png" % (self.channel, charge_selection, trigMatchingOption, absEtaBin)), 'histogramDir' : "jetToTauFakeRate_%s_%s/denominator/%s" % (charge_selection, trigMatchingOption, absEtaBin), 'label' : None, 'make_plots_backgrounds' : self.make_plots_backgrounds } self.createCfg_makePlots( self.jobOptions_make_plots[key_makePlots_job]) for hadTau_selection_numerator in self.hadTau_selections_numerator: key_hadd_stage2_job = getKey(charge_selection) key_makePlots_job = getKey(charge_selection, trigMatchingOption, absEtaBin, "numerator", hadTau_selection_numerator) self.jobOptions_make_plots[key_makePlots_job] = { 'executable' : self.executable_make_plots, 'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job], 'cfgFile_modified' : os.path.join( self.dirs[key_makePlots_dir][DKEY_CFGS], "makePlots_%s_%s_%s_numerator_%s_%s_cfg.py" % \ (self.channel, charge_selection, trigMatchingOption, hadTau_selection_numerator, absEtaBin)), 'outputFile' : os.path.join( self.dirs[key_makePlots_dir][DKEY_PLOT], "makePlots_%s_%s_%s_numerator_%s_%s.png" % \ (self.channel, charge_selection, trigMatchingOption, hadTau_selection_numerator, absEtaBin)), 'histogramDir' : "jetToTauFakeRate_%s_%s/numerator/%s/%s" % (charge_selection, trigMatchingOption, hadTau_selection_numerator, absEtaBin), 'label' : None, 'make_plots_backgrounds' : self.make_plots_backgrounds } self.createCfg_makePlots( self.jobOptions_make_plots[key_makePlots_job]) self.sbatchFile_analyze = os.path.join( self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel) self.sbatchFile_comp_jetToTauFakeRate = os.path.join( self.dirs[DKEY_SCRIPTS], "sbatch_comp_jetToTauFakeRate.py") if self.is_sbatch: logging.info( "Creating script for submitting '%s' jobs to batch system" % self.executable_analyze) self.createScript_sbatch_analyze(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze) logging.info( "Creating script for submitting '%s' jobs to batch system" % self.executable_comp_jetToTauFakeRate) self.createScript_sbatch(self.executable_comp_jetToTauFakeRate, self.sbatchFile_comp_jetToTauFakeRate, self.jobOptions_comp_jetToTauFakeRate) lines_makefile = [] self.addToMakefile_analyze(lines_makefile) self.addToMakefile_hadd_stage1(lines_makefile) self.addToMakefile_hadd_stage2(lines_makefile, make_dependency="phony_hadd_stage1", max_mem='4096M') self.addToMakefile_comp_jetToTauFakeRate(lines_makefile) self.addToMakefile_comp_hadd(lines_makefile) self.addToMakefile_make_plots(lines_makefile) self.createMakefile(lines_makefile) logging.info("Done.") return self.num_jobs
def create(self): """Creates all necessary config files and runs the complete analysis workfow -- either locally or on the batch system """ for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"]: continue process_name = sample_info["process_name_specific"] sample_category = sample_info["sample_category"] is_mc = (sample_info["type"] == "mc") logging.info("Building dictionaries for sample %s..." % process_name) for lepton_selection in self.lepton_selections: for lepton_frWeight in self.lepton_frWeights: if lepton_frWeight == "enabled" and not lepton_selection.startswith("Fakeable"): continue if lepton_frWeight == "disabled" and not lepton_selection in [ "Tight", "forBDTtraining" ]: continue lepton_selection_and_frWeight = get_lepton_selection_and_frWeight(lepton_selection, lepton_frWeight) for chargeSumSelection in self.chargeSumSelections: central_or_shift_extensions = ["", "hadd", "addBackgrounds"] central_or_shift_dedicated = self.central_or_shifts if self.runTHweights(sample_info) else self.central_or_shifts_external central_or_shifts_extended = central_or_shift_extensions + central_or_shift_dedicated for central_or_shift_or_dummy in central_or_shifts_extended: process_name_extended = [ process_name, "hadd" ] for process_name_or_dummy in process_name_extended: if central_or_shift_or_dummy in [ "hadd", "addBackgrounds" ] and process_name_or_dummy in [ "hadd" ]: continue if central_or_shift_or_dummy not in central_or_shift_extensions and not self.accept_systematics( central_or_shift_or_dummy, is_mc, lepton_selection, chargeSumSelection, sample_info ): continue key_dir = getKey(process_name_or_dummy, lepton_selection_and_frWeight, chargeSumSelection, central_or_shift_or_dummy) for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_RLES, DKEY_SYNC ]: if dir_type == DKEY_SYNC and not self.do_sync: continue initDict(self.dirs, [ key_dir, dir_type ]) if dir_type in [ DKEY_CFGS, DKEY_LOGS ]: self.dirs[key_dir][dir_type] = os.path.join(self.configDir, dir_type, self.channel, "_".join([ lepton_selection_and_frWeight, chargeSumSelection ]), process_name_or_dummy, central_or_shift_or_dummy) else: self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel, "_".join([ lepton_selection_and_frWeight, chargeSumSelection ]), process_name_or_dummy) for subdirectory in [ "addBackgrounds", "addBackgroundLeptonFakes", "prepareDatacards", "addSystFakeRates", "makePlots" ]: key_dir = getKey(subdirectory) for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT ]: initDict(self.dirs, [ key_dir, dir_type ]) if dir_type in [ DKEY_CFGS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT ]: self.dirs[key_dir][dir_type] = os.path.join(self.configDir, dir_type, self.channel, subdirectory) else: self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel, subdirectory) for dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT, DKEY_SYNC ]: if dir_type == DKEY_SYNC and not self.do_sync: continue initDict(self.dirs, [ dir_type ]) if dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT ]: self.dirs[dir_type] = os.path.join(self.configDir, dir_type, self.channel) else: self.dirs[dir_type] = os.path.join(self.outputDir, dir_type, self.channel) numDirectories = 0 for key in self.dirs.keys(): if type(self.dirs[key]) == dict: numDirectories += len(self.dirs[key]) else: numDirectories += 1 logging.info("Creating directory structure (numDirectories = %i)" % numDirectories) numDirectories_created = 0; frac = 1 for key in self.dirs.keys(): if type(self.dirs[key]) == dict: for dir_type in self.dirs[key].keys(): create_if_not_exists(self.dirs[key][dir_type]) numDirectories_created += len(self.dirs[key]) else: create_if_not_exists(self.dirs[key]) numDirectories_created = numDirectories_created + 1 while 100*numDirectories_created >= frac*numDirectories: logging.info(" %i%% completed" % frac) frac = frac + 1 logging.info("Done.") inputFileLists = {} for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"]: continue logging.info("Checking input files for sample %s" % sample_info["process_name_specific"]) inputFileLists[sample_name] = generateInputFileList(sample_info, self.max_files_per_job) mcClosure_regex = re.compile('Fakeable_mcClosure_(?P<type>m|e)_wFakeRateWeights') for lepton_selection in self.lepton_selections: electron_selection = lepton_selection muon_selection = lepton_selection hadTauVeto_selection = "Tight" hadTauVeto_selection = "|".join([ hadTauVeto_selection, self.hadTauVeto_selection_part2 ]) if lepton_selection == "forBDTtraining": electron_selection = "Loose" muon_selection = "Loose" elif lepton_selection == "Fakeable_mcClosure_e": electron_selection = "Fakeable" muon_selection = "Tight" elif lepton_selection == "Fakeable_mcClosure_m": electron_selection = "Tight" muon_selection = "Fakeable" for lepton_frWeight in self.lepton_frWeights: if lepton_frWeight == "enabled" and not lepton_selection.startswith("Fakeable"): continue if lepton_frWeight == "disabled" and not lepton_selection in [ "Tight", "forBDTtraining" ]: continue lepton_selection_and_frWeight = get_lepton_selection_and_frWeight(lepton_selection, lepton_frWeight) for chargeSumSelection in self.chargeSumSelections: for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"]: continue process_name = sample_info["process_name_specific"] logging.info("Creating configuration files to run '%s' for sample %s" % (self.executable_analyze, process_name)) inputFileList = inputFileLists[sample_name] sample_category = sample_info["sample_category"] is_mc = (sample_info["type"] == "mc") use_th_weights = self.runTHweights(sample_info) central_or_shift_dedicated = self.central_or_shifts if use_th_weights else self.central_or_shifts_external for central_or_shift in central_or_shift_dedicated: if not self.accept_systematics( central_or_shift, is_mc, lepton_selection, chargeSumSelection, sample_info ): continue central_or_shifts_local = [] if central_or_shift == "central" and not use_th_weights: for central_or_shift_local in self.central_or_shifts_internal: if self.accept_systematics( central_or_shift_local, is_mc, lepton_selection, chargeSumSelection, sample_info ): central_or_shifts_local.append(central_or_shift_local) logging.info(" ... for '%s' and systematic uncertainty option '%s'" % (lepton_selection_and_frWeight, central_or_shift)) # build config files for executing analysis code key_analyze_dir = getKey(process_name, lepton_selection_and_frWeight, chargeSumSelection, central_or_shift) for jobId in inputFileList.keys(): analyze_job_tuple = (process_name, lepton_selection_and_frWeight, chargeSumSelection, central_or_shift, jobId) key_analyze_job = getKey(*analyze_job_tuple) ntupleFiles = inputFileList[jobId] if len(ntupleFiles) == 0: logging.warning("No input ntuples for %s --> skipping job !!" % (key_analyze_job)) continue syncOutput = '' syncTree = '' if self.do_sync: if chargeSumSelection != 'OS': continue mcClosure_match = mcClosure_regex.match(lepton_selection_and_frWeight) if lepton_selection_and_frWeight == 'Tight': syncOutput = os.path.join(self.dirs[key_analyze_dir][DKEY_SYNC], '%s_%s_SR.root' % (self.channel, central_or_shift)) syncTree = 'syncTree_%s_SR' % self.channel.replace('_', '') elif lepton_selection_and_frWeight == 'Fakeable_wFakeRateWeights': syncOutput = os.path.join(self.dirs[key_analyze_dir][DKEY_SYNC], '%s_%s_Fake.root' % (self.channel, central_or_shift)) syncTree = 'syncTree_%s_Fake' % self.channel.replace('_', '') elif mcClosure_match: mcClosure_type = mcClosure_match.group('type') syncOutput = os.path.join(self.dirs[key_analyze_dir][DKEY_SYNC], '%s_%s_mcClosure_%s.root' % (self.channel, central_or_shift, mcClosure_type)) syncTree = 'syncTree_%s_mcClosure_%s' % (self.channel.replace('_', ''), mcClosure_type) else: continue if syncTree and central_or_shift != "central": syncTree = os.path.join(central_or_shift, syncTree) syncRLE = '' if self.do_sync and self.rle_select: syncRLE = self.rle_select % syncTree if not os.path.isfile(syncRLE): logging.warning("Input RLE file for the sync is missing: %s; skipping the job" % syncRLE) continue if syncOutput: self.inputFiles_sync['sync'].append(syncOutput) cfgFile_modified_path = os.path.join(self.dirs[key_analyze_dir][DKEY_CFGS], "analyze_%s_%s_%s_%s_%i_cfg.py" % analyze_job_tuple) logFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_LOGS], "analyze_%s_%s_%s_%s_%i.log" % analyze_job_tuple) rleOutputFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_RLES], "rle_%s_%s_%s_%s_%i.txt" % analyze_job_tuple) \ if self.select_rle_output else "" histogramFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_HIST], "analyze_%s_%s_%s_%s_%i.root" % analyze_job_tuple) branchName_memOutput = '%s_%s' % (self.MEMbranch, self.get_addMEM_systematics(central_or_shift)) \ if self.MEMbranch else '' self.jobOptions_analyze[key_analyze_job] = { 'ntupleFiles' : ntupleFiles, 'cfgFile_modified' : cfgFile_modified_path, 'histogramFile' : histogramFile_path, 'logFile' : logFile_path, 'selEventsFileName_output' : rleOutputFile_path, 'electronSelection' : electron_selection, 'muonSelection' : muon_selection, 'apply_leptonGenMatching' : self.apply_leptonGenMatching, 'hadTauSelection' : hadTauVeto_selection, 'chargeSumSelection' : chargeSumSelection, 'applyFakeRateWeights' : self.applyFakeRateWeights if not lepton_selection == "Tight" else "disabled", 'central_or_shift' : central_or_shift, 'central_or_shifts_local' : central_or_shifts_local, 'selectBDT' : self.isBDTtraining, 'branchName_memOutput' : branchName_memOutput, 'syncOutput' : syncOutput, 'syncTree' : syncTree, 'syncRLE' : syncRLE, 'apply_hlt_filter' : self.hlt_filter, 'useNonNominal' : self.use_nonnominal, 'fillGenEvtHistograms' : True, 'isControlRegion' : self.isControlRegion, } self.createCfg_analyze(self.jobOptions_analyze[key_analyze_job], sample_info, lepton_selection) # initialize input and output file names for hadd_stage1 key_hadd_stage1_dir = getKey(process_name, lepton_selection_and_frWeight, chargeSumSelection) hadd_stage1_job_tuple = (process_name, lepton_selection_and_frWeight, chargeSumSelection) key_hadd_stage1_job = getKey(*hadd_stage1_job_tuple) if not key_hadd_stage1_job in self.inputFiles_hadd_stage1: self.inputFiles_hadd_stage1[key_hadd_stage1_job] = [] self.inputFiles_hadd_stage1[key_hadd_stage1_job].append(self.jobOptions_analyze[key_analyze_job]['histogramFile']) self.outputFile_hadd_stage1[key_hadd_stage1_job] = os.path.join(self.dirs[key_hadd_stage1_dir][DKEY_HIST], "hadd_stage1_%s_%s_%s.root" % hadd_stage1_job_tuple) if self.isBDTtraining or self.do_sync: continue # add output files of hadd_stage1 for data to list of input files for hadd_stage1_5 key_hadd_stage1_job = getKey(process_name, lepton_selection_and_frWeight, chargeSumSelection) key_hadd_stage1_5_dir = getKey("hadd", lepton_selection_and_frWeight, chargeSumSelection) hadd_stage1_5_job_tuple = (lepton_selection_and_frWeight, chargeSumSelection) key_hadd_stage1_5_job = getKey(*hadd_stage1_5_job_tuple) if not key_hadd_stage1_5_job in self.inputFiles_hadd_stage1_5: self.inputFiles_hadd_stage1_5[key_hadd_stage1_5_job] = [] self.inputFiles_hadd_stage1_5[key_hadd_stage1_5_job].append(self.outputFile_hadd_stage1[key_hadd_stage1_job]) self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job] = os.path.join(self.dirs[key_hadd_stage1_5_dir][DKEY_HIST], "hadd_stage1_5_%s_%s.root" % hadd_stage1_5_job_tuple) if self.isBDTtraining or self.do_sync: continue ## doing list of processes to make the hadd in _Convs and _fake ## we could remove the tH ones with althernative couplings sample_categories = [] sample_categories.extend(self.nonfake_backgrounds) sample_categories.extend(self.ttHProcs) processes_input_base = self.get_processes_input_base(sample_categories) # sum fake background contributions for the total of all MC sample # input processes: TT_fake, TTW_fake, TTWW_fake, ... # output process: fakes_mc key_hadd_stage1_5_job = getKey(lepton_selection_and_frWeight, chargeSumSelection) key_addBackgrounds_dir = getKey("addBackgrounds") addBackgrounds_job_fakes_tuple = ("fakes_mc", lepton_selection_and_frWeight, chargeSumSelection) key_addBackgrounds_job_fakes = getKey(*addBackgrounds_job_fakes_tuple) processes_input = [] for process_input_base in processes_input_base: if "HH" in process_input_base: continue processes_input.append("%s_fake" % process_input_base) self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes] = { 'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job], 'cfgFile_modified' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], "addBackgrounds_%s_%s_%s_cfg.py" % addBackgrounds_job_fakes_tuple), 'outputFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackgrounds_%s_%s_%s.root" % addBackgrounds_job_fakes_tuple), 'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], "addBackgrounds_%s_%s_%s.log" % addBackgrounds_job_fakes_tuple), 'categories' : [ getHistogramDir(self.channel, lepton_selection, lepton_frWeight, chargeSumSelection) ], 'processes_input' : processes_input, 'process_output' : "fakes_mc" } self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes]) # sum conversion background contributions for the total of all MC sample # input processes: TT_Convs, TTW_Convs, TTWW_Convs, ... # output process: Convs addBackgrounds_job_Convs_tuple = ("Convs", lepton_selection_and_frWeight, chargeSumSelection) key_addBackgrounds_job_Convs = getKey(*addBackgrounds_job_Convs_tuple) processes_input = [] for process_input_base in self.convs_backgrounds: if "HH" in process_input_base: continue processes_input.append("%s_Convs" % process_input_base) self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_Convs] = { 'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job], 'cfgFile_modified' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], "addBackgrounds_%s_%s_%s_cfg.py" % addBackgrounds_job_Convs_tuple), 'outputFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackgrounds_%s_%s_%s.root" % addBackgrounds_job_Convs_tuple), 'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], "addBackgrounds_%s_%s_%s.log" % addBackgrounds_job_Convs_tuple), 'categories' : [ getHistogramDir(self.channel, lepton_selection, lepton_frWeight, chargeSumSelection) ], 'processes_input' : processes_input, 'process_output' : "Convs" } self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_Convs]) # initialize input and output file names for hadd_stage2 key_hadd_stage1_5_job = getKey(lepton_selection_and_frWeight, chargeSumSelection) key_hadd_stage2_dir = getKey("hadd", lepton_selection_and_frWeight, chargeSumSelection) hadd_stage2_job_tuple = (lepton_selection_and_frWeight, chargeSumSelection) key_hadd_stage2_job = getKey(*hadd_stage2_job_tuple) if not key_hadd_stage2_job in self.inputFiles_hadd_stage2: self.inputFiles_hadd_stage2[key_hadd_stage2_job] = [] if lepton_selection == "Tight": self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes]['outputFile']) self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_Convs]['outputFile']) self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job]) self.outputFile_hadd_stage2[key_hadd_stage2_job] = os.path.join(self.dirs[key_hadd_stage2_dir][DKEY_HIST], "hadd_stage2_%s_%s.root" % hadd_stage2_job_tuple) if self.isBDTtraining or self.do_sync: if self.is_sbatch: logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_analyze) self.sbatchFile_analyze = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel) if self.isBDTtraining: self.createScript_sbatch_analyze(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze) elif self.do_sync: self.createScript_sbatch_syncNtuple(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze) logging.info("Creating Makefile") lines_makefile = [] if self.isBDTtraining: self.addToMakefile_analyze(lines_makefile) self.addToMakefile_hadd_stage1(lines_makefile) elif self.do_sync: self.addToMakefile_syncNtuple(lines_makefile) outputFile_sync_path = os.path.join(self.outputDir, DKEY_SYNC, '%s.root' % self.channel) self.outputFile_sync['sync'] = outputFile_sync_path self.addToMakefile_hadd_sync(lines_makefile) else: raise ValueError("Internal logic error") self.addToMakefile_validate(lines_makefile) self.targets.extend(self.phoniesToAdd) self.createMakefile(lines_makefile) logging.info("Done.") return self.num_jobs logging.info("Creating configuration files to run 'addBackgroundFakes'") for chargeSumSelection in self.chargeSumSelections: key_hadd_stage1_5_job = getKey(get_lepton_selection_and_frWeight("Fakeable", "enabled"), chargeSumSelection) key_addFakes_dir = getKey("addBackgroundLeptonFakes") key_addFakes_job = getKey("data_fakes", chargeSumSelection) category_sideband = "{}_{}_Fakeable_wFakeRateWeights".format(self.channel, chargeSumSelection) self.jobOptions_addFakes[key_addFakes_job] = { 'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job], 'cfgFile_modified' : os.path.join(self.dirs[key_addFakes_dir][DKEY_CFGS], "addBackgroundLeptonFakes_%s_cfg.py" % chargeSumSelection), 'outputFile' : os.path.join(self.dirs[key_addFakes_dir][DKEY_HIST], "addBackgroundLeptonFakes_%s.root" % chargeSumSelection), 'logFile' : os.path.join(self.dirs[key_addFakes_dir][DKEY_LOGS], "addBackgroundLeptonFakes_%s.log" % chargeSumSelection), 'category_signal' : "{}_{}_Tight".format(self.channel, chargeSumSelection), 'category_sideband' : category_sideband } self.createCfg_addFakes(self.jobOptions_addFakes[key_addFakes_job]) key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), chargeSumSelection) self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addFakes[key_addFakes_job]['outputFile']) logging.info("Creating configuration files to run 'prepareDatacards'") for histogramToFit in self.histograms_to_fit: key_prep_dcard_dir = getKey("prepareDatacards") if "OS" in self.chargeSumSelections: key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "OS") prep_dcard_job_tuple = (self.channel, "OS", histogramToFit) key_prep_dcard_job = getKey("OS", histogramToFit) self.jobOptions_prep_dcard[key_prep_dcard_job] = { 'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job], 'cfgFile_modified' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_CFGS], "prepareDatacards_%s_%s_%s_cfg.py" % prep_dcard_job_tuple), 'datacardFile' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_DCRD], "prepareDatacards_%s_%s_%s.root" % prep_dcard_job_tuple), 'histogramDir' : self.histogramDir_prep_dcard, 'histogramToFit' : histogramToFit, 'label' : None } self.createCfg_prep_dcard(self.jobOptions_prep_dcard[key_prep_dcard_job]) if "SS" in self.chargeSumSelections: key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "SS") prep_dcard_job_tuple = (self.channel, "SS", histogramToFit) key_prep_dcard_job = getKey("SS", histogramToFit) self.jobOptions_prep_dcard[key_prep_dcard_job] = { 'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job], 'cfgFile_modified' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_CFGS], "prepareDatacards_%s_%s_%s_cfg.py" % prep_dcard_job_tuple), 'datacardFile' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_DCRD], "prepareDatacards_%s_%s_%s.root" % prep_dcard_job_tuple), 'histogramDir' : self.histogramDir_prep_dcard_SS, 'histogramToFit' : histogramToFit, 'label' : 'SS' } self.createCfg_prep_dcard(self.jobOptions_prep_dcard[key_prep_dcard_job]) # add shape templates for the following systematic uncertainties: # - 'CMS_ttHl_Clos_norm_e' # - 'CMS_ttHl_Clos_shape_e' # - 'CMS_ttHl_Clos_norm_m' # - 'CMS_ttHl_Clos_shape_m' for chargeSumSelection in self.chargeSumSelections: key_prep_dcard_job = getKey(chargeSumSelection, histogramToFit) key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), chargeSumSelection) key_add_syst_fakerate_dir = getKey("addSystFakeRates") add_syst_fakerate_job_tuple = (self.channel, chargeSumSelection, histogramToFit) key_add_syst_fakerate_job = getKey(chargeSumSelection, histogramToFit) self.jobOptions_add_syst_fakerate[key_add_syst_fakerate_job] = { 'inputFile' : self.jobOptions_prep_dcard[key_prep_dcard_job]['datacardFile'], 'cfgFile_modified' : os.path.join(self.dirs[key_add_syst_fakerate_dir][DKEY_CFGS], "addSystFakeRates_%s_%s_%s_cfg.py" % add_syst_fakerate_job_tuple), 'outputFile' : os.path.join(self.dirs[key_add_syst_fakerate_dir][DKEY_DCRD], "addSystFakeRates_%s_%s_%s.root" % add_syst_fakerate_job_tuple), 'category' : self.channel, 'histogramToFit' : histogramToFit, 'plots_outputFileName' : os.path.join(self.dirs[key_add_syst_fakerate_dir][DKEY_PLOT], "addSystFakeRates.png") } histogramDir_nominal = None if chargeSumSelection == "OS": histogramDir_nominal = self.histogramDir_prep_dcard elif chargeSumSelection == "SS": histogramDir_nominal = self.histogramDir_prep_dcard_SS else: raise ValueError("Invalid parameter 'chargeSumSelection' = %s !!" % chargeSumSelection) for lepton_type in [ 'e', 'm' ]: lepton_mcClosure = "Fakeable_mcClosure_%s" % lepton_type if lepton_mcClosure not in self.lepton_selections: continue lepton_selection_and_frWeight = get_lepton_selection_and_frWeight(lepton_mcClosure, "enabled") key_addBackgrounds_job_fakes = getKey("fakes_mc", lepton_selection_and_frWeight, chargeSumSelection) histogramDir_mcClosure = self.mcClosure_dir['%s_%s' % (lepton_mcClosure, chargeSumSelection)] self.jobOptions_add_syst_fakerate[key_add_syst_fakerate_job].update({ 'add_Clos_%s' % lepton_type : ("Fakeable_mcClosure_%s" % lepton_type) in self.lepton_selections, 'inputFile_nominal_%s' % lepton_type : self.outputFile_hadd_stage2[key_hadd_stage2_job], 'histogramName_nominal_%s' % lepton_type : "%s/sel/evt/fakes_mc/%s" % (histogramDir_nominal, histogramToFit), 'inputFile_mcClosure_%s' % lepton_type : self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes]['outputFile'], 'histogramName_mcClosure_%s' % lepton_type : "%s/sel/evt/fakes_mc/%s" % (histogramDir_mcClosure, histogramToFit) }) self.createCfg_add_syst_fakerate(self.jobOptions_add_syst_fakerate[key_add_syst_fakerate_job]) logging.info("Creating configuration files to run 'makePlots'") key_makePlots_dir = getKey("makePlots") if "OS" in self.chargeSumSelections: key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "OS") key_makePlots_job = getKey("OS") self.jobOptions_make_plots[key_makePlots_job] = { 'executable' : self.executable_make_plots, 'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job], 'cfgFile_modified' : os.path.join(self.dirs[key_makePlots_dir][DKEY_CFGS], "makePlots_%s_cfg.py" % self.channel), 'outputFile' : os.path.join(self.dirs[key_makePlots_dir][DKEY_PLOT], "makePlots_%s.png" % self.channel), 'histogramDir' : self.histogramDir_prep_dcard, 'label' : self.channel, 'make_plots_backgrounds' : self.make_plots_backgrounds } self.createCfg_makePlots(self.jobOptions_make_plots[key_makePlots_job]) if "SS" in self.chargeSumSelections: key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "SS") key_makePlots_job = getKey("SS") self.jobOptions_make_plots[key_makePlots_job] = { 'executable' : self.executable_make_plots, 'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job], 'cfgFile_modified' : os.path.join(self.dirs[key_makePlots_dir][DKEY_CFGS], "makePlots_%s_SS_cfg.py" % self.channel), 'outputFile' : os.path.join(self.dirs[key_makePlots_dir][DKEY_PLOT], "makePlots_%s_SS.png" % self.channel), 'histogramDir' : self.histogramDir_prep_dcard_SS, 'label' : "{} SS".format(self.channel), 'make_plots_backgrounds' : self.make_plots_backgrounds } self.createCfg_makePlots(self.jobOptions_make_plots[key_makePlots_job]) if "Fakeable_mcClosure" in self.lepton_selections: #TODO key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "OS") key_makePlots_job = getKey("OS") self.jobOptions_make_plots[key_makePlots_job] = { 'executable' : self.executable_make_plots_mcClosure, 'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job], 'cfgFile_modified' : os.path.join(self.dirs[key_makePlots_dir][DKEY_CFGS], "makePlots_mcClosure_%s_cfg.py" % self.channel), 'outputFile' : os.path.join(self.dirs[key_makePlots_dir][DKEY_PLOT], "makePlots_mcClosure_%s.png" % self.channel) } self.createCfg_makePlots_mcClosure(self.jobOptions_make_plots[key_makePlots_job]) self.sbatchFile_analyze = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel) self.sbatchFile_addBackgrounds = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addBackgrounds_%s.py" % self.channel) self.sbatchFile_addBackgrounds_sum = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addBackgrounds_sum_%s.py" % self.channel) self.sbatchFile_addFakes = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addFakes_%s.py" % self.channel) if self.is_sbatch: logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_analyze) self.createScript_sbatch_analyze(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze) logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_addBackgrounds) self.createScript_sbatch(self.executable_addBackgrounds, self.sbatchFile_addBackgrounds, self.jobOptions_addBackgrounds) self.createScript_sbatch(self.executable_addBackgrounds, self.sbatchFile_addBackgrounds_sum, self.jobOptions_addBackgrounds_sum) logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_addFakes) self.createScript_sbatch(self.executable_addFakes, self.sbatchFile_addFakes, self.jobOptions_addFakes) logging.info("Creating Makefile") lines_makefile = [] self.addToMakefile_analyze(lines_makefile) self.addToMakefile_hadd_stage1(lines_makefile) self.addToMakefile_backgrounds_from_data(lines_makefile) self.addToMakefile_hadd_stage2(lines_makefile) self.addToMakefile_prep_dcard(lines_makefile) self.addToMakefile_add_syst_fakerate(lines_makefile) self.addToMakefile_make_plots(lines_makefile) self.addToMakefile_validate(lines_makefile) self.createMakefile(lines_makefile) logging.info("Done.") return self.num_jobs
def create(self): """Creates all necessary config files and runs the complete analysis workfow -- either locally or on the batch system """ for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]: continue process_name = sample_info["process_name_specific"] key_dir = getKey(process_name) for dir_type in [DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_RLES]: initDict(self.dirs, [key_dir, dir_type]) if dir_type in [DKEY_CFGS, DKEY_LOGS]: self.dirs[key_dir][dir_type] = os.path.join( self.configDir, dir_type, self.channel, "", process_name) else: self.dirs[key_dir][dir_type] = os.path.join( self.outputDir, dir_type, self.channel, "", process_name) for dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_HIST, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT ]: initDict(self.dirs, [dir_type]) if dir_type in [DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_HADD_RT]: self.dirs[dir_type] = os.path.join(self.configDir, dir_type, self.channel) else: self.dirs[dir_type] = os.path.join(self.outputDir, dir_type, self.channel) ##print "self.dirs = ", self.dirs for key in self.dirs.keys(): if type(self.dirs[key]) == dict: for dir_type in self.dirs[key].keys(): create_if_not_exists(self.dirs[key][dir_type]) else: create_if_not_exists(self.dirs[key]) inputFileLists = {} for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]: continue logging.info("Checking input files for sample %s" % sample_info["process_name_specific"]) inputFileLists[sample_name] = generateInputFileList( sample_name, sample_info, self.max_files_per_job, self.debug) for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]: continue process_name = sample_info["process_name_specific"] logging.info( "Creating configuration files to run '%s' for sample %s" % (self.executable_analyze, process_name)) sample_category = sample_info["sample_category"] is_mc = (sample_info["type"] == "mc") is_signal = (sample_category == "signal") for central_or_shift in self.central_or_shifts: inputFileList = inputFileLists[sample_name] for jobId in inputFileList.keys(): if central_or_shift != "central" and not is_mc: continue if central_or_shift.startswith( "CMS_ttHl_thu_shape_ttH" ) and sample_category != "signal": continue if central_or_shift.startswith( "CMS_ttHl_thu_shape_ttW" ) and sample_category != "TTW": continue if central_or_shift.startswith( "CMS_ttHl_thu_shape_ttZ" ) and sample_category != "TTZ": continue # build config files for executing analysis code key_dir = getKey(process_name) key_analyze_job = getKey(process_name, central_or_shift, jobId) ntupleFiles = inputFileList[jobId] if len(ntupleFiles) == 0: print "Warning: ntupleFiles['%s'] = %s --> skipping job !!" % ( key_job, ntupleFiles) continue self.jobOptions_analyze[key_analyze_job] = { 'ntupleFiles' : ntupleFiles, 'cfgFile_modified' : os.path.join(self.dirs[key_dir][DKEY_CFGS], "analyze_%s_%s_%s_%i_cfg.py" % \ (self.channel, process_name, central_or_shift, jobId)), 'histogramFile' : os.path.join(self.dirs[key_dir][DKEY_HIST], "%s_%s_%i.root" % \ (process_name, central_or_shift, jobId)), 'logFile' : os.path.join(self.dirs[key_dir][DKEY_LOGS], "analyze_%s_%s_%s_%i.log" % \ (self.channel, process_name, central_or_shift, jobId)), 'rleOutputFile' : os.path.join(self.dirs[key_dir][DKEY_RLES], "rle_%s_%s_%s_%i.txt" % \ (self.channel, process_name, central_or_shift, jobId)) if self.select_rle_output else "", 'sample_category' : sample_category, 'triggers' : sample_info["triggers"], 'hadTau_selection' : self.hadTau_selection_part2, ##'use_HIP_mitigation_mediumMuonId' : sample_info["use_HIP_mitigation_mediumMuonId"], 'use_HIP_mitigation_mediumMuonId' : True, 'is_mc' : is_mc, 'central_or_shift' : central_or_shift, 'lumi_scale' : 1. if not (self.use_lumi and is_mc) else sample_info["xsection"] * self.lumi / sample_info["nof_events"], 'apply_genWeight' : sample_info["genWeight"] if (is_mc and "genWeight" in sample_info.keys()) else False, 'apply_trigger_bits' : (is_mc and (self.era == "2015" or (self.era == "2016" and sample_info["reHLT"]))) or not is_mc } self.createCfg_analyze( self.jobOptions_analyze[key_analyze_job]) # initialize input and output file names for hadd_stage1 key_hadd_stage1 = getKey(process_name) if not key_hadd_stage1 in self.inputFiles_hadd_stage1.keys( ): self.inputFiles_hadd_stage1[key_hadd_stage1] = [] self.inputFiles_hadd_stage1[key_hadd_stage1].append( self.jobOptions_analyze[key_analyze_job] ['histogramFile']) self.outputFile_hadd_stage1[key_hadd_stage1] = os.path.join(self.dirs[DKEY_HIST], "histograms_harvested_stage1_%s_%s.root" % \ (self.channel, process_name)) # initialize input and output file names for hadd_stage2 key_hadd_stage1 = getKey(process_name) key_hadd_stage2 = getKey("all") if not key_hadd_stage2 in self.inputFiles_hadd_stage2.keys(): self.inputFiles_hadd_stage2[key_hadd_stage2] = [] self.inputFiles_hadd_stage2[key_hadd_stage2].append( self.outputFile_hadd_stage1[key_hadd_stage1]) self.outputFile_hadd_stage2[key_hadd_stage2] = os.path.join(self.dirs[DKEY_HIST], "histograms_harvested_stage2_%s.root" % \ (self.channel)) logging.info("Creating configuration files to run 'prepareDatacards'") for evtSelection in self.evtSelections: for histogramToFit in self.histograms_to_fit: key_prep_dcard_job = getKey(evtSelection, histogramToFit) key_hadd_stage2 = getKey("all") self.jobOptions_prep_dcard[key_prep_dcard_job] = { 'inputFile': self.outputFile_hadd_stage2[key_hadd_stage2], 'cfgFile_modified': os.path.join( self.dirs[DKEY_CFGS], "prepareDatacards_%s_%s_%s_cfg.py" % (self.channel, evtSelection, histogramToFit)), 'datacardFile': os.path.join( self.dirs[DKEY_DCRD], "prepareDatacards_%s_%s_%s.root" % (self.channel, evtSelection, histogramToFit)), 'histogramDir': "_".join([self.histogramDir_prep_dcard, evtSelection]), 'histogramToFit': histogramToFit, 'label': None } self.createCfg_prep_dcard( self.jobOptions_prep_dcard[key_prep_dcard_job]) logging.info("Creating configuration files to run 'makePlots'") for evtSelection in self.evtSelections: key_makePlots_job = getKey(evtSelection) key_hadd_stage2 = getKey("all") self.jobOptions_make_plots[key_makePlots_job] = { 'executable': self.executable_make_plots, 'inputFile': self.outputFile_hadd_stage2[key_hadd_stage2], 'cfgFile_modified': os.path.join( self.dirs[DKEY_CFGS], "makePlots_%s_%s_cfg.py" % (self.channel, evtSelection)), 'outputFile': os.path.join( self.dirs[DKEY_PLOT], "makePlots_%s_%s.png" % (self.channel, evtSelection)), 'histogramDir': "_".join([self.histogramDir_prep_dcard, evtSelection]), 'label': evtSelection, 'make_plots_backgrounds': self.make_plots_backgrounds } self.createCfg_makePlots( self.jobOptions_make_plots[key_makePlots_job]) if self.is_sbatch: logging.info( "Creating script for submitting '%s' jobs to batch system" % self.executable_analyze) self.sbatchFile_analyze = os.path.join( self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel) self.createScript_sbatch() logging.info("Creating Makefile") lines_makefile = [] self.addToMakefile_analyze(lines_makefile) self.addToMakefile_hadd_stage1(lines_makefile) self.addToMakefile_hadd_stage2(lines_makefile) self.addToMakefile_prep_dcard(lines_makefile) self.addToMakefile_make_plots(lines_makefile) self.createMakefile(lines_makefile) logging.info("Done")
def __init__(self, treeName, outputDir, cfgDir, executable_addMEM, samples, era, check_output_files, running_method, max_files_per_job, mem_integrations_per_job, max_mem_integrations, num_parallel_jobs, leptonSelection, hadTauSelection, integration_choice, jet_cleaning_by_index, dry_run, use_nonnominal, use_home, channel, rle_filter_file = '', submission_cmd = None, pool_id = '', max_jobs_per_sample = -1, ): self.treeName = treeName self.outputDir = outputDir self.cfgDir = cfgDir self.executable_addMEM = executable_addMEM self.mem_integrations_per_job = mem_integrations_per_job self.max_files_per_job = max_files_per_job self.max_mem_integrations = max_mem_integrations self.max_jobs_per_sample = max_jobs_per_sample self.samples = samples self.era = era self.check_output_files = check_output_files self.channel = channel self.rle_filter_file = rle_filter_file self.leptonSelection = leptonSelection self.hadTauSelection = hadTauSelection if self.hadTauSelection: self.hadTauDefinition = self.hadTauSelection.split('|')[0] self.hadTauWorkingPoint = self.hadTauSelection.split('|')[1] else: self.hadTauDefinition = None self.hadTauWorkingPoint = None self.maxPermutations_branchName = None self.integration_choice = integration_choice self.jet_cleaning_by_index = jet_cleaning_by_index logging.info( "Number of integration points: %s" % self.integration_choice ) if running_method.lower() not in ["sbatch", "makefile"]: raise ValueError("Invalid running method: %s" % running_method) self.running_method = running_method self.is_sbatch = False self.is_makefile = False if self.running_method.lower() == "sbatch": self.is_sbatch = True else: self.is_makefile = True self.makefile = os.path.join( self.cfgDir, "Makefile_%s" % self.channel) self.num_parallel_jobs = num_parallel_jobs self.dry_run = dry_run self.use_nonnominal = use_nonnominal self.use_home = use_home self.pool_id = pool_id if pool_id else uuid.uuid4() self.workingDir = os.getcwd() logging.info("Working directory is: {workingDir}".format(workingDir = self.workingDir)) for dirPath in [self.outputDir, self.cfgDir]: create_if_not_exists(dirPath) self.stdout_file_path = os.path.join(self.cfgDir, "stdout_%s.log" % self.channel) self.stderr_file_path = os.path.join(self.cfgDir, "stderr_%s.log" % self.channel) self.sw_ver_file_cfg = os.path.join(self.cfgDir, "VERSION_%s.log" % self.channel) self.sw_ver_file_out = os.path.join(self.outputDir, "VERSION_%s.log" % self.channel) self.submission_out = os.path.join(self.cfgDir, "SUBMISSION_%s.log" % self.channel) self.stdout_file_path, self.stderr_file_path, self.sw_ver_file_cfg, self.sw_ver_file_out, self.submission_out = get_log_version(( self.stdout_file_path, self.stderr_file_path, self.sw_ver_file_cfg, self.sw_ver_file_out, self.submission_out )) check_submission_cmd(self.submission_out, submission_cmd) self.dirs = {} self.samples = samples self.cfgFiles_addMEM_modified = {} self.shFiles_addMEM_modified = {} self.logFiles_addMEM = {} self.sbatchFile_addMEM = os.path.join(self.cfgDir, "sbatch_addMEM_%s.py" % self.channel) self.inputFiles = {} self.outputFiles = {} self.hadd_records = {} self.filesToClean = [] del self.samples['sum_events'] for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"]: continue process_name = sample_info["process_name_specific"] key_dir = getKey(sample_name) for dir_type in [DKEY_NTUPLES, DKEY_FINAL_NTUPLES]: initDict(self.dirs, [key_dir, dir_type]) self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel, process_name) for dir_type in [DKEY_CFGS, DKEY_LOGS, DKEY_HADD, DKEY_HADD_RT]: initDict(self.dirs, [key_dir, dir_type]) self.dirs[key_dir][dir_type] = os.path.join(self.cfgDir, dir_type, self.channel, process_name) self.cvmfs_error_log = {}
def create(self): """Creates all necessary config files and runs the complete analysis workfow -- either locally or on the batch system """ for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"]: continue process_name = sample_info["process_name_specific"] key_dir = getKey(process_name) for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_RLES, DKEY_SYNC ]: initDict(self.dirs, [key_dir, dir_type]) if dir_type in [DKEY_CFGS, DKEY_LOGS]: self.dirs[key_dir][dir_type] = os.path.join( self.configDir, dir_type, self.channel, process_name) else: self.dirs[key_dir][dir_type] = os.path.join( self.outputDir, dir_type, self.channel, process_name) for dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT, DKEY_SYNC ]: initDict(self.dirs, [dir_type]) if dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT ]: self.dirs[dir_type] = os.path.join(self.configDir, dir_type, self.channel) else: self.dirs[dir_type] = os.path.join(self.outputDir, dir_type, self.channel) for key in self.dirs.keys(): if type(self.dirs[key]) == dict: for dir_type in self.dirs[key].keys(): create_if_not_exists(self.dirs[key][dir_type]) else: create_if_not_exists(self.dirs[key]) inputFileLists = {} for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"]: continue logging.info("Checking input files for sample %s" % sample_info["process_name_specific"]) inputFileLists[sample_name] = generateInputFileList( sample_info, self.max_files_per_job) for apply_jetSmearing in self.apply_jetSmearing_options: jetSmearingLabel = None if apply_jetSmearing: jetSmearingLabel = "jetSmearingEnabled" else: jetSmearingLabel = "jetSmearingDisabled" for apply_metSmearing in self.apply_metSmearing_options: metSmearingLabel = None if apply_metSmearing: metSmearingLabel = "metSmearingEnabled" else: metSmearingLabel = "metSmearingDisabled" for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"]: continue process_name = sample_info["process_name_specific"] isSignal = True if process_name.find( "signal") != -1 else False logging.info( "Creating configuration files to run '%s' for sample %s" % (self.executable_analyze, process_name)) sample_category = sample_info["sample_category"] inputFileList = inputFileLists[sample_name] numJobsPerFile = None if sample_info[ "process_name_specific"] == "signal_ggf_nonresonant_node_sm_hh_2b2v": numJobsPerFile = 500 elif sample_info[ "process_name_specific"] == "signal_ggf_nonresonant_cHHH1_hh_2b2v": numJobsPerFile = 100 elif sample_info[ "process_name_specific"] == "TTJets_DiLept": numJobsPerFile = 50 elif sample_info[ "process_name_specific"] == "TTJets_DiLept_ext1": numJobsPerFile = 50 elif sample_info["process_name_specific"] == "TTTo2L2Nu": numJobsPerFile = 10 else: raise ValueError("Invalid sample: %s" % sample_info["process_name_specific"]) numJobs = numJobsPerFile * len(inputFileList.keys()) for jobId in range(1, numJobs + 1): ntupleId = ((jobId - 1) / numJobsPerFile) + 1 maxSelEvents = 500 skipSelEvents = maxSelEvents * ( (jobId - 1) % numJobsPerFile) # build config files for executing analysis code key_dir = getKey(process_name) key_analyze_job = getKey(process_name, jetSmearingLabel, metSmearingLabel, jobId) ntupleFiles = inputFileList[ntupleId] if len(ntupleFiles) == 0: logging.warning( "No input ntuples for %s --> skipping job !!" % (key_analyze_job)) continue cfgFile_modified_path = os.path.join( self.dirs[key_dir][DKEY_CFGS], "analyze_%s_%s_%s_%s_%i_cfg.py" % (self.channel, process_name, jetSmearingLabel, metSmearingLabel, jobId)) histogramFile_path = os.path.join( self.dirs[key_dir][DKEY_HIST], "analyze_%s_%s_%s_%s_%i.root" % (self.channel, process_name, jetSmearingLabel, metSmearingLabel, jobId)) logFile_path = os.path.join( self.dirs[key_dir][DKEY_LOGS], "analyze_%s_%s_%s_%s_%i.log" % (self.channel, process_name, jetSmearingLabel, metSmearingLabel, jobId)) rleOutputFile_path = os.path.join(self.dirs[key_dir][DKEY_RLES], "rle_%s_%s_%s_%s_%i.txt" % (self.channel, process_name, jetSmearingLabel, metSmearingLabel, jobId)) \ if self.select_rle_output else "" self.jobOptions_analyze[key_analyze_job] = { 'ntupleFiles': ntupleFiles, 'cfgFile_modified': cfgFile_modified_path, 'histogramFile': histogramFile_path, 'logFile': logFile_path, 'selEventsFileName_output': rleOutputFile_path, 'apply_jetSmearing': apply_jetSmearing, 'apply_metSmearing': apply_metSmearing, 'maxSelEvents': maxSelEvents, 'skipSelEvents': skipSelEvents } self.createCfg_analyze( self.jobOptions_analyze[key_analyze_job], sample_info) # initialize input and output file names for hadd_stage1 key_hadd_stage1 = getKey(process_name, jetSmearingLabel, metSmearingLabel) if not key_hadd_stage1 in self.inputFiles_hadd_stage1: self.inputFiles_hadd_stage1[key_hadd_stage1] = [] self.inputFiles_hadd_stage1[key_hadd_stage1].append( self.jobOptions_analyze[key_analyze_job] ['histogramFile']) self.outputFile_hadd_stage1[key_hadd_stage1] = os.path.join(self.dirs[DKEY_HIST], "histograms_harvested_stage1_%s_%s_%s_%s.root" % \ (self.channel, process_name, jetSmearingLabel, metSmearingLabel)) # add output files of hadd_stage1 to list of input files for hadd_stage2 key_hadd_stage1 = getKey(process_name, jetSmearingLabel, metSmearingLabel) key_hadd_stage2 = getKey("") if not key_hadd_stage2 in self.inputFiles_hadd_stage2: self.inputFiles_hadd_stage2[key_hadd_stage2] = [] self.inputFiles_hadd_stage2[key_hadd_stage2].append( self.outputFile_hadd_stage1[key_hadd_stage1]) self.outputFile_hadd_stage2[ key_hadd_stage2] = os.path.join( self.dirs[DKEY_HIST], "histograms_harvested_stage2_%s.root" % self.channel) if self.is_sbatch: logging.info( "Creating script for submitting '%s' jobs to batch system" % self.executable_analyze) self.sbatchFile_analyze = os.path.join( self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel) self.createScript_sbatch_analyze(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze) logging.info("Creating Makefile") lines_makefile = [] self.addToMakefile_analyze(lines_makefile) self.addToMakefile_hadd_stage1(lines_makefile) self.addToMakefile_hadd_stage2(lines_makefile) self.targets.extend(self.outputFile_hadd_stage2.values()) self.createMakefile(lines_makefile) logging.info("Done") return self.num_jobs
def create(self): """Creates all necessary config files and runs the complete analysis workfow -- either locally or on the batch system """ for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]: continue process_name = sample_info["process_name_specific"] for charge_selection in self.charge_selections: key_dir = getKey(process_name, charge_selection) for dir_type in [DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_RLES]: initDict(self.dirs, [key_dir, dir_type]) if dir_type in [DKEY_CFGS, DKEY_LOGS]: self.dirs[key_dir][dir_type] = os.path.join( self.configDir, dir_type, self.channel, "_".join([charge_selection]), process_name) else: self.dirs[key_dir][dir_type] = os.path.join( self.outputDir, dir_type, self.channel, "_".join([charge_selection]), process_name) for dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT ]: initDict(self.dirs, [dir_type]) if dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT ]: self.dirs[dir_type] = os.path.join(self.configDir, dir_type, self.channel) else: self.dirs[dir_type] = os.path.join(self.outputDir, dir_type, self.channel) ##print "self.dirs = ", self.dirs for key in self.dirs.keys(): if type(self.dirs[key]) == dict: for dir_type in self.dirs[key].keys(): create_if_not_exists(self.dirs[key][dir_type]) else: create_if_not_exists(self.dirs[key]) inputFileLists = {} for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]: continue logging.info("Checking input files for sample %s" % sample_info["process_name_specific"]) inputFileLists[sample_name] = generateInputFileList( sample_name, sample_info, self.max_files_per_job, self.debug) self.inputFileIds = {} for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]: continue process_name = sample_info["process_name_specific"] logging.info( "Creating configuration files to run '%s' for sample %s" % (self.executable_analyze, process_name)) is_mc = (sample_info["type"] == "mc") lumi_scale = 1. if not (self.use_lumi and is_mc) else sample_info[ "xsection"] * self.lumi / sample_info["nof_events"] apply_genWeight = sample_info["apply_genWeight"] if ( is_mc and "apply_genWeight" in sample_info.keys()) else False sample_category = sample_info["sample_category"] triggers = sample_info["triggers"] apply_trigger_bits = ( is_mc and (self.era == "2015" or (self.era == "2016" and sample_info["reHLT"]))) or not is_mc for charge_selection in self.charge_selections: for central_or_shift in self.central_or_shifts: inputFileList = inputFileLists[sample_name] for jobId in inputFileList.keys(): if central_or_shift != "central" and not is_mc: continue if central_or_shift.startswith( "CMS_ttHl_thu_shape_ttH" ) and sample_category != "signal": continue if central_or_shift.startswith( "CMS_ttHl_thu_shape_ttW" ) and sample_category != "TTW": continue if central_or_shift.startswith( "CMS_ttHl_thu_shape_ttZ" ) and sample_category != "TTZ": continue # build config files for executing analysis code key_dir = getKey(process_name, charge_selection) key_analyze_job = getKey(process_name, charge_selection, central_or_shift, jobId) ntupleFiles = inputFileList[jobId] if len(ntupleFiles) == 0: print "Warning: ntupleFiles['%s'] = %s --> skipping job !!" % ( key_job, ntupleFiles) continue self.jobOptions_analyze[key_analyze_job] = { 'ntupleFiles' : ntupleFiles, 'cfgFile_modified' : os.path.join(self.dirs[key_dir][DKEY_CFGS], "analyze_%s_%s_%s_%s_%i_cfg.py" % \ (self.channel, process_name, charge_selection, central_or_shift, jobId)), 'histogramFile' : os.path.join(self.dirs[key_dir][DKEY_HIST], "%s_%s_%s_%i.root" % \ (process_name, charge_selection, central_or_shift, jobId)), 'logFile' : os.path.join(self.dirs[key_dir][DKEY_LOGS], "analyze_%s_%s_%s_%s_%i.log" % \ (self.channel, process_name, charge_selection, central_or_shift, jobId)), 'sample_category' : sample_category, 'triggers' : sample_info["triggers"], 'charge_selection' : charge_selection, 'jet_minPt' : self.jet_minPt, 'jet_maxPt' : self.jet_maxPt, 'jet_minAbsEta' : self.jet_minAbsEta, 'jet_maxAbsEta' : self.jet_maxAbsEta, 'hadTau_selection_denominator' : self.hadTau_selection_denominator, 'hadTau_selections_numerator' : self.hadTau_selections_numerator, 'absEtaBins' : self.absEtaBins, ##'use_HIP_mitigation_mediumMuonId' : sample_info["use_HIP_mitigation_mediumMuonId"], 'use_HIP_mitigation_mediumMuonId' : True, 'is_mc' : is_mc, 'central_or_shift' : central_or_shift, 'lumi_scale' : 1. if not (self.use_lumi and is_mc) else sample_info["xsection"] * self.lumi / sample_info["nof_events"], 'apply_genWeight' : sample_info["genWeight"] if (is_mc and "genWeight" in sample_info.keys()) else False, 'apply_trigger_bits' : (is_mc and (self.era == "2015" or (self.era == "2016" and sample_info["reHLT"]))) or not is_mc, } self.createCfg_analyze( self.jobOptions_analyze[key_analyze_job]) # initialize input and output file names for hadd_stage1 key_hadd_stage1 = getKey(process_name, charge_selection) if not key_hadd_stage1 in self.inputFiles_hadd_stage1: self.inputFiles_hadd_stage1[key_hadd_stage1] = [] self.inputFiles_hadd_stage1[key_hadd_stage1].append( self.jobOptions_analyze[key_analyze_job] ['histogramFile']) self.outputFile_hadd_stage1[key_hadd_stage1] = os.path.join(self.dirs[DKEY_HIST], "histograms_harvested_stage1_%s_%s_%s.root" % \ (self.channel, process_name, charge_selection)) # initialize input and output file names for hadd_stage2 key_hadd_stage1 = getKey(process_name, charge_selection) key_hadd_stage2 = getKey(charge_selection) if not key_hadd_stage2 in self.inputFiles_hadd_stage2: self.inputFiles_hadd_stage2[key_hadd_stage2] = [] self.inputFiles_hadd_stage2[key_hadd_stage2].append( self.outputFile_hadd_stage1[key_hadd_stage1]) self.outputFile_hadd_stage2[key_hadd_stage2] = os.path.join(self.dirs[DKEY_HIST], "histograms_harvested_stage2_%s_%s.root" % \ (self.channel, charge_selection)) logging.info( "Creating configuration files for executing 'comp_jetToTauFakeRate'" ) for charge_selection in self.charge_selections: key_comp_jetToTauFakeRate_job = getKey(charge_selection) key_hadd_stage2 = getKey(charge_selection) self.jobOptions_comp_jetToTauFakeRate[ key_comp_jetToTauFakeRate_job] = { 'inputFile': self.outputFile_hadd_stage2[key_hadd_stage2], 'cfgFile_modified': os.path.join( self.dirs[DKEY_CFGS], "comp_jetToTauFakeRate_%s_cfg.py" % charge_selection), 'outputFile': os.path.join( self.dirs[DKEY_HIST], "comp_jetToTauFakeRate_%s.root" % charge_selection), 'logFile': os.path.join( self.dirs[DKEY_LOGS], "comp_jetToTauFakeRate_%s.log" % charge_selection), 'looseRegion': "jetToTauFakeRate_%s/denominator/" % charge_selection, 'tightRegion': "jetToTauFakeRate_%s/numerator/" % charge_selection, 'absEtaBins': self.absEtaBins, 'ptBins': self.ptBins } self.createCfg_comp_jetToTauFakeRate( self.jobOptions_comp_jetToTauFakeRate[ key_comp_jetToTauFakeRate_job]) self.targets.append(self.jobOptions_comp_jetToTauFakeRate[ key_comp_jetToTauFakeRate_job]['outputFile']) logging.info("Creating configuration files to run 'makePlots'") for charge_selection in self.charge_selections: key_makePlots_job = getKey(charge_selection) key_hadd_stage2 = getKey(charge_selection) self.jobOptions_make_plots[key_makePlots_job] = { 'executable': self.executable_make_plots, 'inputFile': self.outputFile_hadd_stage2[key_hadd_stage2], 'cfgFile_modified': os.path.join(self.dirs[DKEY_CFGS], "makePlots_%s_cfg.py" % self.channel), 'outputFile': os.path.join(self.dirs[DKEY_PLOT], "makePlots_%s.png" % self.channel), 'histogramDir': "jetToTauFakeRate_%s" % charge_selection, 'label': None, 'make_plots_backgrounds': ["TT", "TTW", "TTZ", "EWK", "Rares"], } self.createCfg_makePlots( self.jobOptions_make_plots[key_makePlots_job]) self.cfgFile_make_plots = self.cfgFile_make_plots_denominator for absEtaBin in ["absEtaLt1_5", "absEta1_5to9_9"]: key_makePlots_job = getKey(charge_selection, absEtaBin, "denominator") key_hadd_stage2 = getKey(charge_selection) self.jobOptions_make_plots[key_makePlots_job] = { 'executable': self.executable_make_plots, 'inputFile': self.outputFile_hadd_stage2[key_hadd_stage2], 'cfgFile_modified': os.path.join( self.dirs[DKEY_CFGS], "makePlots_%s_%s_denominator_%s_cfg.py" % (self.channel, charge_selection, absEtaBin)), 'outputFile': os.path.join( self.dirs[DKEY_PLOT], "makePlots_%s_%s_denominator_%s.png" % (self.channel, charge_selection, absEtaBin)), 'histogramDir': "jetToTauFakeRate_%s/denominator/%s" % (charge_selection, absEtaBin), 'label': None, 'make_plots_backgrounds': ["TT", "TTW", "TTZ", "EWK", "Rares"], } self.createCfg_makePlots( self.jobOptions_make_plots[key_makePlots_job]) for hadTau_selection_numerator in self.hadTau_selections_numerator: key_makePlots_job = getKey(charge_selection, absEtaBin, "numerator", hadTau_selection_numerator) key_hadd_stage2 = getKey(charge_selection) self.jobOptions_make_plots[key_makePlots_job] = { 'executable': self.executable_make_plots, 'inputFile': self.outputFile_hadd_stage2[key_hadd_stage2], 'cfgFile_modified': os.path.join( self.dirs[DKEY_CFGS], "makePlots_%s_%s_numerator_%s_%s_cfg.py" % (self.channel, charge_selection, hadTau_selection_numerator, absEtaBin)), 'outputFile': os.path.join( self.dirs[DKEY_PLOT], "makePlots_%s_%s_numerator_%s_%s.png" % (self.channel, charge_selection, hadTau_selection_numerator, absEtaBin)), 'histogramDir': "jetToTauFakeRate_%s/numerator/%s/%s" % (charge_selection, hadTau_selection_numerator, absEtaBin), 'label': None, 'make_plots_backgrounds': ["TT", "TTW", "TTZ", "EWK", "Rares"], } self.createCfg_makePlots( self.jobOptions_make_plots[key_makePlots_job]) if self.is_sbatch: logging.info( "Creating script for submitting '%s' jobs to batch system" % self.executable_analyze) self.sbatchFile_analyze = os.path.join( self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel) self.createScript_sbatch_analyze(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze) logging.info( "Creating script for submitting '%s' jobs to batch system" % self.executable_comp_jetToTauFakeRate) self.sbatchFile_comp_jetToTauFakeRate = os.path.join( self.dirs[DKEY_SCRIPTS], "sbatch_comp_jetToTauFakeRate.py") self.createScript_sbatch(self.executable_comp_jetToTauFakeRate, self.sbatchFile_comp_jetToTauFakeRate, self.jobOptions_comp_jetToTauFakeRate) lines_makefile = [] self.addToMakefile_analyze(lines_makefile) self.addToMakefile_hadd_stage1(lines_makefile) self.addToMakefile_hadd_stage2(lines_makefile) self.addToMakefile_comp_jetToTauFakeRate(lines_makefile) self.addToMakefile_make_plots(lines_makefile) self.createMakefile(lines_makefile) logging.info("Done")
def create(self): """Creates all necessary config files and runs the complete analysis workfow -- either locally or on the batch system """ for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"]: continue process_name = sample_info["process_name_specific"] for mode in self.modes: key_dir = getKey(process_name, mode) for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_RLES, DKEY_SYNC ]: if dir_type == DKEY_SYNC and not self.do_sync: continue initDict(self.dirs, [ key_dir, dir_type ]) if dir_type in [ DKEY_CFGS, DKEY_LOGS ]: self.dirs[key_dir][dir_type] = os.path.join(self.configDir, dir_type, self.channel, "_".join([ mode ]), process_name) else: self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel, "_".join([ mode ]), process_name) for dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT, DKEY_SYNC ]: if dir_type == DKEY_SYNC and not self.do_sync: continue initDict(self.dirs, [ dir_type ]) if dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT ]: self.dirs[dir_type] = os.path.join(self.configDir, dir_type, self.channel) else: self.dirs[dir_type] = os.path.join(self.outputDir, dir_type, self.channel) ##print "self.dirs = ", self.dirs for key in self.dirs.keys(): if type(self.dirs[key]) == dict: for dir_type in self.dirs[key].keys(): create_if_not_exists(self.dirs[key][dir_type]) else: create_if_not_exists(self.dirs[key]) inputFileLists = {} for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"]: continue logging.info("Checking input files for sample %s" % sample_info["process_name_specific"]) inputFileLists[sample_name] = generateInputFileList(sample_info, self.max_files_per_job) for mode in self.modes: for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"]: continue process_name = sample_info["process_name_specific"] logging.info("Creating configuration files to run '%s' for sample %s" % (self.executable_analyze, process_name)) sample_category = sample_info["sample_category"] is_mc = (sample_info["type"] == "mc") for central_or_shift in self.central_or_shifts: inputFileList = inputFileLists[sample_name] for jobId in inputFileList.keys(): # build config files for executing analysis code key_dir = getKey(process_name, mode) key_analyze_job = getKey(process_name, mode, central_or_shift, jobId) ntupleFiles = inputFileList[jobId] if len(ntupleFiles) == 0: logging.warning("No input ntuples for %s --> skipping job !!" % (key_analyze_job)) continue self.jobOptions_analyze[key_analyze_job] = { 'ntupleFiles' : ntupleFiles, 'cfgFile_modified' : os.path.join(self.dirs[key_dir][DKEY_CFGS], "analyze_%s_%s_%s_%s_%i_cfg.py" % \ (self.channel, process_name, mode, central_or_shift, jobId)), 'histogramFile' : os.path.join(self.dirs[key_dir][DKEY_HIST], "%s_%s_%s_%i.root" % \ (process_name, mode, central_or_shift, jobId)), 'logFile' : os.path.join(self.dirs[key_dir][DKEY_LOGS], "analyze_%s_%s_%s_%s_%i.log" % \ (self.channel, process_name, mode, central_or_shift, jobId)), 'sample_category' : sample_category, 'mode' : mode, 'lepton_selection' : self.lepton_selection, 'hadTau_selection' : self.hadTau_selection, 'SVfit4tau_logM_wMassConstraint_MarkovChain' : self.SVfit4tau_logM_wMassConstraint_MarkovChain, 'SVfit4tau_logM_woMassConstraint_MarkovChain' : self.SVfit4tau_logM_woMassConstraint_MarkovChain, 'SVfit4tau_logM_wMassConstraint_VAMP' : self.SVfit4tau_logM_wMassConstraint_VAMP, 'use_HIP_mitigation_mediumMuonId' : False, 'is_mc' : is_mc, 'central_or_shift' : central_or_shift, 'lumi_scale' : 1., 'apply_genWeight' : sample_info["genWeight"] if (is_mc and "genWeight" in sample_info) else False, } self.createCfg_analyze(self.jobOptions_analyze[key_analyze_job]) # initialize input and output file names for hadd_stage1 key_hadd_stage1 = getKey(process_name, mode) if not key_hadd_stage1 in self.inputFiles_hadd_stage1: self.inputFiles_hadd_stage1[key_hadd_stage1] = [] self.inputFiles_hadd_stage1[key_hadd_stage1].append(self.jobOptions_analyze[key_analyze_job]['histogramFile']) self.outputFile_hadd_stage1[key_hadd_stage1] = os.path.join(self.dirs[DKEY_HIST], "histograms_harvested_stage1_%s_%s_%s.root" % \ (self.channel, process_name, mode)) # initialize input and output file names for hadd_stage2 key_hadd_stage2 = getKey() if not key_hadd_stage2 in self.inputFiles_hadd_stage2: self.inputFiles_hadd_stage2[key_hadd_stage2] = [] self.inputFiles_hadd_stage2[key_hadd_stage2].append(self.outputFile_hadd_stage1[key_hadd_stage1]) self.outputFile_hadd_stage2[key_hadd_stage2] = os.path.join(self.dirs[DKEY_HIST], "histograms_harvested_stage2_%s.root" % \ (self.channel)) if self.is_sbatch: logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_analyze) self.sbatchFile_analyze = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel) self.createScript_sbatch_analyze(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze) logging.info("Creating Makefile") lines_makefile = [] self.addToMakefile_analyze(lines_makefile) self.addToMakefile_hadd_stage1(lines_makefile) self.addToMakefile_hadd_stage2(lines_makefile) self.createMakefile(lines_makefile) logging.info("Done") return self.num_jobs
def create(self): for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"]: continue process_name = sample_info["process_name_specific"] central_or_shifts_extended = [""] central_or_shifts_extended.extend(self.central_or_shifts) central_or_shifts_extended.extend( ["hadd", "copyHistograms", "addBackgrounds"]) for central_or_shift_or_dummy in central_or_shifts_extended: process_name_extended = [process_name, "hadd"] for process_name_or_dummy in process_name_extended: if process_name_or_dummy in [ "hadd" ] and central_or_shift_or_dummy != "": continue if central_or_shift_or_dummy in [ "hadd", "copyHistograms", "addBackgrounds" ] and process_name_or_dummy in ["hadd"]: continue key_dir = getKey(process_name_or_dummy, central_or_shift_or_dummy) for dir_type in [ DKEY_CFGS, DKEY_LOGS, DKEY_RLES, DKEY_SYNC ]: initDict(self.dirs, [key_dir, dir_type]) if dir_type in [DKEY_CFGS, DKEY_LOGS]: self.dirs[key_dir][dir_type] = os.path.join( self.configDir, dir_type, self.channel, process_name_or_dummy, central_or_shift_or_dummy) else: self.dirs[key_dir][dir_type] = os.path.join( self.outputDir, dir_type, self.channel, process_name_or_dummy, central_or_shift_or_dummy) for subdirectory in [ "addBackgrounds", "addBackgroundLeptonFakes", "prepareDatacards", "addSystFakeRates", "makePlots" ]: key_dir = getKey(subdirectory) for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_ROOT, DKEY_DCRD, DKEY_PLOT ]: initDict(self.dirs, [key_dir, dir_type]) if dir_type in [DKEY_CFGS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT]: self.dirs[key_dir][dir_type] = os.path.join( self.configDir, dir_type, self.channel, subdirectory) else: self.dirs[key_dir][dir_type] = os.path.join( self.outputDir, dir_type, self.channel, subdirectory) for dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_SYNC, DKEY_HADD_RT ]: initDict(self.dirs, [dir_type]) if dir_type in [DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_HADD_RT]: self.dirs[dir_type] = os.path.join(self.configDir, dir_type, self.channel) else: self.dirs[dir_type] = os.path.join(self.outputDir, dir_type, self.channel) numDirectories = 0 for key in self.dirs.keys(): if type(self.dirs[key]) == dict: numDirectories += len(self.dirs[key]) else: numDirectories += 1 logging.info("Creating directory structure (numDirectories = %i)" % numDirectories) numDirectories_created = 0 frac = 1 for key in self.dirs.keys(): if type(self.dirs[key]) == dict: for dir_type in self.dirs[key].keys(): create_if_not_exists(self.dirs[key][dir_type]) numDirectories_created += len(self.dirs[key]) else: create_if_not_exists(self.dirs[key]) numDirectories_created = numDirectories_created + 1 while 100 * numDirectories_created >= frac * numDirectories: logging.info(" %i%% completed" % frac) frac = frac + 1 logging.info("Done.") inputFileLists = {} for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"]: continue logging.info("Checking input files for sample %s" % sample_info["process_name_specific"]) inputFileLists[sample_name] = generateInputFileList( sample_info, self.max_files_per_job) for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"]: continue process_name = sample_info["process_name_specific"] logging.info( "Creating configuration files to run '%s' for sample %s" % (self.executable_analyze, process_name)) inputFileList = inputFileLists[sample_name] for jobId in inputFileList.keys(): for central_or_shift in self.central_or_shifts: logging.info(" ... for systematic uncertainty %s" % central_or_shift) key_analyze_dir = getKey(process_name, central_or_shift) analyze_job_tuple = (process_name, central_or_shift, jobId) key_analyze_job = getKey(*analyze_job_tuple) ntupleFiles = inputFileList[jobId] if len(ntupleFiles) == 0: print("Warning: no ntupleFiles --> skipping job !!") continue syncOutput = os.path.join( self.dirs[key_analyze_dir][DKEY_SYNC], '%s_%s.root' % (self.channel, central_or_shift)) syncOutputTree = self.output_tree if central_or_shift == "central" else os.path.join( central_or_shift, self.output_tree) self.inputFiles_sync['sync'].append(syncOutput) cfgFile_modified_path = os.path.join( self.dirs[key_analyze_dir][DKEY_CFGS], "analyze_%s_%s_%i_cfg.py" % analyze_job_tuple) logFile_path = os.path.join( self.dirs[key_analyze_dir][DKEY_LOGS], "analyze_%s_%s_%i.log" % analyze_job_tuple) self.jobOptions_analyze[key_analyze_job] = { 'ntupleFiles': ntupleFiles, 'cfgFile_modified': cfgFile_modified_path, 'histogramFile': '', 'logFile': logFile_path, 'syncTree': syncOutputTree, 'syncOutput': syncOutput, 'syncRLE': self.rle_select if self.rle_select and '%s' not in self.rle_select else '', 'useNonNominal': self.use_nonnominal, } self.createCfg_analyze( self.jobOptions_analyze[key_analyze_job], sample_info) logging.info( "Creating script for submitting '%s' jobs to batch system" % self.executable_analyze) self.sbatchFile_analyze = os.path.join( self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel) self.createScript_sbatch_syncNtuple(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze) logging.info("Creating Makefile") lines_makefile = [] self.addToMakefile_syncNtuple(lines_makefile) outputFile_sync_path = os.path.join(self.outputDir, DKEY_SYNC, '%s.root' % self.channel) self.outputFile_sync['sync'] = outputFile_sync_path self.addToMakefile_hadd_sync(lines_makefile) self.targets.extend(self.phoniesToAdd) self.createMakefile(lines_makefile) logging.info("Done") return self.num_jobs
def create(self): """Creates all necessary config files and runs the complete analysis workfow -- either locally or on the batch system """ for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]: continue process_name = sample_info["process_name_specific"] # for charge_selection in self.charge_selections: ## NO CHARGE SELECTION NEEDED HERE # key_dir = getKey(process_name, charge_selection) ## NO CHARGE SELECTION NEEDED HERE key_dir = getKey(process_name) for dir_type in [DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_RLES]: initDict(self.dirs, [key_dir, dir_type]) if dir_type in [DKEY_CFGS, DKEY_LOGS]: # self.dirs[key_dir][dir_type] = os.path.join(self.configDir, dir_type, self.channel, # "_".join([ charge_selection ]), process_name) ## NO CHARGE SELECTION NEEDED HERE self.dirs[key_dir][dir_type] = os.path.join( self.configDir, dir_type, self.channel, process_name) else: # self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel, # "_".join([ charge_selection ]), process_name) ## NO CHARGE SELECTION NEEDED HERE self.dirs[key_dir][dir_type] = os.path.join( self.outputDir, dir_type, self.channel, process_name) for dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_HIST, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT ]: initDict(self.dirs, [dir_type]) if dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_DCRD, DKEY_HADD_RT ]: ## DKEY_PLOT TO BE ADDED LATER self.dirs[dir_type] = os.path.join(self.configDir, dir_type, self.channel) else: self.dirs[dir_type] = os.path.join(self.outputDir, dir_type, self.channel) print "self.dirs = ", self.dirs for key in self.dirs.keys(): if type(self.dirs[key]) == dict: for dir_type in self.dirs[key].keys(): create_if_not_exists(self.dirs[key][dir_type]) else: create_if_not_exists(self.dirs[key]) inputFileLists = {} for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]: continue logging.info("Checking input files for sample %s" % sample_info["process_name_specific"]) inputFileLists[sample_name] = generateInputFileList( sample_name, sample_info, self.max_files_per_job, self.debug) self.inputFileIds = {} for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]: continue process_name = sample_info["process_name_specific"] logging.info( "Creating configuration files to run '%s' for sample %s" % (self.executable_analyze, process_name)) is_mc = (sample_info["type"] == "mc") lumi_scale = 1. if not (self.use_lumi and is_mc) else sample_info[ "xsection"] * self.lumi / sample_info["nof_events"] apply_genWeight = sample_info["apply_genWeight"] if ( is_mc and "apply_genWeight" in sample_info.keys()) else False sample_category = sample_info["sample_category"] triggers = sample_info["triggers"] apply_trigger_bits = ( is_mc and (self.era == "2015" or (self.era == "2016" and sample_info["reHLT"]))) or not is_mc # for charge_selection in self.charge_selections: ## NO CHARGE SELECTION NEEDED HERE for central_or_shift in self.central_or_shifts: inputFileList = inputFileLists[sample_name] for jobId in inputFileList.keys(): if central_or_shift != "central" and not is_mc: continue if central_or_shift.startswith( "CMS_ttHl_thu_shape_ttH" ) and sample_category != "signal": continue if central_or_shift.startswith( "CMS_ttHl_thu_shape_ttW" ) and sample_category != "TTW": continue if central_or_shift.startswith( "CMS_ttHl_thu_shape_ttZ" ) and sample_category != "TTZ": continue # build config files for executing analysis code # key_dir = getKey(process_name, charge_selection) ## NO CHARGE SELECTION NEEDED HERE key_dir = getKey(process_name) # key_analyze_job = getKey(process_name, charge_selection, central_or_shift, jobId) ## NO CHARGE SELECTION NEEDED HERE key_analyze_job = getKey(process_name, central_or_shift, jobId) ntupleFiles = inputFileList[jobId] if len(ntupleFiles) == 0: print "Warning: ntupleFiles['%s'] = %s --> skipping job !!" % ( key_job, ntupleFiles) continue self.jobOptions_analyze[key_analyze_job] = { 'ntupleFiles' : ntupleFiles, # 'cfgFile_modified' : os.path.join(self.dirs[key_dir][DKEY_CFGS], "analyze_%s_%s_%s_%s_%i_cfg.py" % \ # (self.channel, process_name, charge_selection, central_or_shift, jobId)), ## NO CHARGE SELECTION NEEDED HERE # 'histogramFile' : os.path.join(self.dirs[key_dir][DKEY_HIST], "%s_%s_%s_%i.root" % \ # (process_name, charge_selection, central_or_shift, jobId)), ## NO CHARGE SELECTION NEEDED HERE # 'logFile' : os.path.join(self.dirs[key_dir][DKEY_LOGS], "analyze_%s_%s_%s_%s_%i.log" % \ # (self.channel, process_name, charge_selection, central_or_shift, jobId)), ## NO CHARGE SELECTION NEEDED HERE 'cfgFile_modified' : os.path.join(self.dirs[key_dir][DKEY_CFGS], "analyze_%s_%s_%s_%i_cfg.py" % \ (self.channel, process_name, central_or_shift, jobId)), 'histogramFile' : os.path.join(self.dirs[key_dir][DKEY_HIST], "%s_%s_%i.root" % \ (process_name, central_or_shift, jobId)), 'logFile' : os.path.join(self.dirs[key_dir][DKEY_LOGS], "analyze_%s_%s_%s_%i.log" % \ (self.channel, process_name, central_or_shift, jobId)), 'sample_category' : sample_category, 'triggers' : sample_info["triggers"], # 'charge_selection' : charge_selection, ## NO CHARGE SELECTION NEEDED HERE # 'jet_minPt' : self.jet_minPt, # 'jet_maxPt' : self.jet_maxPt, # 'jet_minAbsEta' : self.jet_minAbsEta, # 'jet_maxAbsEta' : self.jet_maxAbsEta, # 'hadTau_selections' : self.hadTau_selections, 'absEtaBins_e' : self.absEtaBins_e, 'absEtaBins_mu' : self.absEtaBins_mu, 'absPtBins_e' : self.absPtBins_e, 'absPtBins_mu' : self.absPtBins_mu, ##'use_HIP_mitigation_mediumMuonId' : sample_info["use_HIP_mitigation_mediumMuonId"], 'use_HIP_mitigation_mediumMuonId' : True, 'is_mc' : is_mc, 'central_or_shift' : central_or_shift, 'lumi_scale' : 1. if not (self.use_lumi and is_mc) else sample_info["xsection"] * self.lumi / sample_info["nof_events"], 'apply_genWeight' : sample_info["genWeight"] if (is_mc and "genWeight" in sample_info.keys()) else False, 'apply_trigger_bits' : (is_mc and (self.era == "2015" or (self.era == "2016" and sample_info["reHLT"]))) or not is_mc, } self.createCfg_analyze( self.jobOptions_analyze[key_analyze_job]) # initialize input and output file names for hadd_stage1 # key_hadd_stage1 = getKey(process_name, charge_selection) key_hadd_stage1 = getKey( process_name) ## NO CHARGE SELECTION NEEDED HERE if not key_hadd_stage1 in self.inputFiles_hadd_stage1: self.inputFiles_hadd_stage1[key_hadd_stage1] = [] self.inputFiles_hadd_stage1[key_hadd_stage1].append( self.jobOptions_analyze[key_analyze_job] ['histogramFile']) # self.outputFile_hadd_stage1[key_hadd_stage1] = os.path.join(self.dirs[DKEY_HIST], "histograms_harvested_stage1_%s_%s_%s.root" % \ # (self.channel, process_name, charge_selection)) ## NO CHARGE SELECTION NEEDED HERE self.outputFile_hadd_stage1[key_hadd_stage1] = os.path.join(self.dirs[DKEY_HIST], "histograms_harvested_stage1_%s_%s.root" % \ (self.channel, process_name)) # initialize input and output file names for hadd_stage1_5 key_hadd_stage1_5 = getKey('') if not key_hadd_stage1_5 in self.inputFiles_hadd_stage1_5: self.inputFiles_hadd_stage1_5[key_hadd_stage1_5] = [] for key_hadd_stage1 in self.outputFile_hadd_stage1.keys(): self.inputFiles_hadd_stage1_5[key_hadd_stage1_5].append( self.outputFile_hadd_stage1[key_hadd_stage1]) self.outputFile_hadd_stage1_5[key_hadd_stage1_5] = os.path.join( self.dirs[DKEY_HIST], "histograms_harvested_stage1_5.root") ## Creating configuration files to run 'addBackgrounds_LeptonFakeRate' [stage 1.5] key_addBackgrounds_job = getKey('') self.jobOptions_addBackgrounds_LeptonFakeRate[ key_addBackgrounds_job] = { 'inputFile': self.outputFile_hadd_stage1_5[key_hadd_stage1_5], 'cfgFile_modified': os.path.join( self.dirs[DKEY_CFGS], os.path.basename( self.cfgFile_addBackgrounds_LeptonFakeRate)), 'outputFile': os.path.join(self.dirs[DKEY_HIST], "addBackground_LeptonFakeRate.root"), 'logFile': os.path.join( self.dirs[DKEY_LOGS], os.path.basename( self.cfgFile_addBackgrounds_LeptonFakeRate.replace( "_cfg.py", ".log"))), } self.createCfg_addBackgrounds_LeptonFakeRate( self. jobOptions_addBackgrounds_LeptonFakeRate[key_addBackgrounds_job]) # initialize input and output file names for hadd_stage2 # key_hadd_stage2 = getKey(charge_selection) ## NO CHARGE SELECTION NEEDED HERE # if not key_hadd_stage2 in self.inputFiles_hadd_stage2: ## NO CHARGE SELECTION NEEDED HERE # self.inputFiles_hadd_stage2[key_hadd_stage2] = [] ## NO CHARGE SELECTION NEEDED HERE # self.inputFiles_hadd_stage2[key_hadd_stage2].append(self.outputFile_hadd_stage1[key_hadd_stage1]) ## NO CHARGE SELECTION NEEDED HERE # self.outputFile_hadd_stage2[key_hadd_stage2] = os.path.join(self.dirs[DKEY_HIST], "histograms_harvested_stage2_%s_%s.root" % \ ## NO CHARGE SELECTION NEEDED HERE # (self.channel, charge_selection)) ## NO CHARGE SELECTION NEEDED HERE key_hadd_stage2 = getKey('') if not key_hadd_stage2 in self.inputFiles_hadd_stage2: self.inputFiles_hadd_stage2[key_hadd_stage2] = [] for key_hadd_stage1_5 in self.outputFile_hadd_stage1_5.keys(): self.inputFiles_hadd_stage2[key_hadd_stage2].append( self.outputFile_hadd_stage1_5[key_hadd_stage1_5]) self.inputFiles_hadd_stage2[key_hadd_stage2].append( self.jobOptions_addBackgrounds_LeptonFakeRate[ key_addBackgrounds_job]['outputFile']) self.outputFile_hadd_stage2[key_hadd_stage2] = os.path.join( self.dirs[DKEY_HIST], "histograms_harvested_stage2.root") if self.prep_dcard: processesToCopy = [] signals = [] logging.info( "Creating configuration files to run 'prepareDatacards_LeptonFakeRate'" ) for process in self.prep_dcard_signals: signals.append(process) self.prep_dcard_signals = signals for process in self.prep_dcard_processesToCopy: processesToCopy.append(process) self.prep_dcard_processesToCopy = processesToCopy for histogramToFit in self.histograms_to_fit: key_prep_dcard_job = getKey(histogramToFit) self.jobOptions_prep_dcard[key_prep_dcard_job] = { 'inputFile': self.outputFile_hadd_stage2[key_hadd_stage2], 'cfgFile_modified': os.path.join( self.dirs[DKEY_CFGS], "prepareDatacards_LeptonFakeRate_%s_cfg.py" % (histogramToFit)), 'datacardFile': os.path.join(self.dirs[DKEY_DCRD], "prepareDatacards_%s.root" % (histogramToFit)), 'histogramDir': (self.histogramDir_prep_dcard), 'histogramToFit': histogramToFit, 'label': None } # self.createCfg_prep_dcard(self.jobOptions_prep_dcard[key_prep_dcard_job]) ## DEF LINE self.createCfg_prep_dcard_LeptonFakeRate( self.jobOptions_prep_dcard[key_prep_dcard_job]) if self.is_sbatch: logging.info( "Creating script for submitting '%s' jobs to batch system" % self.executable_analyze) self.sbatchFile_analyze = os.path.join( self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel) # self.createScript_sbatch() self.createScript_sbatch_analyze(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze) self.sbatchFile_addBackgrounds_LeptonFakeRate = os.path.join( self.dirs[DKEY_SCRIPTS], "sbatch_addBackgrounds_LeptonFakeRate_%s.py" % self.channel) self.createScript_sbatch( self.executable_addBackgrounds_LeptonFakeRate, self.sbatchFile_addBackgrounds_LeptonFakeRate, self.jobOptions_addBackgrounds_LeptonFakeRate) # logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_comp_jetToTauFakeRate) # self.sbatchFile_comp_jetToTauFakeRate = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_comp_jetToTauFakeRate.py") # self.createScript_sbatch(self.executable_comp_jetToTauFakeRate, self.sbatchFile_comp_jetToTauFakeRate, self.jobOptions_comp_jetToTauFakeRate) #### FAKE RATE COMP BLOCK COMMENTED OUT ######################## # logging.info("Creating configuration files for executing 'comp_jetToTauFakeRate'") # for charge_selection in self.charge_selections: # key_comp_jetToTauFakeRate_job = getKey(charge_selection) # key_hadd_stage2 = getKey(charge_selection) # self.jobOptions_comp_jetToTauFakeRate[key_comp_jetToTauFakeRate_job] = { # 'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2], # 'cfgFile_modified' : os.path.join( # self.dirs[DKEY_CFGS], "comp_jetToTauFakeRate_%s_cfg.py" % charge_selection), # 'outputFile' : os.path.join( # self.dirs[DKEY_HIST], "comp_jetToTauFakeRate_%s.root" % charge_selection), # 'looseRegion' : "jetToTauFakeRate_%s/denominator/" % charge_selection, # 'tightRegion' : "jetToTauFakeRate_%s/numerator/" % charge_selection, # 'absEtaBins' : self.absEtaBins, # 'ptBins' : self.ptBins # } # self.createCfg_comp_jetToTauFakeRate(self.jobOptions_comp_jetToTauFakeRate[key_comp_jetToTauFakeRate_job]) # self.targets.append(self.jobOptions_comp_jetToTauFakeRate[key_comp_jetToTauFakeRate_job]['outputFile']) # logging.info("Creating configuration files to run 'makePlots'") # for charge_selection in self.charge_selections: # key_makePlots_job = getKey(charge_selection) # key_hadd_stage2 = getKey(charge_selection) # self.jobOptions_make_plots[key_makePlots_job] = { # 'executable' : self.executable_make_plots, # 'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2], # 'cfgFile_modified' : os.path.join( # self.dirs[DKEY_CFGS], "makePlots_%s_cfg.py" % self.channel), # 'outputFile' : os.path.join( # self.dirs[DKEY_PLOT], "makePlots_%s.png" % self.channel), # 'histogramDir' : "jetToTauFakeRate_%s" % charge_selection, # 'label' : None, # 'make_plots_backgrounds' : [ "TT", "TTW", "TTZ", "EWK", "Rares" ], # } # self.createCfg_makePlots(self.jobOptions_make_plots[key_makePlots_job]) # self.cfgFile_make_plots = self.cfgFile_make_plots_denominator # for absEtaBin in [ "absEtaLt1_5", "absEta1_5to9_9" ]: # key_makePlots_job = getKey(charge_selection, absEtaBin, "denominator") # key_hadd_stage2 = getKey(charge_selection) # self.jobOptions_make_plots[key_makePlots_job] = { # 'executable' : self.executable_make_plots, # 'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2], # 'cfgFile_modified' : os.path.join( # self.dirs[DKEY_CFGS], "makePlots_%s_%s_denominator_%s_cfg.py" % (self.channel, charge_selection, absEtaBin)), # 'outputFile' : os.path.join( # self.dirs[DKEY_PLOT], "makePlots_%s_%s_denominator_%s.png" % (self.channel, charge_selection, absEtaBin)), # 'histogramDir' : "jetToTauFakeRate_%s/denominator/%s" % (charge_selection, absEtaBin), # 'label' : None, # 'make_plots_backgrounds' : [ "TT", "TTW", "TTZ", "EWK", "Rares" ], # } # self.createCfg_makePlots(self.jobOptions_make_plots[key_makePlots_job]) # for hadTau_selection in self.hadTau_selections: # key_makePlots_job = getKey(charge_selection, absEtaBin, "numerator", hadTau_selection) # key_hadd_stage2 = getKey(charge_selection) # self.jobOptions_make_plots[key_makePlots_job] = { # 'executable' : self.executable_make_plots, # 'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2], # 'cfgFile_modified' : os.path.join( # self.dirs[DKEY_CFGS], "makePlots_%s_%s_numerator_%s_%s_cfg.py" % (self.channel, charge_selection, hadTau_selection, absEtaBin)), # 'outputFile' : os.path.join( # self.dirs[DKEY_PLOT], "makePlots_%s_%s_numerator_%s_%s.png" % (self.channel, charge_selection, hadTau_selection, absEtaBin)), # 'histogramDir' : "jetToTauFakeRate_%s/numerator/%s/%s" % (charge_selection, hadTau_selection, absEtaBin), # 'label' : None, # 'make_plots_backgrounds' : [ "TT", "TTW", "TTZ", "EWK", "Rares" ], # } # self.createCfg_makePlots(self.jobOptions_make_plots[key_makePlots_job]) ######################################################### lines_makefile = [] self.addToMakefile_analyze(lines_makefile) self.addToMakefile_hadd_stage1(lines_makefile) # self.addToMakefile_hadd_stage1_5(lines_makefile) self.addToMakefile_backgrounds_from_data(lines_makefile) self.addToMakefile_hadd_stage2(lines_makefile) self.addToMakefile_prep_dcard(lines_makefile) # self.addToMakefile_comp_jetToTauFakeRate(lines_makefile) ## TO BE IMPLEMENTED LATER # self.addToMakefile_make_plots(lines_makefile) ## TO BE IMPLEMENTED LATER self.targets = [ outputFile for outputFile in self.outputFile_hadd_stage2.values() ] self.createMakefile(lines_makefile) logging.info("Done")
def create(self): """Creates all necessary config files and runs the complete analysis workfow -- either locally or on the batch system """ for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"]: continue process_name = sample_info["process_name_specific"] is_mc = (sample_info["type"] == "mc") logging.info("Building dictionaries for sample %s..." % process_name) for lepton_selection in self.lepton_selections: central_or_shift_extensions = ["", "hadd", "addBackgrounds"] central_or_shifts_extended = central_or_shift_extensions + self.central_or_shifts for central_or_shift_or_dummy in central_or_shifts_extended: process_name_extended = [ process_name, "hadd" ] for process_name_or_dummy in process_name_extended: if central_or_shift_or_dummy in [ "hadd", "addBackgrounds" ] and process_name_or_dummy in [ "hadd" ]: continue if central_or_shift_or_dummy != "central" and central_or_shift_or_dummy not in central_or_shift_extensions: if not is_mc: continue if not self.accept_central_or_shift(central_or_shift_or_dummy, sample_info): continue key_dir = getKey(process_name_or_dummy, lepton_selection, central_or_shift_or_dummy) for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_RLES ]: initDict(self.dirs, [ key_dir, dir_type ]) if dir_type in [ DKEY_CFGS, DKEY_LOGS ]: self.dirs[key_dir][dir_type] = os.path.join(self.configDir, dir_type, self.channel, "_".join([ lepton_selection ]), process_name_or_dummy, central_or_shift_or_dummy) else: self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel, "_".join([ lepton_selection ]), process_name_or_dummy) for subdirectory in [ "prepareDatacards" ]: key_dir = getKey(subdirectory) for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT ]: initDict(self.dirs, [ key_dir, dir_type ]) if dir_type in [ DKEY_CFGS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT ]: self.dirs[key_dir][dir_type] = os.path.join(self.configDir, dir_type, self.channel, subdirectory) else: self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel, subdirectory) for dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT, DKEY_COMBINE_OUTPUT ]: initDict(self.dirs, [ dir_type ]) if dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT, DKEY_COMBINE_OUTPUT ]: self.dirs[dir_type] = os.path.join(self.configDir, dir_type, self.channel) else: self.dirs[dir_type] = os.path.join(self.outputDir, dir_type, self.channel) numDirectories = 0 for key in self.dirs.keys(): if type(self.dirs[key]) == dict: numDirectories += len(self.dirs[key]) else: numDirectories += 1 logging.info("Creating directory structure (numDirectories = %i)" % numDirectories) numDirectories_created = 0; frac = 1 for key in self.dirs.keys(): if type(self.dirs[key]) == dict: for dir_type in self.dirs[key].keys(): create_if_not_exists(self.dirs[key][dir_type]) numDirectories_created += len(self.dirs[key]) else: create_if_not_exists(self.dirs[key]) numDirectories_created = numDirectories_created + 1 while 100*numDirectories_created >= frac*numDirectories: logging.info(" %i%% completed" % frac) frac = frac + 1 logging.info("Done.") inputFileLists = {} for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"]: continue logging.info("Checking input files for sample %s" % sample_info["process_name_specific"]) inputFileLists[sample_name] = generateInputFileList(sample_info, self.max_files_per_job) for lepton_selection in self.lepton_selections: for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"]: continue process_name = sample_info["process_name_specific"] logging.info("Creating configuration files to run '%s' for sample %s" % (self.executable_analyze, process_name)) is_mc = (sample_info["type"] == "mc") inputFileList = inputFileLists[sample_name] for central_or_shift in self.central_or_shifts: if central_or_shift != "central" and not is_mc: continue # build config files for executing analysis code key_analyze_dir = getKey(process_name, lepton_selection, central_or_shift) for jobId in inputFileList.keys(): analyze_job_tuple = (process_name, lepton_selection, central_or_shift, jobId) key_analyze_job = getKey(*analyze_job_tuple) ntupleFiles = inputFileList[jobId] if len(ntupleFiles) == 0: logging.warning("No input ntuples for %s --> skipping job !!" % (key_analyze_job)) continue cfgFile_modified_path = os.path.join(self.dirs[key_analyze_dir][DKEY_CFGS], "analyze_%s_%s_%s_%i_cfg.py" % analyze_job_tuple) logFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_LOGS], "analyze_%s_%s_%s_%i.log" % analyze_job_tuple) rleOutputFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_RLES], "rle_%s_%s_%s_%i.txt" % analyze_job_tuple) \ if self.select_rle_output else "" histogramFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_HIST], "analyze_%s_%s_%s_%i.root" % analyze_job_tuple) self.jobOptions_analyze[key_analyze_job] = { 'ntupleFiles' : ntupleFiles, 'cfgFile_modified' : cfgFile_modified_path, 'histogramFile' : histogramFile_path, 'logFile' : logFile_path, 'selEventsFileName_output' : rleOutputFile_path, 'leptonSelection' : lepton_selection, 'applyFakeRateWeights' : "disabled", 'central_or_shift' : central_or_shift, } self.createCfg_analyze(self.jobOptions_analyze[key_analyze_job], sample_info) # initialize input and output file names for hadd_stage1 key_hadd_stage1_dir = getKey(process_name, lepton_selection) hadd_stage1_job_tuple = (process_name, lepton_selection) key_hadd_stage1_job = getKey(*hadd_stage1_job_tuple) if not key_hadd_stage1_job in self.inputFiles_hadd_stage1.keys(): self.inputFiles_hadd_stage1[key_hadd_stage1_job] = [] self.inputFiles_hadd_stage1[key_hadd_stage1_job].append(self.jobOptions_analyze[key_analyze_job]['histogramFile']) self.outputFile_hadd_stage1[key_hadd_stage1_job] = os.path.join(self.dirs[key_hadd_stage1_dir][DKEY_HIST], "hadd_stage1_%s_%s.root" % hadd_stage1_job_tuple) # initialize input and output file names for hadd_stage2 key_hadd_stage1_job = getKey(process_name, lepton_selection) key_hadd_stage2_dir = getKey("hadd", lepton_selection) key_hadd_stage2_job = getKey(lepton_selection) if not key_hadd_stage2_job in self.inputFiles_hadd_stage2.keys(): self.inputFiles_hadd_stage2[key_hadd_stage2_job] = [] self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.outputFile_hadd_stage1[key_hadd_stage1_job]) self.outputFile_hadd_stage2[key_hadd_stage2_job] = os.path.join(self.dirs[key_hadd_stage2_dir][DKEY_HIST], "hadd_stage2_%s.root" % lepton_selection) logging.info("Creating configuration files to run 'prepareDatacards'") processesToCopy = [] for process in self.prep_dcard_processesToCopy: processesToCopy.append(process) self.prep_dcard_processesToCopy = processesToCopy processesToCopy = [] for process in self.prep_dcard_signals: processesToCopy.append(process) self.prep_dcard_signals = processesToCopy for histogramToFit in self.histograms_to_fit: key_hadd_stage2_job = getKey("Tight") key_prep_dcard_dir = getKey("prepareDatacards") prep_dcard_job_tuple = (self.channel, histogramToFit) key_prep_dcard_job = getKey(histogramToFit) datacardFile = os.path.join(self.dirs[key_prep_dcard_dir][DKEY_DCRD], "prepareDatacards_%s_%s.root" % prep_dcard_job_tuple) self.jobOptions_prep_dcard[key_prep_dcard_job] = { 'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job], 'cfgFile_modified' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_CFGS], "prepareDatacards_%s_%s_cfg.py" % prep_dcard_job_tuple), 'datacardFile' : datacardFile, 'histogramDir' : self.histogramDir_prep_dcard, 'histogramToFit' : histogramToFit, 'label' : None } self.createCfg_prep_dcard(self.jobOptions_prep_dcard[key_prep_dcard_job]) jobOptions_makefile = copy.deepcopy(self.jobOptions_postFit) jobOptions_makefile['fit_result'] = os.path.join( self.dirs[DKEY_COMBINE_OUTPUT], 'fit_{}'.format(histogramToFit), jobOptions_makefile['target'] ) jobOptions_makefile['hadd_stage2'] = self.outputFile_hadd_stage2[key_hadd_stage2_job] jobOptions_makefile['prepare_datacard'] = datacardFile jobOptions_makefile['data_datacard'] = os.path.join( self.dirs[key_prep_dcard_dir][DKEY_DCRD], "prepareDatacards_data_%s_%s.root" % prep_dcard_job_tuple ) jobOptions_makefile['pseudodata_datacard'] = os.path.join( self.dirs[key_prep_dcard_dir][DKEY_DCRD], "prepareDatacards_pseudodata_%s_%s.root" % prep_dcard_job_tuple ) jobOptions_makefile['makefile'] = os.path.join( self.dirs[DKEY_COMBINE_OUTPUT], 'Makefile_{}'.format(histogramToFit) ) jobOptions_makefile['stdout'] = os.path.join( self.dirs[DKEY_COMBINE_OUTPUT], 'stdout_{}.log'.format(histogramToFit) ) self.createCfg_postFit(jobOptions_makefile) self.sbatchFile_analyze = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel) if self.is_sbatch: logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_analyze) self.createScript_sbatch_analyze(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze) logging.info("Creating Makefile") lines_makefile = [] self.addToMakefile_analyze(lines_makefile) self.addToMakefile_hadd_stage1(lines_makefile) self.addToMakefile_hadd_stage2(lines_makefile, make_dependency = "phony_hadd_stage1") self.addToMakefile_prep_dcard(lines_makefile) self.addToMakefile_postFit(lines_makefile) self.createMakefile(lines_makefile) logging.info("Done.") return self.num_jobs
def __init__( self, configDir, outputDir, output_file, executable, projection_module, samples, max_files_per_job, era, plot, check_output_files, running_method, num_parallel_jobs, pool_id='', verbose=False, dry_run=False, use_home=False, submission_cmd=None, ): self.configDir = configDir self.outputDir = outputDir self.executable = executable self.projection_module = projection_module self.max_num_jobs = 200000 self.samples = samples self.max_files_per_job = max_files_per_job self.era = era self.plot = plot self.check_output_files = check_output_files self.verbose = verbose self.dry_run = dry_run self.use_home = use_home if running_method.lower() not in ["sbatch", "makefile"]: raise ValueError("Invalid running method: %s" % running_method) self.running_method = running_method self.is_sbatch = self.running_method.lower() == "sbatch" self.is_makefile = not self.is_sbatch self.makefile = os.path.join( self.configDir, "Makefile_{}".format(self.projection_module)) self.num_parallel_jobs = num_parallel_jobs self.pool_id = pool_id if pool_id else uuid.uuid4() self.workingDir = os.getcwd() logging.info("Working directory is: %s" % self.workingDir) self.template_dir = os.path.join(os.getenv('CMSSW_BASE'), 'src', 'tthAnalysis', 'HiggsToTauTau', 'test', 'templates') logging.info("Templates directory is: %s" % self.template_dir) create_if_not_exists(self.configDir) create_if_not_exists(self.outputDir) self.output_file = os.path.join(self.outputDir, output_file) self.stdout_file_path = os.path.join( self.configDir, "stdout_{}.log".format(self.projection_module)) self.stderr_file_path = os.path.join( self.configDir, "stderr_{}.log".format(self.projection_module)) self.sw_ver_file_cfg = os.path.join( self.configDir, "VERSION_{}.log".format(self.projection_module)) self.sw_ver_file_out = os.path.join( self.outputDir, "VERSION_{}.log".format(self.projection_module)) self.submission_out = os.path.join(self.configDir, "SUBMISSION.log") self.stdout_file_path, self.stderr_file_path, self.sw_ver_file_cfg, self.sw_ver_file_out, self.submission_out = get_log_version( (self.stdout_file_path, self.stderr_file_path, self.sw_ver_file_cfg, self.sw_ver_file_out, self.submission_out)) check_submission_cmd(self.submission_out, submission_cmd) self.sbatchFile_projection = os.path.join( self.configDir, "sbatch_{}.py".format(self.projection_module)) self.cfgFiles_projection = {} self.logFiles_projection = {} self.scriptFiles_projection = {} self.jobOptions_sbatch = {} self.inputFiles = {} self.outputFiles_tmp = {} self.outputFiles = {} self.phoniesToAdd = [] self.filesToClean = [] self.targets = [] self.makefile_target = "sbatch_{}".format(self.projection_module) self.dirs = {} all_dirs = [ DKEY_CFGS, DKEY_HISTO_TMP, DKEY_HISTO, DKEY_PLOTS, DKEY_LOGS, DKEY_SCRIPTS, DKEY_HADD_RT ] cfg_dirs = [ DKEY_CFGS, DKEY_LOGS, DKEY_PLOTS, DKEY_SCRIPTS, DKEY_HADD_RT ] ref_genWeightsFile = os.path.join( os.environ['CMSSW_BASE'], 'src', 'tthAnalysis', 'HiggsToTauTau', 'data', 'refGenWeight_{}.txt'.format(self.era)) self.ref_genWeights = load_refGenWeightsFromFile( ref_genWeightsFile) if projection_module != 'puHist' else {} for sample_name, sample_info in self.samples.items(): if not sample_info['use_it']: continue process_name = sample_info["process_name_specific"] key_dir = getKey(process_name) for dir_type in all_dirs: if dir_type == DKEY_PLOTS: continue initDict(self.dirs, [key_dir, dir_type]) if dir_type in cfg_dirs: self.dirs[key_dir][dir_type] = os.path.join( self.configDir, dir_type, process_name) else: self.dirs[key_dir][dir_type] = os.path.join( self.outputDir, dir_type, process_name) for dir_type in cfg_dirs: initDict(self.dirs, [dir_type]) self.dirs[dir_type] = os.path.join(self.configDir, dir_type) self.cvmfs_error_log = {} self.num_jobs = { 'hadd': 0, 'project': 0, 'plot': 0, }
def __init__(self, configDir, outputDir, executable_preselNtuple, channel, samples, era, debug, running_method, rle_directory, version, num_parallel_jobs, pool_id=''): self.configDir = configDir self.outputDir = outputDir self.executable_preselNtuple = executable_preselNtuple self.channel = channel self.max_files_per_job = 1 self.max_num_jobs = 200000 self.samples = samples self.era = era self.debug = debug assert (running_method.lower() in ["sbatch", "makefile"]), "Invalid running method: %s" % running_method self.running_method = running_method self.is_sbatch = False self.is_makefile = False if self.running_method.lower() == "sbatch": self.is_sbatch = True else: self.is_makefile = True self.makefile = os.path.join(self.configDir, "Makefile_%s" % self.channel) self.num_parallel_jobs = num_parallel_jobs self.pool_id = pool_id if pool_id else uuid.uuid4() self.workingDir = os.getcwd() print "Working directory is: " + self.workingDir self.rle_directory = rle_directory self.version = version if self.rle_directory == 'default': self.rle_directory = os.path.join( ##os.path.join("/hdfs/local/ttH_2tau", getpass.getuser(), "ttHNtupleProduction", ERA, version), '/home', getpass.getuser(), 'ttHAnalysis', self.era, self.version, 'rles', self.channel) elif self.rle_directory: if not os.path.isdir(self.rle_directory): logging.error("No such directory: '{directory_name}'".format( directory_name=self.rle_directory)) sys.exit(1) create_if_not_exists(self.configDir) create_if_not_exists(self.outputDir) self.stdout_file = codecs.open( os.path.join(self.outputDir, "stdout_%s.log" % self.channel), 'w', 'utf-8') self.stderr_file = codecs.open( os.path.join(self.outputDir, "stderr_%s.log" % self.channel), 'w', 'utf-8') self.dirs = {} self.samples = samples self.cfgFiles_preselNtuple_modified = {} self.logFiles_preselNtuple = {} self.sbatchFile_preselNtuple = os.path.join( self.configDir, "sbatch_preselNtuple_%s.py" % self.channel) self.inputFiles = {} self.outputFiles = {} self.filesToClean = [] for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]: continue process_name = sample_info["process_name_specific"] key_dir = getKey(sample_name) for dir_type in [DKEY_CFGS, DKEY_NTUPLES, DKEY_LOGS]: initDict(self.dirs, [key_dir, dir_type]) if dir_type in [DKEY_CFGS]: self.dirs[key_dir][dir_type] = os.path.join( self.configDir, dir_type, self.channel, process_name) else: self.dirs[key_dir][dir_type] = os.path.join( self.outputDir, dir_type, self.channel, process_name) ##print "self.dirs = ", self.dirs self.cvmfs_error_log = {}