Esempio n. 1
0
    def __init__(self, treeName, outputDir, executable_addMEM, samples, era, debug, running_method,
                 max_files_per_job, mem_integrations_per_job, max_mem_integrations, num_parallel_jobs):

        self.treeName = treeName
        self.outputDir = outputDir
        self.executable_addMEM = executable_addMEM
        self.channel = "2lss_1tau"
        self.mem_integrations_per_job = mem_integrations_per_job
        self.max_files_per_job = max_files_per_job
        self.max_mem_integrations = max_mem_integrations
        self.samples = samples
        self.era = era
        self.debug = debug
        assert(running_method.lower() in [
          "sbatch", "makefile"]), "Invalid running method: %s" % running_method
        self.running_method = running_method
        self.is_sbatch = False
        self.is_makefile = False
        if self.running_method.lower() == "sbatch":
            self.is_sbatch = True
        else:
            self.is_makefile = True
        self.makefile = os.path.join(
          self.outputDir, "Makefile_%s" % self.channel)
        self.num_parallel_jobs = num_parallel_jobs

        self.workingDir = os.getcwd()
        print "Working directory is: " + self.workingDir

        create_if_not_exists(self.outputDir)
        self.stdout_file = codecs.open(os.path.join(
          self.outputDir, "stdout_%s.log" % self.channel), 'w', 'utf-8')
        self.stderr_file = codecs.open(os.path.join(
          self.outputDir, "stderr_%s.log" % self.channel), 'w', 'utf-8')
        self.dirs = {}
        self.samples = samples
        self.cfgFile_addMEM_original = os.path.join(self.workingDir, "addMEM_2lss_1tau_cfg.py")
        self.cfgFiles_addMEM_modified = {}
        self.logFiles_addMEM = {}
        self.sbatchFile_addMEM = os.path.join(
          self.outputDir, "sbatch_addMEM_%s.py" % self.channel)
        self.inputFiles = {}
        self.outputFiles = {}
        self.hadd_records = {}
        self.filesToClean = []

        for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]:
                continue
            process_name = sample_info["process_name_specific"]
            key_dir = getKey(sample_name)
            for dir_type in [ DKEY_CFGS, DKEY_NTUPLES, DKEY_FINAL_NTUPLES, DKEY_LOGS, DKEY_HADD ]:
                initDict(self.dirs, [ key_dir, dir_type ])
                self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel, process_name)

        self.cvmfs_error_log = {}
Esempio n. 2
0
 def get_scratch_dir(self):
     scratch_dir = "/scratch/%s" % getpass.getuser()
     if not os.path.exists(scratch_dir):
         print "Directory '%s' does not yet exist, creating it !!" % scratch_dir
         run_cmd(command_create_scratchDir)
     scratch_dir = os.path.join(
         scratch_dir,
         "tthAnalysis" + "_" + date.today().isoformat()
     )
     create_if_not_exists(scratch_dir)
     return scratch_dir
Esempio n. 3
0
 def get_scratch_dir(self):
     scratch_dir = "/scratch/%s" % getpass.getuser()
     if not os.path.exists(scratch_dir):
         logging.info("Directory '%s' does not yet exist, creating it !!" %
                      scratch_dir)
         run_cmd(command_create_scratchDir)
     scratch_dir = os.path.join(
         scratch_dir,
         "%s_%s" % (self.analysisName, datetime.date.today().isoformat()),
     )
     create_if_not_exists(scratch_dir)
     return scratch_dir
Esempio n. 4
0
    def create(self):
        create_if_not_exists(self.hadd_log_dir_path)

        if self.running_method.lower() == 'sbatch':
            create_if_not_exists(self.hadd_script_dir_path)

            createScript_sbatch_hadd(
                sbatch_script_file_name=self.hadd_script_path,
                input_file_names=list(self.channel_info.keys()),
                output_file_name=self.final_output_file,
                script_file_name=self.hadd_script_path.replace('.py', '.sh'),
                log_file_name=self.
                hadd_log_executable_path,  # the basename will be replaced anyways?
                working_dir=None,
                waitForJobs=True,
                auxDirName='',
                pool_id=uuid.uuid4(),
                verbose=False,
                max_input_files_per_job=len(self.channel_info),
                dry_run=self.dry_run,
                use_home=self.use_home,
                min_file_size=-1,
            )

            logging.info("Generated hadd config file: %s" %
                         self.hadd_script_path)
            self.hadd_script_path = 'python %s' % self.hadd_script_path
            additional_cmds = ''

        else:
            self.hadd_script_path = 'hadd -f {} {}'.format(
                os.path.basename(self.final_output_file),
                ' '.join(list(self.channel_info.keys())))
            additional_cmds = 'mv {} {}'.format(
                os.path.basename(self.final_output_file),
                self.final_output_file)

        with open(self.makefile_path, 'w') as makefile:
            hadd_script_cmd = '{}{}'.format(
                'sleep 60\n\t' if self.running_method.lower() == 'makefile'
                else '', self.hadd_script_path)
            makeFileContents = jinja2.Template(makeFileTemplate).render(
                output_file=self.final_output_file,
                channel_info=self.channel_info,
                hadd_script=hadd_script_cmd,
                hadd_wrapper_log=self.hadd_log_wrapper_path,
                additional_cmds=additional_cmds,
                validate_channels=' '.join(self.channels_to_validate),
                output_dir=self.output_dir,
            )
            makefile.write(makeFileContents)
        logging.info("Created the makefile: %s" % self.makefile_path)
Esempio n. 5
0
    def create(self):
        """Creates all necessary config files and runs the Ntuple production -- either locally or on the batch system
        """

        for key in self.dirs.keys():
            if type(self.dirs[key]) == dict:
                for dir_type in self.dirs[key].keys():
                    create_if_not_exists(self.dirs[key][dir_type])
            else:
                create_if_not_exists(self.dirs[key])
  
        self.inputFileIds = {}
        for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]:
                continue

            process_name = sample_info["process_name_specific"]

            logging.info("Creating configuration files to run '%s' for sample %s" % (self.executable_prodNtuple, process_name))  
    
            inputFileList = generateInputFileList(sample_name, sample_info, self.max_files_per_job, self.debug)
            for jobId in inputFileList.keys():
            
                key_dir = getKey(sample_name)
                key_file = getKey(sample_name, jobId)

                self.inputFiles[key_file] = inputFileList[jobId]
                if len(self.inputFiles[key_file]) == 0:
                    print "Warning: ntupleFiles['%s'] = %s --> skipping job !!" % (key_file, self.inputFiles[key_file])
                    continue
                ##print "sample = '%s', jobId = %i: number of input files = %i" % (sample_name, jobId, len(self.inputFiles[key_file]))
                ##print self.inputFiles[key_file]
                assert(len(self.inputFiles[key_file]) == 1), "There is more than one input file!"
                self.cfgFiles_prodNtuple_modified[key_file] = os.path.join(self.dirs[key_dir][DKEY_CFGS], "produceNtuple_%s_%s_%i_cfg.py" % \
                  (self.channel, process_name, jobId))
                self.outputFiles[key_file] = os.path.join(self.dirs[key_dir][DKEY_NTUPLES], "%s_%i.root" % \
                  (process_name, jobId))
                self.logFiles_prodNtuple[key_file] = os.path.join(self.dirs[key_dir][DKEY_LOGS], "produceNtuple_%s_%s_%i.log" % \
                  (self.channel, process_name, jobId))
                self.createCfg_prodNtuple(self.inputFiles[key_file], self.outputFiles[key_file], self.era, self.cfgFiles_prodNtuple_modified[key_file])
                
        if self.is_sbatch:
            logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_prodNtuple)
            self.createScript_sbatch()

        logging.info("Creating Makefile")
        lines_makefile = []
        self.addToMakefile_prodNtuple(lines_makefile)
        #self.addToMakefile_clean(lines_makefile)
        self.createMakefile(lines_makefile)
  
        logging.info("Done")
Esempio n. 6
0
    def run(self, clean):
        record_software_state(self.sw_ver_file_cfg, self.sw_ver_file_out,
                              DEPENDENCIES)
        target = 'all'
        if clean:
            if not os.path.isfile(self.makefile_path):
                logging.error(
                    "The makefile %s is missing and therefore it's not possible to clean anything; "
                    "run sync Ntuple production first!" % self.makefile_path)
                sys.exit(1)
            target = 'clean'

        nof_parallel_jobs = len(self.channel_info)
        make_cmd = "make -f %s -j %d %s 2>%s 1>%s" % \
          (self.makefile_path, nof_parallel_jobs, target, self.stderr_file_path, self.stdout_file_path)
        if self.running_method.lower() == "makefile":
            run_dir = re.sub('^/home', '/scratch', self.config_dir)
            create_if_not_exists(run_dir)
            make_cmd = re.sub('^make', 'make -C {}'.format(run_dir), make_cmd)
        logging.info("Running the make command: %s" % make_cmd)
        run_cmd(make_cmd)
        logging.info("All done")
Esempio n. 7
0
    def create(self):
        """Creates all necessary config files and runs the Ntuple production -- either locally or on the batch system
        """

        for key in self.dirs.keys():
            if type(self.dirs[key]) == dict:
                for dir_type in self.dirs[key].keys():
                    create_if_not_exists(self.dirs[key][dir_type])
            else:
                create_if_not_exists(self.dirs[key])

        self.inputFileIds = {}
        for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"]:
                continue

            process_name = sample_info["process_name_specific"]
            is_mc = (sample_info["type"] == "mc")

            if is_mc and process_name not in self.pileup_histograms:
                raise ValueError("Missing PU distribution for %s in file %s" %
                                 (process_name, self.pileup))

            logging.info(
                "Creating configuration files to run '%s' for sample %s" %
                (self.executable, process_name))

            inputFileList = generateInputFileList(sample_info,
                                                  self.max_files_per_job)
            key_dir = getKey(sample_name)
            subDirs = list(
                map(
                    lambda y: os.path.join(self.dirs[key_dir][DKEY_NTUPLES],
                                           '%04d' % y),
                    set(map(lambda x: x // 1000, inputFileList.keys()))))
            for subDir in subDirs:
                create_if_not_exists(subDir)
            for jobId in inputFileList.keys():

                key_file = getKey(sample_name, jobId)

                self.inputFiles[key_file] = inputFileList[jobId]
                if len(self.inputFiles[key_file]) == 0:
                    logging.warning(
                        "ntupleFiles['%s'] = %s --> skipping job !!" %
                        (key_file, self.inputFiles[key_file]))
                    continue
                self.cfgFiles_prodNtuple_modified[key_file] = os.path.join(
                    self.dirs[key_dir][DKEY_CFGS],
                    "produceNtuple_%s_%i_cfg.py" % (process_name, jobId))
                self.outputFiles[key_file] = os.path.join(
                    self.dirs[key_dir][DKEY_NTUPLES], "%04d" % (jobId // 1000),
                    "tree_%i.root" % jobId)
                self.logFiles_prodNtuple[key_file] = os.path.join(
                    self.dirs[key_dir][DKEY_LOGS],
                    "produceNtuple_%s_%i.log" % (process_name, jobId))
                hlt_paths = sample_info["hlt_paths"] if not is_mc else []
                hlt_cuts = list(
                    Triggers(self.era).triggers_flat
                ) if self.preselection_cuts["applyHLTcut"] else []
                jobOptions = {
                    'inputFiles': self.inputFiles[key_file],
                    'cfgFile_modified':
                    self.cfgFiles_prodNtuple_modified[key_file],
                    'outputFile': self.outputFiles[key_file],
                    'is_mc': is_mc,
                    'random_seed': jobId,
                    'process_name': process_name,
                    'category_name': sample_info["sample_category"],
                    'triggers': hlt_paths,
                    'HLTcuts': hlt_cuts,
                }
                self.createCfg_prodNtuple(jobOptions)

        num_jobs = 0
        if self.is_sbatch:
            logging.info(
                "Creating script for submitting '%s' jobs to batch system" %
                self.executable)
            num_jobs = self.createScript_sbatch()
            logging.info("Generated %i job(s)" % num_jobs)

        logging.info("Creating Makefile")
        lines_makefile = []
        self.addToMakefile_prodNtuple(lines_makefile)
        self.createMakefile(lines_makefile)

        logging.info("Done")
        return num_jobs
    def create(self):
        """Creates all necessary config files and runs the complete analysis workfow -- either locally or on the batch system
    """

        for key in self.dirs.keys():
            for dir_type in self.dirs[key].keys():
                create_if_not_exists(self.dirs[key][dir_type])

        self.inputFileIds = {}
        for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"] or sample_info["sample_category"] in [
                    "additional_signal_overlap", "background_data_estimate"
            ]:
                continue

            process_name = sample_info["process_name_specific"]

            logging.info(
                "Creating configuration files to run '%s' for sample %s" %
                (self.executable_analyze, process_name))

            (secondary_files, primary_store,
             secondary_store) = self.initializeInputFileIds(
                 sample_name, sample_info)

            is_mc = (sample_info["type"] == "mc")
            lumi_scale = 1. if not (self.use_lumi and is_mc) else sample_info[
                "xsection"] * self.lumi / sample_info["nof_events"]
            sample_category = sample_info["sample_category"]
            triggers = sample_info["triggers"]

            for central_or_shift in self.central_or_shifts:
                for jobId in range(len(self.inputFileIds[sample_name])):
                    if central_or_shift != "central" and not is_mc:
                        continue

                    inputFiles = generate_input_list(
                        self.inputFileIds[sample_name][jobId], secondary_files,
                        primary_store, secondary_store, self.debug)

                    key_dir = getKey(sample_name)
                    key_file = getKey(sample_name, central_or_shift, jobId)

                    self.cfgFiles_analyze_modified[key_file] = os.path.join(self.dirs[key_dir][DKEY_CFGS], "analyze_%s_%s_%s_%i_cfg.py" % \
                      (self.channel, process_name, central_or_shift, jobId))
                    self.histogramFiles[key_file] = os.path.join(self.dirs[key_dir][DKEY_HIST], "%s_%s_%i.root" % \
                      (process_name, central_or_shift, jobId))
                    self.logFiles_analyze[key_file] = os.path.join(self.dirs[key_dir][DKEY_LOGS], "analyze_%s_%s_%s_%i.log" % \
                      (self.channel, process_name, central_or_shift, jobId))

                    self.createCfg_analyze(
                        inputFiles, self.histogramFiles[key_file],
                        sample_category, triggers, self.lepton_selection,
                        self.hadTau_selection, is_mc, central_or_shift,
                        lumi_scale, self.cfgFiles_analyze_modified[key_file])

        if self.is_sbatch:
            logging.info(
                "Creating script for submitting '%s' jobs to batch system" %
                self.executable_analyze)
            self.createScript_sbatch()

        logging.info(
            "Creating configuration files for executing 'prepareDatacards'")
        for histogramToFit in self.histograms_to_fit:
            self.createCfg_prep_dcard(histogramToFit)

        lines_makefile = []
        self.addToMakefile_analyze(lines_makefile)
        self.addToMakefile_hadd_stage1(lines_makefile)
        self.addToMakefile_backgrounds_from_data(lines_makefile)
        self.addToMakefile_hadd_stage2(lines_makefile)
        self.addToMakefile_prep_dcard(lines_makefile)
        self.addToMakefile_clean(lines_makefile)
        self.createMakefile(lines_makefile)

        logging.info("Done")
  def create(self):
    """Creates all necessary config files and runs the complete analysis workfow -- either locally or on the batch system
    """

    for sample_name, sample_info in self.samples.items():
      if not sample_info["use_it"]:
        continue
      process_name = sample_info["process_name_specific"]
      for mode in self.modes:
        key_dir = getKey(process_name, mode)
        for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_RLES, DKEY_SYNC ]:
          if dir_type == DKEY_SYNC and not self.do_sync:
            continue
          initDict(self.dirs, [ key_dir, dir_type ])
          if dir_type in [ DKEY_CFGS, DKEY_LOGS ]:
            self.dirs[key_dir][dir_type] = os.path.join(self.configDir, dir_type, self.channel, "_".join([ mode ]), process_name)
          else:
            self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel, "_".join([ mode ]), process_name)
    for dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT, DKEY_SYNC ]:
      if dir_type == DKEY_SYNC and not self.do_sync:
        continue
      initDict(self.dirs, [ dir_type ])
      if dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT ]:
        self.dirs[dir_type] = os.path.join(self.configDir, dir_type, self.channel)
      else:
        self.dirs[dir_type] = os.path.join(self.outputDir, dir_type, self.channel)
    ##print "self.dirs = ", self.dirs

    for key in self.dirs.keys():
      if type(self.dirs[key]) == dict:
        for dir_type in self.dirs[key].keys():
          create_if_not_exists(self.dirs[key][dir_type])
      else:
        create_if_not_exists(self.dirs[key])

    inputFileLists = {}
    for sample_name, sample_info in self.samples.items():
      if not sample_info["use_it"]:
        continue
      logging.info("Checking input files for sample %s" % sample_info["process_name_specific"])
      inputFileLists[sample_name] = generateInputFileList(sample_info, self.max_files_per_job)

    for mode in self.modes:
      for sample_name, sample_info in self.samples.items():
        if not sample_info["use_it"]:
          continue
        process_name = sample_info["process_name_specific"]
        logging.info("Creating configuration files to run '%s' for sample %s" % (self.executable_analyze, process_name))

        sample_category = sample_info["sample_category"]
        is_mc = (sample_info["type"] == "mc")

        for central_or_shift in self.central_or_shifts:

          inputFileList = inputFileLists[sample_name]
          for jobId in inputFileList.keys():

            # build config files for executing analysis code
            key_dir = getKey(process_name, mode)
            key_analyze_job = getKey(process_name, mode, central_or_shift, jobId)
            ntupleFiles = inputFileList[jobId]
            if len(ntupleFiles) == 0:
              logging.warning("No input ntuples for %s --> skipping job !!" % (key_analyze_job))
              continue

            self.jobOptions_analyze[key_analyze_job] = {
              'ntupleFiles' : ntupleFiles,
              'cfgFile_modified' : os.path.join(self.dirs[key_dir][DKEY_CFGS], "analyze_%s_%s_%s_%s_%i_cfg.py" % \
                (self.channel, process_name, mode, central_or_shift, jobId)),
              'histogramFile' : os.path.join(self.dirs[key_dir][DKEY_HIST], "%s_%s_%s_%i.root" % \
                (process_name, mode, central_or_shift, jobId)),
              'logFile' : os.path.join(self.dirs[key_dir][DKEY_LOGS], "analyze_%s_%s_%s_%s_%i.log" % \
                (self.channel, process_name, mode, central_or_shift, jobId)),
              'sample_category' : sample_category,
              'mode' : mode,
              'lepton_selection' : self.lepton_selection,
              'hadTau_selection' : self.hadTau_selection,
              'SVfit4tau_logM_wMassConstraint_MarkovChain' : self.SVfit4tau_logM_wMassConstraint_MarkovChain,
              'SVfit4tau_logM_woMassConstraint_MarkovChain' : self.SVfit4tau_logM_woMassConstraint_MarkovChain,
              'SVfit4tau_logM_wMassConstraint_VAMP' : self.SVfit4tau_logM_wMassConstraint_VAMP,
              'use_HIP_mitigation_mediumMuonId' : False,
              'is_mc' : is_mc,
              'central_or_shift' : central_or_shift,
              'lumi_scale' : 1.,
              'apply_genWeight' : sample_info["genWeight"] if (is_mc and "genWeight" in sample_info) else False,
            }
            self.createCfg_analyze(self.jobOptions_analyze[key_analyze_job])

            # initialize input and output file names for hadd_stage1
            key_hadd_stage1 = getKey(process_name, mode)
            if not key_hadd_stage1 in self.inputFiles_hadd_stage1:
              self.inputFiles_hadd_stage1[key_hadd_stage1] = []
            self.inputFiles_hadd_stage1[key_hadd_stage1].append(self.jobOptions_analyze[key_analyze_job]['histogramFile'])
            self.outputFile_hadd_stage1[key_hadd_stage1] = os.path.join(self.dirs[DKEY_HIST], "histograms_harvested_stage1_%s_%s_%s.root" % \
              (self.channel, process_name, mode))

            # initialize input and output file names for hadd_stage2
            key_hadd_stage2 = getKey()
            if not key_hadd_stage2 in self.inputFiles_hadd_stage2:
              self.inputFiles_hadd_stage2[key_hadd_stage2] = []
            self.inputFiles_hadd_stage2[key_hadd_stage2].append(self.outputFile_hadd_stage1[key_hadd_stage1])
            self.outputFile_hadd_stage2[key_hadd_stage2] = os.path.join(self.dirs[DKEY_HIST], "histograms_harvested_stage2_%s.root" % \
              (self.channel))

    if self.is_sbatch:
      logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_analyze)
      self.sbatchFile_analyze = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel)
      self.createScript_sbatch_analyze(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze)

    logging.info("Creating Makefile")
    lines_makefile = []
    self.addToMakefile_analyze(lines_makefile)
    self.addToMakefile_hadd_stage1(lines_makefile)
    self.addToMakefile_hadd_stage2(lines_makefile)
    self.createMakefile(lines_makefile)

    logging.info("Done")

    return self.num_jobs
Esempio n. 10
0
    def create(self):
        """Creates all necessary config files and runs the complete analysis workfow -- either locally or on the batch system
    """
        for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"]:
                continue
            process_name = sample_info["process_name_specific"]
            key_dir = getKey(process_name)
            for dir_type in [
                    DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_RLES, DKEY_SYNC
            ]:
                initDict(self.dirs, [key_dir, dir_type])
                if dir_type in [DKEY_CFGS, DKEY_LOGS]:
                    self.dirs[key_dir][dir_type] = os.path.join(
                        self.configDir, dir_type, self.channel, process_name)
                else:
                    self.dirs[key_dir][dir_type] = os.path.join(
                        self.outputDir, dir_type, self.channel, process_name)
        for dir_type in [
                DKEY_CFGS, DKEY_SCRIPTS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD,
                DKEY_PLOT, DKEY_HADD_RT, DKEY_SYNC
        ]:
            initDict(self.dirs, [dir_type])
            if dir_type in [
                    DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT,
                    DKEY_HADD_RT
            ]:
                self.dirs[dir_type] = os.path.join(self.configDir, dir_type,
                                                   self.channel)
            else:
                self.dirs[dir_type] = os.path.join(self.outputDir, dir_type,
                                                   self.channel)
        for key in self.dirs.keys():
            if type(self.dirs[key]) == dict:
                for dir_type in self.dirs[key].keys():
                    create_if_not_exists(self.dirs[key][dir_type])
            else:
                create_if_not_exists(self.dirs[key])

        inputFileLists = {}
        for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"]:
                continue
            logging.info("Checking input files for sample %s" %
                         sample_info["process_name_specific"])
            inputFileLists[sample_name] = generateInputFileList(
                sample_info, self.max_files_per_job)

        for apply_jetSmearing in self.apply_jetSmearing_options:
            jetSmearingLabel = None
            if apply_jetSmearing:
                jetSmearingLabel = "jetSmearingEnabled"
            else:
                jetSmearingLabel = "jetSmearingDisabled"
            for apply_metSmearing in self.apply_metSmearing_options:
                metSmearingLabel = None
                if apply_metSmearing:
                    metSmearingLabel = "metSmearingEnabled"
                else:
                    metSmearingLabel = "metSmearingDisabled"
                for sample_name, sample_info in self.samples.items():
                    if not sample_info["use_it"]:
                        continue
                    process_name = sample_info["process_name_specific"]
                    isSignal = True if process_name.find(
                        "signal") != -1 else False
                    logging.info(
                        "Creating configuration files to run '%s' for sample %s"
                        % (self.executable_analyze, process_name))
                    sample_category = sample_info["sample_category"]

                    inputFileList = inputFileLists[sample_name]
                    numJobsPerFile = None
                    if sample_info[
                            "process_name_specific"] == "signal_ggf_nonresonant_node_sm_hh_2b2v":
                        numJobsPerFile = 500
                    elif sample_info[
                            "process_name_specific"] == "signal_ggf_nonresonant_cHHH1_hh_2b2v":
                        numJobsPerFile = 100
                    elif sample_info[
                            "process_name_specific"] == "TTJets_DiLept":
                        numJobsPerFile = 50
                    elif sample_info[
                            "process_name_specific"] == "TTJets_DiLept_ext1":
                        numJobsPerFile = 50
                    elif sample_info["process_name_specific"] == "TTTo2L2Nu":
                        numJobsPerFile = 10
                    else:
                        raise ValueError("Invalid sample: %s" %
                                         sample_info["process_name_specific"])
                    numJobs = numJobsPerFile * len(inputFileList.keys())
                    for jobId in range(1, numJobs + 1):

                        ntupleId = ((jobId - 1) / numJobsPerFile) + 1
                        maxSelEvents = 500
                        skipSelEvents = maxSelEvents * (
                            (jobId - 1) % numJobsPerFile)

                        # build config files for executing analysis code
                        key_dir = getKey(process_name)
                        key_analyze_job = getKey(process_name,
                                                 jetSmearingLabel,
                                                 metSmearingLabel, jobId)
                        ntupleFiles = inputFileList[ntupleId]
                        if len(ntupleFiles) == 0:
                            logging.warning(
                                "No input ntuples for %s --> skipping job !!" %
                                (key_analyze_job))
                            continue

                        cfgFile_modified_path = os.path.join(
                            self.dirs[key_dir][DKEY_CFGS],
                            "analyze_%s_%s_%s_%s_%i_cfg.py" %
                            (self.channel, process_name, jetSmearingLabel,
                             metSmearingLabel, jobId))
                        histogramFile_path = os.path.join(
                            self.dirs[key_dir][DKEY_HIST],
                            "analyze_%s_%s_%s_%s_%i.root" %
                            (self.channel, process_name, jetSmearingLabel,
                             metSmearingLabel, jobId))
                        logFile_path = os.path.join(
                            self.dirs[key_dir][DKEY_LOGS],
                            "analyze_%s_%s_%s_%s_%i.log" %
                            (self.channel, process_name, jetSmearingLabel,
                             metSmearingLabel, jobId))
                        rleOutputFile_path = os.path.join(self.dirs[key_dir][DKEY_RLES], "rle_%s_%s_%s_%s_%i.txt" % (self.channel, process_name, jetSmearingLabel, metSmearingLabel, jobId)) \
                                             if self.select_rle_output else ""
                        self.jobOptions_analyze[key_analyze_job] = {
                            'ntupleFiles': ntupleFiles,
                            'cfgFile_modified': cfgFile_modified_path,
                            'histogramFile': histogramFile_path,
                            'logFile': logFile_path,
                            'selEventsFileName_output': rleOutputFile_path,
                            'apply_jetSmearing': apply_jetSmearing,
                            'apply_metSmearing': apply_metSmearing,
                            'maxSelEvents': maxSelEvents,
                            'skipSelEvents': skipSelEvents
                        }
                        self.createCfg_analyze(
                            self.jobOptions_analyze[key_analyze_job],
                            sample_info)

                        # initialize input and output file names for hadd_stage1
                        key_hadd_stage1 = getKey(process_name,
                                                 jetSmearingLabel,
                                                 metSmearingLabel)
                        if not key_hadd_stage1 in self.inputFiles_hadd_stage1:
                            self.inputFiles_hadd_stage1[key_hadd_stage1] = []
                        self.inputFiles_hadd_stage1[key_hadd_stage1].append(
                            self.jobOptions_analyze[key_analyze_job]
                            ['histogramFile'])
                        self.outputFile_hadd_stage1[key_hadd_stage1] = os.path.join(self.dirs[DKEY_HIST], "histograms_harvested_stage1_%s_%s_%s_%s.root" % \
                          (self.channel, process_name, jetSmearingLabel, metSmearingLabel))

                    # add output files of hadd_stage1 to list of input files for hadd_stage2
                    key_hadd_stage1 = getKey(process_name, jetSmearingLabel,
                                             metSmearingLabel)
                    key_hadd_stage2 = getKey("")
                    if not key_hadd_stage2 in self.inputFiles_hadd_stage2:
                        self.inputFiles_hadd_stage2[key_hadd_stage2] = []
                    self.inputFiles_hadd_stage2[key_hadd_stage2].append(
                        self.outputFile_hadd_stage1[key_hadd_stage1])
                    self.outputFile_hadd_stage2[
                        key_hadd_stage2] = os.path.join(
                            self.dirs[DKEY_HIST],
                            "histograms_harvested_stage2_%s.root" %
                            self.channel)

        if self.is_sbatch:
            logging.info(
                "Creating script for submitting '%s' jobs to batch system" %
                self.executable_analyze)
            self.sbatchFile_analyze = os.path.join(
                self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel)
            self.createScript_sbatch_analyze(self.executable_analyze,
                                             self.sbatchFile_analyze,
                                             self.jobOptions_analyze)

        logging.info("Creating Makefile")
        lines_makefile = []
        self.addToMakefile_analyze(lines_makefile)
        self.addToMakefile_hadd_stage1(lines_makefile)
        self.addToMakefile_hadd_stage2(lines_makefile)
        self.targets.extend(self.outputFile_hadd_stage2.values())
        self.createMakefile(lines_makefile)

        logging.info("Done")

        return self.num_jobs
Esempio n. 11
0
  def create(self):
    """Creates all necessary config files and runs the complete analysis workfow -- either locally or on the batch system
    """

    for key in self.dirs.keys():
      if type(self.dirs[key]) == dict:
        for dir_type in self.dirs[key].keys():
          create_if_not_exists(self.dirs[key][dir_type])
      else:
        create_if_not_exists(self.dirs[key])
  
    self.inputFileIds = {}
    for sample_name, sample_info in self.samples.items():
      if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]:
        continue

      process_name = sample_info["process_name_specific"]

      logging.info("Creating configuration files to run '%s' for sample %s" % (self.executable_analyze, process_name))  

      ( secondary_files, primary_store, secondary_store ) = self.initializeInputFileIds(sample_name, sample_info)

      is_mc = (sample_info["type"] == "mc")
      lumi_scale = 1. if not (self.use_lumi and is_mc) else sample_info["xsection"] * self.lumi / sample_info["nof_events"]
      sample_category = sample_info["sample_category"]
      triggers = sample_info["triggers"]
      apply_trigger_bits = (is_mc and self.era == "2016" and sample_info["reHLT"]) or not is_mc

      for hadTau_selection in self.hadTau_selections:
        for hadTau_frWeight in [ "enabled", "disabled" ]:
          if hadTau_frWeight == "enabled" and not hadTau_selection.startswith("Fakeable"):
            continue
          hadTau_selection_and_frWeight = get_hadTau_selection_and_frWeight(hadTau_selection, hadTau_frWeight)
          for hadTau_genMatch in self.hadTau_genMatches:
            for hadTau_charge_selection in self.hadTau_charge_selections:
              for central_or_shift in self.central_or_shifts:
                for jobId in range(len(self.inputFileIds[sample_name])):
                  if hadTau_genMatch != "all" and not is_mc:
                    continue
                  if hadTau_genMatch == "all" and is_mc:
                    continue
                  if central_or_shift != "central" and not (hadTau_selection.startswith("Tight") and hadTau_charge_selection == "OS"):
                    continue
                  if central_or_shift != "central" and not is_mc:
                    continue
                  if hadTau_selection == "Fakeable_mcClosure" and not hadTau_frWeight == "enabled":
                    continue
                  if central_or_shift.startswith("CMS_ttHl_thu_shape_ttH") and sample_category != "signal":
                    continue
                  if central_or_shift.startswith("CMS_ttHl_thu_shape_ttW") and sample_category != "TTW":
                    continue
                  if central_or_shift.startswith("CMS_ttHl_thu_shape_ttZ") and sample_category != "TTZ":
                    continue
                  sample_category_and_genMatch = sample_category + hadTau_genMatch                 

                  key_dir = getKey(sample_name, hadTau_selection, hadTau_frWeight, hadTau_charge_selection)
                  key_file = getKey(sample_name, hadTau_selection, hadTau_frWeight, hadTau_genMatch, hadTau_charge_selection, central_or_shift, jobId)

                  self.ntupleFiles[key_file] = generate_input_list(self.inputFileIds[sample_name][jobId], secondary_files, primary_store, secondary_store, self.debug)
                  self.cfgFiles_analyze_modified[key_file] = os.path.join(self.dirs[key_dir][DKEY_CFGS], "analyze_%s_%s_%s_%s_%s_%s_%i_cfg.py" % \
                    (self.channel, process_name, hadTau_selection_and_frWeight, hadTau_genMatch, hadTau_charge_selection, central_or_shift, jobId))
                  self.histogramFiles[key_file] = os.path.join(self.dirs[key_dir][DKEY_HIST], "%s_%s_%s_%s_%s_%i.root" % \
                    (process_name, hadTau_selection_and_frWeight, hadTau_genMatch, hadTau_charge_selection, central_or_shift, jobId))
                  self.logFiles_analyze[key_file] = os.path.join(self.dirs[key_dir][DKEY_LOGS], "analyze_%s_%s_%s_%s_%s_%s_%i.log" % \
                    (self.channel, process_name, hadTau_selection_and_frWeight, hadTau_genMatch, hadTau_charge_selection, central_or_shift, jobId))
                
                  self.createCfg_analyze(self.ntupleFiles[key_file], self.histogramFiles[key_file], sample_category, self.era, triggers,
                    hadTau_selection, hadTau_genMatch, self.apply_hadTauGenMatching, hadTau_frWeight, hadTau_charge_selection,
                    is_mc, central_or_shift, lumi_scale, apply_trigger_bits, self.cfgFiles_analyze_modified[key_file])
                
    if self.is_sbatch:
      logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_analyze)
      self.createScript_sbatch()

    logging.info("Creating configuration files for executing 'addBackgrounds'")  
    process_names = []
    process_names.extend(self.nonfake_backgrounds)
    process_names.extend([ "signal", "ttH_htt", "ttH_hww", "ttH_hzz" ])
    # sum non-fake contributions for each MC sample separately
    # input processes: TT2t0e0m0j, TT1t1e0m0j, TT1t0e1m0j", TT0t2e0m0j, TT0t1e1m0j, TT0t0e2m0j; TTW2t0e0m0j,...
    # output processes: TT; ...
    for process_name in process_names:
      for hadTau_selection in self.hadTau_selections:
        for hadTau_frWeight in self.hadTau_frWeights:
          if hadTau_frWeight == "enabled" and not hadTau_selection.startswith("Fakeable"):
            continue
          hadTau_selection_and_frWeight = get_hadTau_selection_and_frWeight(hadTau_selection, hadTau_frWeight)
          for hadTau_charge_selection in self.hadTau_charge_selections:
            key = getKey(process_name, hadTau_selection, hadTau_frWeight, hadTau_charge_selection)
            self.histogramFile_addBackgrounds[key] = os.path.join(self.outputDir, DKEY_HIST, "addBackgrounds_%s_%s_%s_%s.root" % \
              (self.channel, process_name, hadTau_selection_and_frWeight, hadTau_charge_selection))        
            self.cfgFile_addBackgrounds_modified[key] = os.path.join(self.outputDir, DKEY_CFGS, "addBackgrounds_%s_%s_%s_%s_cfg.py" % \
              (self.channel, process_name, hadTau_selection_and_frWeight, hadTau_charge_selection))
            histogramDir = getHistogramDir(hadTau_selection, hadTau_frWeight, hadTau_charge_selection)
            processes_input = [ "%s%s" % (process_name, genMatch) for genMatch in self.hadTau_genMatches_nonfakes ]
            self.process_output_addBackgrounds[key] = process_name
            self.createCfg_addBackgrounds(self.histogramFile_hadd_stage1, self.histogramFile_addBackgrounds[key], self.cfgFile_addBackgrounds_modified[key],
              [ histogramDir ], processes_input, self.process_output_addBackgrounds[key])
    # sum fake contributions for the total of all MC sample
    # input processes: TT1t0e0m1j, TT0t1e0m1j, TT0t0e1m1j, TT0t0e0m2j; TTW1t0e0m1j,...
    # output process: fakes_mc
    for hadTau_selection in self.hadTau_selections:
      for hadTau_frWeight in self.hadTau_frWeights:
        if hadTau_frWeight == "enabled" and not hadTau_selection.startswith("Fakeable"):
          continue
        hadTau_selection_and_frWeight = get_hadTau_selection_and_frWeight(hadTau_selection, hadTau_frWeight)
        for hadTau_charge_selection in self.hadTau_charge_selections:
          key = getKey(hadTau_selection, hadTau_frWeight, hadTau_charge_selection)
          self.histogramFile_addBackgrounds[key] = os.path.join(self.outputDir, DKEY_HIST, "addBackgrounds_%s_fakes_mc_%s_%s.root" % \
            (self.channel, hadTau_selection_and_frWeight, hadTau_charge_selection))
          self.cfgFile_addBackgrounds_modified[key] = os.path.join(self.outputDir, DKEY_CFGS, "addBackgrounds_%s_fakes_mc_%s_%s_cfg.py" % \
            (self.channel, hadTau_selection_and_frWeight, hadTau_charge_selection))
          histogramDir = getHistogramDir(hadTau_selection, hadTau_frWeight, hadTau_charge_selection)
          processes_input = []
          for process_name in self.nonfake_backgrounds:
            for genMatch in self.hadTau_genMatches_fakes:
              processes_input.append("%s%s" % (process_name, genMatch))
          self.process_output_addBackgrounds[key] = "fakes_mc"
          self.createCfg_addBackgrounds(self.histogramFile_hadd_stage1, self.histogramFile_addBackgrounds[key], self.cfgFile_addBackgrounds_modified[key],
            [ histogramDir ], processes_input, self.process_output_addBackgrounds[key])
        
    logging.info("Creating configuration files for executing 'addBackgroundFakes'")
    for hadTau_charge_selection in self.hadTau_charge_selections:
      key = getKey("fakes_data", charge_selection) 
      self.histogramFile_addFakes[key] = os.path.join(self.outputDir, DKEY_HIST, "addBackgroundJetToTauFakes_%s_%s.root" % \
        (self.channel, hadTau_charge_selection))
      self.cfgFile_addFakes_modified[key] = os.path.join(self.outputDir, DKEY_CFGS, "addBackgroundJetToTauFakes_%s_%s_cfg.py" % \
        (self.channel, hadTau_charge_selection))
      category_signal = "0l_2tau_%s_Tight" % hadTau_charge_selection
      category_sideband = "0l_2tau_%s_Fakeable_wFakeRateWeights" % hadTau_charge_selection
      self.createCfg_addFakes(self.histogramFile_hadd_stage1_5, self.histogramFile_addFakes[key], self.cfgFile_addFakes_modified[key],
        category_signal, category_sideband)  

    logging.info("Creating configuration files for executing 'prepareDatacards'")
    for histogramToFit in self.histograms_to_fit:
      self.createCfg_prep_dcard(histogramToFit)

    logging.info("Creating configuration files for executing 'makePlots'")
    self.createCfg_makePlots()
    if "SS" in self.hadTau_charge_selections:
      self.createCfg_makePlots(self.histogramDir_prep_dcard_SS, "SS")
    if "Fakeable_mcClosure" in self.hadTau_selections:
      self.createCfg_makePlots_mcClosure()

    logging.info("Creating Makefile")  
    lines_makefile = []
    self.addToMakefile_analyze(lines_makefile)
    self.addToMakefile_hadd_stage1(lines_makefile)
    self.addToMakefile_backgrounds_from_data(lines_makefile)
    self.addToMakefile_hadd_stage2(lines_makefile)
    self.addToMakefile_prep_dcard(lines_makefile)
    self.addToMakefile_make_plots(lines_makefile)
    self.addToMakefile_make_plots_mcClosure(lines_makefile)   
    self.addToMakefile_clean(lines_makefile)
    self.createMakefile(lines_makefile)
  
    logging.info("Done")
Esempio n. 12
0
    def __init__(self,
                 configDir,
                 outputDir,
                 executable_prodNtuple,
                 cfgFile_prodNtuple,
                 samples,
                 max_files_per_job,
                 era,
                 preselection_cuts,
                 leptonSelection,
                 hadTauSelection,
                 debug,
                 running_method,
                 version,
                 num_parallel_jobs,
                 pool_id=''):

        self.configDir = configDir
        self.outputDir = outputDir
        self.executable_prodNtuple = executable_prodNtuple
        self.max_num_jobs = 200000
        self.samples = samples
        self.max_files_per_job = max_files_per_job
        self.era = era
        self.preselection_cuts = preselection_cuts
        self.leptonSelection = leptonSelection
        self.hadTauSelection = hadTauSelection
        self.debug = debug
        assert (running_method.lower()
                in ["sbatch",
                    "makefile"]), "Invalid running method: %s" % running_method
        self.running_method = running_method
        self.is_sbatch = False
        self.is_makefile = False
        if self.running_method.lower() == "sbatch":
            self.is_sbatch = True
        else:
            self.is_makefile = True
        self.makefile = os.path.join(self.configDir, "Makefile_prodNtuple")
        self.num_parallel_jobs = num_parallel_jobs
        self.pool_id = pool_id if pool_id else uuid.uuid4()

        self.workingDir = os.getcwd()
        print "Working directory is: " + self.workingDir

        self.version = version

        create_if_not_exists(self.configDir)
        create_if_not_exists(self.outputDir)
        self.stdout_file = codecs.open(
            os.path.join(self.configDir, "stdout_prodNtuple.log"), 'w',
            'utf-8')
        self.stderr_file = codecs.open(
            os.path.join(self.configDir, "stderr_prodNtuple.log"), 'w',
            'utf-8')
        self.dirs = {}
        self.samples = samples
        self.cfgFile_prodNtuple_original = os.path.join(
            self.workingDir, cfgFile_prodNtuple)
        self.cfgFiles_prodNtuple_modified = {}
        self.logFiles_prodNtuple = {}
        self.sbatchFile_prodNtuple = os.path.join(self.configDir,
                                                  "sbatch_prodNtuple.py")
        self.inputFiles = {}
        self.outputFiles = {}
        self.filesToClean = []

        for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"]:
                continue
            process_name = sample_info["process_name_specific"]
            key_dir = getKey(sample_name)
            for dir_type in [DKEY_CFGS, DKEY_NTUPLES, DKEY_LOGS]:
                initDict(self.dirs, [key_dir, dir_type])
                if dir_type in [DKEY_CFGS, DKEY_LOGS]:
                    self.dirs[key_dir][dir_type] = os.path.join(
                        self.configDir, dir_type, process_name)
                else:
                    self.dirs[key_dir][dir_type] = os.path.join(
                        self.outputDir, dir_type, process_name)
        for dir_type in [DKEY_CFGS, DKEY_LOGS]:
            initDict(self.dirs, [dir_type])
            if dir_type in [DKEY_CFGS, DKEY_NTUPLES, DKEY_LOGS]:
                self.dirs[dir_type] = os.path.join(self.configDir, dir_type)
            else:
                self.dirs[dir_type] = os.path.join(self.outputDir, dir_type)
        ##print "self.dirs = ", self.dirs

        self.cvmfs_error_log = {}
Esempio n. 13
0
    def create(self):
        """Creates all necessary config files and runs the complete analysis workfow -- either locally or on the batch system
    """

        for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"] or sample_info["sample_category"] in [
                    "additional_signal_overlap", "background_data_estimate"
            ]:
                continue
            process_name = sample_info["process_name_specific"]
            for charge_selection in self.charge_selections:
                key_dir = getKey(process_name, charge_selection)
                for dir_type in [DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_RLES]:
                    initDict(self.dirs, [key_dir, dir_type])
                    if dir_type in [DKEY_CFGS, DKEY_LOGS]:
                        self.dirs[key_dir][dir_type] = os.path.join(
                            self.configDir, dir_type, self.channel,
                            "_".join([charge_selection]), process_name)
                    else:
                        self.dirs[key_dir][dir_type] = os.path.join(
                            self.outputDir, dir_type, self.channel,
                            "_".join([charge_selection]), process_name)
        for dir_type in [
                DKEY_CFGS, DKEY_SCRIPTS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD,
                DKEY_PLOT, DKEY_HADD_RT
        ]:
            initDict(self.dirs, [dir_type])
            if dir_type in [
                    DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT,
                    DKEY_HADD_RT
            ]:
                self.dirs[dir_type] = os.path.join(self.configDir, dir_type,
                                                   self.channel)
            else:
                self.dirs[dir_type] = os.path.join(self.outputDir, dir_type,
                                                   self.channel)
        ##print "self.dirs = ", self.dirs

        for key in self.dirs.keys():
            if type(self.dirs[key]) == dict:
                for dir_type in self.dirs[key].keys():
                    create_if_not_exists(self.dirs[key][dir_type])
            else:
                create_if_not_exists(self.dirs[key])

        inputFileLists = {}
        for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"] or sample_info["sample_category"] in [
                    "additional_signal_overlap", "background_data_estimate"
            ]:
                continue
            logging.info("Checking input files for sample %s" %
                         sample_info["process_name_specific"])
            inputFileLists[sample_name] = generateInputFileList(
                sample_name, sample_info, self.max_files_per_job, self.debug)

        self.inputFileIds = {}
        for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"] or sample_info["sample_category"] in [
                    "additional_signal_overlap", "background_data_estimate"
            ]:
                continue

            process_name = sample_info["process_name_specific"]

            logging.info(
                "Creating configuration files to run '%s' for sample %s" %
                (self.executable_analyze, process_name))

            is_mc = (sample_info["type"] == "mc")
            lumi_scale = 1. if not (self.use_lumi and is_mc) else sample_info[
                "xsection"] * self.lumi / sample_info["nof_events"]
            apply_genWeight = sample_info["apply_genWeight"] if (
                is_mc and "apply_genWeight" in sample_info.keys()) else False
            sample_category = sample_info["sample_category"]
            triggers = sample_info["triggers"]
            apply_trigger_bits = (
                is_mc and
                (self.era == "2015" or
                 (self.era == "2016" and sample_info["reHLT"]))) or not is_mc

            for charge_selection in self.charge_selections:
                for central_or_shift in self.central_or_shifts:

                    inputFileList = inputFileLists[sample_name]
                    for jobId in inputFileList.keys():
                        if central_or_shift != "central" and not is_mc:
                            continue
                        if central_or_shift.startswith(
                                "CMS_ttHl_thu_shape_ttH"
                        ) and sample_category != "signal":
                            continue
                        if central_or_shift.startswith(
                                "CMS_ttHl_thu_shape_ttW"
                        ) and sample_category != "TTW":
                            continue
                        if central_or_shift.startswith(
                                "CMS_ttHl_thu_shape_ttZ"
                        ) and sample_category != "TTZ":
                            continue

                        # build config files for executing analysis code
                        key_dir = getKey(process_name, charge_selection)
                        key_analyze_job = getKey(process_name,
                                                 charge_selection,
                                                 central_or_shift, jobId)

                        ntupleFiles = inputFileList[jobId]
                        if len(ntupleFiles) == 0:
                            print "Warning: ntupleFiles['%s'] = %s --> skipping job !!" % (
                                key_job, ntupleFiles)
                            continue
                        self.jobOptions_analyze[key_analyze_job] = {
                          'ntupleFiles' : ntupleFiles,
                          'cfgFile_modified' : os.path.join(self.dirs[key_dir][DKEY_CFGS], "analyze_%s_%s_%s_%s_%i_cfg.py" % \
                            (self.channel, process_name, charge_selection, central_or_shift, jobId)),
                          'histogramFile' : os.path.join(self.dirs[key_dir][DKEY_HIST], "%s_%s_%s_%i.root" % \
                            (process_name, charge_selection, central_or_shift, jobId)),
                          'logFile' : os.path.join(self.dirs[key_dir][DKEY_LOGS], "analyze_%s_%s_%s_%s_%i.log" % \
                            (self.channel, process_name, charge_selection, central_or_shift, jobId)),
                          'sample_category' : sample_category,
                          'triggers' : sample_info["triggers"],
                          'charge_selection' : charge_selection,
                          'jet_minPt' : self.jet_minPt,
                          'jet_maxPt' : self.jet_maxPt,
                          'jet_minAbsEta' : self.jet_minAbsEta,
                          'jet_maxAbsEta' : self.jet_maxAbsEta,
                          'hadTau_selection_denominator' : self.hadTau_selection_denominator,
                          'hadTau_selections_numerator' : self.hadTau_selections_numerator,
                          'absEtaBins' : self.absEtaBins,
                          ##'use_HIP_mitigation_mediumMuonId' : sample_info["use_HIP_mitigation_mediumMuonId"],
                          'use_HIP_mitigation_mediumMuonId' : True,
                          'is_mc' : is_mc,
                          'central_or_shift' : central_or_shift,
                          'lumi_scale' : 1. if not (self.use_lumi and is_mc) else sample_info["xsection"] * self.lumi / sample_info["nof_events"],
                          'apply_genWeight' : sample_info["genWeight"] if (is_mc and "genWeight" in sample_info.keys()) else False,
                          'apply_trigger_bits' : (is_mc and (self.era == "2015" or (self.era == "2016" and sample_info["reHLT"]))) or not is_mc,
                        }
                        self.createCfg_analyze(
                            self.jobOptions_analyze[key_analyze_job])

                        # initialize input and output file names for hadd_stage1
                        key_hadd_stage1 = getKey(process_name,
                                                 charge_selection)
                        if not key_hadd_stage1 in self.inputFiles_hadd_stage1:
                            self.inputFiles_hadd_stage1[key_hadd_stage1] = []
                        self.inputFiles_hadd_stage1[key_hadd_stage1].append(
                            self.jobOptions_analyze[key_analyze_job]
                            ['histogramFile'])
                        self.outputFile_hadd_stage1[key_hadd_stage1] = os.path.join(self.dirs[DKEY_HIST], "histograms_harvested_stage1_%s_%s_%s.root" % \
                          (self.channel, process_name, charge_selection))

                # initialize input and output file names for hadd_stage2
                key_hadd_stage1 = getKey(process_name, charge_selection)
                key_hadd_stage2 = getKey(charge_selection)
                if not key_hadd_stage2 in self.inputFiles_hadd_stage2:
                    self.inputFiles_hadd_stage2[key_hadd_stage2] = []
                self.inputFiles_hadd_stage2[key_hadd_stage2].append(
                    self.outputFile_hadd_stage1[key_hadd_stage1])
                self.outputFile_hadd_stage2[key_hadd_stage2] = os.path.join(self.dirs[DKEY_HIST], "histograms_harvested_stage2_%s_%s.root" % \
                  (self.channel, charge_selection))

        logging.info(
            "Creating configuration files for executing 'comp_jetToTauFakeRate'"
        )
        for charge_selection in self.charge_selections:
            key_comp_jetToTauFakeRate_job = getKey(charge_selection)
            key_hadd_stage2 = getKey(charge_selection)
            self.jobOptions_comp_jetToTauFakeRate[
                key_comp_jetToTauFakeRate_job] = {
                    'inputFile':
                    self.outputFile_hadd_stage2[key_hadd_stage2],
                    'cfgFile_modified':
                    os.path.join(
                        self.dirs[DKEY_CFGS],
                        "comp_jetToTauFakeRate_%s_cfg.py" % charge_selection),
                    'outputFile':
                    os.path.join(
                        self.dirs[DKEY_HIST],
                        "comp_jetToTauFakeRate_%s.root" % charge_selection),
                    'logFile':
                    os.path.join(
                        self.dirs[DKEY_LOGS],
                        "comp_jetToTauFakeRate_%s.log" % charge_selection),
                    'looseRegion':
                    "jetToTauFakeRate_%s/denominator/" % charge_selection,
                    'tightRegion':
                    "jetToTauFakeRate_%s/numerator/" % charge_selection,
                    'absEtaBins':
                    self.absEtaBins,
                    'ptBins':
                    self.ptBins
                }
            self.createCfg_comp_jetToTauFakeRate(
                self.jobOptions_comp_jetToTauFakeRate[
                    key_comp_jetToTauFakeRate_job])
            self.targets.append(self.jobOptions_comp_jetToTauFakeRate[
                key_comp_jetToTauFakeRate_job]['outputFile'])

        logging.info("Creating configuration files to run 'makePlots'")
        for charge_selection in self.charge_selections:
            key_makePlots_job = getKey(charge_selection)
            key_hadd_stage2 = getKey(charge_selection)
            self.jobOptions_make_plots[key_makePlots_job] = {
                'executable':
                self.executable_make_plots,
                'inputFile':
                self.outputFile_hadd_stage2[key_hadd_stage2],
                'cfgFile_modified':
                os.path.join(self.dirs[DKEY_CFGS],
                             "makePlots_%s_cfg.py" % self.channel),
                'outputFile':
                os.path.join(self.dirs[DKEY_PLOT],
                             "makePlots_%s.png" % self.channel),
                'histogramDir':
                "jetToTauFakeRate_%s" % charge_selection,
                'label':
                None,
                'make_plots_backgrounds': ["TT", "TTW", "TTZ", "EWK", "Rares"],
            }
            self.createCfg_makePlots(
                self.jobOptions_make_plots[key_makePlots_job])
            self.cfgFile_make_plots = self.cfgFile_make_plots_denominator
            for absEtaBin in ["absEtaLt1_5", "absEta1_5to9_9"]:
                key_makePlots_job = getKey(charge_selection, absEtaBin,
                                           "denominator")
                key_hadd_stage2 = getKey(charge_selection)
                self.jobOptions_make_plots[key_makePlots_job] = {
                    'executable':
                    self.executable_make_plots,
                    'inputFile':
                    self.outputFile_hadd_stage2[key_hadd_stage2],
                    'cfgFile_modified':
                    os.path.join(
                        self.dirs[DKEY_CFGS],
                        "makePlots_%s_%s_denominator_%s_cfg.py" %
                        (self.channel, charge_selection, absEtaBin)),
                    'outputFile':
                    os.path.join(
                        self.dirs[DKEY_PLOT],
                        "makePlots_%s_%s_denominator_%s.png" %
                        (self.channel, charge_selection, absEtaBin)),
                    'histogramDir':
                    "jetToTauFakeRate_%s/denominator/%s" %
                    (charge_selection, absEtaBin),
                    'label':
                    None,
                    'make_plots_backgrounds':
                    ["TT", "TTW", "TTZ", "EWK", "Rares"],
                }
                self.createCfg_makePlots(
                    self.jobOptions_make_plots[key_makePlots_job])
                for hadTau_selection_numerator in self.hadTau_selections_numerator:
                    key_makePlots_job = getKey(charge_selection, absEtaBin,
                                               "numerator",
                                               hadTau_selection_numerator)
                    key_hadd_stage2 = getKey(charge_selection)
                    self.jobOptions_make_plots[key_makePlots_job] = {
                        'executable':
                        self.executable_make_plots,
                        'inputFile':
                        self.outputFile_hadd_stage2[key_hadd_stage2],
                        'cfgFile_modified':
                        os.path.join(
                            self.dirs[DKEY_CFGS],
                            "makePlots_%s_%s_numerator_%s_%s_cfg.py" %
                            (self.channel, charge_selection,
                             hadTau_selection_numerator, absEtaBin)),
                        'outputFile':
                        os.path.join(
                            self.dirs[DKEY_PLOT],
                            "makePlots_%s_%s_numerator_%s_%s.png" %
                            (self.channel, charge_selection,
                             hadTau_selection_numerator, absEtaBin)),
                        'histogramDir':
                        "jetToTauFakeRate_%s/numerator/%s/%s" %
                        (charge_selection, hadTau_selection_numerator,
                         absEtaBin),
                        'label':
                        None,
                        'make_plots_backgrounds':
                        ["TT", "TTW", "TTZ", "EWK", "Rares"],
                    }
                    self.createCfg_makePlots(
                        self.jobOptions_make_plots[key_makePlots_job])

        if self.is_sbatch:
            logging.info(
                "Creating script for submitting '%s' jobs to batch system" %
                self.executable_analyze)
            self.sbatchFile_analyze = os.path.join(
                self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel)
            self.createScript_sbatch_analyze(self.executable_analyze,
                                             self.sbatchFile_analyze,
                                             self.jobOptions_analyze)
            logging.info(
                "Creating script for submitting '%s' jobs to batch system" %
                self.executable_comp_jetToTauFakeRate)
            self.sbatchFile_comp_jetToTauFakeRate = os.path.join(
                self.dirs[DKEY_SCRIPTS], "sbatch_comp_jetToTauFakeRate.py")
            self.createScript_sbatch(self.executable_comp_jetToTauFakeRate,
                                     self.sbatchFile_comp_jetToTauFakeRate,
                                     self.jobOptions_comp_jetToTauFakeRate)

        lines_makefile = []
        self.addToMakefile_analyze(lines_makefile)
        self.addToMakefile_hadd_stage1(lines_makefile)
        self.addToMakefile_hadd_stage2(lines_makefile)
        self.addToMakefile_comp_jetToTauFakeRate(lines_makefile)
        self.addToMakefile_make_plots(lines_makefile)
        self.createMakefile(lines_makefile)

        logging.info("Done")
Esempio n. 14
0
    def create(self):
        """Creates all necessary config files and runs the PU profile production -- either locally or on the batch system
        """

        for key in self.dirs.keys():
            if type(self.dirs[key]) == dict:
                for dir_type in self.dirs[key].keys():
                    create_if_not_exists(self.dirs[key][dir_type])
            else:
                create_if_not_exists(self.dirs[key])

        self.inputFileIds = {}
        for sample_name, sample_info in self.samples.items():
            if not sample_info['use_it']:
                continue

            process_name = sample_info["process_name_specific"]
            is_mc = (sample_info["type"] == "mc")

            if not is_mc:
                continue

            logging.info(
                "Creating configuration files to run '%s' for sample %s" %
                (self.executable, process_name))

            inputFileList_map = generateInputFileList(sample_info, 1)
            key_dir = getKey(process_name)
            key_file = getKey(process_name)

            self.inputFiles[key_file] = list(
                itertools.chain(*inputFileList_map.values()))
            if len(self.inputFiles[key_file]) == 0:
                logging.warning("'%s' = %s --> skipping job !!" %
                                (key_file, self.inputFiles[key_file]))
                continue

            outputFile = os.path.join(self.dirs[key_dir][DKEY_RESULTS],
                                      "%s.txt" % process_name)
            self.outputFiles[key_file] = outputFile
            if os.path.isfile(outputFile):
                logging.info('File {} already exists --> skipping job'.format(
                    outputFile))
                continue

            self.cfgFiles[key_file] = os.path.join(
                self.dirs[key_dir][DKEY_CFGS],
                "refGenWeight_%s_cfg.txt" % (process_name))
            self.logFiles[key_file] = os.path.join(
                self.dirs[key_dir][DKEY_LOGS],
                "refGenWeight_%s.log" % (process_name))
            self.scriptFiles[key_file] = os.path.join(
                self.dirs[key_dir][DKEY_CFGS],
                "refGenWeight_%s_cfg.sh" % (process_name))
            self.plotFiles[key_file] = ' '.join([
                os.path.join(self.dirs[key_dir][DKEY_PLOTS],
                             "refGenWeight_%s.%s" % (process_name, extension))
                for extension in ['pdf', 'png']
            ])

            self.jobOptions_sbatch[key_file] = {
                'inputFiles':
                self.inputFiles[key_file],
                'cfgFile_path':
                self.cfgFiles[key_file],
                'cmdParams':
                "-i {} -o {} -p {} -v".format(
                    self.cfgFiles[key_file],
                    self.outputFiles[key_file],
                    self.plotFiles[key_file],
                ),
                'outputFile':
                self.outputFiles[key_file],
                'logFile':
                self.logFiles[key_file],
                'scriptFile':
                self.scriptFiles[key_file],
            }
            self.createCfg(self.jobOptions_sbatch[key_file])

        if self.is_sbatch:
            logging.info(
                "Creating script for submitting '%s' jobs to batch system" %
                self.executable)
            self.num_jobs['refGenWeight'] += self.createScript_sbatch(
                self.executable, self.sbatchFile, self.jobOptions_sbatch)

        logging.info("Creating Makefile")
        lines_makefile = []
        self.addToMakefile(lines_makefile)
        self.addToMakefile_final(lines_makefile)
        self.createMakefile(lines_makefile)
        logging.info("Done")

        return self.num_jobs
Esempio n. 15
0
    def create(self):
        """Creates all necessary config files and runs the MEM -- either locally or on the batch system
        """
        statistics = {}

        for key in self.dirs.keys():
            if type(self.dirs[key]) == dict:
                for dir_type in self.dirs[key].keys():
                    create_if_not_exists(self.dirs[key][dir_type])
            else:
                create_if_not_exists(self.dirs[key])

        for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]:
                continue

            if not os.path.exists(sample_info['local_paths'][0]['path']):
                logging.warning("Skipping sample {sample_name}".format(sample_name = sample_name))
                continue

            process_name = sample_info["process_name_specific"]

            logging.info("Creating configuration files to run '%s' for sample %s" % (self.executable_addMEM, process_name))

            inputFileList = generateInputFileList(sample_name, sample_info, self.max_files_per_job, self.debug)
            # typically, the analysis ends here and starts looping b/c the smallest unit of work processes at least one file
            # we need, however, to split the file into event ranges in such a way that each job performs
            # mem_integrations_per_job MEM integrations

            # so what we are going to do is to open each set of files in inputFileList, read the variable
            # requestMEM_2lss_1tau and try to gather the event ranges such that each event range
            # performs up to mem_integrations_per_job integrations per job
            memEvtRangeDict = self.memJobList(inputFileList)

            for jobId in memEvtRangeDict.keys():

                key_dir = getKey(sample_name)
                key_file = getKey(sample_name, jobId)

                self.inputFiles[key_file] = memEvtRangeDict[jobId]['input_fileset']

                # there should always be a job
                assert(self.inputFiles[key_file] > 0), "More than one input file: %s ?? !!" % \
                                                       ', '.join(self.inputFiles[key_file])

                #TODO: is this assertion really needed? in principle, no ...
                assert(len(self.inputFiles[key_file]) == 1), "There is more than one input file!"
                self.cfgFiles_addMEM_modified[key_file] = os.path.join(self.dirs[key_dir][DKEY_CFGS], "addMEM_%s_%s_%i_cfg.py" % \
                                                                       (self.channel, process_name, jobId))
                self.outputFiles[key_file] = os.path.join(self.dirs[key_dir][DKEY_NTUPLES], "%s_%i.root" % \
                  (process_name, jobId))
                self.logFiles_addMEM[key_file] = os.path.join(self.dirs[key_dir][DKEY_LOGS], "addMEM_%s_%s_%i.log" % \
                                                              (self.channel, process_name, jobId))
                self.createCfg_addMEM(
                    self.inputFiles[key_file],
                    memEvtRangeDict[jobId]['event_range'][0],
                    memEvtRangeDict[jobId]['event_range'][1],
                    self.outputFiles[key_file],
                    self.era,
                    self.cfgFiles_addMEM_modified[key_file],
                )

                # associate the output file with the fileset_id
                fileset_id = memEvtRangeDict[jobId]['fileset_id']
                hadd_output = os.path.join(
                    self.dirs[key_dir][DKEY_FINAL_NTUPLES], '%s_%i.root' % ('tree', fileset_id)
                )
                if hadd_output not in self.hadd_records:
                    self.hadd_records[hadd_output] = {}
                    self.hadd_records[hadd_output]['output_files'] = []
                self.hadd_records[hadd_output]['fileset_id'] = fileset_id
                self.hadd_records[hadd_output]['output_files'].append(self.outputFiles[key_file])
                #self.filesToClean.append(self.outputFiles[key_file])

            # let's sum the number of integration per sample
            nofEntriesMap = {}
            for v in memEvtRangeDict.values():
                if v['fileset_id'] not in nofEntriesMap:
                    nofEntriesMap[v['fileset_id']] = v['nof_entries']
            statistics[process_name] = {
                'nof_int'         : sum([entry['nof_int'] for entry in memEvtRangeDict.values()]),
                'nof_entries'     : sum(nofEntriesMap.values()),
                'nof_jobs'        : len(memEvtRangeDict),
                'nof_events_pass' : sum([entry['nof_events_pass'] for entry in memEvtRangeDict.values()]),
                'nof_int_pass'    : sum([entry['nof_int_pass'] for entry in memEvtRangeDict.values()]),
            }

        if self.is_sbatch:
            logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_addMEM)
            self.createScript_sbatch()

        logging.info("Creating Makefile")
        lines_makefile = []
        self.addToMakefile_addMEM(lines_makefile)
        self.addToMakefile_hadd(lines_makefile)
        self.createMakefile(lines_makefile)

        ws_len = max([len(kk) + 1 for kk in statistics.keys()])
        total_nof_integrations_sum = sum(x['nof_int'] for x in statistics.values())
        total_nof_entires          = sum(x['nof_entries'] for x in statistics.values())
        total_nof_integrations_avg = float(total_nof_integrations_sum) / total_nof_entires
        total_nof_jobs             = sum(x['nof_jobs'] for x in statistics.values())
        total_nof_pass             = sum(x['nof_events_pass'] for x in statistics.values())
        total_nof_int_pass_avg     = float(sum(x['nof_int_pass'] for x in statistics.values())) / total_nof_pass
        for k, v in statistics.iteritems():
            print('%s%s: %d (%d entries; %d jobs; %.2f int/evt; %d (%.2f%%) evt pass; %.2f int/evt pass)' %
                  (k, ' ' * (ws_len - len(k)), v['nof_int'], v['nof_entries'], v['nof_jobs'],
                   float(v['nof_int']) / v['nof_entries'], v['nof_events_pass'],
                   (100 * float(v['nof_events_pass']) / v['nof_entries']), float(v['nof_int_pass']) / v['nof_events_pass']))
        print('%s%s: %d (%d entries; %d jobs; %.2f int/evt; %d evt pass; %.2f int/evt pass)' %
              ('total', ' ' * (ws_len - len('total')), total_nof_integrations_sum, total_nof_entires, total_nof_jobs,
               total_nof_integrations_avg, total_nof_pass, total_nof_int_pass_avg))

        if total_nof_integrations_sum > self.max_mem_integrations:
            logging.error("Will not start the jobs (max nof integrations exceeded)!")
            return False
        else:
            logging.info("Done")
            return True
  def create(self):
    """Creates all necessary config files and runs the complete analysis workfow -- either locally or on the batch system
    """

    for sample_name, sample_info in self.samples.items():
      if not sample_info["use_it"]:
        continue

      process_name = sample_info["process_name_specific"]
      is_mc = (sample_info["type"] == "mc")

      logging.info("Building dictionaries for sample %s..." % process_name)
      for lepton_selection in self.lepton_selections:
        central_or_shift_extensions = ["", "hadd", "addBackgrounds"]
        central_or_shifts_extended = central_or_shift_extensions + self.central_or_shifts
        for central_or_shift_or_dummy in central_or_shifts_extended:
          process_name_extended = [ process_name, "hadd" ]
          for process_name_or_dummy in process_name_extended:
            if central_or_shift_or_dummy in [ "hadd", "addBackgrounds" ] and process_name_or_dummy in [ "hadd" ]:
              continue
            if central_or_shift_or_dummy != "central" and central_or_shift_or_dummy not in central_or_shift_extensions:
              if not is_mc:
                continue
              if not self.accept_central_or_shift(central_or_shift_or_dummy, sample_info):
                continue

            key_dir = getKey(process_name_or_dummy, lepton_selection, central_or_shift_or_dummy)
            for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_RLES ]:
              initDict(self.dirs, [ key_dir, dir_type ])
              if dir_type in [ DKEY_CFGS, DKEY_LOGS ]:
                self.dirs[key_dir][dir_type] = os.path.join(self.configDir, dir_type, self.channel,
                  "_".join([ lepton_selection ]), process_name_or_dummy, central_or_shift_or_dummy)
              else:
                self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel,
                  "_".join([ lepton_selection ]), process_name_or_dummy)
    for subdirectory in [ "prepareDatacards" ]:
      key_dir = getKey(subdirectory)
      for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT ]:
        initDict(self.dirs, [ key_dir, dir_type ])
        if dir_type in [ DKEY_CFGS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT ]:
          self.dirs[key_dir][dir_type] = os.path.join(self.configDir, dir_type, self.channel, subdirectory)
        else:
          self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel, subdirectory)
    for dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT, DKEY_COMBINE_OUTPUT ]:
      initDict(self.dirs, [ dir_type ])
      if dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT, DKEY_COMBINE_OUTPUT ]:
        self.dirs[dir_type] = os.path.join(self.configDir, dir_type, self.channel)
      else:
        self.dirs[dir_type] = os.path.join(self.outputDir, dir_type, self.channel)

    numDirectories = 0
    for key in self.dirs.keys():
      if type(self.dirs[key]) == dict:
        numDirectories += len(self.dirs[key])
      else:
        numDirectories += 1
    logging.info("Creating directory structure (numDirectories = %i)" % numDirectories)
    numDirectories_created = 0;
    frac = 1
    for key in self.dirs.keys():
      if type(self.dirs[key]) == dict:
        for dir_type in self.dirs[key].keys():
          create_if_not_exists(self.dirs[key][dir_type])
        numDirectories_created += len(self.dirs[key])
      else:
        create_if_not_exists(self.dirs[key])
        numDirectories_created = numDirectories_created + 1
      while 100*numDirectories_created >= frac*numDirectories:
        logging.info(" %i%% completed" % frac)
        frac = frac + 1
    logging.info("Done.")

    inputFileLists = {}
    for sample_name, sample_info in self.samples.items():
      if not sample_info["use_it"]:
        continue
      logging.info("Checking input files for sample %s" % sample_info["process_name_specific"])
      inputFileLists[sample_name] = generateInputFileList(sample_info, self.max_files_per_job)

    for lepton_selection in self.lepton_selections:
      for sample_name, sample_info in self.samples.items():
        if not sample_info["use_it"]:
          continue
        process_name = sample_info["process_name_specific"]
        
        logging.info("Creating configuration files to run '%s' for sample %s" % (self.executable_analyze, process_name))
        is_mc = (sample_info["type"] == "mc")
        inputFileList = inputFileLists[sample_name]
        for central_or_shift in self.central_or_shifts:
          if central_or_shift != "central" and not is_mc:
            continue

          # build config files for executing analysis code
          key_analyze_dir = getKey(process_name, lepton_selection, central_or_shift)

          for jobId in inputFileList.keys():
            analyze_job_tuple = (process_name, lepton_selection, central_or_shift, jobId)
            key_analyze_job = getKey(*analyze_job_tuple)
            ntupleFiles = inputFileList[jobId]
            if len(ntupleFiles) == 0:
              logging.warning("No input ntuples for %s --> skipping job !!" % (key_analyze_job))
              continue

            cfgFile_modified_path = os.path.join(self.dirs[key_analyze_dir][DKEY_CFGS], "analyze_%s_%s_%s_%i_cfg.py" % analyze_job_tuple)
            logFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_LOGS], "analyze_%s_%s_%s_%i.log" % analyze_job_tuple)
            rleOutputFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_RLES], "rle_%s_%s_%s_%i.txt" % analyze_job_tuple) \
                                 if self.select_rle_output else ""
            histogramFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_HIST], "analyze_%s_%s_%s_%i.root" % analyze_job_tuple)

            self.jobOptions_analyze[key_analyze_job] = {
              'ntupleFiles'              : ntupleFiles,
              'cfgFile_modified'         : cfgFile_modified_path,
              'histogramFile'            : histogramFile_path,
              'logFile'                  : logFile_path,
              'selEventsFileName_output' : rleOutputFile_path,
              'leptonSelection'          : lepton_selection,
              'applyFakeRateWeights'     : "disabled",
              'central_or_shift'         : central_or_shift,
            }
            self.createCfg_analyze(self.jobOptions_analyze[key_analyze_job], sample_info)

            # initialize input and output file names for hadd_stage1
            key_hadd_stage1_dir = getKey(process_name, lepton_selection)
            hadd_stage1_job_tuple = (process_name, lepton_selection)
            key_hadd_stage1_job = getKey(*hadd_stage1_job_tuple)
            if not key_hadd_stage1_job in self.inputFiles_hadd_stage1.keys():
              self.inputFiles_hadd_stage1[key_hadd_stage1_job] = []
            self.inputFiles_hadd_stage1[key_hadd_stage1_job].append(self.jobOptions_analyze[key_analyze_job]['histogramFile'])
            self.outputFile_hadd_stage1[key_hadd_stage1_job] = os.path.join(self.dirs[key_hadd_stage1_dir][DKEY_HIST],
                                                                            "hadd_stage1_%s_%s.root" % hadd_stage1_job_tuple)

        # initialize input and output file names for hadd_stage2
        key_hadd_stage1_job = getKey(process_name, lepton_selection)
        key_hadd_stage2_dir = getKey("hadd", lepton_selection)
        key_hadd_stage2_job = getKey(lepton_selection)
        if not key_hadd_stage2_job in self.inputFiles_hadd_stage2.keys():
          self.inputFiles_hadd_stage2[key_hadd_stage2_job] = []
        self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.outputFile_hadd_stage1[key_hadd_stage1_job])
        self.outputFile_hadd_stage2[key_hadd_stage2_job] = os.path.join(self.dirs[key_hadd_stage2_dir][DKEY_HIST],
                                                                        "hadd_stage2_%s.root" % lepton_selection)

    logging.info("Creating configuration files to run 'prepareDatacards'")
    processesToCopy = []
    for process in self.prep_dcard_processesToCopy:
      processesToCopy.append(process)
    self.prep_dcard_processesToCopy = processesToCopy
    processesToCopy = []
    for process in self.prep_dcard_signals:
      processesToCopy.append(process)
    self.prep_dcard_signals = processesToCopy
    for histogramToFit in self.histograms_to_fit:
      key_hadd_stage2_job = getKey("Tight")
      key_prep_dcard_dir = getKey("prepareDatacards")
      prep_dcard_job_tuple = (self.channel, histogramToFit)
      key_prep_dcard_job = getKey(histogramToFit)
      datacardFile = os.path.join(self.dirs[key_prep_dcard_dir][DKEY_DCRD], "prepareDatacards_%s_%s.root" % prep_dcard_job_tuple)
      self.jobOptions_prep_dcard[key_prep_dcard_job] = {
        'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job],
        'cfgFile_modified' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_CFGS], "prepareDatacards_%s_%s_cfg.py" % prep_dcard_job_tuple),
        'datacardFile' : datacardFile,
        'histogramDir' : self.histogramDir_prep_dcard,
        'histogramToFit' : histogramToFit,
        'label' : None
      }
      self.createCfg_prep_dcard(self.jobOptions_prep_dcard[key_prep_dcard_job])

      jobOptions_makefile = copy.deepcopy(self.jobOptions_postFit)
      jobOptions_makefile['fit_result'] = os.path.join(
        self.dirs[DKEY_COMBINE_OUTPUT], 'fit_{}'.format(histogramToFit), jobOptions_makefile['target']
      )
      jobOptions_makefile['hadd_stage2'] = self.outputFile_hadd_stage2[key_hadd_stage2_job]
      jobOptions_makefile['prepare_datacard'] = datacardFile
      jobOptions_makefile['data_datacard'] = os.path.join(
        self.dirs[key_prep_dcard_dir][DKEY_DCRD], "prepareDatacards_data_%s_%s.root" % prep_dcard_job_tuple
      )
      jobOptions_makefile['pseudodata_datacard'] = os.path.join(
        self.dirs[key_prep_dcard_dir][DKEY_DCRD], "prepareDatacards_pseudodata_%s_%s.root" % prep_dcard_job_tuple
      )
      jobOptions_makefile['makefile'] = os.path.join(
        self.dirs[DKEY_COMBINE_OUTPUT], 'Makefile_{}'.format(histogramToFit)
      )
      jobOptions_makefile['stdout'] = os.path.join(
        self.dirs[DKEY_COMBINE_OUTPUT], 'stdout_{}.log'.format(histogramToFit)
      )
      self.createCfg_postFit(jobOptions_makefile)

    self.sbatchFile_analyze = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel)
    if self.is_sbatch:
      logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_analyze)
      self.createScript_sbatch_analyze(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze)

    logging.info("Creating Makefile")
    lines_makefile = []
    self.addToMakefile_analyze(lines_makefile)
    self.addToMakefile_hadd_stage1(lines_makefile)
    self.addToMakefile_hadd_stage2(lines_makefile, make_dependency = "phony_hadd_stage1")
    self.addToMakefile_prep_dcard(lines_makefile)
    self.addToMakefile_postFit(lines_makefile)
    self.createMakefile(lines_makefile)

    logging.info("Done.")

    return self.num_jobs
Esempio n. 17
0
  def create(self):
    """Creates all necessary config files and runs the complete analysis workfow -- either locally or on the batch system
    """

    for key in self.dirs.keys():
      if type(self.dirs[key]) == dict:
        for dir_type in self.dirs[key].keys():
          create_if_not_exists(self.dirs[key][dir_type])
      else:
        create_if_not_exists(self.dirs[key])
  
    for sample_name, sample_info in self.samples.items():
      if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]:
        continue

      process_name = sample_info["process_name_specific"]

      logging.info("Creating configuration files to run '%s' for sample %s" % (self.executable_analyze, process_name))  
    
      is_mc = (sample_info["type"] == "mc")
      lumi_scale = 1. if not (self.use_lumi and is_mc) else sample_info["xsection"] * self.lumi / sample_info["nof_events"]
      apply_genWeight = sample_info["apply_genWeight"] if (is_mc and "apply_genWeight" in sample_info.keys()) else False
      sample_category = sample_info["sample_category"]
      triggers = sample_info["triggers"]
      apply_trigger_bits = (is_mc and (self.era == "2015" or (self.era == "2016" and sample_info["reHLT"]))) or not is_mc

      for lepton_and_hadTau_selection in self.lepton_and_hadTau_selections:
        lepton_selection = lepton_and_hadTau_selection
        hadTau_selection = lepton_and_hadTau_selection
        if self.applyFakeRateWeights == "2lepton":
          hadTau_selection = "Tight"
        hadTau_selection = "|".join([ hadTau_selection, self.hadTau_selection_part2 ])        
        for lepton_and_hadTau_frWeight in self.lepton_and_hadTau_frWeights:
          if lepton_and_hadTau_frWeight == "enabled" and not lepton_and_hadTau_selection.startswith("Fakeable"):
            continue
          if lepton_and_hadTau_selection == "Fakeable_mcClosure" and not lepton_and_hadTau_frWeight == "enabled":
            continue
          lepton_and_hadTau_selection_and_frWeight = get_lepton_and_hadTau_selection_and_frWeight(lepton_and_hadTau_selection, lepton_and_hadTau_frWeight)
          
          for lepton_charge_selection in self.lepton_charge_selections:
            for central_or_shift in self.central_or_shifts:

              inputFileList = generateInputFileList(sample_name, sample_info, self.max_files_per_job, self.debug)
              for jobId in inputFileList.keys():
                if central_or_shift != "central" and not (lepton_and_hadTau_selection.startswith("Tight") and lepton_charge_selection == "SS"):
                  continue
                if central_or_shift != "central" and not is_mc:
                  continue                
                if central_or_shift.startswith("CMS_ttHl_thu_shape_ttH") and sample_category != "signal":
                  continue
                if central_or_shift.startswith("CMS_ttHl_thu_shape_ttW") and sample_category != "TTW":
                  continue
                if central_or_shift.startswith("CMS_ttHl_thu_shape_ttZ") and sample_category != "TTZ":
                  continue
  
                key_dir = getKey(sample_name, lepton_and_hadTau_selection, lepton_and_hadTau_frWeight, lepton_charge_selection)
                key_file = getKey(sample_name, lepton_and_hadTau_selection, lepton_and_hadTau_frWeight, lepton_charge_selection, central_or_shift, jobId)

                self.ntupleFiles[key_file] = inputFileList[jobId]
                if len(self.ntupleFiles[key_file]) == 0:
                  print "Warning: ntupleFiles['%s'] = %s --> skipping job !!" % (key_file, self.ntupleFiles[key_file])
                  continue
                self.cfgFiles_analyze_modified[key_file] = os.path.join(self.dirs[key_dir][DKEY_CFGS], "analyze_%s_%s_%s_%s_%s_%i_cfg.py" % \
                  (self.channel, process_name, lepton_and_hadTau_selection_and_frWeight, lepton_charge_selection, central_or_shift, jobId))
                self.histogramFiles[key_file] = os.path.join(self.dirs[key_dir][DKEY_HIST], "%s_%s_%s_%s_%i.root" % \
                  (process_name, lepton_and_hadTau_selection_and_frWeight, lepton_charge_selection, central_or_shift, jobId))
                self.logFiles_analyze[key_file] = os.path.join(self.dirs[key_dir][DKEY_LOGS], "analyze_%s_%s_%s_%s_%s_%i.log" % \
                  (self.channel, process_name, lepton_and_hadTau_selection_and_frWeight, lepton_charge_selection, central_or_shift, jobId))
                self.rleOutputFiles[key_file] = os.path.join(self.dirs[key_dir][DKEY_RLES], "rle_%s_%s_%s_%s_%s_%i.txt" % \
                  (self.channel, process_name, lepton_and_hadTau_selection_and_frWeight, lepton_charge_selection, central_or_shift, jobId)) if self.select_rle_output else ""
                
                applyFakeRateWeights = self.applyFakeRateWeights
                if lepton_and_hadTau_frWeight == "disabled":
                  applyFakeRateWeights = "disabled"
                self.createCfg_analyze(self.ntupleFiles[key_file], self.histogramFiles[key_file], sample_category, self.era, triggers,
                  lepton_selection, self.apply_leptonGenMatching, lepton_charge_selection, hadTau_selection, self.apply_hadTauGenMatching,
                  applyFakeRateWeights, is_mc, central_or_shift, lumi_scale, apply_genWeight, apply_trigger_bits, self.cfgFiles_analyze_modified[key_file],
                  self.rleOutputFiles[key_file])
                
    if self.is_sbatch:
      logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_analyze)
      self.createScript_sbatch()

    for key in self.histogramFiles.keys():
      self.inputFiles_hadd_stage1.append(self.histogramFiles[key])

    logging.info("Creating configuration files for executing 'addBackgrounds'")  
    process_names = []
    process_names.extend(self.nonfake_backgrounds)
    process_names.extend([ "signal", "ttH_htt", "ttH_hww", "ttH_hzz" ])
    # sum non-fake contributions for each MC sample separately
    # input processes: TT2l0j,...
    # output processes: TT; ...
    for process_name in process_names:
      for lepton_and_hadTau_selection in self.lepton_and_hadTau_selections:
        for lepton_and_hadTau_frWeight in self.lepton_and_hadTau_frWeights:
          if lepton_and_hadTau_frWeight == "enabled" and not lepton_and_hadTau_selection.startswith("Fakeable"):
            continue
          if lepton_and_hadTau_selection == "Fakeable_mcClosure" and not lepton_and_hadTau_frWeight == "enabled":
            continue
          lepton_and_hadTau_selection_and_frWeight = get_lepton_and_hadTau_selection_and_frWeight(lepton_and_hadTau_selection, lepton_and_hadTau_frWeight)
          for lepton_charge_selection in self.lepton_charge_selections:
            key = getKey(process_name, lepton_and_hadTau_selection, lepton_and_hadTau_frWeight, lepton_charge_selection)
            self.histogramFile_addBackgrounds[key] = os.path.join(self.outputDir, DKEY_HIST, "addBackgrounds_%s_%s_%s_%s.root" % \
              (self.channel, process_name, lepton_and_hadTau_selection_and_frWeight, lepton_charge_selection))        
            self.cfgFile_addBackgrounds_modified[key] = os.path.join(self.outputDir, DKEY_CFGS, "addBackgrounds_%s_%s_%s_%s_cfg.py" % \
              (self.channel, process_name, lepton_and_hadTau_selection_and_frWeight, lepton_charge_selection))
            histogramDir = getHistogramDir(lepton_and_hadTau_selection, lepton_and_hadTau_frWeight, lepton_charge_selection)
            processes_input = [ "%s%s" % (process_name, genMatch) for genMatch in self.lepton_and_hadTau_genMatches_nonfakes ]
            # CV: treat fakes in ttH signal events as "signal", not as "background"
            ##if process_name in [ "signal", "ttH_htt", "ttH_hww", "ttH_hzz" ]:
            ##  processes_input.extend([ "%s%s" % (process_name, genMatch) for genMatch in self.lepton_and_hadTau_genMatches_fakes ])
            self.process_output_addBackgrounds[key] = process_name
            self.createCfg_addBackgrounds(self.histogramFile_hadd_stage1, self.histogramFile_addBackgrounds[key], self.cfgFile_addBackgrounds_modified[key],
              [ histogramDir ], processes_input, self.process_output_addBackgrounds[key])
    # sum fake contributions for each MC sample separately
    # input processes: TT1l1j,TT0l2j,...
    # output processes: fakes_TT; ...
    for process_name in process_names:
      for lepton_and_hadTau_selection in self.lepton_and_hadTau_selections:
        for lepton_and_hadTau_frWeight in self.lepton_and_hadTau_frWeights:
          if lepton_and_hadTau_frWeight == "enabled" and not lepton_and_hadTau_selection.startswith("Fakeable"):
            continue
          if lepton_and_hadTau_selection == "Fakeable_mcClosure" and not lepton_and_hadTau_frWeight == "enabled":
            continue
          lepton_and_hadTau_selection_and_frWeight = get_lepton_and_hadTau_selection_and_frWeight(lepton_and_hadTau_selection, lepton_and_hadTau_frWeight)
          for lepton_charge_selection in self.lepton_charge_selections:
            key = getKey("fakes_%s" % process_name, lepton_and_hadTau_selection, lepton_and_hadTau_frWeight, lepton_charge_selection)
            self.histogramFile_addBackgrounds[key] = os.path.join(self.outputDir, DKEY_HIST, "addBackgrounds_%s_fakes_%s_%s_%s.root" % \
              (self.channel, process_name, lepton_and_hadTau_selection_and_frWeight, lepton_charge_selection))        
            self.cfgFile_addBackgrounds_modified[key] = os.path.join(self.outputDir, DKEY_CFGS, "addBackgrounds_%s_fakes_%s_%s_%s_cfg.py" % \
              (self.channel, process_name, lepton_and_hadTau_selection_and_frWeight, lepton_charge_selection))
            histogramDir = getHistogramDir(lepton_and_hadTau_selection, lepton_and_hadTau_frWeight, lepton_charge_selection)
            processes_input = [ "%s%s" % (process_name, genMatch) for genMatch in self.lepton_and_hadTau_genMatches_fakes ]
            self.process_output_addBackgrounds[key] = "fakes_%s" % process_name
            self.createCfg_addBackgrounds(self.histogramFile_hadd_stage1, self.histogramFile_addBackgrounds[key], self.cfgFile_addBackgrounds_modified[key],
              [ histogramDir ], processes_input, self.process_output_addBackgrounds[key])
    # sum fake contributions for the total of all MC samples
    # input processes: TT1l1j,TT0l2j,...
    # output process: fakes_mc
    for lepton_and_hadTau_selection in self.lepton_and_hadTau_selections:
      for lepton_and_hadTau_frWeight in self.lepton_and_hadTau_frWeights:
        if lepton_and_hadTau_frWeight == "enabled" and not lepton_and_hadTau_selection.startswith("Fakeable"):
          continue
        if lepton_and_hadTau_selection == "Fakeable_mcClosure" and not lepton_and_hadTau_frWeight == "enabled":
          continue
        lepton_and_hadTau_selection_and_frWeight = get_lepton_and_hadTau_selection_and_frWeight(lepton_and_hadTau_selection, lepton_and_hadTau_frWeight)
        for lepton_charge_selection in self.lepton_charge_selections:
          key = getKey(lepton_and_hadTau_selection, lepton_and_hadTau_frWeight, lepton_charge_selection)
          self.histogramFile_addBackgrounds[key] = os.path.join(self.outputDir, DKEY_HIST, "addBackgrounds_%s_fakes_mc_%s_%s.root" % \
            (self.channel, lepton_and_hadTau_selection_and_frWeight, lepton_charge_selection))
          self.cfgFile_addBackgrounds_modified[key] = os.path.join(self.outputDir, DKEY_CFGS, "addBackgrounds_%s_fakes_mc_%s_%s_cfg.py" % \
            (self.channel, lepton_and_hadTau_selection_and_frWeight, lepton_charge_selection))
          histogramDir = getHistogramDir(lepton_and_hadTau_selection, lepton_and_hadTau_frWeight, lepton_charge_selection)
          processes_input = []
          for process_name in self.nonfake_backgrounds:
            for genMatch in self.lepton_and_hadTau_genMatches_fakes:
              processes_input.append("%s%s" % (process_name, genMatch))
          self.process_output_addBackgrounds[key] = "fakes_mc"
          self.createCfg_addBackgrounds(self.histogramFile_hadd_stage1, self.histogramFile_addBackgrounds[key], self.cfgFile_addBackgrounds_modified[key],
            [ histogramDir ], processes_input, self.process_output_addBackgrounds[key])
        
    logging.info("Creating configuration files for executing 'addBackgroundLeptonFakes'")
    for lepton_charge_selection in self.lepton_charge_selections:
      key = getKey("fakes_data", lepton_charge_selection) 
      self.histogramFile_addFakes[key] = os.path.join(self.outputDir, DKEY_HIST, "addBackgroundLeptonFakes_%s_%s.root" % \
        (self.channel, lepton_charge_selection))
      self.cfgFile_addFakes_modified[key] = os.path.join(self.outputDir, DKEY_CFGS, "addBackgroundLeptonFakes_%s_%s_cfg.py" % \
        (self.channel, lepton_charge_selection))
      category_signal = "2lss_1tau_%s_Tight" % lepton_charge_selection
      category_sideband = "2lss_1tau_%s_Fakeable_wFakeRateWeights" % lepton_charge_selection
      self.createCfg_addFakes(self.histogramFile_hadd_stage1_5, self.histogramFile_addFakes[key], self.cfgFile_addFakes_modified[key],
        category_signal, category_sideband)

    logging.info("Creating configuration files for executing 'addBackgroundLeptonFlips'")
    self.createCfg_addFlips(self.histogramFile_hadd_stage1_5, self.histogramFile_addFlips, self.cfgFile_addFlips_modified)

    logging.info("Creating configuration files for executing 'prepareDatacards'")
    for histogramToFit in self.histograms_to_fit:
      self.createCfg_prep_dcard(histogramToFit)

    logging.info("Creating configuration files for executing 'makePlots'")
    self.createCfg_makePlots()
    if "OS" in self.lepton_charge_selections:
      make_plots_backgrounds = self.make_plots_backgrounds
      if "flips_data" in make_plots_backgrounds:
        make_plots_backgrounds.remove("flips_data")
      self.createCfg_makePlots(self.histogramDir_prep_dcard_OS, "OS", make_plots_backgrounds)
    if "Fakeable_mcClosure" in self.lepton_and_hadTau_selections:
      self.createCfg_makePlots_mcClosure()

    self.inputFiles_hadd_stage2 = [ self.histogramFile_hadd_stage1_5 ] + self.histogramFile_addFakes.values() + [ self.histogramFile_addFlips ]
    
    logging.info("Creating Makefile")
    lines_makefile = []
    self.addToMakefile_analyze(lines_makefile)
    self.addToMakefile_hadd_stage1(lines_makefile)
    self.addToMakefile_backgrounds_from_data(lines_makefile)
    self.addToMakefile_hadd_stage2(lines_makefile)
    self.addToMakefile_prep_dcard(lines_makefile)
    self.addToMakefile_make_plots(lines_makefile)
    self.addToMakefile_make_plots_mcClosure(lines_makefile)
    self.createMakefile(lines_makefile)
  
    logging.info("Done")
Esempio n. 18
0
    def create(self):
        for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"]:
                continue
            process_name = sample_info["process_name_specific"]
            central_or_shifts_extended = [""]
            central_or_shifts_extended.extend(self.central_or_shifts)
            central_or_shifts_extended.extend(
                ["hadd", "copyHistograms", "addBackgrounds"])
            for central_or_shift_or_dummy in central_or_shifts_extended:
                process_name_extended = [process_name, "hadd"]
                for process_name_or_dummy in process_name_extended:
                    if process_name_or_dummy in [
                            "hadd"
                    ] and central_or_shift_or_dummy != "":
                        continue
                    if central_or_shift_or_dummy in [
                            "hadd", "copyHistograms", "addBackgrounds"
                    ] and process_name_or_dummy in ["hadd"]:
                        continue
                    key_dir = getKey(process_name_or_dummy,
                                     central_or_shift_or_dummy)
                    for dir_type in [
                            DKEY_CFGS, DKEY_LOGS, DKEY_RLES, DKEY_SYNC
                    ]:
                        initDict(self.dirs, [key_dir, dir_type])
                        if dir_type in [DKEY_CFGS, DKEY_LOGS]:
                            self.dirs[key_dir][dir_type] = os.path.join(
                                self.configDir, dir_type, self.channel,
                                process_name_or_dummy,
                                central_or_shift_or_dummy)
                        else:
                            self.dirs[key_dir][dir_type] = os.path.join(
                                self.outputDir, dir_type, self.channel,
                                process_name_or_dummy,
                                central_or_shift_or_dummy)

        for subdirectory in [
                "addBackgrounds", "addBackgroundLeptonFakes",
                "prepareDatacards", "addSystFakeRates", "makePlots"
        ]:
            key_dir = getKey(subdirectory)
            for dir_type in [
                    DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_ROOT, DKEY_DCRD,
                    DKEY_PLOT
            ]:
                initDict(self.dirs, [key_dir, dir_type])
                if dir_type in [DKEY_CFGS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT]:
                    self.dirs[key_dir][dir_type] = os.path.join(
                        self.configDir, dir_type, self.channel, subdirectory)
                else:
                    self.dirs[key_dir][dir_type] = os.path.join(
                        self.outputDir, dir_type, self.channel, subdirectory)
        for dir_type in [
                DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_SYNC, DKEY_HADD_RT
        ]:
            initDict(self.dirs, [dir_type])
            if dir_type in [DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_HADD_RT]:
                self.dirs[dir_type] = os.path.join(self.configDir, dir_type,
                                                   self.channel)
            else:
                self.dirs[dir_type] = os.path.join(self.outputDir, dir_type,
                                                   self.channel)

        numDirectories = 0
        for key in self.dirs.keys():
            if type(self.dirs[key]) == dict:
                numDirectories += len(self.dirs[key])
            else:
                numDirectories += 1
        logging.info("Creating directory structure (numDirectories = %i)" %
                     numDirectories)
        numDirectories_created = 0
        frac = 1
        for key in self.dirs.keys():
            if type(self.dirs[key]) == dict:
                for dir_type in self.dirs[key].keys():
                    create_if_not_exists(self.dirs[key][dir_type])
                numDirectories_created += len(self.dirs[key])
            else:
                create_if_not_exists(self.dirs[key])
                numDirectories_created = numDirectories_created + 1
            while 100 * numDirectories_created >= frac * numDirectories:
                logging.info(" %i%% completed" % frac)
                frac = frac + 1
        logging.info("Done.")

        inputFileLists = {}
        for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"]:
                continue
            logging.info("Checking input files for sample %s" %
                         sample_info["process_name_specific"])
            inputFileLists[sample_name] = generateInputFileList(
                sample_info, self.max_files_per_job)

        for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"]:
                continue
            process_name = sample_info["process_name_specific"]
            logging.info(
                "Creating configuration files to run '%s' for sample %s" %
                (self.executable_analyze, process_name))
            inputFileList = inputFileLists[sample_name]

            for jobId in inputFileList.keys():
                for central_or_shift in self.central_or_shifts:
                    logging.info(" ... for systematic uncertainty %s" %
                                 central_or_shift)

                    key_analyze_dir = getKey(process_name, central_or_shift)
                    analyze_job_tuple = (process_name, central_or_shift, jobId)
                    key_analyze_job = getKey(*analyze_job_tuple)
                    ntupleFiles = inputFileList[jobId]
                    if len(ntupleFiles) == 0:
                        print("Warning: no ntupleFiles --> skipping job !!")
                        continue

                    syncOutput = os.path.join(
                        self.dirs[key_analyze_dir][DKEY_SYNC],
                        '%s_%s.root' % (self.channel, central_or_shift))
                    syncOutputTree = self.output_tree if central_or_shift == "central" else os.path.join(
                        central_or_shift, self.output_tree)
                    self.inputFiles_sync['sync'].append(syncOutput)

                    cfgFile_modified_path = os.path.join(
                        self.dirs[key_analyze_dir][DKEY_CFGS],
                        "analyze_%s_%s_%i_cfg.py" % analyze_job_tuple)
                    logFile_path = os.path.join(
                        self.dirs[key_analyze_dir][DKEY_LOGS],
                        "analyze_%s_%s_%i.log" % analyze_job_tuple)

                    self.jobOptions_analyze[key_analyze_job] = {
                        'ntupleFiles':
                        ntupleFiles,
                        'cfgFile_modified':
                        cfgFile_modified_path,
                        'histogramFile':
                        '',
                        'logFile':
                        logFile_path,
                        'syncTree':
                        syncOutputTree,
                        'syncOutput':
                        syncOutput,
                        'syncRLE':
                        self.rle_select if self.rle_select
                        and '%s' not in self.rle_select else '',
                        'useNonNominal':
                        self.use_nonnominal,
                    }
                    self.createCfg_analyze(
                        self.jobOptions_analyze[key_analyze_job], sample_info)

        logging.info(
            "Creating script for submitting '%s' jobs to batch system" %
            self.executable_analyze)
        self.sbatchFile_analyze = os.path.join(
            self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel)
        self.createScript_sbatch_syncNtuple(self.executable_analyze,
                                            self.sbatchFile_analyze,
                                            self.jobOptions_analyze)
        logging.info("Creating Makefile")
        lines_makefile = []
        self.addToMakefile_syncNtuple(lines_makefile)
        outputFile_sync_path = os.path.join(self.outputDir, DKEY_SYNC,
                                            '%s.root' % self.channel)
        self.outputFile_sync['sync'] = outputFile_sync_path
        self.addToMakefile_hadd_sync(lines_makefile)
        self.targets.extend(self.phoniesToAdd)
        self.createMakefile(lines_makefile)
        logging.info("Done")
        return self.num_jobs
Esempio n. 19
0
    def create(self):
        """Creates all necessary config files and runs the PU profile production -- either locally or on the batch system
        """

        for key in self.dirs.keys():
            if type(self.dirs[key]) == dict:
                for dir_type in self.dirs[key].keys():
                    create_if_not_exists(self.dirs[key][dir_type])
            else:
                create_if_not_exists(self.dirs[key])

        self.inputFileIds = {}
        for sample_name, sample_info in self.samples.items():
            if not sample_info['use_it']:
                continue

            process_name = sample_info["process_name_specific"]
            is_mc = (sample_info["type"] == "mc")

            if not is_mc:
                continue

            logging.info(
                "Creating configuration files to run '%s' for sample %s" %
                (self.executable, process_name))

            inputFileList = generateInputFileList(sample_info,
                                                  self.max_files_per_job)
            key_dir = getKey(process_name)

            outputFile = os.path.join(self.dirs[key_dir][DKEY_HISTO],
                                      "%s.root" % process_name)
            self.outputFiles[process_name] = {
                'inputFiles': [],
                'outputFile': outputFile,
            }
            if os.path.isfile(outputFile) and tools_is_file_ok(
                    outputFile, min_file_size=2000):
                logging.info('File {} already exists --> skipping job'.format(
                    outputFile))
                continue

            for jobId in inputFileList.keys():

                key_file = getKey(sample_name, jobId)

                self.inputFiles[key_file] = inputFileList[jobId]
                if len(self.inputFiles[key_file]) == 0:
                    logging.warning("'%s' = %s --> skipping job !!" %
                                    (key_file, self.inputFiles[key_file]))
                    continue

                self.cfgFiles_projection[key_file] = os.path.join(
                    self.dirs[key_dir][DKEY_CFGS],
                    "project_%s_%i_cfg.txt" % (process_name, jobId))
                self.outputFiles_tmp[key_file] = os.path.join(
                    self.dirs[key_dir][DKEY_HISTO_TMP],
                    "histogram_%i.root" % jobId)
                self.logFiles_projection[key_file] = os.path.join(
                    self.dirs[key_dir][DKEY_LOGS],
                    "project_%s_%i.log" % (process_name, jobId))
                self.scriptFiles_projection[key_file] = os.path.join(
                    self.dirs[key_dir][DKEY_CFGS],
                    "project_%s_%i_cfg.sh" % (process_name, jobId))
                projection_module = self.projection_module
                if projection_module == "count":
                    projection_module = "countHistogramAll"
                    if sample_name.startswith('/TTTo'):
                        projection_module += "CompTopRwgt"
                    elif sample_info['sample_category'].startswith('ttH'):
                        projection_module += "CompHTXS"
                    elif isSplitByNlheJet(process_name):
                        projection_module += "SplitByLHENjet"
                    elif isSplitByNlheHT(process_name):
                        projection_module += "SplitByLHEHT"
                    elif isSplitByNlheJetHT(process_name, sample_name):
                        projection_module += "SplitByLHENjetHT"
                self.jobOptions_sbatch[key_file] = {
                    'histName': process_name,
                    'inputFiles': self.inputFiles[key_file],
                    'cfgFile_path': self.cfgFiles_projection[key_file],
                    'outputFile': self.outputFiles_tmp[key_file],
                    'logFile': self.logFiles_projection[key_file],
                    'scriptFile': self.scriptFiles_projection[key_file],
                    'projection_module': projection_module,
                }
                if self.projection_module != 'puHist':
                    self.jobOptions_sbatch[key_file][
                        'ref_genWeight'] = self.ref_genWeights[process_name]
                    if process_name not in self.ref_genWeights:
                        raise RuntimeError(
                            "Unable to find reference LHE weight for process %s"
                            % process_name)
                self.createCfg_project(self.jobOptions_sbatch[key_file])
                self.outputFiles[process_name]['inputFiles'].append(
                    self.outputFiles_tmp[key_file])

        if self.is_sbatch:
            logging.info(
                "Creating script for submitting '%s' jobs to batch system" %
                self.executable)
            self.num_jobs['project'] += self.createScript_sbatch(
                self.executable, self.sbatchFile_projection,
                self.jobOptions_sbatch)

        logging.info("Creating Makefile")
        lines_makefile = []
        self.addToMakefile_project(lines_makefile)
        self.addToMakefile_hadd(lines_makefile)
        if self.plot:
            self.addToMakefile_plot(lines_makefile)
        self.addToMakefile_finalHadd(lines_makefile)
        self.createMakefile(lines_makefile)
        logging.info("Done")

        return self.num_jobs
Esempio n. 20
0
    def submitJob(self, inputFiles, executable, cfgFile, outputFilePath, outputFiles, logFile=None, skipIfOutputFileExists=False):
        """Waits for all sbatch jobs submitted by this instance of sbatchManager to finish processing
        """

        # raise if logfile missing
        if not logFile:
            if not self.logFileDir:
                raise ValueError(
                    "Please call 'setLogFileDir' before calling 'submitJob' !!")
            logFile = os.path.join(self.logFileDir, os.path.basename(
                script_file).replace(".sh", ".log"))

        # if any of the output files exists, returns (Margus: BUG? Because only
        # that file should be skipped, not all?)
        if skipIfOutputFileExists:
            for outputFile in outputFiles:
                if os.path.exists(os.path.join(outputFilePath, outputFile)):
                    print "output file = '%s' exists --> skipping !!" % os.path.join(outputFilePath, outputFile)
                    return

        if not self.workingDir:
            raise ValueError(
                "Please call 'setWorkingDir' before calling 'submitJob' !!")

        # create scratch dir
        scratchDir = "/scratch/%s" % getpass.getuser()
        if not os.path.exists(scratchDir):
            print "Directory '%s' does not yet exist, creating it !!" % scratchDir
            run_cmd(command_create_scratchDir)
        scratchDir = os.path.join(
            scratchDir, "tthAnalysis" + "_" + date.today().isoformat())
        create_if_not_exists(scratchDir)

        # create script for executing jobs
        script_file = cfgFile.replace(".py", ".sh")
        script_file = script_file.replace("_cfg", "")

        wrapper_log_file = logFile.replace('.log', '_wrapper.log')
        executable_log_file = logFile.replace('.log', '_executable.log')

        command = "%s --partition=%s --output=%s %s" % (
            self.command_submit, self.queue, wrapper_log_file, script_file)

        script = jinja2.Template(job_template).render(
            working_dir = self.workingDir,
            scratch_dir = scratchDir,
            exec_name = executable,
            cfg_file = cfgFile,
            inputFiles = " ".join(inputFiles),
            outputDir = outputFilePath,
            outputFiles = " ".join(outputFiles),
            wrapper_log_file = wrapper_log_file,
            executable_log_file = executable_log_file,
            RUNNING_COMMAND = command
        )
        print "writing sbatch script file = '%s'" % script_file
        with codecs.open(script_file, "w", "utf-8") as f:
            f.write(script)

        print "<submitJob>: command = %s" % command
        run_cmd_output = run_cmd(command)
        print "run_cmd_output: %s" % run_cmd_output
        ret_val = run_cmd_output.split()[-1]
        print "ret_val: %s" % ret_val
        job_id = ret_val.split()[-1]
        # print " jobId = %s" % jobId
        self.jobIds.append(job_id)
  def create(self):
    """Creates all necessary config files and runs the complete analysis workfow -- either locally or on the batch system
    """

    for key in self.dirs.keys():
      for dir_type in self.dirs[key].keys():
        create_if_not_exists(self.dirs[key][dir_type])
  
    self.inputFileIds = {}
    for sample_name, sample_info in self.samples.items():
      if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]:
        continue

      process_name = sample_info["process_name_specific"]

      logging.info("Creating configuration files to run '%s' for sample %s" % (self.executable_analyze, process_name))  

      is_mc = (sample_info["type"] == "mc")
      lumi_scale = 1. if not (self.use_lumi and is_mc) else sample_info["xsection"] * self.lumi / sample_info["nof_events"]
      sample_category = sample_info["sample_category"]
      triggers = sample_info["triggers"]
      apply_trigger_bits = (is_mc and (self.era == "2015" or (self.era == "2016" and sample_info["reHLT"]))) or not is_mc

      for lepton_selection in self.lepton_selections:
        for central_or_shift in self.central_or_shifts:

          inputFileList = generateInputFileList(sample_name, sample_info, self.max_files_per_job, self.debug)
          for jobId in inputFileList.keys():
            if central_or_shift != "central" and not (lepton_selection == "Tight"):
              continue
            if central_or_shift != "central" and not is_mc:
              continue
            if central_or_shift.startswith("CMS_ttHl_thu_shape_ttH") and sample_category != "signal":
              continue
            if central_or_shift.startswith("CMS_ttHl_thu_shape_ttW") and sample_category != "TTW":
              continue
            if central_or_shift.startswith("CMS_ttHl_thu_shape_ttZ") and sample_category != "TTZ":
              continue

            key_dir = getKey(sample_name, lepton_selection)
            key_file = getKey(sample_name, lepton_selection, central_or_shift, jobId)

            self.ntupleFiles[key_file] = inputFileList[jobId]
            self.cfgFiles_analyze_modified[key_file] = os.path.join(self.dirs[key_dir][DKEY_CFGS], "analyze_%s_%s_%s_%s_%i_cfg.py" % \
              (self.channel, process_name, lepton_selection, central_or_shift, jobId))
            self.histogramFiles[key_file] = os.path.join(self.dirs[key_dir][DKEY_HIST], "%s_%s_%s_%i.root" % \
              (process_name, lepton_selection, central_or_shift, jobId))
            self.logFiles_analyze[key_file] = os.path.join(self.dirs[key_dir][DKEY_LOGS], "analyze_%s_%s_%s_%s_%i.log" % \
              (self.channel, process_name, lepton_selection, central_or_shift, jobId))
            self.rleOutputFiles[key_file] = os.path.join(self.dirs[key_dir][DKEY_RLES], "rle_%s_%s_%s_%s_%i.txt" % \
              (self.channel, process_name, lepton_selection, central_or_shift, jobId)) if self.select_rle_output else ""  
            self.createCfg_analyze(self.ntupleFiles[key_file], self.histogramFiles[key_file], sample_category, self.era, triggers,
              lepton_selection, 
              is_mc, central_or_shift, lumi_scale, apply_trigger_bits, self.cfgFiles_analyze_modified[key_file], self.rleOutputFiles[key_file])
                
    if self.is_sbatch:
      logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_analyze)
      self.createScript_sbatch()
      print self.sbatchFile_analyze
    #logging.info("Creating configuration files for executing 'addBackgroundLeptonFakes'")
    #self.createCfg_addFakes(self.histogramFile_hadd_stage1, self.histogramFile_addFakes, self.cfgFile_addFakes_modified)

    #logging.info("Creating configuration files for executing 'addBackgroundLeptonFlips'")
    #self.createCfg_addFlips(self.histogramFile_hadd_stage1, self.histogramFile_addFlips, self.cfgFile_addFlips_modified)

    logging.info("Creating configuration files for executing 'prepareDatacards'")
    for histogramToFit in self.histograms_to_fit:
      self.createCfg_prep_dcard(histogramToFit)

    lines_makefile = []
    self.addToMakefile_analyze(lines_makefile)
    self.addToMakefile_hadd_stage1(lines_makefile)
    self.addToMakefile_backgrounds_from_data(lines_makefile)
    self.addToMakefile_hadd_stage2(lines_makefile)
    self.addToMakefile_prep_dcard(lines_makefile)
    self.createMakefile(lines_makefile)
  
    logging.info("Done")
  def create(self):
    """Creates all necessary config files and runs the complete analysis workfow -- either locally or on the batch system
    """

    for key in self.dirs.keys():
      for dir_type in self.dirs[key].keys():
        create_if_not_exists(self.dirs[key][dir_type])
  
    self.inputFileIds = {}
    for sample_name, sample_info in self.samples.items():
      if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]:
        continue

      process_name = sample_info["process_name_specific"]

      logging.info("Creating configuration files to run '%s' for sample %s" % (self.executable_analyze, process_name))  

      ( secondary_files, primary_store, secondary_store ) = self.initializeInputFileIds(sample_name, sample_info)

      is_mc = (sample_info["type"] == "mc")
      lumi_scale = 1. if not (self.use_lumi and is_mc) else sample_info["xsection"] * self.lumi / sample_info["nof_events"]
      sample_category = sample_info["sample_category"]
      triggers = sample_info["triggers"]

      for central_or_shift in self.central_or_shifts:
        for jobId in range(len(self.inputFileIds[sample_name])):
          if central_or_shift != "central" and not is_mc:
            continue

          inputFiles = generate_input_list(self.inputFileIds[sample_name][jobId], secondary_files, primary_store, secondary_store, self.debug)
  
          key_dir = getKey(sample_name)
          key_file = getKey(sample_name, central_or_shift, jobId)

          self.cfgFiles_analyze_modified[key_file] = os.path.join(self.dirs[key_dir][DKEY_CFGS], "analyze_%s_%s_%s_%i_cfg.py" % \
            (self.channel, process_name, central_or_shift, jobId))
          self.histogramFiles[key_file] = os.path.join(self.dirs[key_dir][DKEY_HIST], "%s_%s_%i.root" % \
            (process_name, central_or_shift, jobId))
          self.logFiles_analyze[key_file] = os.path.join(self.dirs[key_dir][DKEY_LOGS], "analyze_%s_%s_%s_%i.log" % \
            (self.channel, process_name, central_or_shift, jobId))
                
          self.createCfg_analyze(inputFiles, self.histogramFiles[key_file], sample_category, triggers,
            self.lepton_selection, self.hadTau_selection,
            is_mc, central_or_shift, lumi_scale, self.cfgFiles_analyze_modified[key_file])
                
    if self.is_sbatch:
      logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_analyze)
      self.createScript_sbatch()

    logging.info("Creating configuration files for executing 'prepareDatacards'")
    for histogramToFit in self.histograms_to_fit:
      self.createCfg_prep_dcard(histogramToFit)

    lines_makefile = []
    self.addToMakefile_analyze(lines_makefile)
    self.addToMakefile_hadd_stage1(lines_makefile)
    self.addToMakefile_backgrounds_from_data(lines_makefile)
    self.addToMakefile_hadd_stage2(lines_makefile)
    self.addToMakefile_prep_dcard(lines_makefile)
    self.addToMakefile_clean(lines_makefile)
    self.createMakefile(lines_makefile)
  
    logging.info("Done")
Esempio n. 23
0
    def create(self):
        """Creates all necessary config files and runs the complete analysis workfow -- either locally or on the batch system
        """

        for key in self.dirs.keys():
            for dir_type in self.dirs[key].keys():
                create_if_not_exists(self.dirs[key][dir_type])

        self.inputFileIds = {}
        for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"] or sample_info["sample_category"] in ["additional_signal_overlap", "background_data_estimate"]:
                continue

            process_name = sample_info["process_name_specific"]

            logging.info("Creating configuration files to run '%s' for sample %s" % (
                self.executable_analyze, process_name))

            (secondary_files, primary_store, secondary_store) = self.initializeInputFileIds(
                sample_name, sample_info)

            is_mc = (sample_info["type"] == "mc")
            lumi_scale = 1. if not (self.use_lumi and is_mc) else sample_info[
                "xsection"] * self.lumi / sample_info["nof_events"]
            sample_category = sample_info["sample_category"]
            triggers = sample_info["triggers"]

            for lepton_selection in self.lepton_selections:

                key_dir = getKey(sample_name, lepton_selection)

                for central_or_shift in self.central_or_shifts:

                    if self.select_root_output:
                        rootOutputSingleFile = os.path.join(self.dirs[key_dir][DKEY_ROOT], "out_%s_%s_%s_%s.root" %
                                                            (self.channel, process_name, lepton_selection, central_or_shift))
                        self.rootOutputAux[rootOutputSingleFile] = os.path.join(self.dirs[key_dir][DKEY_ROOT], "out_%s_%s_%s_%s_*.root" %
                                                                                (self.channel, process_name, lepton_selection, central_or_shift))

                    for jobId in range(len(self.inputFileIds[sample_name])):
                        if central_or_shift != "central" and not is_mc:
                            continue


                        key_dir = getKey(sample_name, lepton_selection)
                        key_file = getKey(sample_name, lepton_selection, central_or_shift, jobId)

                        inputFiles = generate_input_list(self.inputFileIds[sample_name][jobId], secondary_files, primary_store, secondary_store, self.debug)
                        self.ntupleFiles[key_file] = inputFiles

                        self.cfgFiles_analyze_modified[key_file] = os.path.join(self.dirs[key_dir][DKEY_CFGS], "analyze_%s_%s_%s_%s_%i_cfg.py" %
                                                                                (self.channel, process_name, lepton_selection, central_or_shift, jobId))
                        self.histogramFiles[key_file] = os.path.join(self.dirs[key_dir][DKEY_HIST], "%s_%s_%s_%i.root" %
                                                                     (process_name, lepton_selection, central_or_shift, jobId))
                        self.logFiles_analyze[key_file] = os.path.join(self.dirs[key_dir][DKEY_LOGS], "analyze_%s_%s_%s_%s_%i.log" %
                                                                       (self.channel, process_name, lepton_selection, central_or_shift, jobId))
                        self.rleOutputFiles[key_file] = os.path.join(self.dirs[key_dir][DKEY_RLES], "rle_%s_%s_%s_%s_%i.txt" %
                                                                     (self.channel, process_name, lepton_selection, central_or_shift, jobId)) if self.select_rle_output else ""
                        self.rootOutputFiles[key_file] = os.path.join(self.dirs[key_dir][DKEY_ROOT], "out_%s_%s_%s_%s_%i.root" %
                                                                      (self.channel, process_name, lepton_selection, central_or_shift, jobId)) if self.select_root_output else ""

                        self.createCfg_analyze(inputFiles,
                                                self.histogramFiles[key_file],
                                                sample_category,
                                                self.era,
                                                triggers,
                                                lepton_selection,
                                                is_mc,
                                                central_or_shift,
                                                lumi_scale,
                                                self.cfgFiles_analyze_modified[key_file],
                                                self.rleOutputFiles[key_file],
                                                self.rootOutputFiles[key_file]
                                                )

        if self.is_sbatch:
            logging.info(
                "Creating script for submitting '%s' jobs to batch system" % self.executable_analyze)
            self.createScript_sbatch()

        lines_makefile = []
        self.addToMakefile_analyze(lines_makefile)
        # # TODO hackfix (Margus)
        self.datacardFiles['this_value_is_useless_and_not_used_but_somehow_it_is_important_should_be_fixed'] = 'hadd_stage1'

        self.createMakefile(lines_makefile)

        logging.info("Done")
Esempio n. 24
0
    def create(self):
        """Creates all necessary config files and runs the complete analysis workfow -- either locally or on the batch system
    """

        for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"] or sample_info["sample_category"] in [
                    "additional_signal_overlap", "background_data_estimate"
            ]:
                continue
            process_name = sample_info["process_name_specific"]
            key_dir = getKey(process_name)
            for dir_type in [DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_RLES]:
                initDict(self.dirs, [key_dir, dir_type])
                if dir_type in [DKEY_CFGS, DKEY_LOGS]:
                    self.dirs[key_dir][dir_type] = os.path.join(
                        self.configDir, dir_type, self.channel, "",
                        process_name)
                else:
                    self.dirs[key_dir][dir_type] = os.path.join(
                        self.outputDir, dir_type, self.channel, "",
                        process_name)
        for dir_type in [
                DKEY_CFGS, DKEY_SCRIPTS, DKEY_HIST, DKEY_DCRD, DKEY_PLOT,
                DKEY_HADD_RT
        ]:
            initDict(self.dirs, [dir_type])
            if dir_type in [DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_HADD_RT]:
                self.dirs[dir_type] = os.path.join(self.configDir, dir_type,
                                                   self.channel)
            else:
                self.dirs[dir_type] = os.path.join(self.outputDir, dir_type,
                                                   self.channel)
        ##print "self.dirs = ", self.dirs

        for key in self.dirs.keys():
            if type(self.dirs[key]) == dict:
                for dir_type in self.dirs[key].keys():
                    create_if_not_exists(self.dirs[key][dir_type])
            else:
                create_if_not_exists(self.dirs[key])

        inputFileLists = {}
        for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"] or sample_info["sample_category"] in [
                    "additional_signal_overlap", "background_data_estimate"
            ]:
                continue
            logging.info("Checking input files for sample %s" %
                         sample_info["process_name_specific"])
            inputFileLists[sample_name] = generateInputFileList(
                sample_name, sample_info, self.max_files_per_job, self.debug)

        for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"] or sample_info["sample_category"] in [
                    "additional_signal_overlap", "background_data_estimate"
            ]:
                continue
            process_name = sample_info["process_name_specific"]
            logging.info(
                "Creating configuration files to run '%s' for sample %s" %
                (self.executable_analyze, process_name))

            sample_category = sample_info["sample_category"]
            is_mc = (sample_info["type"] == "mc")
            is_signal = (sample_category == "signal")

            for central_or_shift in self.central_or_shifts:

                inputFileList = inputFileLists[sample_name]
                for jobId in inputFileList.keys():
                    if central_or_shift != "central" and not is_mc:
                        continue
                    if central_or_shift.startswith(
                            "CMS_ttHl_thu_shape_ttH"
                    ) and sample_category != "signal":
                        continue
                    if central_or_shift.startswith(
                            "CMS_ttHl_thu_shape_ttW"
                    ) and sample_category != "TTW":
                        continue
                    if central_or_shift.startswith(
                            "CMS_ttHl_thu_shape_ttZ"
                    ) and sample_category != "TTZ":
                        continue

                    # build config files for executing analysis code
                    key_dir = getKey(process_name)
                    key_analyze_job = getKey(process_name, central_or_shift,
                                             jobId)
                    ntupleFiles = inputFileList[jobId]
                    if len(ntupleFiles) == 0:
                        print "Warning: ntupleFiles['%s'] = %s --> skipping job !!" % (
                            key_job, ntupleFiles)
                        continue
                    self.jobOptions_analyze[key_analyze_job] = {
                      'ntupleFiles' : ntupleFiles,
                      'cfgFile_modified' : os.path.join(self.dirs[key_dir][DKEY_CFGS], "analyze_%s_%s_%s_%i_cfg.py" % \
                         (self.channel, process_name, central_or_shift, jobId)),
                      'histogramFile' : os.path.join(self.dirs[key_dir][DKEY_HIST], "%s_%s_%i.root" % \
                         (process_name, central_or_shift, jobId)),
                      'logFile' : os.path.join(self.dirs[key_dir][DKEY_LOGS], "analyze_%s_%s_%s_%i.log" % \
                         (self.channel, process_name, central_or_shift, jobId)),
                      'rleOutputFile' : os.path.join(self.dirs[key_dir][DKEY_RLES], "rle_%s_%s_%s_%i.txt" % \
                         (self.channel, process_name, central_or_shift, jobId)) if self.select_rle_output else "",
                      'sample_category' : sample_category,
                      'triggers' : sample_info["triggers"],
                      'hadTau_selection' : self.hadTau_selection_part2,
                      ##'use_HIP_mitigation_mediumMuonId' : sample_info["use_HIP_mitigation_mediumMuonId"],
                      'use_HIP_mitigation_mediumMuonId' : True,
                      'is_mc' : is_mc,
                      'central_or_shift' : central_or_shift,
                      'lumi_scale' : 1. if not (self.use_lumi and is_mc) else sample_info["xsection"] * self.lumi / sample_info["nof_events"],
                      'apply_genWeight' : sample_info["genWeight"] if (is_mc and "genWeight" in sample_info.keys()) else False,
                      'apply_trigger_bits' : (is_mc and (self.era == "2015" or (self.era == "2016" and sample_info["reHLT"]))) or not is_mc
                    }
                    self.createCfg_analyze(
                        self.jobOptions_analyze[key_analyze_job])

                    # initialize input and output file names for hadd_stage1
                    key_hadd_stage1 = getKey(process_name)
                    if not key_hadd_stage1 in self.inputFiles_hadd_stage1.keys(
                    ):
                        self.inputFiles_hadd_stage1[key_hadd_stage1] = []
                    self.inputFiles_hadd_stage1[key_hadd_stage1].append(
                        self.jobOptions_analyze[key_analyze_job]
                        ['histogramFile'])
                    self.outputFile_hadd_stage1[key_hadd_stage1] = os.path.join(self.dirs[DKEY_HIST], "histograms_harvested_stage1_%s_%s.root" % \
                      (self.channel, process_name))

            # initialize input and output file names for hadd_stage2
            key_hadd_stage1 = getKey(process_name)
            key_hadd_stage2 = getKey("all")
            if not key_hadd_stage2 in self.inputFiles_hadd_stage2.keys():
                self.inputFiles_hadd_stage2[key_hadd_stage2] = []
            self.inputFiles_hadd_stage2[key_hadd_stage2].append(
                self.outputFile_hadd_stage1[key_hadd_stage1])
            self.outputFile_hadd_stage2[key_hadd_stage2] = os.path.join(self.dirs[DKEY_HIST], "histograms_harvested_stage2_%s.root" % \
              (self.channel))

        logging.info("Creating configuration files to run 'prepareDatacards'")
        for evtSelection in self.evtSelections:
            for histogramToFit in self.histograms_to_fit:
                key_prep_dcard_job = getKey(evtSelection, histogramToFit)
                key_hadd_stage2 = getKey("all")
                self.jobOptions_prep_dcard[key_prep_dcard_job] = {
                    'inputFile':
                    self.outputFile_hadd_stage2[key_hadd_stage2],
                    'cfgFile_modified':
                    os.path.join(
                        self.dirs[DKEY_CFGS],
                        "prepareDatacards_%s_%s_%s_cfg.py" %
                        (self.channel, evtSelection, histogramToFit)),
                    'datacardFile':
                    os.path.join(
                        self.dirs[DKEY_DCRD],
                        "prepareDatacards_%s_%s_%s.root" %
                        (self.channel, evtSelection, histogramToFit)),
                    'histogramDir':
                    "_".join([self.histogramDir_prep_dcard, evtSelection]),
                    'histogramToFit':
                    histogramToFit,
                    'label':
                    None
                }
                self.createCfg_prep_dcard(
                    self.jobOptions_prep_dcard[key_prep_dcard_job])

        logging.info("Creating configuration files to run 'makePlots'")
        for evtSelection in self.evtSelections:
            key_makePlots_job = getKey(evtSelection)
            key_hadd_stage2 = getKey("all")
            self.jobOptions_make_plots[key_makePlots_job] = {
                'executable':
                self.executable_make_plots,
                'inputFile':
                self.outputFile_hadd_stage2[key_hadd_stage2],
                'cfgFile_modified':
                os.path.join(
                    self.dirs[DKEY_CFGS],
                    "makePlots_%s_%s_cfg.py" % (self.channel, evtSelection)),
                'outputFile':
                os.path.join(
                    self.dirs[DKEY_PLOT],
                    "makePlots_%s_%s.png" % (self.channel, evtSelection)),
                'histogramDir':
                "_".join([self.histogramDir_prep_dcard, evtSelection]),
                'label':
                evtSelection,
                'make_plots_backgrounds':
                self.make_plots_backgrounds
            }
            self.createCfg_makePlots(
                self.jobOptions_make_plots[key_makePlots_job])

        if self.is_sbatch:
            logging.info(
                "Creating script for submitting '%s' jobs to batch system" %
                self.executable_analyze)
            self.sbatchFile_analyze = os.path.join(
                self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel)
            self.createScript_sbatch()

        logging.info("Creating Makefile")
        lines_makefile = []
        self.addToMakefile_analyze(lines_makefile)
        self.addToMakefile_hadd_stage1(lines_makefile)
        self.addToMakefile_hadd_stage2(lines_makefile)
        self.addToMakefile_prep_dcard(lines_makefile)
        self.addToMakefile_make_plots(lines_makefile)
        self.createMakefile(lines_makefile)

        logging.info("Done")
Esempio n. 25
0
    def __init__(self,
                 treeName,
                 outputDir,
                 cfgDir,
                 executable_addMEM,
                 samples,
                 era,
                 debug,
                 running_method,
                 max_files_per_job,
                 mem_integrations_per_job,
                 max_mem_integrations,
                 num_parallel_jobs,
                 leptonSelection,
                 hadTauSelection,
                 isForBDTtraining,
                 channel,
                 pool_id=''):

        self.treeName = treeName
        self.outputDir = outputDir
        self.cfgDir = cfgDir
        self.executable_addMEM = executable_addMEM
        self.mem_integrations_per_job = mem_integrations_per_job
        self.max_files_per_job = max_files_per_job
        self.max_mem_integrations = max_mem_integrations
        self.samples = samples
        self.era = era
        self.debug = debug
        self.channel = channel
        self.leptonSelection = leptonSelection
        self.hadTauSelection = hadTauSelection
        self.hadTauDefinition = self.hadTauSelection.split('|')[0]
        self.hadTauWorkingPoint = self.hadTauSelection.split('|')[1]
        self.maxPermutations_branchName = "maxPermutations_addMEM_%s_lep%s_tau%s_%s" % (
            self.channel,
            self.leptonSelection,
            self.hadTauDefinition,
            self.hadTauWorkingPoint,
        )
        self.isForBDTtraining = isForBDTtraining
        if running_method.lower() not in ["sbatch", "makefile"]:
            raise ValueError("Invalid running method: %s" % running_method)
        self.running_method = running_method
        self.is_sbatch = False
        self.is_makefile = False
        if self.running_method.lower() == "sbatch":
            self.is_sbatch = True
        else:
            self.is_makefile = True
        self.makefile = os.path.join(self.cfgDir, "Makefile_%s" % self.channel)
        self.num_parallel_jobs = num_parallel_jobs
        self.pool_id = pool_id if pool_id else uuid.uuid4()

        self.workingDir = os.getcwd()
        logging.info("Working directory is: {workingDir}".format(
            workingDir=self.workingDir))

        for dirPath in [self.outputDir, self.cfgDir]:
            create_if_not_exists(dirPath)
        self.stdout_file = codecs.open(
            os.path.join(self.cfgDir, "stdout_%s.log" % self.channel), 'w',
            'utf-8')
        self.stderr_file = codecs.open(
            os.path.join(self.cfgDir, "stderr_%s.log" % self.channel), 'w',
            'utf-8')
        self.dirs = {}
        self.samples = samples
        self.cfgFiles_addMEM_modified = {}
        self.shFiles_addMEM_modified = {}
        self.logFiles_addMEM = {}
        self.sbatchFile_addMEM = os.path.join(
            self.cfgDir, "sbatch_addMEM_%s.py" % self.channel)
        self.inputFiles = {}
        self.outputFiles = {}
        self.hadd_records = {}
        self.filesToClean = []

        for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"] or \
                sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]:
                continue
            process_name = sample_info["process_name_specific"]
            key_dir = getKey(sample_name)
            for dir_type in [DKEY_NTUPLES, DKEY_FINAL_NTUPLES]:
                initDict(self.dirs, [key_dir, dir_type])
                self.dirs[key_dir][dir_type] = os.path.join(
                    self.outputDir, dir_type, self.channel, process_name)
            for dir_type in [DKEY_CFGS, DKEY_LOGS, DKEY_HADD, DKEY_HADD_RT]:
                initDict(self.dirs, [key_dir, dir_type])
                self.dirs[key_dir][dir_type] = os.path.join(
                    self.cfgDir, dir_type, self.channel, process_name)

        self.cvmfs_error_log = {}
Esempio n. 26
0
    def create(self):
        """Creates all necessary config files and runs the PU profile production -- either locally or on the batch system
        """

        for key in self.dirs.keys():
            if type(self.dirs[key]) == dict:
                for dir_type in self.dirs[key].keys():
                    create_if_not_exists(self.dirs[key][dir_type])
            else:
                create_if_not_exists(self.dirs[key])

        self.inputFileIds = {}
        for sample_name, sample_info in self.samples.items():
            if not sample_info['use_it']:
                continue

            process_name = sample_info["process_name_specific"]
            is_mc = (sample_info["type"] == "mc")

            if not is_mc:
                continue

            logging.info(
                "Creating configuration files to run '%s' for sample %s" %
                (self.executable, process_name))

            inputFileList = generateInputFileList(sample_info,
                                                  self.max_files_per_job)
            key_dir = getKey(process_name)

            outputFile = os.path.join(self.dirs[key_dir][DKEY_HISTO],
                                      "%s.root" % process_name)
            if os.path.isfile(outputFile) and tools_is_file_ok(
                    outputFile, min_file_size=2000):
                logging.info('File {} already exists --> skipping job'.format(
                    outputFile))
                continue

            self.outputFiles[process_name] = {
                'inputFiles': [],
                'outputFile': outputFile
            }

            for jobId in inputFileList.keys():

                key_file = getKey(sample_name, jobId)

                self.inputFiles[key_file] = inputFileList[jobId]
                if len(self.inputFiles[key_file]) == 0:
                    logging.warning(
                        "ntupleFiles['%s'] = %s --> skipping job !!" %
                        (key_file, self.inputFiles[key_file]))
                    continue

                self.cfgFiles_puProfile[key_file] = os.path.join(
                    self.dirs[key_dir][DKEY_CFGS],
                    "puProfile_%s_%i_cfg.txt" % (process_name, jobId))
                self.outputFiles_tmp[key_file] = os.path.join(
                    self.dirs[key_dir][DKEY_HISTO_TMP],
                    "histogram_%i.root" % jobId)
                self.logFiles_puProfile[key_file] = os.path.join(
                    self.dirs[key_dir][DKEY_LOGS],
                    "puProfile_%s_%i.log" % (process_name, jobId))
                self.scriptFiles_puProfile[key_file] = os.path.join(
                    self.dirs[key_dir][DKEY_CFGS],
                    "puProfile_%s_%i_cfg.sh" % (process_name, jobId))
                self.jobOptions_sbatch[key_file] = {
                    'histName': process_name,
                    'inputFiles': self.inputFiles[key_file],
                    'cfgFile_path': self.cfgFiles_puProfile[key_file],
                    'outputFile': self.outputFiles_tmp[key_file],
                    'logFile': self.logFiles_puProfile[key_file],
                    'scriptFile': self.scriptFiles_puProfile[key_file],
                }
                self.createCfg_puProfile(self.jobOptions_sbatch[key_file])
                self.outputFiles[process_name]['inputFiles'].append(
                    self.outputFiles_tmp[key_file])

        if self.is_sbatch:
            logging.info(
                "Creating script for submitting '%s' jobs to batch system" %
                self.executable)
            self.num_jobs['puProfile'] += self.createScript_sbatch(
                self.executable, self.sbatchFile_puProfile,
                self.jobOptions_sbatch)

        logging.info("Creating Makefile")
        lines_makefile = []
        self.addToMakefile_puProfile(lines_makefile)
        self.addToMakefile_hadd(lines_makefile)
        self.addToMakefile_plot(lines_makefile)
        self.addToMakefile_finalHadd(lines_makefile)
        self.createMakefile(lines_makefile)
        logging.info("Done")

        return self.num_jobs
Esempio n. 27
0
    def create(self):
        """Creates all necessary config files and runs the MEM -- either locally or on the batch system
        """
        statistics = {}

        for key in self.dirs.keys():
            if type(self.dirs[key]) == dict:
                for dir_type in self.dirs[key].keys():
                    create_if_not_exists(self.dirs[key][dir_type])
            else:
                create_if_not_exists(self.dirs[key])

        # read the file in, sample-by-sample
        # build the dictionary recursively
        # add rle file also to generated cfg files
        # print integrations per job as well!
        # consider more than 1 file per jobs -- the jobs are splitted by MEM integration anyways

        for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"] or \
              sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]:
                continue

            if not os.path.exists(sample_info['local_paths'][0]['path']):
                logging.warning("Skipping sample {sample_name}".format(
                    sample_name=sample_name))
                continue

            process_name = sample_info["process_name_specific"]
            logging.info(
                "Creating configuration files to run '%s' for sample %s" %
                (self.executable_addMEM, process_name))
            is_mc = (sample_info["type"] == "mc")

            inputFileList = generateInputFileList(sample_name, sample_info,
                                                  self.max_files_per_job,
                                                  self.debug)
            # typically, the analysis ends here and starts looping b/c the smallest unit of work processes
            # at least one file; we need, however, to split the file into event ranges in such a way that
            # each job performs mem_integrations_per_job MEM integrations

            # so what we are going to do is to open each set of files in inputFileList, read the variable
            # requestMEM_*l_*tau and try to gather the event ranges such that each event range
            # performs up to mem_integrations_per_job integrations per job
            memEvtRangeDict = self.memJobList(inputFileList)

            for jobId in memEvtRangeDict.keys():

                key_dir = getKey(sample_name)
                key_file = getKey(sample_name, jobId)

                self.inputFiles[key_file] = memEvtRangeDict[jobId][
                    'input_fileset']

                # there should always be a job
                assert(self.inputFiles[key_file] > 0), "More than one input file: %s ?? !!" % \
                                                       ', '.join(self.inputFiles[key_file])

                #assert(len(self.inputFiles[key_file]) == 1), "There is more than one input file!"
                self.cfgFiles_addMEM_modified[key_file] = os.path.join(
                    self.dirs[key_dir][DKEY_CFGS], "addMEM_%s_%s_%i_cfg.py" %
                    (self.channel, process_name, jobId))
                self.shFiles_addMEM_modified[key_file] = os.path.join(
                    self.dirs[key_dir][DKEY_CFGS],
                    "addMEM_%s_%s_%i.sh" % (self.channel, process_name, jobId))
                self.outputFiles[key_file] = os.path.join(
                    self.dirs[key_dir][DKEY_NTUPLES],
                    "%s_%i.root" % (process_name, jobId))
                self.logFiles_addMEM[key_file] = os.path.join(
                    self.dirs[key_dir][DKEY_LOGS], "addMEM_%s_%s_%i.log" %
                    (self.channel, process_name, jobId))
                self.createCfg_addMEM(
                    self.inputFiles[key_file],
                    memEvtRangeDict[jobId]['event_range'][0],
                    memEvtRangeDict[jobId]['event_range'][1],
                    self.outputFiles[key_file],
                    self.era,
                    is_mc,
                    self.cfgFiles_addMEM_modified[key_file],
                )

                # associate the output file with the fileset_id
                #UDPATE: ONE OUTPUT FILE PER SAMPLE!
                fileset_id = memEvtRangeDict[jobId]['fileset_id']
                hadd_output_dir = os.path.join(
                    self.dirs[key_dir][DKEY_FINAL_NTUPLES],
                    str('%04d' % fileset_id // 1000))
                if not os.path.exists(hadd_output_dir):
                    os.makedirs(hadd_output_dir)
                hadd_output = os.path.join(
                    hadd_output_dir,
                    '%s_%i.root' % ('tree', fileset_id)  # UDPATE: ADDED
                    #hadd_output_dir, "tree.root" # UDPATE: REMOVED
                )
                if hadd_output not in self.hadd_records:
                    self.hadd_records[hadd_output] = {}
                    self.hadd_records[hadd_output]['output_files'] = []
                self.hadd_records[hadd_output]['fileset_id'] = fileset_id
                self.hadd_records[hadd_output]['output_files'].append(
                    self.outputFiles[key_file])
                self.hadd_records[hadd_output]['process_name'] = process_name
                #self.filesToClean.append(self.outputFiles[key_file])

            # let's sum the number of integration per sample
            nofEntriesMap = {}
            for v in memEvtRangeDict.values():
                if v['fileset_id'] not in nofEntriesMap:
                    nofEntriesMap[v['fileset_id']] = {
                        'nof_entries': v['nof_entries'],
                    }
            statistics[process_name] = {
                'nof_int':
                sum([entry['nof_int'] for entry in memEvtRangeDict.values()]),
                'nof_entries':
                sum([entry['nof_entries']
                     for entry in nofEntriesMap.values()]),
                'nof_events_pass':
                sum([
                    entry['nof_events_pass']
                    for entry in memEvtRangeDict.values()
                ]),
                'nof_int_pass':
                sum([
                    entry['nof_int_pass']
                    for entry in memEvtRangeDict.values()
                ]),
                'nof_zero':
                sum([entry['nof_zero'] for entry in memEvtRangeDict.values()]),
                'nof_jobs':
                len(memEvtRangeDict),
            }

        if self.is_sbatch:
            logging.info(
                "Creating script for submitting '%s' jobs to batch system" %
                self.executable_addMEM)
            self.createScript_sbatch()

        logging.info("Creating Makefile")
        lines_makefile = []
        self.addToMakefile_addMEM(lines_makefile)
        self.addToMakefile_hadd(lines_makefile)
        self.createMakefile(lines_makefile)

        ws_len = max([len(kk) + 1 for kk in statistics.keys()])
        total_nof_integrations_sum = sum(x['nof_int']
                                         for x in statistics.values())
        total_nof_entires = sum(x['nof_entries'] for x in statistics.values())
        total_nof_zero_int = sum(x['nof_zero'] for x in statistics.values())
        total_nof_jobs = sum(x['nof_jobs'] for x in statistics.values())
        total_nof_pass = sum(x['nof_events_pass'] for x in statistics.values())
        total_nof_int_pass_avg = float(
            sum(x['nof_int_pass']
                for x in statistics.values())) / total_nof_pass
        total_nof_integrations_avg = float(
            total_nof_integrations_sum) / total_nof_entires
        total_nof_int_per_job = float(
            total_nof_integrations_sum) / total_nof_jobs
        for k, v in statistics.iteritems():
            if v['nof_entries'] == 0:
                int_per_event = 0.
                evt_pass = 0.
            else:
                int_per_event = float(v['nof_int']) / v['nof_entries']
                evt_pass = (100 * float(v['nof_events_pass']) /
                            v['nof_entries'])
            if v['nof_events_pass'] == 0:
                nof_int_pass = 0.
            else:
                nof_int_pass = float(v['nof_int_pass']) / v['nof_events_pass']
            print(
                '%s%s: %d (%d entries; %d jobs; %.2f int/evt; %d (%.2f%%) evt pass; %.2f int/evt pass; %d evt 0int)'
                % (
                    k,
                    ' ' * (ws_len - len(k)),
                    v['nof_int'],
                    v['nof_entries'],
                    v['nof_jobs'],
                    int_per_event,
                    v['nof_events_pass'],
                    evt_pass,
                    nof_int_pass,
                    v['nof_zero'],
                ))
        print(
            '%s%s: %d (%d entries; %d jobs; %.2f int/evt; %d evt pass; %.2f int/evt pass; '
            '%.2f int/job pass; %d evt 0int)' % (
                'total',
                ' ' * (ws_len - len('total')),
                total_nof_integrations_sum,
                total_nof_entires,
                total_nof_jobs,
                total_nof_integrations_avg,
                total_nof_pass,
                total_nof_int_pass_avg,
                total_nof_int_per_job,
                total_nof_zero_int,
            ))

        if self.max_mem_integrations > 0 and total_nof_integrations_sum > self.max_mem_integrations:
            logging.error(
                "Will not start the jobs (max nof integrations exceeded)!")
            return False
        else:
            logging.info("Done")
            return True
Esempio n. 28
0
    def create(self):
        """Creates all necessary config files and runs the Ntuple production -- either locally or on the batch system
        """

        for key in self.dirs.keys():
            if type(self.dirs[key]) == dict:
                for dir_type in self.dirs[key].keys():
                    create_if_not_exists(self.dirs[key][dir_type])
            else:
                create_if_not_exists(self.dirs[key])

        self.inputFileIds = {}
        for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"]:
                continue

            process_name = sample_info["process_name_specific"]
            is_mc = (sample_info["type"] == "mc")

            logging.info(
                "Creating configuration files to run '%s' for sample %s" %
                (self.executable_prodNtuple, process_name))

            inputFileList = generateInputFileList(sample_name, sample_info,
                                                  self.max_files_per_job,
                                                  self.debug)
            key_dir = getKey(sample_name)
            subDirs = list(
                map(
                    lambda y: os.path.join(self.dirs[key_dir][DKEY_NTUPLES],
                                           '%04d' % y),
                    set(map(lambda x: x // 1000, inputFileList.keys()))))
            for subDir in subDirs:
                create_if_not_exists(subDir)
            for jobId in inputFileList.keys():

                key_file = getKey(sample_name, jobId)

                self.inputFiles[key_file] = inputFileList[jobId]
                if len(self.inputFiles[key_file]) == 0:
                    print "Warning: ntupleFiles['%s'] = %s --> skipping job !!" % (
                        key_file, self.inputFiles[key_file])
                    continue
                self.cfgFiles_prodNtuple_modified[key_file] = os.path.join(self.dirs[key_dir][DKEY_CFGS], "produceNtuple_%s_%i_cfg.py" % \
                  (process_name, jobId))
                self.outputFiles[key_file] = os.path.join(
                    self.dirs[key_dir][DKEY_NTUPLES], "%04d" % (jobId // 1000),
                    "tree_%i.root" % jobId)
                self.logFiles_prodNtuple[key_file] = os.path.join(self.dirs[key_dir][DKEY_LOGS], "produceNtuple_%s_%i.log" % \
                  (process_name, jobId))
                jobOptions = {
                    'inputFiles': self.inputFiles[key_file],
                    'cfgFile_modified':
                    self.cfgFiles_prodNtuple_modified[key_file],
                    'outputFile': self.outputFiles[key_file],
                    ##'use_HIP_mitigation_mediumMuonId' : sample_info["use_HIP_mitigation_mediumMuonId"],
                    'use_HIP_mitigation_mediumMuonId': True,
                    'is_mc': is_mc,
                    'random_seed': jobId
                }
                self.createCfg_prodNtuple(jobOptions)

        if self.is_sbatch:
            logging.info(
                "Creating script for submitting '%s' jobs to batch system" %
                self.executable_prodNtuple)
            self.createScript_sbatch()

        logging.info("Creating Makefile")
        lines_makefile = []
        self.addToMakefile_prodNtuple(lines_makefile)
        #self.addToMakefile_clean(lines_makefile)
        self.createMakefile(lines_makefile)

        logging.info("Done")
  def create(self):
    """Creates all necessary config files and runs the complete analysis workfow -- either locally or on the batch system
    """

    for sample_name, sample_info in self.samples.items():
      if not sample_info["use_it"]:
        continue

      sample_category = sample_info["sample_category"]
      is_mc = (sample_info["type"] == "mc")
      process_name = sample_info["process_name_specific"]

      logging.info("Building dictionaries for sample %s..." % process_name)
      for lepton_selection in self.lepton_selections:
        for lepton_frWeight in self.lepton_frWeights:
          if lepton_frWeight == "enabled" and not lepton_selection.startswith("Fakeable"):
            continue
          if lepton_frWeight == "disabled" and not lepton_selection in ["Tight", "forBDTtraining"]:
            continue

          lepton_selection_and_frWeight = get_lepton_selection_and_frWeight(lepton_selection, lepton_frWeight)
          for leptonChargeSelection in self.leptonChargeSelections:
            central_or_shift_extensions = ["", "hadd", "addBackgrounds"]
            central_or_shift_dedicated = self.central_or_shifts if self.runTHweights(sample_info) else self.central_or_shifts_external
            central_or_shifts_extended = central_or_shift_extensions + central_or_shift_dedicated
            for central_or_shift_or_dummy in central_or_shifts_extended:
              process_name_extended = [ process_name, "hadd" ]
              for process_name_or_dummy in process_name_extended:
                if central_or_shift_or_dummy in [ "hadd", "addBackgrounds" ] and process_name_or_dummy in [ "hadd" ]:
                  continue

                if central_or_shift_or_dummy not in central_or_shift_extensions and not self.accept_systematics(
                    central_or_shift_or_dummy, is_mc, lepton_selection, leptonChargeSelection, sample_info
                ):
                  continue
                  
                key_dir = getKey(process_name_or_dummy, lepton_selection_and_frWeight, leptonChargeSelection, central_or_shift_or_dummy)
                for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_RLES, DKEY_SYNC ]:
                  if dir_type == DKEY_SYNC and not self.do_sync:
                    continue
                  initDict(self.dirs, [ key_dir, dir_type ])
                  if dir_type in [ DKEY_CFGS, DKEY_LOGS ]:
                    self.dirs[key_dir][dir_type] = os.path.join(self.configDir, dir_type, self.channel,
                      "_".join([ lepton_selection_and_frWeight, leptonChargeSelection ]), process_name_or_dummy, central_or_shift_or_dummy)
                  else:
                    self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel,
                      "_".join([ lepton_selection_and_frWeight, leptonChargeSelection ]), process_name_or_dummy)
    for subdirectory in [ "addBackgrounds", "addBackgroundLeptonFakes", "prepareDatacards", "addSystFakeRates", "makePlots" ]:
      key_dir = getKey(subdirectory)
      for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT ]:
        initDict(self.dirs, [ key_dir, dir_type ])
        if dir_type in [ DKEY_CFGS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT ]:
          self.dirs[key_dir][dir_type] = os.path.join(self.configDir, dir_type, self.channel, subdirectory)
        else:
          self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel, subdirectory)
    for dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT, DKEY_SYNC ]:
      if dir_type == DKEY_SYNC and not self.do_sync:
        continue
      initDict(self.dirs, [ dir_type ])
      if dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT ]:
        self.dirs[dir_type] = os.path.join(self.configDir, dir_type, self.channel)
      else:
        self.dirs[dir_type] = os.path.join(self.outputDir, dir_type, self.channel)

    numDirectories = 0
    for key in self.dirs.keys():
      if type(self.dirs[key]) == dict:
        numDirectories += len(self.dirs[key])
      else:
        numDirectories += 1
    logging.info("Creating directory structure (numDirectories = %i)" % numDirectories)
    numDirectories_created = 0;
    frac = 1
    for key in self.dirs.keys():
      if type(self.dirs[key]) == dict:
        for dir_type in self.dirs[key].keys():
          create_if_not_exists(self.dirs[key][dir_type])
        numDirectories_created += len(self.dirs[key])
      else:
        create_if_not_exists(self.dirs[key])
        numDirectories_created = numDirectories_created + 1
      while 100*numDirectories_created >= frac*numDirectories:
        logging.info(" %i%% completed" % frac)
        frac = frac + 1
    logging.info("Done.")   

    inputFileLists = {}
    for sample_name, sample_info in self.samples.items():
      if not sample_info["use_it"]:
        continue
      logging.info("Checking input files for sample %s" % sample_info["process_name_specific"])
      inputFileLists[sample_name] = generateInputFileList(sample_info, self.max_files_per_job)

    for lepton_selection in self.lepton_selections:

      hadTau_selection = "Tight"
      hadTau_selection = "|".join([hadTau_selection, self.hadTau_selection_part2])

      electron_selection = lepton_selection
      muon_selection = lepton_selection

      if lepton_selection == "forBDTtraining":
        electron_selection = "Loose"
        muon_selection = "Loose"
      elif lepton_selection == "Fakeable_mcClosure_e":
        electron_selection = "Fakeable"
        muon_selection = "Tight"
      elif lepton_selection == "Fakeable_mcClosure_m":
        electron_selection = "Tight"
        muon_selection = "Fakeable"

      if "forBDTtraining" in lepton_selection:
        electron_selection = "Loose"
        muon_selection = "Loose"
      elif lepton_selection == "Fakeable_mcClosure_e":
        electron_selection = "Fakeable"
        muon_selection = "Tight"
      elif lepton_selection == "Fakeable_mcClosure_m":
        electron_selection = "Tight"
        muon_selection = "Fakeable"

      for lepton_frWeight in self.lepton_frWeights:
        if lepton_frWeight == "enabled" and not lepton_selection.startswith("Fakeable"):
          continue
        if lepton_frWeight == "disabled" and not lepton_selection in [ "Tight", "forBDTtraining" ]:
          continue
        lepton_selection_and_frWeight = get_lepton_selection_and_frWeight(lepton_selection, lepton_frWeight)

        for leptonChargeSelection in self.leptonChargeSelections:

          for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"]:
              continue
            process_name = sample_info["process_name_specific"]
            logging.info("Creating configuration files to run '%s' for sample %s" % (self.executable_analyze, process_name))
            inputFileList = inputFileLists[sample_name]

            sample_category = sample_info["sample_category"]
            is_mc = (sample_info["type"] == "mc")
            use_th_weights = self.runTHweights(sample_info)

            central_or_shift_dedicated = self.central_or_shifts if use_th_weights else self.central_or_shifts_external
            for central_or_shift in central_or_shift_dedicated:
              if not self.accept_systematics(
                  central_or_shift, is_mc, lepton_selection, leptonChargeSelection, sample_info
              ):
                continue

              central_or_shifts_local = []
              if central_or_shift == "central" and not use_th_weights:
                for central_or_shift_local in self.central_or_shifts_internal:
                  if self.accept_systematics(
                      central_or_shift_local, is_mc, lepton_selection, leptonChargeSelection, sample_info
                  ):
                    central_or_shifts_local.append(central_or_shift_local)

              logging.info(" ... for '%s' and systematic uncertainty option '%s'" % (lepton_selection_and_frWeight, central_or_shift))

              # build config files for executing analysis code
              key_analyze_dir = getKey(process_name, lepton_selection_and_frWeight, leptonChargeSelection, central_or_shift)

              for jobId in inputFileList.keys():

                analyze_job_tuple = (process_name, lepton_selection_and_frWeight, leptonChargeSelection, central_or_shift, jobId)
                key_analyze_job = getKey(*analyze_job_tuple)
                ntupleFiles = inputFileList[jobId]
                if len(ntupleFiles) == 0:
                  logging.warning("No input ntuples for %s --> skipping job !!" % (key_analyze_job))
                  continue

                cfgFile_modified_path = os.path.join(self.dirs[key_analyze_dir][DKEY_CFGS], "analyze_%s_%s_%s_%s_%i_cfg.py" % analyze_job_tuple)
                logFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_LOGS], "analyze_%s_%s_%s_%s_%i.log" % analyze_job_tuple)
                rleOutputFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_RLES], "rle_%s_%s_%s_%s_%i.txt" % analyze_job_tuple) \
                                     if self.select_rle_output else ""
                histogramFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_HIST], "analyze_%s_%s_%s_%s_%i.root" % analyze_job_tuple)
                applyFakeRateWeights = self.applyFakeRateWeights \
                  if self.isBDTtraining or lepton_selection.find("Tight") == -1 \
                  else "disabled"

                self.jobOptions_analyze[key_analyze_job] = {
                  'ntupleFiles'              : ntupleFiles,
                  'cfgFile_modified'         : cfgFile_modified_path,
                  'histogramFile'            : histogramFile_path,
                  'logFile'                  : logFile_path,
                  'selEventsFileName_output' : rleOutputFile_path,
                  'electronSelection'        : electron_selection,
                  'muonSelection'            : muon_selection,
                  'apply_leptonGenMatching'  : self.apply_leptonGenMatching,
                  'leptonChargeSelection'  : leptonChargeSelection,
                  'applyFakeRateWeights'     : applyFakeRateWeights,
                  'hadTauSelection'          : hadTau_selection,
                  'central_or_shift'         : central_or_shift,
                  'central_or_shifts_local'  : central_or_shifts_local,
                  'fillGenEvtHistograms'     : True,
		          'selectBDT'                : self.isBDTtraining,
                  'apply_hlt_filter'         : self.hlt_filter,
                  'selectBDT'                : self.isBDTtraining,
                }
                self.createCfg_analyze(self.jobOptions_analyze[key_analyze_job], sample_info, lepton_selection)

                # initialize input and output file names for hadd_stage1
                key_hadd_stage1_dir = getKey(process_name, lepton_selection_and_frWeight, leptonChargeSelection)
                hadd_stage1_job_tuple = (process_name, lepton_selection_and_frWeight, leptonChargeSelection)
                key_hadd_stage1_job = getKey(*hadd_stage1_job_tuple)
                if not key_hadd_stage1_job in self.inputFiles_hadd_stage1:
                  self.inputFiles_hadd_stage1[key_hadd_stage1_job] = []
                self.inputFiles_hadd_stage1[key_hadd_stage1_job].append(self.jobOptions_analyze[key_analyze_job]['histogramFile'])
                self.outputFile_hadd_stage1[key_hadd_stage1_job] = os.path.join(self.dirs[key_hadd_stage1_dir][DKEY_HIST],
                                                                                "hadd_stage1_%s_%s_%s.root" % hadd_stage1_job_tuple)

            if self.isBDTtraining:
              continue

            # add output files of hadd_stage1 to list of input files for hadd_stage1_5
            key_hadd_stage1_job = getKey(process_name, lepton_selection_and_frWeight, leptonChargeSelection)
            key_hadd_stage1_5_dir = getKey("hadd", lepton_selection_and_frWeight, leptonChargeSelection)
            hadd_stage1_5_job_tuple = (lepton_selection_and_frWeight, leptonChargeSelection)
            key_hadd_stage1_5_job = getKey(*hadd_stage1_5_job_tuple)
            if not key_hadd_stage1_5_job in self.inputFiles_hadd_stage1_5:
              self.inputFiles_hadd_stage1_5[key_hadd_stage1_5_job] = []
            self.inputFiles_hadd_stage1_5[key_hadd_stage1_5_job].append(self.outputFile_hadd_stage1[key_hadd_stage1_job])
            self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job] = os.path.join(self.dirs[key_hadd_stage1_5_dir][DKEY_HIST],
                                                                        "hadd_stage1_5_%s_%s.root" % hadd_stage1_5_job_tuple)

          if self.isBDTtraining:
            continue

          # sum fake background contributions for the total of all MC sample
          # input processes: TT_fake, TTW_fake, TTWW_fake, ...
          # output process: fakes_mc
          key_hadd_stage1_5_job = getKey(lepton_selection_and_frWeight, leptonChargeSelection)
          key_addBackgrounds_dir = getKey("addBackgrounds")
          addBackgrounds_job_fakes_tuple = ("fakes_mc", lepton_selection_and_frWeight, leptonChargeSelection)
          key_addBackgrounds_job_fakes = getKey(*addBackgrounds_job_fakes_tuple)
          sample_categories = []
          sample_categories.extend(self.nonfake_backgrounds)
          processes_input = []
          for sample_category in sample_categories:
            processes_input.append("%s_fake" % sample_category)
          self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes] = {
            'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job],
            'cfgFile_modified' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], "addBackgrounds_%s_%s_%s_cfg.py" % addBackgrounds_job_fakes_tuple),
            'outputFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackgrounds_%s_%s_%s.root" % addBackgrounds_job_fakes_tuple),
            'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], "addBackgrounds_%s_%s_%s.log" % addBackgrounds_job_fakes_tuple),
            'categories' : [ getHistogramDir(lepton_selection, lepton_frWeight, leptonChargeSelection) ],
            'processes_input' : processes_input,
            'process_output' : "fakes_mc"
          }
          self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes])

          # sum conversion background contributions for the total of all MC sample
          # input processes: TT_Convs, TTW_Convs, TTWW_Convs, ...
          # output process: Convs
          addBackgrounds_job_Convs_tuple = ("Convs", lepton_selection_and_frWeight, leptonChargeSelection)
          key_addBackgrounds_job_Convs = getKey(*addBackgrounds_job_Convs_tuple)
          sample_categories = []
          sample_categories.extend(self.nonfake_backgrounds)
          processes_input = []
          for sample_category in self.convs_backgrounds:
            processes_input.append("%s_Convs" % sample_category)
          self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_Convs] = {
            'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job],
            'cfgFile_modified' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], "addBackgrounds_%s_%s_%s_cfg.py" % addBackgrounds_job_Convs_tuple),
            'outputFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackgrounds_%s_%s_%s.root" % addBackgrounds_job_Convs_tuple),
            'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], "addBackgrounds_%s_%s_%s.log" % addBackgrounds_job_Convs_tuple),
            'categories' : [ getHistogramDir(lepton_selection, lepton_frWeight, leptonChargeSelection) ],
            'processes_input' : processes_input,
            'process_output' : "Convs"
          }
          self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_Convs])

          # sum signal contributions from HH->4tau ("tttt"), HH->2W2tau ("wwtt"), and HH->4W ("wwww"),
          # separately for "nonfake" and "fake" contributions
          genMatch_categories = [ "nonfake", "fake" ]
          for genMatch_category in genMatch_categories:
            for signal_base, signal_input in self.signal_io.items():
              addBackgrounds_job_signal_tuple = (lepton_selection_and_frWeight, leptonChargeSelection, signal_base, genMatch_category)
              key_addBackgrounds_job_signal = getKey(*addBackgrounds_job_signal_tuple)
              if key_addBackgrounds_job_signal in self.jobOptions_addBackgrounds_sum.keys():
                continue
              processes_input = signal_input
              process_output = signal_base
              if genMatch_category == "fake":
                processes_input = [ process_input + "_fake" for process_input in processes_input ]
                process_output += "_fake"
              self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_signal] = {
                'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job],
                'cfgFile_modified' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], "addBackgrounds_%s_%s_%s_%s_cfg.py" % addBackgrounds_job_signal_tuple),
                'outputFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackgrounds_%s_%s_%s_%s.root" % addBackgrounds_job_signal_tuple),
                'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], "addBackgrounds_%s_%s_%s_%s.log" % addBackgrounds_job_signal_tuple),
                'categories' : [ getHistogramDir(lepton_selection, lepton_frWeight, leptonChargeSelection) ],
                'processes_input' : processes_input,
                'process_output' : process_output
              }
              self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_signal])
              key_hadd_stage2_job = getKey(lepton_selection_and_frWeight, leptonChargeSelection)
              if not key_hadd_stage2_job in self.inputFiles_hadd_stage2:
                self.inputFiles_hadd_stage2[key_hadd_stage2_job] = []
              if lepton_selection == "Tight":
                self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_signal]['outputFile'])

          # initialize input and output file names for hadd_stage2
          key_hadd_stage1_5_job = getKey(lepton_selection_and_frWeight, leptonChargeSelection)
          key_hadd_stage2_dir = getKey("hadd", lepton_selection_and_frWeight, leptonChargeSelection)
          hadd_stage2_job_tuple = (lepton_selection_and_frWeight, leptonChargeSelection)
          key_hadd_stage2_job = getKey(*hadd_stage2_job_tuple)
          if not key_hadd_stage2_job in self.inputFiles_hadd_stage2:
            self.inputFiles_hadd_stage2[key_hadd_stage2_job] = []
          if lepton_selection == "Tight":
            self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes]['outputFile'])
            self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_Convs]['outputFile'])          
          self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job])
          self.outputFile_hadd_stage2[key_hadd_stage2_job] = os.path.join(self.dirs[key_hadd_stage2_dir][DKEY_HIST],
                                                                          "hadd_stage2_%s_%s.root" % hadd_stage2_job_tuple)

    if self.isBDTtraining:
      if self.is_sbatch:
        logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_analyze)
        self.sbatchFile_analyze = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel)
        self.createScript_sbatch_analyze(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze)
      logging.info("Creating Makefile")
      lines_makefile = []
      self.addToMakefile_analyze(lines_makefile)
      self.addToMakefile_hadd_stage1(lines_makefile)
      self.targets.extend(self.phoniesToAdd)
      self.addToMakefile_validate(lines_makefile)
      self.createMakefile(lines_makefile)
      logging.info("Done.")
      return self.num_jobs

    logging.info("Creating configuration files to run 'addBackgroundFakes'")
    for leptonChargeSelection in self.leptonChargeSelections:
      key_hadd_stage1_5_job = getKey(get_lepton_selection_and_frWeight("Fakeable", "enabled"), leptonChargeSelection)
      key_addFakes_dir = getKey("addBackgroundLeptonFakes")
      key_addFakes_job = getKey("data_fakes", leptonChargeSelection)
      category_sideband = "hh_4l_%s_Fakeable_wFakeRateWeights" % leptonChargeSelection
      self.jobOptions_addFakes[key_addFakes_job] = {
        'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job],
        'cfgFile_modified' : os.path.join(self.dirs[key_addFakes_dir][DKEY_CFGS], "addBackgroundLeptonFakes_%s_cfg.py" % leptonChargeSelection),
        'outputFile' : os.path.join(self.dirs[key_addFakes_dir][DKEY_HIST], "addBackgroundLeptonFakes_%s.root" % leptonChargeSelection),
        'logFile' : os.path.join(self.dirs[key_addFakes_dir][DKEY_LOGS], "addBackgroundLeptonFakes_%s.log" % leptonChargeSelection),
        'category_signal' : "hh_4l_%s_Tight" % leptonChargeSelection,
        'category_sideband' : category_sideband
      }
      self.createCfg_addFakes(self.jobOptions_addFakes[key_addFakes_job])
      key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), leptonChargeSelection)
      self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addFakes[key_addFakes_job]['outputFile'])

    logging.info("Creating configuration files to run 'prepareDatacards'")
    for histogramToFit in self.histograms_to_fit:
      key_prep_dcard_dir = getKey("prepareDatacards")
      if "OS" in self.leptonChargeSelections:
        key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "OS")
        prep_dcard_job_tuple = (self.channel, "OS", histogramToFit)
        key_prep_dcard_job = getKey("OS", histogramToFit)
        self.jobOptions_prep_dcard[key_prep_dcard_job] = {
          'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job],
          'cfgFile_modified' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_CFGS], "prepareDatacards_%s_%s_%s_cfg.py" % prep_dcard_job_tuple),
          'datacardFile' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_DCRD], "prepareDatacards_%s_%s_%s.root" % prep_dcard_job_tuple),
          'histogramDir' : self.histogramDir_prep_dcard,
          'histogramToFit' : histogramToFit,
          'label' : '4l',
        }
        self.createCfg_prep_dcard(self.jobOptions_prep_dcard[key_prep_dcard_job])

      if "SS" in self.leptonChargeSelections:
        key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "SS")
        prep_dcard_job_tuple = (self.channel, "SS", histogramToFit)
        key_prep_dcard_job = getKey("SS", histogramToFit)
        self.jobOptions_prep_dcard[key_prep_dcard_job] = {
          'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job],
          'cfgFile_modified' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_CFGS], "prepareDatacards_%s_%s_%s_cfg.py" % prep_dcard_job_tuple),
          'datacardFile' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_DCRD], "prepareDatacards_%s_%s_%s.root" % prep_dcard_job_tuple),
          'histogramDir' : self.histogramDir_prep_dcard_SS,
          'histogramToFit' : histogramToFit,
          'label' : '4l SS',
        }
        self.createCfg_prep_dcard(self.jobOptions_prep_dcard[key_prep_dcard_job])

      # add shape templates for the following systematic uncertainties:
      #  - 'CMS_ttHl_Clos_norm_e'
      #  - 'CMS_ttHl_Clos_shape_e'
      #  - 'CMS_ttHl_Clos_norm_m'
      #  - 'CMS_ttHl_Clos_shape_m'
      for leptonChargeSelection in self.leptonChargeSelections:
        key_prep_dcard_job = getKey(leptonChargeSelection, histogramToFit)
        key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), leptonChargeSelection)
        key_add_syst_fakerate_dir = getKey("addSystFakeRates")
        add_syst_fakerate_job_tuple = (self.channel, leptonChargeSelection, histogramToFit)
        key_add_syst_fakerate_job = getKey(leptonChargeSelection, histogramToFit)        
        self.jobOptions_add_syst_fakerate[key_add_syst_fakerate_job] = {
          'inputFile' : self.jobOptions_prep_dcard[key_prep_dcard_job]['datacardFile'],
          'cfgFile_modified' : os.path.join(self.dirs[key_add_syst_fakerate_dir][DKEY_CFGS], "addSystFakeRates_%s_%s_%s_cfg.py" % add_syst_fakerate_job_tuple),
          'outputFile' : os.path.join(self.dirs[key_add_syst_fakerate_dir][DKEY_DCRD], "addSystFakeRates_%s_%s_%s.root" % add_syst_fakerate_job_tuple),
          'category' : self.channel,
          'histogramToFit' : histogramToFit,
          'plots_outputFileName' : os.path.join(self.dirs[DKEY_PLOT], "addSystFakeRates.png")
        }
        histogramDir_nominal = None
        if leptonChargeSelection == "OS":
          histogramDir_nominal = self.histogramDir_prep_dcard
        elif leptonChargeSelection == "SS":
          histogramDir_nominal = self.histogramDir_prep_dcard_SS
        else:
          raise ValueError("Invalid parameter 'leptonChargeSelection' = %s !!" % leptonChargeSelection)
        for lepton_type in [ 'e', 'm' ]:
          lepton_mcClosure = "Fakeable_mcClosure_%s" % lepton_type
          if lepton_mcClosure not in self.lepton_selections:
            continue
          lepton_selection_and_frWeight = get_lepton_selection_and_frWeight(lepton_mcClosure, "enabled")
          key_addBackgrounds_job_fakes = getKey("fakes_mc", lepton_selection_and_frWeight, leptonChargeSelection)
          histogramDir_mcClosure = self.mcClosure_dir['%s_%s' % (lepton_mcClosure, leptonChargeSelection)]
          self.jobOptions_add_syst_fakerate[key_add_syst_fakerate_job].update({
            'add_Clos_%s' % lepton_type : ("Fakeable_mcClosure_%s" % lepton_type) in self.lepton_selections,
            'inputFile_nominal_%s' % lepton_type : self.outputFile_hadd_stage2[key_hadd_stage2_job],
            'histogramName_nominal_%s' % lepton_type : "%s/sel/evt/fakes_mc/%s" % (histogramDir_nominal, histogramToFit),
            'inputFile_mcClosure_%s' % lepton_type : self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes]['outputFile'],
            'histogramName_mcClosure_%s' % lepton_type : "%s/sel/evt/fakes_mc/%s" % (histogramDir_mcClosure, histogramToFit)
          })
        self.createCfg_add_syst_fakerate(self.jobOptions_add_syst_fakerate[key_add_syst_fakerate_job])

    logging.info("Creating configuration files to run 'makePlots'")
    key_makePlots_dir = getKey("makePlots")
    if "OS" in self.leptonChargeSelections:
      key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "OS")
      key_makePlots_job = getKey("OS")
      self.jobOptions_make_plots[key_makePlots_job] = {
        'executable' : self.executable_make_plots,
        'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job],
        'cfgFile_modified' : os.path.join(self.dirs[key_makePlots_dir][DKEY_CFGS], "makePlots_%s_cfg.py" % self.channel),
        'outputFile' : os.path.join(self.dirs[key_makePlots_dir][DKEY_PLOT], "makePlots_%s.png" % self.channel),
        'histogramDir' : self.histogramDir_prep_dcard,
        'label' : '4l',
        'make_plots_backgrounds' : self.make_plots_backgrounds,
      }
      self.createCfg_makePlots(self.jobOptions_make_plots[key_makePlots_job])
    if "SS" in self.leptonChargeSelections:
      key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "SS")                  
      key_makePlots_job = getKey("SS")
      self.jobOptions_make_plots[key_makePlots_job] = {
        'executable' : self.executable_make_plots,
        'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job],
        'cfgFile_modified' : os.path.join(self.dirs[key_makePlots_dir][DKEY_CFGS], "makePlots_%s_SS_cfg.py" % self.channel),
        'outputFile' : os.path.join(self.dirs[key_makePlots_dir][DKEY_PLOT], "makePlots_%s_SS.png" % self.channel),
        'histogramDir' : self.histogramDir_prep_dcard_SS,
        'label' : "4l SS",
        'make_plots_backgrounds' : self.make_plots_backgrounds,
      }
      self.createCfg_makePlots(self.jobOptions_make_plots[key_makePlots_job])
    if "Fakeable_mcClosure" in self.lepton_selections: #TODO
      key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "OS")
      key_makePlots_job = getKey("Fakeable_mcClosure", "OS")      
      self.jobOptions_make_plots[key_makePlots_job] = {
        'executable' : self.executable_make_plots_mcClosure,
        'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job],
        'cfgFile_modified' : os.path.join(self.dirs[key_makePlots_dir][DKEY_CFGS], "makePlots_mcClosure_%s_cfg.py" % self.channel),
        'outputFile' : os.path.join(self.dirs[key_makePlots_dir][DKEY_PLOT], "makePlots_mcClosure_%s.png" % self.channel)
      }
      self.createCfg_makePlots_mcClosure(self.jobOptions_make_plots[key_makePlots_job])

    if self.is_sbatch:
      logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_analyze)
      self.sbatchFile_analyze = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel)
      self.createScript_sbatch_analyze(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze)
      logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_addBackgrounds)
      self.sbatchFile_addBackgrounds = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addBackgrounds_%s.py" % self.channel)
      self.createScript_sbatch(self.executable_addBackgrounds, self.sbatchFile_addBackgrounds, self.jobOptions_addBackgrounds)
      self.sbatchFile_addBackgrounds_sum = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addBackgrounds_sum_%s.py" % self.channel)
      self.createScript_sbatch(self.executable_addBackgrounds, self.sbatchFile_addBackgrounds_sum, self.jobOptions_addBackgrounds_sum)
      logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_addFakes)
      self.sbatchFile_addFakes = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addFakes_%s.py" % self.channel)
      self.createScript_sbatch(self.executable_addFakes, self.sbatchFile_addFakes, self.jobOptions_addFakes)

    logging.info("Creating Makefile")
    lines_makefile = []
    self.addToMakefile_analyze(lines_makefile)
    self.addToMakefile_hadd_stage1(lines_makefile)
    self.addToMakefile_backgrounds_from_data(lines_makefile)
    self.addToMakefile_hadd_stage2(lines_makefile)
    self.addToMakefile_prep_dcard(lines_makefile)
    self.addToMakefile_add_syst_fakerate(lines_makefile)
    self.addToMakefile_make_plots(lines_makefile)
    self.addToMakefile_validate(lines_makefile)
    self.createMakefile(lines_makefile)

    logging.info("Done.")

    return self.num_jobs
Esempio n. 30
0
    'label'    : inclusive_sample,
  })

for sample_entry in samples.values():
  sample_name = sample_entry['process_name_specific']
  if sample_name not in valid_samples_to_sum_flat and sample_entry['type'] != 'data':
    valid_samples_to_sum.append({
      'xs'       : sample_entry['xsection'],
      'samples'  : [ sample_name ],
      'nentries' : { sample_name : sample_entry['nof_tree_events'] },
      'label'    : sample_name,
    })

if not os.path.isdir(args.output):
  if not args.force:
    raise ValueError('Use -f/--force to create output directory %s' % args.output)
  create_if_not_exists(args.output)

for valid_samples in valid_samples_to_sum:
  input_files = {
    sample_name : {
        'input'    : pattern.format(sample_name = sample_name),
        'nentries' : valid_samples['nentries'][sample_name],
    } for sample_name in valid_samples['samples']
  }
  output_files = [
    os.path.join(args.output, '%s.%s' % (valid_samples['label'], ext)) for ext in args.extension
  ]
  expected_neff = lumi * valid_samples['xs']
  plot(input_files, output_files, valid_samples['label'], expected_neff, args.mode)
Esempio n. 31
0
    def create(self):
        """Creates all necessary config files and runs the complete analysis workfow -- either locally or on the batch system
    """

        for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"] or sample_info["sample_category"] in [
                    "additional_signal_overlap", "background_data_estimate"
            ]:
                continue
            process_name = sample_info["process_name_specific"]
            key_dir = getKey(process_name)
            for dir_type in [DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_RLES]:
                initDict(self.dirs, [key_dir, dir_type])
                if dir_type in [DKEY_CFGS, DKEY_LOGS]:
                    self.dirs[key_dir][dir_type] = os.path.join(
                        self.configDir, dir_type, self.channel, process_name)
                else:
                    self.dirs[key_dir][dir_type] = os.path.join(
                        self.outputDir, dir_type, self.channel, process_name)
        for dir_type in [
                DKEY_CFGS, DKEY_SCRIPTS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD,
                DKEY_PLOT, DKEY_HADD_RT
        ]:
            initDict(self.dirs, [dir_type])
            if dir_type in [
                    DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT,
                    DKEY_HADD_RT
            ]:
                self.dirs[dir_type] = os.path.join(self.configDir, dir_type,
                                                   self.channel)
            else:
                self.dirs[dir_type] = os.path.join(self.outputDir, dir_type,
                                                   self.channel)
        ##print "self.dirs = ", self.dirs

        for key in self.dirs.keys():
            if type(self.dirs[key]) == dict:
                for dir_type in self.dirs[key].keys():
                    create_if_not_exists(self.dirs[key][dir_type])
            else:
                create_if_not_exists(self.dirs[key])

        inputFileLists = {}
        for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"] or sample_info["sample_category"] in [
                    "additional_signal_overlap", "background_data_estimate"
            ]:
                continue
            logging.info("Checking input files for sample %s" %
                         sample_info["process_name_specific"])
            inputFileLists[sample_name] = generateInputFileList(
                sample_name, sample_info, self.max_files_per_job, self.debug)

        for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"] or sample_info["sample_category"] in [
                    "additional_signal_overlap", "background_data_estimate"
            ]:
                continue
            process_name = sample_info["process_name_specific"]
            logging.info(
                "Creating configuration files to run '%s' for sample %s" %
                (self.executable_analyze, process_name))

            sample_category = sample_info["sample_category"]
            is_mc = (sample_info["type"] == "mc")

            inputFileList = inputFileLists[sample_name]
            for jobId in inputFileList.keys():
                ##print "processing sample %s: jobId = %i" % (process_name, jobId)

                # build config files for executing analysis code
                key_dir = getKey(process_name)
                key_analyze_job = getKey(process_name, jobId)
                ntupleFiles = inputFileList[jobId]
                if len(ntupleFiles) == 0:
                    print "Warning: ntupleFiles['%s'] = %s --> skipping job !!" % (
                        key_file, ntupleFiles)
                    continue

                self.jobOptions_analyze[key_analyze_job] = {
                  'ntupleFiles' : ntupleFiles,
                  'cfgFile_modified' : os.path.join(self.dirs[key_dir][DKEY_CFGS], "analyze_%s_%s_%i_cfg.py" % \
                     (self.channel, process_name, jobId)),
                  'histogramFile' : os.path.join(self.dirs[key_dir][DKEY_HIST], "%s_%i.root" % \
                     (process_name, jobId)),
                  'logFile' : os.path.join(self.dirs[key_dir][DKEY_LOGS], "analyze_%s_%s_%i.log" % \
                     (self.channel, process_name, jobId)),
                  'sample_category' : sample_category,
                  'hadTau_selection' : self.hadTau_selection,
                  'use_HIP_mitigation_mediumMuonId' : True,
                  'is_mc' : is_mc,
                  'lumi_scale' : 1. if not (self.use_lumi and is_mc) else sample_info["xsection"] * self.lumi / sample_info["nof_events"],
                  'apply_genWeight' : sample_info["genWeight"] if (is_mc and "genWeight" in sample_info) else False,
                  'selectBDT' : True,
                  'changeBranchNames' : self.changeBranchNames
                }
                self.createCfg_analyze(
                    self.jobOptions_analyze[key_analyze_job])

                # initialize input and output file names for hadd_stage1
                key_hadd_stage1 = getKey(process_name)
                if not key_hadd_stage1 in self.inputFiles_hadd_stage1:
                    self.inputFiles_hadd_stage1[key_hadd_stage1] = []
                self.inputFiles_hadd_stage1[key_hadd_stage1].append(
                    self.jobOptions_analyze[key_analyze_job]['histogramFile'])
                self.outputFile_hadd_stage1[key_hadd_stage1] = os.path.join(self.dirs[DKEY_HIST], "histograms_harvested_stage1_%s_%s.root" % \
                  (self.channel, process_name))
                self.targets.append(
                    self.outputFile_hadd_stage1[key_hadd_stage1])

        if self.is_sbatch:
            logging.info(
                "Creating script for submitting '%s' jobs to batch system" %
                self.executable_analyze)
            self.sbatchFile_analyze = os.path.join(
                self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel)
            self.createScript_sbatch_analyze(self.executable_analyze,
                                             self.sbatchFile_analyze,
                                             self.jobOptions_analyze)

        logging.info("Creating Makefile")
        lines_makefile = []
        self.addToMakefile_analyze(lines_makefile)
        self.addToMakefile_hadd_stage1(lines_makefile)
        self.createMakefile(lines_makefile)

        logging.info("Done")

        return self.num_jobs
Esempio n. 32
0
  def create(self):
    """Creates all necessary config files and runs the complete analysis workfow -- either locally or on the batch system
    """

    for sample_name, sample_info in self.samples.items():
      if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]:
        continue
      process_name = sample_info["process_name_specific"]
      for lepton_selection in self.lepton_selections:
        for lepton_frWeight in self.lepton_frWeights:
          if lepton_frWeight == "enabled" and not lepton_selection.startswith("Fakeable"):
            continue
          lepton_selection_and_frWeight = get_lepton_selection_and_frWeight(lepton_selection, lepton_frWeight)
          central_or_shifts_extended = [ "" ]
          central_or_shifts_extended.extend(self.central_or_shifts)
          central_or_shifts_extended.extend([ "hadd", "addBackgrounds" ])
          for central_or_shift_or_dummy in central_or_shifts_extended:
            process_name_extended = [ process_name, "hadd" ]
            for process_name_or_dummy in process_name_extended:
              key_dir = getKey(process_name_or_dummy, lepton_selection_and_frWeight, central_or_shift_or_dummy)
              for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_ROOT, DKEY_RLES, DKEY_SYNC ]:
                initDict(self.dirs, [ key_dir, dir_type ])
                if dir_type in [ DKEY_CFGS, DKEY_LOGS ]:
                  self.dirs[key_dir][dir_type] = os.path.join(self.configDir, dir_type, self.channel,
                    "_".join([ lepton_selection_and_frWeight ]), process_name_or_dummy, central_or_shift_or_dummy)
                else:
                  self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel,
                    "_".join([ lepton_selection_and_frWeight ]), process_name_or_dummy, central_or_shift_or_dummy)
    for subdirectory in [ "addBackgrounds", "addBackgroundLeptonFakes", "prepareDatacards", "addSystFakeRates", "makePlots" ]:
      key_dir = getKey(subdirectory)
      for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_ROOT, DKEY_DCRD, DKEY_PLOT ]:
        initDict(self.dirs, [ key_dir, dir_type ])
        if dir_type in [ DKEY_CFGS, DKEY_LOGS ]:
          self.dirs[key_dir][dir_type] = os.path.join(self.configDir, dir_type, self.channel, subdirectory)
        else:
          self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel, subdirectory)
    for dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT, DKEY_SYNC ]:
      initDict(self.dirs, [ dir_type ])
      if dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT ]:
        self.dirs[dir_type] = os.path.join(self.configDir, dir_type, self.channel)
      else:
        self.dirs[dir_type] = os.path.join(self.outputDir, dir_type, self.channel)

    numDirectories = 0
    for key in self.dirs.keys():
      if type(self.dirs[key]) == dict:
        numDirectories += len(self.dirs[key])
      else:
        numDirectories += 1
    logging.info("Creating directory structure (numDirectories = %i)" % numDirectories)
    numDirectories_created = 0;
    frac = 1
    for key in self.dirs.keys():
      if type(self.dirs[key]) == dict:
        for dir_type in self.dirs[key].keys():
          create_if_not_exists(self.dirs[key][dir_type])
        numDirectories_created += len(self.dirs[key])
      else:
        create_if_not_exists(self.dirs[key])
        numDirectories_created = numDirectories_created + 1
      while 100*numDirectories_created >= frac*numDirectories:
        logging.info(" %i%% completed" % frac)
        frac = frac + 1
    logging.info("Done.")

    inputFileLists = {}
    for sample_name, sample_info in self.samples.items():
      if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]:
        continue
      logging.info("Checking input files for sample %s" % sample_info["process_name_specific"])
      inputFileLists[sample_name] = generateInputFileList(sample_info, self.max_files_per_job)

    mcClosure_regex = re.compile('Fakeable_mcClosure_(?P<type>m|e)_wFakeRateWeights')
    for lepton_selection in self.lepton_selections:
      electron_selection = lepton_selection
      muon_selection = lepton_selection

      hadTauVeto_selection = "Tight"
      hadTauVeto_selection = "|".join([ hadTauVeto_selection, self.hadTauVeto_selection_part2 ])

      if lepton_selection == "Fakeable_mcClosure_e":
        electron_selection = "Fakeable"
        muon_selection = "Tight"
      elif lepton_selection == "Fakeable_mcClosure_m":
        electron_selection = "Tight"
        muon_selection = "Fakeable"

      for lepton_frWeight in self.lepton_frWeights:
        if lepton_frWeight == "enabled" and not lepton_selection.startswith("Fakeable"):
          continue
        if lepton_frWeight == "disabled" and not lepton_selection in [ "Tight" ]:
          continue
        lepton_selection_and_frWeight = get_lepton_selection_and_frWeight(lepton_selection, lepton_frWeight)

        for sample_name, sample_info in self.samples.items():
          if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]:
            continue
          process_name = sample_info["process_name_specific"]
          logging.info("Creating configuration files to run '%s' for sample %s" % (self.executable_analyze, process_name))

          sample_category = sample_info["sample_category"]
          is_mc = (sample_info["type"] == "mc")
          is_signal = (sample_category == "signal")

          for central_or_shift in self.central_or_shifts:

            inputFileList = inputFileLists[sample_name]
            for jobId in inputFileList.keys():
              if central_or_shift != "central":
                isFR_shape_shift = (central_or_shift in systematics.FR_all)
                if not ((lepton_selection == "Fakeable" and isFR_shape_shift) or lepton_selection == "Tight"):
                  continue
                if not is_mc and not isFR_shape_shift:
                  continue

              if central_or_shift in systematics.LHE().ttH and sample_category != "signal":
                continue
              if central_or_shift in systematics.LHE().ttW and sample_category != "TTW":
                continue
              if central_or_shift in systematics.LHE().ttZ and sample_category != "TTZ":
                continue
              if central_or_shift in systematics.DYMCReweighting and not is_dymc_reweighting(sample_name):
                continue

              logging.info(" ... for '%s' and systematic uncertainty option '%s'" % (lepton_selection_and_frWeight, central_or_shift))

              # build config files for executing analysis code
              key_analyze_dir = getKey(process_name, lepton_selection_and_frWeight, central_or_shift)
              analyze_job_tuple = (process_name, lepton_selection_and_frWeight, central_or_shift, jobId)
              key_analyze_job = getKey(*analyze_job_tuple)
              ntupleFiles = inputFileList[jobId]
              if len(ntupleFiles) == 0:
                logging.warning("No input ntuples for %s --> skipping job !!" % (key_analyze_job))
                continue

              syncOutput = ''
              syncTree = ''
              syncRequireGenMatching = True
              if self.do_sync:
                mcClosure_match = mcClosure_regex.match(lepton_selection_and_frWeight)
                if lepton_selection_and_frWeight == 'Tight':
                  syncOutput = os.path.join(self.dirs[key_analyze_dir][DKEY_SYNC], '%s_%s_SR.root' % (self.channel, central_or_shift))
                  syncTree = 'syncTree_%s_SR' % self.channel
                  syncRequireGenMatching = True
                elif lepton_selection_and_frWeight == 'Fakeable_wFakeRateWeights':
                  syncOutput = os.path.join(self.dirs[key_analyze_dir][DKEY_SYNC], '%s_%s_Fake.root' % (self.channel, central_or_shift))
                  syncTree = 'syncTree_%s_Fake' % self.channel
                elif mcClosure_match:
                  mcClosure_type = mcClosure_match.group('type')
                  syncOutput = os.path.join(self.dirs[key_analyze_dir][DKEY_SYNC], '%s_%s_mcClosure_%s.root' % (self.channel, central_or_shift, mcClosure_type))
                  syncTree = 'syncTree_%s_mcClosure_%s' % (self.channel, mcClosure_type)
                else:
                  continue
              if syncTree and central_or_shift != "central":
                syncTree = os.path.join(central_or_shift, syncTree)
              syncRLE = ''
              if self.do_sync and self.rle_select:
                syncRLE = self.rle_select % syncTree
                if not os.path.isfile(syncRLE):
                  logging.warning("Input RLE file for the sync is missing: %s; skipping the job" % syncRLE)
                  continue
              if syncOutput:
                self.inputFiles_sync['sync'].append(syncOutput)

              cfgFile_modified_path = os.path.join(self.dirs[key_analyze_dir][DKEY_CFGS], "analyze_%s_%s_%s_%i_cfg.py" % analyze_job_tuple)
              logFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_LOGS], "analyze_%s_%s_%s_%i.log" % analyze_job_tuple)
              rleOutputFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_RLES], "rle_%s_%s_%s_%i.txt" % analyze_job_tuple) \
                                   if self.select_rle_output else ""
              histogramFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_HIST], "analyze_%s_%s_%s_%i.root" % analyze_job_tuple)

              self.jobOptions_analyze[key_analyze_job] = {
                'ntupleFiles'              : ntupleFiles,
                'cfgFile_modified'         : cfgFile_modified_path,
                'histogramFile'            : histogramFile_path,
                'logFile'                  : logFile_path,
                'selEventsFileName_output' : rleOutputFile_path,
                'electronSelection'        : electron_selection,
                'muonSelection'            : muon_selection,
                'apply_leptonGenMatching'  : self.apply_leptonGenMatching,
                'hadTauSelection_veto'     : hadTauVeto_selection,
                'applyFakeRateWeights'     : self.applyFakeRateWeights if not lepton_selection == "Tight" else "disabled",
                'central_or_shift'         : central_or_shift,
                'syncOutput'               : syncOutput,
                'syncTree'                 : syncTree,
                'syncRLE'                  : syncRLE,
                'syncRequireGenMatching'   : syncRequireGenMatching,
                'useNonNominal'            : self.use_nonnominal,
                'apply_hlt_filter'         : self.hlt_filter,
              }
              self.createCfg_analyze(self.jobOptions_analyze[key_analyze_job], sample_info, lepton_selection)

              # initialize input and output file names for hadd_stage1
              key_hadd_stage1_dir = getKey(process_name, lepton_selection_and_frWeight)
              hadd_stage1_job_tuple = (process_name, lepton_selection_and_frWeight)
              key_hadd_stage1_job = getKey(*hadd_stage1_job_tuple)
              if not key_hadd_stage1_job in self.inputFiles_hadd_stage1:
                self.inputFiles_hadd_stage1[key_hadd_stage1_job] = []
              self.inputFiles_hadd_stage1[key_hadd_stage1_job].append(self.jobOptions_analyze[key_analyze_job]['histogramFile'])
              self.outputFile_hadd_stage1[key_hadd_stage1_job] = os.path.join(self.dirs[key_hadd_stage1_dir][DKEY_HIST],
                                                                              "hadd_stage1_%s_%s.root" % hadd_stage1_job_tuple)

          if self.do_sync: continue

          if is_mc:
            logging.info("Creating configuration files to run 'addBackgrounds' for sample %s" % process_name)

            sample_categories = [ sample_category ]
            if is_signal:
              sample_categories = [ "signal", "ttH", "ttH_htt", "ttH_hww", "ttH_hzz", "ttH_hmm", "ttH_hzg" ]
            for sample_category in sample_categories:
              # sum non-fake and fake contributions for each MC sample separately
              genMatch_categories = [ "nonfake", "conversions", "fake" ]
              for genMatch_category in genMatch_categories:
                key_hadd_stage1_job = getKey(process_name, lepton_selection_and_frWeight)
                key_addBackgrounds_dir = getKey(process_name, lepton_selection_and_frWeight, "addBackgrounds")
                addBackgrounds_job_tuple = None
                processes_input = None
                process_output = None
                if genMatch_category == "nonfake":
                  # sum non-fake contributions for each MC sample separately
                  # input processes: TT3l0g0j,...
                  # output processes: TT; ...
                  if sample_category in [ "signal" ]:
                    lepton_genMatches = []
                    lepton_genMatches.extend(self.lepton_genMatches_nonfakes)
                    lepton_genMatches.extend(self.lepton_genMatches_conversions)
                    lepton_genMatches.extend(self.lepton_genMatches_fakes)
                    processes_input = [ "%s%s" % (sample_category, genMatch) for genMatch in lepton_genMatches ]
                  elif sample_category in [ "ttH" ]:
                    lepton_genMatches = []
                    lepton_genMatches.extend(self.lepton_genMatches_nonfakes)
                    lepton_genMatches.extend(self.lepton_genMatches_conversions)
                    processes_input = []
                    processes_input.extend([ "%s%s" % ("ttH_htt", genMatch) for genMatch in lepton_genMatches ])
                    processes_input.extend([ "%s%s" % ("ttH_hww", genMatch) for genMatch in lepton_genMatches ])
                    processes_input.extend([ "%s%s" % ("ttH_hzz", genMatch) for genMatch in lepton_genMatches ])
                    processes_input.extend([ "%s%s" % ("ttH_hzg", genMatch) for genMatch in lepton_genMatches ])
                    processes_input.extend([ "%s%s" % ("ttH_hmm", genMatch) for genMatch in lepton_genMatches ])
                  else:
                    processes_input = [ "%s%s" % (sample_category, genMatch) for genMatch in self.lepton_genMatches_nonfakes ]
                  process_output = sample_category
                  addBackgrounds_job_tuple = (process_name, sample_category, lepton_selection_and_frWeight)
                elif genMatch_category == "conversions":
                  # sum fake contributions for each MC sample separately
                  # input processes: TT2l1g0j, TT1l2g0j, TT0l3g0j; ...
                  # output processes: TT_conversion; ...
                  if sample_category in [ "signal" ]:
                    processes_input = [ "%s%s" % (sample_category, genMatch) for genMatch in self.lepton_genMatches_conversions ]
                  elif sample_category in [ "ttH" ]:
                    processes_input = []
                    processes_input.extend([ "%s%s" % ("ttH_htt", genMatch) for genMatch in self.lepton_genMatches_conversions ])
                    processes_input.extend([ "%s%s" % ("ttH_hww", genMatch) for genMatch in self.lepton_genMatches_conversions ])
                    processes_input.extend([ "%s%s" % ("ttH_hzz", genMatch) for genMatch in self.lepton_genMatches_conversions ])
                    processes_input.extend([ "%s%s" % ("ttH_hzg", genMatch) for genMatch in self.lepton_genMatches_conversions ])
                    processes_input.extend([ "%s%s" % ("ttH_hmm", genMatch) for genMatch in self.lepton_genMatches_conversions ])
                  else:
                    processes_input = [ "%s%s" % (sample_category, genMatch) for genMatch in self.lepton_genMatches_conversions ]
                  process_output = "%s_conversion" % sample_category
                  addBackgrounds_job_tuple = (process_name, "%s_conversion" % sample_category, lepton_selection_and_frWeight)
                elif genMatch_category == "fake":
                  # sum fake contributions for each MC sample separately
                  # input processes: TT2l0g1j, TT1l1g1j, TT1l0g2j, TT0l2g1j, TT0l1g2j, TT0l0g3j; ...
                  # output processes: TT_fake; ...
                  if sample_category in [ "signal" ]:
                    processes_input = [ "%s%s" % (sample_category, genMatch) for genMatch in self.lepton_genMatches_fakes ]
                  elif sample_category in [ "ttH" ]:
                    processes_input = []
                    processes_input.extend([ "%s%s" % ("ttH_htt", genMatch) for genMatch in self.lepton_genMatches_fakes ])
                    processes_input.extend([ "%s%s" % ("ttH_hww", genMatch) for genMatch in self.lepton_genMatches_fakes ])
                    processes_input.extend([ "%s%s" % ("ttH_hzz", genMatch) for genMatch in self.lepton_genMatches_fakes ])
                    processes_input.extend([ "%s%s" % ("ttH_hzg", genMatch) for genMatch in self.lepton_genMatches_fakes ])
                    processes_input.extend([ "%s%s" % ("ttH_hmm", genMatch) for genMatch in self.lepton_genMatches_fakes ])
                  else:
                    processes_input = [ "%s%s" % (sample_category, genMatch) for genMatch in self.lepton_genMatches_fakes ]
                  process_output = "%s_fake" % sample_category
                  addBackgrounds_job_tuple = (process_name, "%s_fake" % sample_category, lepton_selection_and_frWeight)
                if processes_input:
                  logging.info(" ...for genMatch option = '%s'" % genMatch_category)
                  key_addBackgrounds_job = getKey(*addBackgrounds_job_tuple)
                  cfgFile_modified = os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], "addBackgrounds_%s_%s_%s_cfg.py" % addBackgrounds_job_tuple)
                  outputFile = os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackgrounds_%s_%s_%s.root" % addBackgrounds_job_tuple)
                  self.jobOptions_addBackgrounds[key_addBackgrounds_job] = {
                    'inputFile' : self.outputFile_hadd_stage1[key_hadd_stage1_job],
                    'cfgFile_modified' : cfgFile_modified,
                    'outputFile' : outputFile,
                    'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], os.path.basename(cfgFile_modified).replace("_cfg.py", ".log")),
                    'categories' : [ getHistogramDir(lepton_selection, lepton_frWeight) ],
                    'processes_input' : processes_input,
                    'process_output' : process_output
                  }
                  self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds[key_addBackgrounds_job])

                  # initialize input and output file names for hadd_stage1_5
                  key_hadd_stage1_5_dir = getKey("hadd", lepton_selection_and_frWeight)                  
                  key_hadd_stage1_5_job = getKey(lepton_selection_and_frWeight)
                  if not key_hadd_stage1_5_job in self.inputFiles_hadd_stage1_5:
                    self.inputFiles_hadd_stage1_5[key_hadd_stage1_5_job] = []
                  self.inputFiles_hadd_stage1_5[key_hadd_stage1_5_job].append(self.jobOptions_addBackgrounds[key_addBackgrounds_job]['outputFile'])
                  self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job] = os.path.join(self.dirs[key_hadd_stage1_5_dir][DKEY_HIST],
                                                                                      "hadd_stage1_5_%s.root" % lepton_selection_and_frWeight)

          # add output files of hadd_stage1 for data to list of input files for hadd_stage1_5
          if not is_mc:
            key_hadd_stage1_job = getKey(process_name, lepton_selection_and_frWeight)
            key_hadd_stage1_5_job = getKey(lepton_selection_and_frWeight)
            if not key_hadd_stage1_5_job in self.inputFiles_hadd_stage1_5:
              self.inputFiles_hadd_stage1_5[key_hadd_stage1_5_job] = []
            self.inputFiles_hadd_stage1_5[key_hadd_stage1_5_job].append(self.outputFile_hadd_stage1[key_hadd_stage1_job])

        if self.do_sync: continue

        # sum fake background contributions for the total of all MC sample
        # input processes: TT2l0g1j, TT1l1g1j, TT1l0g2j, TT0l3j, TT0l3j, TT0l3j, TT0l3j; ...
        # output process: fakes_mc
        key_hadd_stage1_5_job = getKey(lepton_selection_and_frWeight)
        key_addBackgrounds_dir = getKey("addBackgrounds")
        addBackgrounds_job_fakes_tuple = ("fakes_mc", lepton_selection_and_frWeight)
        key_addBackgrounds_job_fakes = getKey(*addBackgrounds_job_fakes_tuple)
        sample_categories = []
        sample_categories.extend(self.nonfake_backgrounds)
        sample_categories.extend([ "signal" ])
        processes_input = []
        for sample_category in sample_categories:
          processes_input.append("%s_fake" % sample_category)
        self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes] = {
          'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job],
          'cfgFile_modified' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], "addBackgrounds_%s_%s_cfg.py" % addBackgrounds_job_fakes_tuple),
          'outputFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackgrounds_%s_%s.root" % addBackgrounds_job_fakes_tuple),
          'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], "addBackgrounds_%s_%s.log" % addBackgrounds_job_fakes_tuple),
          'categories' : [ getHistogramDir(lepton_selection, lepton_frWeight) ],
          'processes_input' : processes_input,
          'process_output' : "fakes_mc"
        }
        self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes])

        # sum conversion background contributions for the total of all MC sample
        # input processes: TT2l0g1j, TT1l1g1j, TT1l0g2j, TT0l3j, TT0l3j, TT0l3j, TT0l3j; ...
        # output process: conversions
        addBackgrounds_job_conversions_tuple = ("conversions", lepton_selection_and_frWeight)
        key_addBackgrounds_job_conversions = getKey(*addBackgrounds_job_conversions_tuple)
        sample_categories = []
        sample_categories.extend(self.nonfake_backgrounds)
        sample_categories.extend([ "signal" ])
        processes_input = []
        for sample_category in sample_categories:
          processes_input.append("%s_conversion" % sample_category)
        self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_conversions] = {
          'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job],
          'cfgFile_modified' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], "addBackgrounds_%s_%s_cfg.py" % addBackgrounds_job_conversions_tuple),
          'outputFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackgrounds_%s_%s.root" % addBackgrounds_job_conversions_tuple),
          'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], "addBackgrounds_%s_%s.log" % addBackgrounds_job_conversions_tuple),
          'categories' : [ getHistogramDir(lepton_selection, lepton_frWeight) ],
          'processes_input' : processes_input,
          'process_output' : "conversions"
        }
        self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_conversions])

        # initialize input and output file names for hadd_stage2
        key_hadd_stage1_5_job = getKey(lepton_selection_and_frWeight)
        key_hadd_stage2_dir = getKey("hadd", lepton_selection_and_frWeight)        
        key_hadd_stage2_job = getKey(lepton_selection_and_frWeight)
        if not key_hadd_stage2_job in self.inputFiles_hadd_stage2:
          self.inputFiles_hadd_stage2[key_hadd_stage2_job] = []
        if lepton_selection == "Tight":
          self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes]['outputFile'])
          self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_conversions]['outputFile'])        
        self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job])
        self.outputFile_hadd_stage2[key_hadd_stage2_job] = os.path.join(self.dirs[key_hadd_stage2_dir][DKEY_HIST],
                                                                        "hadd_stage2_%s.root" % lepton_selection_and_frWeight)

    if self.do_sync:
      if self.is_sbatch:
        logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_analyze)
        self.sbatchFile_analyze = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel)
        self.createScript_sbatch_syncNtuple(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze)
      logging.info("Creating Makefile")
      lines_makefile = []
      self.addToMakefile_syncNtuple(lines_makefile)
      outputFile_sync_path = os.path.join(self.outputDir, DKEY_SYNC, '%s.root' % self.channel)
      self.outputFile_sync['sync'] = outputFile_sync_path
      self.targets.append(outputFile_sync_path)
      self.addToMakefile_hadd_sync(lines_makefile)
      self.createMakefile(lines_makefile)
      logging.info("Done.")
      return self.num_jobs

    logging.info("Creating configuration files to run 'addBackgroundFakes'")
    key_hadd_stage1_5_job = getKey(get_lepton_selection_and_frWeight("Fakeable", "enabled"))
    key_addFakes_job = getKey("fakes_data")
    category_sideband = "ttZctrl_Fakeable_wFakeRateWeights"
    self.jobOptions_addFakes[key_addFakes_job] = {
      'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job],
      'cfgFile_modified' : os.path.join(self.dirs[DKEY_CFGS], "addBackgroundLeptonFakes_cfg.py"),
      'outputFile' : os.path.join(self.dirs[DKEY_HIST], "addBackgroundLeptonFakes.root"),
      'logFile' : os.path.join(self.dirs[DKEY_LOGS], "addBackgroundLeptonFakes.log"),
      'category_signal' : "ttZctrl_Tight",
      'category_sideband' : category_sideband
    }
    self.createCfg_addFakes(self.jobOptions_addFakes[key_addFakes_job])
    key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"))
    self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addFakes[key_addFakes_job]['outputFile'])

    logging.info("Creating configuration files to run 'prepareDatacards'")
    for histogramToFit in self.histograms_to_fit:
      key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"))
      key_prep_dcard_dir = getKey("prepareDatacards")
      prep_dcard_job_tuple = (self.channel, histogramToFit)
      key_prep_dcard_job = getKey(histogramToFit)      
      self.jobOptions_prep_dcard[key_prep_dcard_job] = {
        'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job],
        'cfgFile_modified' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_CFGS], "prepareDatacards_%s_%s_cfg.py" % prep_dcard_job_tuple),
        'datacardFile' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_DCRD], "prepareDatacards_%s_%s.root" % prep_dcard_job_tuple),
        'histogramDir' : self.histogramDir_prep_dcard,
        'histogramToFit' : histogramToFit,
        'label' : None
      }
      self.createCfg_prep_dcard(self.jobOptions_prep_dcard[key_prep_dcard_job])

      # add shape templates for the following systematic uncertainties:
      #  - 'CMS_ttHl_Clos_norm_e'
      #  - 'CMS_ttHl_Clos_shape_e'
      #  - 'CMS_ttHl_Clos_norm_m'
      #  - 'CMS_ttHl_Clos_shape_m'
      key_prep_dcard_job = getKey(histogramToFit)
      key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"))
      key_add_syst_fakerate_dir = getKey("addSystFakeRates")                                    
      add_syst_fakerate_job_tuple = (self.channel, histogramToFit) 
      key_add_syst_fakerate_job = getKey(histogramToFit)      
      self.jobOptions_add_syst_fakerate[key_add_syst_fakerate_job] = {
        'inputFile' : self.jobOptions_prep_dcard[key_prep_dcard_job]['datacardFile'],
        'cfgFile_modified' : os.path.join(self.dirs[key_add_syst_fakerate_dir][DKEY_CFGS], "addSystFakeRates_%s_%s_cfg.py" % add_syst_fakerate_job_tuple),
        'outputFile' : os.path.join(self.dirs[key_add_syst_fakerate_dir][DKEY_DCRD], "addSystFakeRates_%s_%s.root" % add_syst_fakerate_job_tuple),
        'category' : self.channel,
        'histogramToFit' : histogramToFit,
        'plots_outputFileName' : os.path.join(self.dirs[DKEY_PLOT], "addSystFakeRates.png")
      }
      histogramDir_nominal = self.histogramDir_prep_dcard
      for lepton_type in [ 'e', 'm' ]:
        lepton_mcClosure = "Fakeable_mcClosure_%s" % lepton_type
        if lepton_mcClosure not in self.lepton_selections:
          continue
        lepton_selection_and_frWeight = get_lepton_selection_and_frWeight(lepton_mcClosure, "enabled")
        key_addBackgrounds_job_fakes = getKey("fakes_mc", lepton_selection_and_frWeight)
        histogramDir_mcClosure = self.mcClosure_dir[lepton_mcClosure]
        self.jobOptions_add_syst_fakerate[key_add_syst_fakerate_job].update({
          'add_Clos_%s' % lepton_type : ("Fakeable_mcClosure_%s" % lepton_type) in self.lepton_selections,
          'inputFile_nominal_%s' % lepton_type : self.outputFile_hadd_stage2[key_hadd_stage2_job],
          'histogramName_nominal_%s' % lepton_type : "%s/sel/evt/fakes_mc/%s" % (histogramDir_nominal, histogramToFit),
          'inputFile_mcClosure_%s' % lepton_type : self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes]['outputFile'],
          'histogramName_mcClosure_%s' % lepton_type : "%s/sel/evt/fakes_mc/%s" % (histogramDir_mcClosure, histogramToFit)
        })
      self.createCfg_add_syst_fakerate(self.jobOptions_add_syst_fakerate[key_add_syst_fakerate_job])

    logging.info("Creating configuration files to run 'makePlots'")
    key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"))
    key_makePlots_dir = getKey("makePlots")                                       
    key_makePlots_job = getKey('')
    self.jobOptions_make_plots[key_makePlots_job] = {
      'executable' : self.executable_make_plots,
      'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job],
      'cfgFile_modified' : os.path.join(self.dirs[key_makePlots_dir][DKEY_CFGS], "makePlots_%s_cfg.py" % self.channel),
      'outputFile' : os.path.join(self.dirs[key_makePlots_dir][DKEY_PLOT], "makePlots_%s.png" % self.channel),
      'histogramDir' : self.histogramDir_prep_dcard,
      'label' : "t#bar{t}Z control region",
      'make_plots_backgrounds' : self.make_plots_backgrounds
    }
    self.createCfg_makePlots(self.jobOptions_make_plots[key_makePlots_job])

    if self.is_sbatch:
      logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_analyze)
      self.sbatchFile_analyze = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel)
      self.createScript_sbatch_analyze(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze)
      logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_addBackgrounds)
      self.sbatchFile_addBackgrounds = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addBackgrounds_%s.py" % self.channel)
      self.createScript_sbatch(self.executable_addBackgrounds, self.sbatchFile_addBackgrounds, self.jobOptions_addBackgrounds)
      self.sbatchFile_addBackgrounds_sum = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addBackgrounds_sum_%s.py" % self.channel)
      self.createScript_sbatch(self.executable_addBackgrounds, self.sbatchFile_addBackgrounds_sum, self.jobOptions_addBackgrounds_sum)
      logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_addFakes)
      self.sbatchFile_addFakes = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addFakes_%s.py" % self.channel)
      self.createScript_sbatch(self.executable_addFakes, self.sbatchFile_addFakes, self.jobOptions_addFakes)

    logging.info("Creating Makefile")
    lines_makefile = []
    self.addToMakefile_analyze(lines_makefile)
    self.addToMakefile_hadd_stage1(lines_makefile)
    self.addToMakefile_backgrounds_from_data(lines_makefile)
    self.addToMakefile_hadd_stage2(lines_makefile)
    self.addToMakefile_prep_dcard(lines_makefile)
    self.addToMakefile_add_syst_fakerate(lines_makefile)
    self.addToMakefile_make_plots(lines_makefile)
    self.createMakefile(lines_makefile)

    logging.info("Done.")

    return self.num_jobs
Esempio n. 33
0
if output_filenames:
    if len(basedirs) != len(output_filenames):
        raise ValueError(
          "The number of output file names (%d) does not coincide w/ the number of "
          "input base directories (%d); either drop the list of output filenames or "
          "provide the exact same number of output file names as input base directories" % \
          (len(output_filenames), len(basedirs))
        )

for output_filename in output_filenames:
    output_dir = os.path.dirname(output_filename)
    if not os.path.isdir(output_dir):
        logging.debug('Directory %s does not exist' % output_dir)
        if args.force:
            create_if_not_exists(output_dir)
        else:
            raise ValueError(
                'Use -f/--force to create the output directory %s' %
                output_dir)

for basedir_idx, basedir in enumerate(basedirs):
    logging.info('Finding sum of weights for %s' % basedir)
    filenames = get_filelist(basedir)

    weight_map = collections.OrderedDict()
    for filename in filenames:
        assert (exists(filename))
        neg_weights = collections.OrderedDict()
        logging.debug('Processing %s ...' % filename)
        events = Events(filename)
  def create(self):
    """Creates all necessary config files and runs the complete analysis workfow -- either locally or on the batch system
    """

    for key in self.dirs.keys():
      for dir_type in self.dirs[key].keys():
        create_if_not_exists(self.dirs[key][dir_type])
  
    self.inputFileIds = {}
    for sample_name, sample_info in self.samples.items():
      if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]:
        continue

      process_name = sample_info["process_name_specific"]

      logging.info("Creating configuration files to run '%s' for sample %s" % (self.executable_analyze, process_name))  

      is_mc = (sample_info["type"] == "mc")
      lumi_scale = 1. if not (self.use_lumi and is_mc) else sample_info["xsection"] * self.lumi / sample_info["nof_events"]
      apply_genWeight = sample_info["apply_genWeight"] if (is_mc and "apply_genWeight" in sample_info.keys()) else False
      sample_category = sample_info["sample_category"]
      triggers = sample_info["triggers"]
      apply_trigger_bits = (is_mc and (self.era == "2015" or (self.era == "2016" and sample_info["reHLT"]))) or not is_mc

      for charge_selection in self.charge_selections:
        for central_or_shift in self.central_or_shifts:

          inputFileList = generateInputFileList(sample_name, sample_info, self.max_files_per_job, self.debug)
          for jobId in inputFileList.keys():
            if central_or_shift != "central" and not is_mc:
              continue
            if central_or_shift.startswith("CMS_ttHl_thu_shape_ttH") and sample_category != "signal":
              continue
            if central_or_shift.startswith("CMS_ttHl_thu_shape_ttW") and sample_category != "TTW":
              continue
            if central_or_shift.startswith("CMS_ttHl_thu_shape_ttZ") and sample_category != "TTZ":
              continue

            key_dir = getKey(sample_name, charge_selection)
            key_file = getKey(sample_name, charge_selection, central_or_shift, jobId)

            self.ntupleFiles[key_file] = inputFileList[jobId]
            self.cfgFiles_analyze_modified[key_file] = os.path.join(self.dirs[key_dir][DKEY_CFGS], "analyze_%s_%s_%s_%s_%i_cfg.py" % \
              (self.channel, process_name, charge_selection, central_or_shift, jobId))
            self.histogramFiles[key_file] = os.path.join(self.dirs[key_dir][DKEY_HIST], "%s_%s_%s_%i.root" % \
              (process_name, charge_selection, central_or_shift, jobId))
            self.logFiles_analyze[key_file] = os.path.join(self.dirs[key_dir][DKEY_LOGS], "analyze_%s_%s_%s_%s_%i.log" % \
              (self.channel, process_name, charge_selection, central_or_shift, jobId))
                
            self.createCfg_analyze(self.ntupleFiles[key_file], self.histogramFiles[key_file], sample_category, self.era, triggers,
              charge_selection, self.jet_minPt, self.jet_maxPt, self.jet_minAbsEta, self.jet_maxAbsEta, self.hadTau_selections, self.absEtaBins,
              is_mc, central_or_shift, lumi_scale, apply_genWeight, apply_trigger_bits, self.cfgFiles_analyze_modified[key_file])
                
    if self.is_sbatch:
      logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_analyze)
      self.createScript_sbatch()

    logging.info("Creating configuration files for executing 'comp_jetToTauFakeRate'")
    for charge_selection in self.charge_selections:
      self.histogramFile_comp_jetToTauFakeRate[charge_selection] = os.path.join(
        self.outputDir, DKEY_HIST, "comp_jetToTauFakeRate_%s.root" % charge_selection)
      self.histogramDir_numerator[charge_selection] = "jetToTauFakeRate_%s/numerator/" % charge_selection
      self.histogramDir_denominator[charge_selection] = "jetToTauFakeRate_%s/denominator/" % charge_selection
      self.cfgFile_comp_jetToTauFakeRate_modified[charge_selection] = os.path.join(
        self.outputDir, DKEY_CFGS, "comp_jetToTauFakeRate_%s_cfg.py" % charge_selection)
      self.createCfg_comp_jetToTauFakeRate(self.histogramFile_hadd_stage1, self.histogramFile_comp_jetToTauFakeRate[charge_selection],
        self.histogramDir_denominator[charge_selection], self.histogramDir_numerator[charge_selection], self.absEtaBins, self.ptBins,
        self.cfgFile_comp_jetToTauFakeRate_modified[charge_selection])

    lines_makefile = []
    self.addToMakefile_analyze(lines_makefile)
    self.addToMakefile_hadd_stage1(lines_makefile)
    self.addToMakefile_comp_jetToTauFakeRate(lines_makefile)
    self.addToMakefile_hadd_stage2(lines_makefile)
    self.createMakefile(lines_makefile)
  
    logging.info("Done")
Esempio n. 35
0
    def create(self):
        """Creates all necessary config files and runs the complete analysis workfow -- either locally or on the batch system
    """

        for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"]:
                continue
            process_name = sample_info["process_name_specific"]
            key_dir = getKey(process_name)
            for dir_type in [DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_RLES]:
                initDict(self.dirs, [key_dir, dir_type])
                if dir_type in [DKEY_CFGS, DKEY_LOGS]:
                    self.dirs[key_dir][dir_type] = os.path.join(
                        self.configDir, dir_type, self.channel, process_name)
                else:
                    self.dirs[key_dir][dir_type] = os.path.join(
                        self.outputDir, dir_type, self.channel, process_name)
        for dir_type in [
                DKEY_CFGS, DKEY_SCRIPTS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD,
                DKEY_PLOT, DKEY_HADD_RT
        ]:
            initDict(self.dirs, [dir_type])
            if dir_type in [
                    DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT,
                    DKEY_HADD_RT
            ]:
                self.dirs[dir_type] = os.path.join(self.configDir, dir_type,
                                                   self.channel)
            else:
                self.dirs[dir_type] = os.path.join(self.outputDir, dir_type,
                                                   self.channel)

        numDirectories = 0
        for key in self.dirs.keys():
            if type(self.dirs[key]) == dict:
                numDirectories += len(self.dirs[key])
            else:
                numDirectories += 1
        logging.info("Creating directory structure (numDirectories = %i)" %
                     numDirectories)
        numDirectories_created = 0
        frac = 1
        for key in self.dirs.keys():
            if type(self.dirs[key]) == dict:
                for dir_type in self.dirs[key].keys():
                    create_if_not_exists(self.dirs[key][dir_type])
                numDirectories_created += len(self.dirs[key])
            else:
                create_if_not_exists(self.dirs[key])
                numDirectories_created = numDirectories_created + 1
            while 100 * numDirectories_created >= frac * numDirectories:
                logging.info(" %i%% completed" % frac)
                frac = frac + 1
        logging.info("Done.")

        inputFileLists = {}
        for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"]:
                continue
            logging.info("Checking input files for sample %s" %
                         sample_info["process_name_specific"])
            inputFileLists[sample_name] = generateInputFileList(
                sample_info, self.max_files_per_job)

        for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"]:
                continue
            process_name = sample_info["process_name_specific"]
            logging.info(
                "Creating configuration files to run '%s' for sample %s" %
                (self.executable_analyze, process_name))

            inputFileList = inputFileLists[sample_name]
            for jobId in inputFileList.keys():
                ##print "processing sample %s: jobId = %i" % (process_name, jobId)

                # build config files for executing analysis code
                key_analyze_dir = getKey(process_name)
                analyze_job_tuple = (process_name, jobId)
                key_analyze_job = getKey(*analyze_job_tuple)
                ntupleFiles = inputFileList[jobId]
                if len(ntupleFiles) == 0:
                    logging.warning(
                        "No input ntuples for %s --> skipping job !!" %
                        (key_analyze_job))
                    continue

                cfgFile_modified_path = os.path.join(
                    self.dirs[key_analyze_dir][DKEY_CFGS],
                    "analyze_%s_%i_cfg.py" % analyze_job_tuple)
                logFile_path = os.path.join(
                    self.dirs[key_analyze_dir][DKEY_LOGS],
                    "analyze_%s_%i.log" % analyze_job_tuple)
                histogramFile_path = os.path.join(
                    self.dirs[key_analyze_dir][DKEY_HIST],
                    "analyze_%s_%i.root" % analyze_job_tuple)

                self.jobOptions_analyze[key_analyze_job] = {
                    'ntupleFiles': ntupleFiles,
                    'cfgFile_modified': cfgFile_modified_path,
                    'histogramFile': histogramFile_path,
                    'histogramDir': 'analyze_hadTopTagger',
                    'logFile': logFile_path,
                    'hadTauSelection': self.hadTau_selection,
                    'lumiScale': 1.,
                    'selectBDT': True,
                }
                self.createCfg_analyze(
                    self.jobOptions_analyze[key_analyze_job], sample_info)

                # initialize input and output file names for hadd_stage1
                key_hadd_stage1_dir = getKey(process_name,
                                             lepton_selection_and_frWeight)
                key_hadd_stage1_job = getKey(process_name)
                if not key_hadd_stage1_job in self.inputFiles_hadd_stage1:
                    self.inputFiles_hadd_stage1[key_hadd_stage1_job] = []
                self.inputFiles_hadd_stage1[key_hadd_stage1_job].append(
                    self.jobOptions_analyze[key_analyze_job]['histogramFile'])
                self.outputFile_hadd_stage1[
                    key_hadd_stage1_job] = os.path.join(
                        self.dirs[key_hadd_stage1_dir][DKEY_HIST],
                        "hadd_stage1_%s.root" % process_name)
                self.targets.append(
                    self.outputFile_hadd_stage1[key_hadd_stage1_job])

        self.sbatchFile_analyze = os.path.join(
            self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel)
        if self.is_sbatch:
            logging.info(
                "Creating script for submitting '%s' jobs to batch system" %
                self.executable_analyze)
            self.createScript_sbatch_analyze(self.executable_analyze,
                                             self.sbatchFile_analyze,
                                             self.jobOptions_analyze)

        logging.info("Creating Makefile")
        lines_makefile = []
        self.addToMakefile_analyze(lines_makefile)
        self.addToMakefile_hadd_stage1(lines_makefile)
        self.createMakefile(lines_makefile)

        logging.info("Done.")

        return self.num_jobs
    def create(self):
        """Creates all necessary config files and runs the complete analysis workfow -- either locally or on the batch system
    """

        for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"] or sample_info["sample_category"] in [
                    "additional_signal_overlap", "background_data_estimate"
            ]:
                continue
            process_name = sample_info["process_name_specific"]
            #      for charge_selection in self.charge_selections:                                                          ## NO CHARGE SELECTION NEEDED HERE
            #        key_dir = getKey(process_name, charge_selection)                                                       ## NO CHARGE SELECTION NEEDED HERE
            key_dir = getKey(process_name)
            for dir_type in [DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_RLES]:
                initDict(self.dirs, [key_dir, dir_type])
                if dir_type in [DKEY_CFGS, DKEY_LOGS]:
                    #            self.dirs[key_dir][dir_type] = os.path.join(self.configDir, dir_type, self.channel,
                    #              "_".join([ charge_selection ]), process_name)                                                    ## NO CHARGE SELECTION NEEDED HERE
                    self.dirs[key_dir][dir_type] = os.path.join(
                        self.configDir, dir_type, self.channel, process_name)
                else:
                    #            self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel,
                    #              "_".join([ charge_selection ]), process_name)                                                    ## NO CHARGE SELECTION NEEDED HERE
                    self.dirs[key_dir][dir_type] = os.path.join(
                        self.outputDir, dir_type, self.channel, process_name)
        for dir_type in [
                DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_HIST, DKEY_DCRD,
                DKEY_PLOT, DKEY_HADD_RT
        ]:
            initDict(self.dirs, [dir_type])
            if dir_type in [
                    DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_DCRD, DKEY_HADD_RT
            ]:  ## DKEY_PLOT TO BE ADDED LATER
                self.dirs[dir_type] = os.path.join(self.configDir, dir_type,
                                                   self.channel)
            else:
                self.dirs[dir_type] = os.path.join(self.outputDir, dir_type,
                                                   self.channel)
        print "self.dirs = ", self.dirs

        for key in self.dirs.keys():
            if type(self.dirs[key]) == dict:
                for dir_type in self.dirs[key].keys():
                    create_if_not_exists(self.dirs[key][dir_type])
            else:
                create_if_not_exists(self.dirs[key])

        inputFileLists = {}
        for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"] or sample_info["sample_category"] in [
                    "additional_signal_overlap", "background_data_estimate"
            ]:
                continue
            logging.info("Checking input files for sample %s" %
                         sample_info["process_name_specific"])
            inputFileLists[sample_name] = generateInputFileList(
                sample_name, sample_info, self.max_files_per_job, self.debug)

        self.inputFileIds = {}
        for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"] or sample_info["sample_category"] in [
                    "additional_signal_overlap", "background_data_estimate"
            ]:
                continue

            process_name = sample_info["process_name_specific"]

            logging.info(
                "Creating configuration files to run '%s' for sample %s" %
                (self.executable_analyze, process_name))

            is_mc = (sample_info["type"] == "mc")
            lumi_scale = 1. if not (self.use_lumi and is_mc) else sample_info[
                "xsection"] * self.lumi / sample_info["nof_events"]
            apply_genWeight = sample_info["apply_genWeight"] if (
                is_mc and "apply_genWeight" in sample_info.keys()) else False
            sample_category = sample_info["sample_category"]
            triggers = sample_info["triggers"]
            apply_trigger_bits = (
                is_mc and
                (self.era == "2015" or
                 (self.era == "2016" and sample_info["reHLT"]))) or not is_mc

            #      for charge_selection in self.charge_selections:                                        ## NO CHARGE SELECTION NEEDED HERE
            for central_or_shift in self.central_or_shifts:
                inputFileList = inputFileLists[sample_name]
                for jobId in inputFileList.keys():
                    if central_or_shift != "central" and not is_mc:
                        continue
                    if central_or_shift.startswith(
                            "CMS_ttHl_thu_shape_ttH"
                    ) and sample_category != "signal":
                        continue
                    if central_or_shift.startswith(
                            "CMS_ttHl_thu_shape_ttW"
                    ) and sample_category != "TTW":
                        continue
                    if central_or_shift.startswith(
                            "CMS_ttHl_thu_shape_ttZ"
                    ) and sample_category != "TTZ":
                        continue
                    # build config files for executing analysis code
#            key_dir = getKey(process_name, charge_selection)                                  ## NO CHARGE SELECTION NEEDED HERE
                    key_dir = getKey(process_name)
                    #            key_analyze_job = getKey(process_name, charge_selection, central_or_shift, jobId) ## NO CHARGE SELECTION NEEDED HERE
                    key_analyze_job = getKey(process_name, central_or_shift,
                                             jobId)

                    ntupleFiles = inputFileList[jobId]
                    if len(ntupleFiles) == 0:
                        print "Warning: ntupleFiles['%s'] = %s --> skipping job !!" % (
                            key_job, ntupleFiles)
                        continue
                    self.jobOptions_analyze[key_analyze_job] = {
                      'ntupleFiles' : ntupleFiles,
                    #              'cfgFile_modified' : os.path.join(self.dirs[key_dir][DKEY_CFGS], "analyze_%s_%s_%s_%s_%i_cfg.py" % \
                    #                (self.channel, process_name, charge_selection, central_or_shift, jobId)),                          ## NO CHARGE SELECTION NEEDED HERE
                    #              'histogramFile' : os.path.join(self.dirs[key_dir][DKEY_HIST], "%s_%s_%s_%i.root" % \
                    #                (process_name, charge_selection, central_or_shift, jobId)),                                        ## NO CHARGE SELECTION NEEDED HERE
                    #              'logFile' : os.path.join(self.dirs[key_dir][DKEY_LOGS], "analyze_%s_%s_%s_%s_%i.log" % \
                    #                (self.channel, process_name, charge_selection, central_or_shift, jobId)),                          ## NO CHARGE SELECTION NEEDED HERE
                      'cfgFile_modified' : os.path.join(self.dirs[key_dir][DKEY_CFGS], "analyze_%s_%s_%s_%i_cfg.py" % \
                        (self.channel, process_name, central_or_shift, jobId)),
                      'histogramFile' : os.path.join(self.dirs[key_dir][DKEY_HIST], "%s_%s_%i.root" % \
                        (process_name,  central_or_shift, jobId)),
                      'logFile' : os.path.join(self.dirs[key_dir][DKEY_LOGS], "analyze_%s_%s_%s_%i.log" % \
                        (self.channel, process_name, central_or_shift, jobId)),
                      'sample_category' : sample_category,
                      'triggers' : sample_info["triggers"],
                    #              'charge_selection' : charge_selection,                                         ## NO CHARGE SELECTION NEEDED HERE
                    #              'jet_minPt' : self.jet_minPt,
                    #              'jet_maxPt' : self.jet_maxPt,
                    #              'jet_minAbsEta' : self.jet_minAbsEta,
                    #              'jet_maxAbsEta' : self.jet_maxAbsEta,
                    #              'hadTau_selections' : self.hadTau_selections,
                      'absEtaBins_e' : self.absEtaBins_e,
                      'absEtaBins_mu' : self.absEtaBins_mu,
                      'absPtBins_e' : self.absPtBins_e,
                      'absPtBins_mu' : self.absPtBins_mu,
                      ##'use_HIP_mitigation_mediumMuonId' : sample_info["use_HIP_mitigation_mediumMuonId"],
                      'use_HIP_mitigation_mediumMuonId' : True,
                      'is_mc' : is_mc,
                      'central_or_shift' : central_or_shift,
                      'lumi_scale' : 1. if not (self.use_lumi and is_mc) else sample_info["xsection"] * self.lumi / sample_info["nof_events"],
                      'apply_genWeight' : sample_info["genWeight"] if (is_mc and "genWeight" in sample_info.keys()) else False,
                      'apply_trigger_bits' : (is_mc and (self.era == "2015" or (self.era == "2016" and sample_info["reHLT"]))) or not is_mc,
                    }
                    self.createCfg_analyze(
                        self.jobOptions_analyze[key_analyze_job])

                    # initialize input and output file names for hadd_stage1
                    #            key_hadd_stage1 = getKey(process_name, charge_selection)
                    key_hadd_stage1 = getKey(
                        process_name)  ## NO CHARGE SELECTION NEEDED HERE
                    if not key_hadd_stage1 in self.inputFiles_hadd_stage1:
                        self.inputFiles_hadd_stage1[key_hadd_stage1] = []
                    self.inputFiles_hadd_stage1[key_hadd_stage1].append(
                        self.jobOptions_analyze[key_analyze_job]
                        ['histogramFile'])
                    #            self.outputFile_hadd_stage1[key_hadd_stage1] = os.path.join(self.dirs[DKEY_HIST], "histograms_harvested_stage1_%s_%s_%s.root" % \
                    #              (self.channel, process_name, charge_selection))                                                                                 ## NO CHARGE SELECTION NEEDED HERE
                    self.outputFile_hadd_stage1[key_hadd_stage1] = os.path.join(self.dirs[DKEY_HIST], "histograms_harvested_stage1_%s_%s.root" % \
                      (self.channel, process_name))

        # initialize input and output file names for hadd_stage1_5
        key_hadd_stage1_5 = getKey('')
        if not key_hadd_stage1_5 in self.inputFiles_hadd_stage1_5:
            self.inputFiles_hadd_stage1_5[key_hadd_stage1_5] = []
        for key_hadd_stage1 in self.outputFile_hadd_stage1.keys():
            self.inputFiles_hadd_stage1_5[key_hadd_stage1_5].append(
                self.outputFile_hadd_stage1[key_hadd_stage1])
        self.outputFile_hadd_stage1_5[key_hadd_stage1_5] = os.path.join(
            self.dirs[DKEY_HIST], "histograms_harvested_stage1_5.root")

        ## Creating configuration files to run 'addBackgrounds_LeptonFakeRate' [stage 1.5]
        key_addBackgrounds_job = getKey('')
        self.jobOptions_addBackgrounds_LeptonFakeRate[
            key_addBackgrounds_job] = {
                'inputFile':
                self.outputFile_hadd_stage1_5[key_hadd_stage1_5],
                'cfgFile_modified':
                os.path.join(
                    self.dirs[DKEY_CFGS],
                    os.path.basename(
                        self.cfgFile_addBackgrounds_LeptonFakeRate)),
                'outputFile':
                os.path.join(self.dirs[DKEY_HIST],
                             "addBackground_LeptonFakeRate.root"),
                'logFile':
                os.path.join(
                    self.dirs[DKEY_LOGS],
                    os.path.basename(
                        self.cfgFile_addBackgrounds_LeptonFakeRate.replace(
                            "_cfg.py", ".log"))),
            }
        self.createCfg_addBackgrounds_LeptonFakeRate(
            self.
            jobOptions_addBackgrounds_LeptonFakeRate[key_addBackgrounds_job])

        # initialize input and output file names for hadd_stage2
        #        key_hadd_stage2 = getKey(charge_selection)                                                                                            ## NO CHARGE SELECTION NEEDED HERE
        #        if not key_hadd_stage2 in self.inputFiles_hadd_stage2:                                                                                ## NO CHARGE SELECTION NEEDED HERE
        #          self.inputFiles_hadd_stage2[key_hadd_stage2] = []                                                                                   ## NO CHARGE SELECTION NEEDED HERE
        #        self.inputFiles_hadd_stage2[key_hadd_stage2].append(self.outputFile_hadd_stage1[key_hadd_stage1])                                     ## NO CHARGE SELECTION NEEDED HERE
        #        self.outputFile_hadd_stage2[key_hadd_stage2] = os.path.join(self.dirs[DKEY_HIST], "histograms_harvested_stage2_%s_%s.root" % \        ## NO CHARGE SELECTION NEEDED HERE
        #          (self.channel, charge_selection))                                                                                                   ## NO CHARGE SELECTION NEEDED HERE
        key_hadd_stage2 = getKey('')
        if not key_hadd_stage2 in self.inputFiles_hadd_stage2:
            self.inputFiles_hadd_stage2[key_hadd_stage2] = []
        for key_hadd_stage1_5 in self.outputFile_hadd_stage1_5.keys():
            self.inputFiles_hadd_stage2[key_hadd_stage2].append(
                self.outputFile_hadd_stage1_5[key_hadd_stage1_5])
        self.inputFiles_hadd_stage2[key_hadd_stage2].append(
            self.jobOptions_addBackgrounds_LeptonFakeRate[
                key_addBackgrounds_job]['outputFile'])
        self.outputFile_hadd_stage2[key_hadd_stage2] = os.path.join(
            self.dirs[DKEY_HIST], "histograms_harvested_stage2.root")

        if self.prep_dcard:
            processesToCopy = []
            signals = []
            logging.info(
                "Creating configuration files to run 'prepareDatacards_LeptonFakeRate'"
            )
            for process in self.prep_dcard_signals:
                signals.append(process)
            self.prep_dcard_signals = signals
            for process in self.prep_dcard_processesToCopy:
                processesToCopy.append(process)
            self.prep_dcard_processesToCopy = processesToCopy
            for histogramToFit in self.histograms_to_fit:
                key_prep_dcard_job = getKey(histogramToFit)
                self.jobOptions_prep_dcard[key_prep_dcard_job] = {
                    'inputFile':
                    self.outputFile_hadd_stage2[key_hadd_stage2],
                    'cfgFile_modified':
                    os.path.join(
                        self.dirs[DKEY_CFGS],
                        "prepareDatacards_LeptonFakeRate_%s_cfg.py" %
                        (histogramToFit)),
                    'datacardFile':
                    os.path.join(self.dirs[DKEY_DCRD],
                                 "prepareDatacards_%s.root" %
                                 (histogramToFit)),
                    'histogramDir': (self.histogramDir_prep_dcard),
                    'histogramToFit':
                    histogramToFit,
                    'label':
                    None
                }
                #        self.createCfg_prep_dcard(self.jobOptions_prep_dcard[key_prep_dcard_job])       ## DEF LINE
                self.createCfg_prep_dcard_LeptonFakeRate(
                    self.jobOptions_prep_dcard[key_prep_dcard_job])

        if self.is_sbatch:
            logging.info(
                "Creating script for submitting '%s' jobs to batch system" %
                self.executable_analyze)
            self.sbatchFile_analyze = os.path.join(
                self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel)
            #      self.createScript_sbatch()

            self.createScript_sbatch_analyze(self.executable_analyze,
                                             self.sbatchFile_analyze,
                                             self.jobOptions_analyze)
            self.sbatchFile_addBackgrounds_LeptonFakeRate = os.path.join(
                self.dirs[DKEY_SCRIPTS],
                "sbatch_addBackgrounds_LeptonFakeRate_%s.py" % self.channel)
            self.createScript_sbatch(
                self.executable_addBackgrounds_LeptonFakeRate,
                self.sbatchFile_addBackgrounds_LeptonFakeRate,
                self.jobOptions_addBackgrounds_LeptonFakeRate)


#      logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_comp_jetToTauFakeRate)
#      self.sbatchFile_comp_jetToTauFakeRate = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_comp_jetToTauFakeRate.py")
#      self.createScript_sbatch(self.executable_comp_jetToTauFakeRate, self.sbatchFile_comp_jetToTauFakeRate, self.jobOptions_comp_jetToTauFakeRate)

#### FAKE RATE COMP BLOCK COMMENTED OUT ########################

#    logging.info("Creating configuration files for executing 'comp_jetToTauFakeRate'")
#    for charge_selection in self.charge_selections:
#      key_comp_jetToTauFakeRate_job = getKey(charge_selection)
#      key_hadd_stage2 = getKey(charge_selection)
#      self.jobOptions_comp_jetToTauFakeRate[key_comp_jetToTauFakeRate_job] = {
#        'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2],
#        'cfgFile_modified' : os.path.join(
#          self.dirs[DKEY_CFGS], "comp_jetToTauFakeRate_%s_cfg.py" % charge_selection),
#        'outputFile' : os.path.join(
#          self.dirs[DKEY_HIST], "comp_jetToTauFakeRate_%s.root" % charge_selection),
#        'looseRegion' : "jetToTauFakeRate_%s/denominator/" % charge_selection,
#        'tightRegion' : "jetToTauFakeRate_%s/numerator/" % charge_selection,
#        'absEtaBins' : self.absEtaBins,
#        'ptBins' : self.ptBins
#      }
#      self.createCfg_comp_jetToTauFakeRate(self.jobOptions_comp_jetToTauFakeRate[key_comp_jetToTauFakeRate_job])
#      self.targets.append(self.jobOptions_comp_jetToTauFakeRate[key_comp_jetToTauFakeRate_job]['outputFile'])

#    logging.info("Creating configuration files to run 'makePlots'")
#    for charge_selection in self.charge_selections:
#      key_makePlots_job = getKey(charge_selection)
#      key_hadd_stage2 = getKey(charge_selection)
#      self.jobOptions_make_plots[key_makePlots_job] = {
#        'executable' : self.executable_make_plots,
#        'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2],
#        'cfgFile_modified' : os.path.join(
#          self.dirs[DKEY_CFGS], "makePlots_%s_cfg.py" % self.channel),
#        'outputFile' : os.path.join(
#          self.dirs[DKEY_PLOT], "makePlots_%s.png" % self.channel),
#        'histogramDir' : "jetToTauFakeRate_%s" % charge_selection,
#        'label' : None,
#        'make_plots_backgrounds' : [ "TT", "TTW", "TTZ", "EWK", "Rares" ],
#      }
#      self.createCfg_makePlots(self.jobOptions_make_plots[key_makePlots_job])
#      self.cfgFile_make_plots = self.cfgFile_make_plots_denominator
#      for absEtaBin in [ "absEtaLt1_5", "absEta1_5to9_9" ]:
#        key_makePlots_job = getKey(charge_selection, absEtaBin, "denominator")
#        key_hadd_stage2 = getKey(charge_selection)
#        self.jobOptions_make_plots[key_makePlots_job] = {
#          'executable' : self.executable_make_plots,
#          'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2],
#          'cfgFile_modified' : os.path.join(
#            self.dirs[DKEY_CFGS], "makePlots_%s_%s_denominator_%s_cfg.py" % (self.channel, charge_selection, absEtaBin)),
#          'outputFile' : os.path.join(
#            self.dirs[DKEY_PLOT], "makePlots_%s_%s_denominator_%s.png" % (self.channel, charge_selection, absEtaBin)),
#          'histogramDir' : "jetToTauFakeRate_%s/denominator/%s" % (charge_selection, absEtaBin),
#          'label' : None,
#          'make_plots_backgrounds' : [ "TT", "TTW", "TTZ", "EWK", "Rares" ],
#        }
#        self.createCfg_makePlots(self.jobOptions_make_plots[key_makePlots_job])
#        for hadTau_selection in self.hadTau_selections:
#          key_makePlots_job = getKey(charge_selection, absEtaBin, "numerator", hadTau_selection)
#          key_hadd_stage2 = getKey(charge_selection)
#          self.jobOptions_make_plots[key_makePlots_job] = {
#            'executable' : self.executable_make_plots,
#            'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2],
#            'cfgFile_modified' : os.path.join(
#              self.dirs[DKEY_CFGS], "makePlots_%s_%s_numerator_%s_%s_cfg.py" % (self.channel, charge_selection, hadTau_selection, absEtaBin)),
#            'outputFile' : os.path.join(
#              self.dirs[DKEY_PLOT], "makePlots_%s_%s_numerator_%s_%s.png" % (self.channel, charge_selection, hadTau_selection, absEtaBin)),
#            'histogramDir' : "jetToTauFakeRate_%s/numerator/%s/%s" % (charge_selection, hadTau_selection, absEtaBin),
#            'label' : None,
#            'make_plots_backgrounds' : [ "TT", "TTW", "TTZ", "EWK", "Rares" ],
#          }
#          self.createCfg_makePlots(self.jobOptions_make_plots[key_makePlots_job])
#########################################################

        lines_makefile = []
        self.addToMakefile_analyze(lines_makefile)
        self.addToMakefile_hadd_stage1(lines_makefile)
        #    self.addToMakefile_hadd_stage1_5(lines_makefile)
        self.addToMakefile_backgrounds_from_data(lines_makefile)
        self.addToMakefile_hadd_stage2(lines_makefile)
        self.addToMakefile_prep_dcard(lines_makefile)
        #    self.addToMakefile_comp_jetToTauFakeRate(lines_makefile)   ## TO BE IMPLEMENTED LATER
        #    self.addToMakefile_make_plots(lines_makefile)              ## TO BE IMPLEMENTED LATER
        self.targets = [
            outputFile for outputFile in self.outputFile_hadd_stage2.values()
        ]
        self.createMakefile(lines_makefile)

        logging.info("Done")
Esempio n. 37
0
    root_file.Close()
    return True


def hadd(input_files, output_file):
    cmd_str = 'hadd -f %s %s' % (output_file, ' '.join(input_files))
    stdout, stderr = run_cmd(cmd_str, do_not_log=True, return_stderr=True)
    if not stdout or stderr:
        raise RuntimeError('Error: %s' % stderr)


output_root_dir = os.path.expanduser('~/sandbox/stitch_samples/root_files')
output_plot_dir = os.path.expanduser('~/sandbox/stitch_samples/plots')

for output_dir in [output_root_dir, output_plot_dir]:
    create_if_not_exists(output_dir)

for sample_set_to_stich in samples_to_stitch:
    binning_keys = filter(lambda key: key != 'inclusive',
                          sample_set_to_stich.keys())

    sample_list = []
    for key in sample_set_to_stich:
        if key == 'inclusive':
            sample_list.extend(sample_set_to_stich[key]['samples'])
        else:
            for binned_sample in sample_set_to_stich[key]:
                sample_list.extend(binned_sample['samples'])
    assert (len(sample_list) == len(set(sample_list)))

    binning = {}
Esempio n. 38
0
    def __init__(
        self,
        configDir,
        outputDir,
        cfgFile_prodNtuple,
        samples,
        max_files_per_job,
        era,
        preselection_cuts,
        leptonSelection,
        hadTauWP,
        check_output_files,
        running_method,
        version,
        num_parallel_jobs,
        pileup,
        golden_json,
        dry_run,
        isDebug,
        gen_matching_by_index,
        use_nonnominal,
        use_home,
        skip_tools_step,
        verbose=False,
        pool_id='',
    ):

        self.configDir = configDir
        self.outputDir = outputDir
        self.max_num_jobs = 200000
        self.samples = samples
        self.max_files_per_job = max_files_per_job
        self.era = era
        self.preselection_cuts = preselection_cuts
        self.leptonSelection = leptonSelection
        self.hadTauWP = hadTauWP
        self.check_output_files = check_output_files
        self.verbose = verbose
        self.dry_run = dry_run
        self.isDebug = isDebug
        self.gen_matching_by_index = gen_matching_by_index
        self.use_nonnominal = use_nonnominal
        self.use_home = use_home
        self.pileup = pileup
        self.golden_json = golden_json
        if running_method.lower() not in ["sbatch", "makefile"]:
            raise ValueError("Invalid running method: %s" % running_method)

        if not os.path.isfile(self.pileup):
            raise ValueError('No such file: %s' % self.pileup)
        self.pileup_histograms = get_pileup_histograms(self.pileup)

        if not os.path.isfile(self.golden_json):
            raise ValueError('No such file: %s' % self.golden_json)

        self.running_method = running_method
        self.is_sbatch = self.running_method.lower() == "sbatch"
        self.is_makefile = not self.is_sbatch
        self.makefile = os.path.join(self.configDir, "Makefile_prodNtuple")
        self.num_parallel_jobs = num_parallel_jobs
        self.skip_tools_step = skip_tools_step
        self.pool_id = pool_id if pool_id else uuid.uuid4()

        self.workingDir = os.getcwd()
        logging.info("Working directory is: %s" % self.workingDir)
        self.template_dir = os.path.join(os.getenv('CMSSW_BASE'), 'src',
                                         'tthAnalysis', 'HiggsToTauTau',
                                         'test', 'templates')
        logging.info("Templates directory is: %s" % self.template_dir)

        self.version = version
        self.samples = samples

        create_if_not_exists(self.configDir)
        create_if_not_exists(self.outputDir)
        self.stdout_file_path = os.path.join(self.configDir,
                                             "stdout_prodNtuple.log")
        self.stderr_file_path = os.path.join(self.configDir,
                                             "stderr_prodNtuple.log")
        self.sw_ver_file_cfg = os.path.join(self.configDir,
                                            "VERSION_prodNtuple.log")
        self.sw_ver_file_out = os.path.join(self.outputDir,
                                            "VERSION_prodNtuple.log")
        self.stdout_file_path, self.stderr_file_path, self.sw_ver_file_cfg, self.sw_ver_file_out = get_log_version(
            (self.stdout_file_path, self.stderr_file_path,
             self.sw_ver_file_cfg, self.sw_ver_file_out))

        self.cfgFile_prodNtuple_original = os.path.join(
            self.template_dir, cfgFile_prodNtuple)
        self.sbatchFile_prodNtuple = os.path.join(self.configDir,
                                                  "sbatch_prodNtuple.py")
        self.cfgFiles_prodNtuple_modified = {}
        self.logFiles_prodNtuple = {}

        self.inputFiles = {}
        self.outputFiles = {}
        self.filesToClean = []
        self.dirs = {}
        for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"]:
                continue
            process_name = sample_info["process_name_specific"]
            key_dir = getKey(sample_name)
            for dir_type in [DKEY_CFGS, DKEY_NTUPLES, DKEY_LOGS]:
                initDict(self.dirs, [key_dir, dir_type])
                if dir_type in [DKEY_CFGS, DKEY_LOGS]:
                    self.dirs[key_dir][dir_type] = os.path.join(
                        self.configDir, dir_type, process_name)
                else:
                    self.dirs[key_dir][dir_type] = os.path.join(
                        self.outputDir, dir_type, process_name)
        for dir_type in [DKEY_CFGS, DKEY_LOGS]:
            initDict(self.dirs, [dir_type])
            if dir_type in [DKEY_CFGS, DKEY_LOGS]:
                self.dirs[dir_type] = os.path.join(self.configDir, dir_type)
            else:
                self.dirs[dir_type] = os.path.join(self.outputDir, dir_type)

        self.cvmfs_error_log = {}
        self.executable = "produceNtuple.sh"
Esempio n. 39
0
    def create(self):
        """Creates all necessary config files and runs the complete analysis workfow -- either locally or on the batch system
    """

        for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"]:
                continue
            process_name = sample_info["process_name_specific"]
            sample_category = sample_info["sample_category"]
            is_mc = (sample_info["type"] == "mc")

            logging.info("Building dictionaries for sample %s..." %
                         process_name)
            for charge_selection in self.charge_selections:
                central_or_shift_extensions = ["", "hadd", "addBackgrounds"]
                central_or_shifts_extended = central_or_shift_extensions + self.central_or_shifts
                for central_or_shift_or_dummy in central_or_shifts_extended:
                    process_name_extended = [process_name, "hadd"]
                    for process_name_or_dummy in process_name_extended:
                        if central_or_shift_or_dummy in [
                                "hadd"
                        ] and process_name_or_dummy in ["hadd"]:
                            continue
                        if central_or_shift_or_dummy != "central" and central_or_shift_or_dummy not in central_or_shift_extensions:
                            if not is_mc:
                                continue
                            if not self.accept_central_or_shift(
                                    central_or_shift_or_dummy, sample_info):
                                continue

                        key_dir = getKey(process_name_or_dummy,
                                         charge_selection,
                                         central_or_shift_or_dummy)
                        for dir_type in [
                                DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_RLES
                        ]:
                            initDict(self.dirs, [key_dir, dir_type])
                            if dir_type in [DKEY_CFGS, DKEY_LOGS]:
                                self.dirs[key_dir][dir_type] = os.path.join(
                                    self.configDir, dir_type, self.channel,
                                    "_".join([charge_selection]),
                                    process_name_or_dummy,
                                    central_or_shift_or_dummy)
                            else:
                                self.dirs[key_dir][dir_type] = os.path.join(
                                    self.outputDir, dir_type, self.channel,
                                    "_".join([charge_selection]),
                                    process_name_or_dummy)
        for subdirectory in ["comp_jetToTauFakeRate", "makePlots"]:
            key_dir = getKey(subdirectory)
            for dir_type in [
                    DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT
            ]:
                initDict(self.dirs, [key_dir, dir_type])
                if dir_type in [DKEY_CFGS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT]:
                    self.dirs[key_dir][dir_type] = os.path.join(
                        self.configDir, dir_type, self.channel, subdirectory)
                else:
                    self.dirs[key_dir][dir_type] = os.path.join(
                        self.outputDir, dir_type, self.channel, subdirectory)
        for dir_type in [
                DKEY_CFGS, DKEY_SCRIPTS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD,
                DKEY_PLOT, DKEY_HADD_RT
        ]:
            initDict(self.dirs, [dir_type])
            if dir_type in [
                    DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT,
                    DKEY_HADD_RT
            ]:
                self.dirs[dir_type] = os.path.join(self.configDir, dir_type,
                                                   self.channel)
            else:
                self.dirs[dir_type] = os.path.join(self.outputDir, dir_type,
                                                   self.channel)

        numDirectories = 0
        for key in self.dirs.keys():
            if type(self.dirs[key]) == dict:
                numDirectories += len(self.dirs[key])
            else:
                numDirectories += 1
        logging.info("Creating directory structure (numDirectories = %i)" %
                     numDirectories)
        numDirectories_created = 0
        frac = 1
        for key in self.dirs.keys():
            if type(self.dirs[key]) == dict:
                for dir_type in self.dirs[key].keys():
                    create_if_not_exists(self.dirs[key][dir_type])
                numDirectories_created += len(self.dirs[key])
            else:
                create_if_not_exists(self.dirs[key])
                numDirectories_created = numDirectories_created + 1
            while 100 * numDirectories_created >= frac * numDirectories:
                logging.info(" %i%% completed" % frac)
                frac = frac + 1
        logging.info("Done.")

        inputFileLists = {}
        for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"]:
                continue
            logging.info("Checking input files for sample %s" %
                         sample_info["process_name_specific"])
            inputFileLists[sample_name] = generateInputFileList(
                sample_info, self.max_files_per_job)

        self.inputFileIds = {}
        for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"]:
                continue

            process_name = sample_info["process_name_specific"]
            inputFileList = inputFileLists[sample_name]

            logging.info(
                "Creating configuration files to run '%s' for sample %s" %
                (self.executable_analyze, process_name))

            is_mc = (sample_info["type"] == "mc")
            sample_category = sample_info["sample_category"]

            for charge_selection in self.charge_selections:
                for central_or_shift in self.central_or_shifts:

                    if central_or_shift != "central" and not is_mc:
                        continue
                    if not self.accept_central_or_shift(
                            central_or_shift, sample_info):
                        continue

                    # build config files for executing analysis code
                    key_analyze_dir = getKey(process_name, charge_selection,
                                             central_or_shift)

                    for jobId in inputFileList.keys():

                        analyze_job_tuple = (process_name, charge_selection,
                                             central_or_shift, jobId)
                        key_analyze_job = getKey(*analyze_job_tuple)
                        ntupleFiles = inputFileList[jobId]
                        if len(ntupleFiles) == 0:
                            logging.warning(
                                "No input ntuples for %s --> skipping job !!" %
                                (key_analyze_job))
                            continue

                        cfgFile_modified_path = os.path.join(
                            self.dirs[key_analyze_dir][DKEY_CFGS],
                            "analyze_%s_%s_%s_%i_cfg.py" % analyze_job_tuple)
                        logFile_path = os.path.join(
                            self.dirs[key_analyze_dir][DKEY_LOGS],
                            "analyze_%s_%s_%s_%i.log" % analyze_job_tuple)
                        histogramFile_path = os.path.join(
                            self.dirs[key_analyze_dir][DKEY_HIST],
                            "analyze_%s_%s_%s_%i.root" % analyze_job_tuple)
                        rleOutputFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_RLES], "rle_%s_%s_%s_%i.txt" % analyze_job_tuple) \
                          if self.select_rle_output else ""

                        self.jobOptions_analyze[key_analyze_job] = {
                            'ntupleFiles': ntupleFiles,
                            'cfgFile_modified': cfgFile_modified_path,
                            'histogramFile': histogramFile_path,
                            'logFile': logFile_path,
                            'chargeSelection': charge_selection,
                            'jet_minPt': self.jet_minPt,
                            'jet_maxPt': self.jet_maxPt,
                            'jet_minAbsEta': self.jet_minAbsEta,
                            'jet_maxAbsEta': self.jet_maxAbsEta,
                            'hadTau_selection_tight':
                            self.hadTau_selection_tight,
                            'hadTauSelection_denominator':
                            self.hadTau_selection_denominator,
                            'hadTauSelections_numerator':
                            self.hadTau_selections_numerator,
                            'trigMatchingOptions': self.trigMatchingOptions,
                            'selEventsFileName_output': rleOutputFile_path,
                            'absEtaBins': self.absEtaBins,
                            'decayModes': self.decayModes,
                            'central_or_shift': central_or_shift,
                            'central_or_shifts_local': [],
                            'apply_hlt_filter': self.hlt_filter,
                        }
                        self.createCfg_analyze(
                            self.jobOptions_analyze[key_analyze_job],
                            sample_info)

                        # initialize input and output file names for hadd_stage1
                        key_hadd_stage1_dir = getKey(process_name,
                                                     charge_selection)
                        hadd_stage1_job_tuple = (process_name,
                                                 charge_selection)
                        key_hadd_stage1_job = getKey(*hadd_stage1_job_tuple)
                        if not key_hadd_stage1_job in self.inputFiles_hadd_stage1:
                            self.inputFiles_hadd_stage1[
                                key_hadd_stage1_job] = []
                        self.inputFiles_hadd_stage1[
                            key_hadd_stage1_job].append(
                                self.jobOptions_analyze[key_analyze_job]
                                ['histogramFile'])
                        self.outputFile_hadd_stage1[
                            key_hadd_stage1_job] = os.path.join(
                                self.dirs[key_hadd_stage1_dir][DKEY_HIST],
                                "hadd_stage1_%s_%s.root" %
                                hadd_stage1_job_tuple)

                # initialize input and output file names for hadd_stage2
                key_hadd_stage1_job = getKey(process_name, charge_selection)
                key_hadd_stage2_dir = getKey("hadd", charge_selection)
                key_hadd_stage2_job = getKey(charge_selection)
                if not key_hadd_stage2_job in self.inputFiles_hadd_stage2:
                    self.inputFiles_hadd_stage2[key_hadd_stage2_job] = []
                self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(
                    self.outputFile_hadd_stage1[key_hadd_stage1_job])
                self.outputFile_hadd_stage2[
                    key_hadd_stage2_job] = os.path.join(
                        self.dirs[key_hadd_stage2_dir][DKEY_HIST],
                        "hadd_stage2_%s.root" % charge_selection)

        logging.info(
            "Creating configuration files for executing 'comp_jetToTauFakeRate'"
        )
        for charge_selection in self.charge_selections:
            charge_key = "comp_%s" % charge_selection
            self.comp_input_files[charge_key] = []
            for trigMatchingOption in self.trigMatchingOptions:
                key_hadd_stage2_job = getKey(charge_selection)
                key_comp_jetToTauFakeRate_dir = getKey("comp_jetToTauFakeRate")
                key_comp_jetToTauFakeRate_job = getKey(charge_selection,
                                                       trigMatchingOption)
                self.jobOptions_comp_jetToTauFakeRate[
                    key_comp_jetToTauFakeRate_job] = {
                        'inputFile':
                        self.outputFile_hadd_stage2[key_hadd_stage2_job],
                        'cfgFile_modified':
                        os.path.join(
                            self.dirs[DKEY_CFGS],
                            "comp_jetToTauFakeRate_%s_%s_cfg.py" %
                            (charge_selection, trigMatchingOption)),
                        'outputFile':
                        os.path.join(
                            self.dirs[DKEY_HIST],
                            "comp_jetToTauFakeRate_%s_%s.root" %
                            (charge_selection, trigMatchingOption)),
                        'logFile':
                        os.path.join(
                            self.dirs[DKEY_LOGS],
                            "comp_jetToTauFakeRate_%s_%s.log" %
                            (charge_selection, trigMatchingOption)),
                        'looseRegion':
                        "jetToTauFakeRate_%s_%s/denominator/" %
                        (charge_selection, trigMatchingOption),
                        'tightRegion':
                        "jetToTauFakeRate_%s_%s/numerator/" %
                        (charge_selection, trigMatchingOption),
                        'absEtaBins':
                        self.absEtaBins,
                        'ptBins':
                        self.ptBins,
                        'decayModes':
                        self.decayModes,
                        'hadTauSelections':
                        self.hadTau_selections_numerator,
                        'trigMatchingOption':
                        trigMatchingOption,
                        'plots_outputFileName':
                        os.path.join(
                            self.dirs[key_comp_jetToTauFakeRate_dir]
                            [DKEY_PLOT], "comp_jetToTauFakeRate_%s.png" %
                            trigMatchingOption)
                    }
                self.createCfg_comp_jetToTauFakeRate(
                    self.jobOptions_comp_jetToTauFakeRate[
                        key_comp_jetToTauFakeRate_job])
                comp_output = self.jobOptions_comp_jetToTauFakeRate[
                    key_comp_jetToTauFakeRate_job]['outputFile']
                self.targets.append(comp_output)
                self.comp_input_files[charge_key].append(comp_output)
            self.comp_output_files[charge_key] = os.path.join(
                self.dirs[DKEY_HIST],
                "comp_jetToTauFakeRate_%s.root" % charge_selection)

        logging.info("Creating configuration files to run 'makePlots'")
        for charge_selection in self.charge_selections:
            key_hadd_stage2_job = getKey(charge_selection)
            key_makePlots_dir = getKey("makePlots")
            key_makePlots_job = getKey(charge_selection)
            self.jobOptions_make_plots[key_makePlots_job] = {
                'executable':
                self.executable_make_plots,
                'inputFile':
                self.outputFile_hadd_stage2[key_hadd_stage2_job],
                'cfgFile_modified':
                os.path.join(self.dirs[key_makePlots_dir][DKEY_CFGS],
                             "makePlots_%s_cfg.py" % self.channel),
                'outputFile':
                os.path.join(self.dirs[key_makePlots_dir][DKEY_PLOT],
                             "makePlots_%s.png" % self.channel),
                'histogramDir':
                "jetToTauFakeRate_%s" % charge_selection,
                'label':
                None,
                'make_plots_backgrounds':
                self.make_plots_backgrounds
            }
            self.createCfg_makePlots(
                self.jobOptions_make_plots[key_makePlots_job])
            for trigMatchingOption in self.trigMatchingOptions:
                self.cfgFile_make_plots = self.cfgFile_make_plots_denominator
                for absEtaBin in ["absEtaLt1_5", "absEta1_5to9_9"]:
                    key_hadd_stage2_job = getKey(charge_selection)
                    key_makePlots_job = getKey(charge_selection,
                                               trigMatchingOption, absEtaBin,
                                               "denominator")
                    self.jobOptions_make_plots[key_makePlots_job] = {
                      'executable' : self.executable_make_plots,
                      'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job],
                      'cfgFile_modified' : os.path.join(
                        self.dirs[key_makePlots_dir][DKEY_CFGS], "makePlots_%s_%s_%s_denominator_%s_cfg.py" % \
                          (self.channel, charge_selection, trigMatchingOption, absEtaBin)),
                      'outputFile' : os.path.join(
                        self.dirs[key_makePlots_dir][DKEY_PLOT], "makePlots_%s_%s_%s_denominator_%s.png" % (self.channel, charge_selection, trigMatchingOption, absEtaBin)),
                      'histogramDir' : "jetToTauFakeRate_%s_%s/denominator/%s" % (charge_selection, trigMatchingOption, absEtaBin),
                      'label' : None,
                      'make_plots_backgrounds' : self.make_plots_backgrounds
                    }
                    self.createCfg_makePlots(
                        self.jobOptions_make_plots[key_makePlots_job])
                    for hadTau_selection_numerator in self.hadTau_selections_numerator:
                        key_hadd_stage2_job = getKey(charge_selection)
                        key_makePlots_job = getKey(charge_selection,
                                                   trigMatchingOption,
                                                   absEtaBin, "numerator",
                                                   hadTau_selection_numerator)
                        self.jobOptions_make_plots[key_makePlots_job] = {
                          'executable' : self.executable_make_plots,
                          'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job],
                          'cfgFile_modified' : os.path.join(
                            self.dirs[key_makePlots_dir][DKEY_CFGS], "makePlots_%s_%s_%s_numerator_%s_%s_cfg.py" % \
                              (self.channel, charge_selection, trigMatchingOption, hadTau_selection_numerator, absEtaBin)),
                          'outputFile' : os.path.join(
                            self.dirs[key_makePlots_dir][DKEY_PLOT], "makePlots_%s_%s_%s_numerator_%s_%s.png" % \
                              (self.channel, charge_selection, trigMatchingOption, hadTau_selection_numerator, absEtaBin)),
                          'histogramDir' : "jetToTauFakeRate_%s_%s/numerator/%s/%s" % (charge_selection, trigMatchingOption, hadTau_selection_numerator, absEtaBin),
                          'label' : None,
                          'make_plots_backgrounds' : self.make_plots_backgrounds
                        }
                        self.createCfg_makePlots(
                            self.jobOptions_make_plots[key_makePlots_job])

        self.sbatchFile_analyze = os.path.join(
            self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel)
        self.sbatchFile_comp_jetToTauFakeRate = os.path.join(
            self.dirs[DKEY_SCRIPTS], "sbatch_comp_jetToTauFakeRate.py")
        if self.is_sbatch:
            logging.info(
                "Creating script for submitting '%s' jobs to batch system" %
                self.executable_analyze)
            self.createScript_sbatch_analyze(self.executable_analyze,
                                             self.sbatchFile_analyze,
                                             self.jobOptions_analyze)
            logging.info(
                "Creating script for submitting '%s' jobs to batch system" %
                self.executable_comp_jetToTauFakeRate)
            self.createScript_sbatch(self.executable_comp_jetToTauFakeRate,
                                     self.sbatchFile_comp_jetToTauFakeRate,
                                     self.jobOptions_comp_jetToTauFakeRate)

        lines_makefile = []
        self.addToMakefile_analyze(lines_makefile)
        self.addToMakefile_hadd_stage1(lines_makefile)
        self.addToMakefile_hadd_stage2(lines_makefile,
                                       make_dependency="phony_hadd_stage1",
                                       max_mem='4096M')
        self.addToMakefile_comp_jetToTauFakeRate(lines_makefile)
        self.addToMakefile_comp_hadd(lines_makefile)
        self.addToMakefile_make_plots(lines_makefile)
        self.createMakefile(lines_makefile)

        logging.info("Done.")

        return self.num_jobs
Esempio n. 40
0
    def __init__(
        self,
        configDir,
        outputDir,
        output_file,
        executable,
        projection_module,
        samples,
        max_files_per_job,
        era,
        plot,
        check_output_files,
        running_method,
        num_parallel_jobs,
        pool_id='',
        verbose=False,
        dry_run=False,
        use_home=False,
        submission_cmd=None,
    ):

        self.configDir = configDir
        self.outputDir = outputDir
        self.executable = executable
        self.projection_module = projection_module
        self.max_num_jobs = 200000
        self.samples = samples
        self.max_files_per_job = max_files_per_job
        self.era = era
        self.plot = plot
        self.check_output_files = check_output_files
        self.verbose = verbose
        self.dry_run = dry_run
        self.use_home = use_home
        if running_method.lower() not in ["sbatch", "makefile"]:
            raise ValueError("Invalid running method: %s" % running_method)

        self.running_method = running_method
        self.is_sbatch = self.running_method.lower() == "sbatch"
        self.is_makefile = not self.is_sbatch
        self.makefile = os.path.join(
            self.configDir, "Makefile_{}".format(self.projection_module))
        self.num_parallel_jobs = num_parallel_jobs
        self.pool_id = pool_id if pool_id else uuid.uuid4()

        self.workingDir = os.getcwd()
        logging.info("Working directory is: %s" % self.workingDir)
        self.template_dir = os.path.join(os.getenv('CMSSW_BASE'), 'src',
                                         'tthAnalysis', 'HiggsToTauTau',
                                         'test', 'templates')
        logging.info("Templates directory is: %s" % self.template_dir)

        create_if_not_exists(self.configDir)
        create_if_not_exists(self.outputDir)
        self.output_file = os.path.join(self.outputDir, output_file)
        self.stdout_file_path = os.path.join(
            self.configDir, "stdout_{}.log".format(self.projection_module))
        self.stderr_file_path = os.path.join(
            self.configDir, "stderr_{}.log".format(self.projection_module))
        self.sw_ver_file_cfg = os.path.join(
            self.configDir, "VERSION_{}.log".format(self.projection_module))
        self.sw_ver_file_out = os.path.join(
            self.outputDir, "VERSION_{}.log".format(self.projection_module))
        self.submission_out = os.path.join(self.configDir, "SUBMISSION.log")
        self.stdout_file_path, self.stderr_file_path, self.sw_ver_file_cfg, self.sw_ver_file_out, self.submission_out = get_log_version(
            (self.stdout_file_path, self.stderr_file_path,
             self.sw_ver_file_cfg, self.sw_ver_file_out, self.submission_out))
        check_submission_cmd(self.submission_out, submission_cmd)

        self.sbatchFile_projection = os.path.join(
            self.configDir, "sbatch_{}.py".format(self.projection_module))
        self.cfgFiles_projection = {}
        self.logFiles_projection = {}
        self.scriptFiles_projection = {}
        self.jobOptions_sbatch = {}

        self.inputFiles = {}
        self.outputFiles_tmp = {}
        self.outputFiles = {}

        self.phoniesToAdd = []
        self.filesToClean = []
        self.targets = []

        self.makefile_target = "sbatch_{}".format(self.projection_module)

        self.dirs = {}
        all_dirs = [
            DKEY_CFGS, DKEY_HISTO_TMP, DKEY_HISTO, DKEY_PLOTS, DKEY_LOGS,
            DKEY_SCRIPTS, DKEY_HADD_RT
        ]
        cfg_dirs = [
            DKEY_CFGS, DKEY_LOGS, DKEY_PLOTS, DKEY_SCRIPTS, DKEY_HADD_RT
        ]

        ref_genWeightsFile = os.path.join(
            os.environ['CMSSW_BASE'], 'src', 'tthAnalysis', 'HiggsToTauTau',
            'data', 'refGenWeight_{}.txt'.format(self.era))
        self.ref_genWeights = load_refGenWeightsFromFile(
            ref_genWeightsFile) if projection_module != 'puHist' else {}

        for sample_name, sample_info in self.samples.items():
            if not sample_info['use_it']:
                continue
            process_name = sample_info["process_name_specific"]
            key_dir = getKey(process_name)
            for dir_type in all_dirs:
                if dir_type == DKEY_PLOTS:
                    continue
                initDict(self.dirs, [key_dir, dir_type])
                if dir_type in cfg_dirs:
                    self.dirs[key_dir][dir_type] = os.path.join(
                        self.configDir, dir_type, process_name)
                else:
                    self.dirs[key_dir][dir_type] = os.path.join(
                        self.outputDir, dir_type, process_name)
        for dir_type in cfg_dirs:
            initDict(self.dirs, [dir_type])
            self.dirs[dir_type] = os.path.join(self.configDir, dir_type)

        self.cvmfs_error_log = {}
        self.num_jobs = {
            'hadd': 0,
            'project': 0,
            'plot': 0,
        }
Esempio n. 41
0
    def __init__(self, outputDir, executable_analyze, channel, central_or_shifts,
                 max_files_per_job, era, use_lumi, lumi, debug, running_method, num_parallel_jobs,
                 histograms_to_fit, executable_prep_dcard="prepareDatacards", executable_make_plots="makePlots"):

        self.outputDir = outputDir
        self.executable_analyze = executable_analyze
        self.channel = channel
        self.central_or_shifts = central_or_shifts
        self.max_files_per_job = max_files_per_job
        self.max_num_jobs = 20000
        self.era = era
        self.use_lumi = use_lumi
        self.lumi = lumi
        self.debug = debug
        assert(running_method.lower() in [
               "sbatch", "makefile"]), "Invalid running method: %s" % running_method
        self.running_method = running_method
        self.is_sbatch = False
        self.is_makefile = False
        if self.running_method.lower() == "sbatch":
            self.is_sbatch = True
        else:
            self.is_makefile = True
        self.makefile = os.path.join(
            self.outputDir, "Makefile_%s" % self.channel)
        self.num_parallel_jobs = num_parallel_jobs
        self.histograms_to_fit = histograms_to_fit
        self.executable_prep_dcard = executable_prep_dcard
        self.prep_dcard_processesToCopy = [
            "data_obs", "TT", "TTW", "TTZ", "EWK", "Rares"]
        self.prep_dcard_signals = ["ttH_hww", "ttH_hzz", "ttH_htt"]
        self.executable_make_plots = executable_make_plots

        self.workingDir = os.getcwd()
        print "Working directory is: " + self.workingDir

        create_if_not_exists(self.outputDir)
        self.stdout_file = codecs.open(os.path.join(
            self.outputDir, "stdout_%s.log" % self.channel), 'w', 'utf-8')
        self.stderr_file = codecs.open(os.path.join(
            self.outputDir, "stderr_%s.log" % self.channel), 'w', 'utf-8')
        self.dirs = {}
        self.samples = {}
        self.cfgFiles_analyze_modified = {}
        self.logFiles_analyze = {}
        self.sbatchFile_analyze = os.path.join(
            self.outputDir, "sbatch_analyze_%s.py" % self.channel)
        self.ntupleFiles = {}
        self.histogramFiles = {}
        self.inputFiles_hadd_stage1 = []
        self.histogramFile_hadd_stage1 = os.path.join(
            self.outputDir, DKEY_HIST, "histograms_harvested_stage1_%s.root" % self.channel)
        self.inputFiles_hadd_stage1_5 = []
        self.histogramFile_hadd_stage1_5 = os.path.join(
            self.outputDir, DKEY_HIST, "histograms_harvested_stage1_5_%s.root" % self.channel)
        self.inputFiles_hadd_stage2 = []
        self.histogramFile_hadd_stage2 = os.path.join(
            self.outputDir, DKEY_HIST, "histograms_harvested_stage2_%s.root" % self.channel)
        self.datacardFiles = {}
        self.cfgFile_prep_dcard_original = os.path.join(
            self.workingDir, "prepareDatacards_cfg.py")
        self.cfgFile_prep_dcard_modified = {}
        self.histogramDir_prep_dcard = None
        self.make_plots_backgrounds = ["TT", "TTW", "TTZ", "EWK", "Rares"]
        self.make_plots_signal = "signal"
        self.cfgFile_make_plots_original = os.path.join(
            self.workingDir, "makePlots_cfg.py")
        self.cfgFiles_make_plots_modified = []
        self.filesToClean = []
        self.rleOutputFiles = {}
        self.rootOutputFiles = {}
        self.rootOutputAux = {}

        if era == '2015':
            self.triggers_1e = ['HLT_BIT_HLT_Ele23_WPLoose_Gsf_v']
            self.triggers_2e = [
                'HLT_BIT_HLT_Ele17_Ele12_CaloIdL_TrackIdL_IsoVL_DZ_v']
            self.triggers_1mu = [
                'HLT_BIT_HLT_IsoMu20_v', 'HLT_BIT_HLT_IsoTkMu20_v']
            self.triggers_2mu = ['HLT_BIT_HLT_Mu17_TrkIsoVVL_Mu8_TrkIsoVVL_DZ_v',
                                 'HLT_BIT_HLT_Mu17_TrkIsoVVL_TkMu8_TrkIsoVVL_DZ_v']
            self.triggers_1e1mu = ['HLT_BIT_HLT_Mu17_TrkIsoVVL_Ele12_CaloIdL_TrackIdL_IsoVL_v',
                                   'HLT_BIT_HLT_Mu8_TrkIsoVVL_Ele17_CaloIdL_TrackIdL_IsoVL_v']
        elif era == '2016':
            # CV: HLT_Ele25_WPTight_Gsf_v* was prescaled during part of 2016
            # Runs B-D, so use HLT_Ele27_eta2p1_WPLoose_Gsf_v in addition
            self.triggers_1e = ['HLT_BIT_HLT_Ele25_WPTight_Gsf_v',
                                'HLT_BIT_HLT_Ele27_eta2p1_WPLoose_Gsf_v']
            self.triggers_2e = [
                'HLT_BIT_HLT_Ele23_Ele12_CaloIdL_TrackIdL_IsoVL_DZ_v']
            self.triggers_1mu = [
                'HLT_BIT_HLT_IsoMu22_v', 'HLT_BIT_HLT_IsoTkMu22_v']
            self.triggers_2mu = ['HLT_BIT_HLT_Mu17_TrkIsoVVL_Mu8_TrkIsoVVL_DZ_v',
                                 'HLT_BIT_HLT_Mu17_TrkIsoVVL_TkMu8_TrkIsoVVL_DZ_v']
            self.triggers_1e1mu = ['HLT_BIT_HLT_Mu23_TrkIsoVVL_Ele12_CaloIdL_TrackIdL_IsoVL_v',
                                   'HLT_BIT_HLT_Mu8_TrkIsoVVL_Ele23_CaloIdL_TrackIdL_IsoVL_v']
            self.triggers_2tau = [
                'HLT_BIT_HLT_DoubleMediumIsoPFTau35_Trk1_eta2p1_Reg_v*']
        else:
            raise ValueError(
                "Invalid Configuration parameter 'era' = %s !!" % era)

        self.cvmfs_error_log = {}