Ejemplo n.º 1
0
def createScript_sbatch(sbatch_script_file_name,
                        executable,
                        command_line_parameters,
                        input_file_names,
                        output_file_names,
                        script_file_names,
                        log_file_names=None,
                        working_dir=None,
                        max_num_jobs=100000,
                        cvmfs_error_log=None,
                        pool_id='',
                        cmssw_base_dir=None,
                        verbose=False):
    """Creates the python script necessary to submit analysis and/or Ntuple production jobs to the batch system
    """
    if not working_dir:
        working_dir = os.getcwd()

    if not cmssw_base_dir:
        cmssw_base_dir = os.environ['CMSSW_BASE']

    if not pool_id:
        raise ValueError('pool_id is empty')
    sbatch_analyze_lines, num_jobs = generate_sbatch_lines(
        executable, command_line_parameters, input_file_names,
        output_file_names, script_file_names, log_file_names, working_dir,
        max_num_jobs, cvmfs_error_log, pool_id, cmssw_base_dir, verbose)
    createFile(sbatch_script_file_name, sbatch_analyze_lines)
    return num_jobs
Ejemplo n.º 2
0
def createScript_sbatch_hadd(sbatch_script_file_name,
                             input_file_names,
                             output_file_name,
                             script_file_name,
                             log_file_name=None,
                             working_dir=None,
                             waitForJobs=True,
                             auxDirName='',
                             pool_id='',
                             verbose=False):
    """Creates the python script necessary to submit 'hadd' jobs to the batch system
    """
    if not working_dir:
        working_dir = os.getcwd()
    if not pool_id:
        raise ValueError('pool_id is empty')
    sbatch_hadd_lines, num_jobs = generate_sbatch_lines_hadd(
        input_file_names=input_file_names,
        output_file_name=output_file_name,
        script_file_name=script_file_name,
        log_file_name=log_file_name,
        working_dir=working_dir,
        waitForJobs=waitForJobs,
        auxDirName=auxDirName,
        pool_id=pool_id,
        verbose=verbose,
    )
    createFile(sbatch_script_file_name, sbatch_hadd_lines)
    return num_jobs
Ejemplo n.º 3
0
    def createCfg(self, jobOptions):
        """Create python configuration file for the refGenWeight script

        Args:
          inputFiles: list of input files (Ntuples)
          outputFile: output file of the job -- a ROOT file containing histogram
        """
        lines = jobOptions['inputFiles']
        createFile(jobOptions['cfgFile_path'], lines, nofNewLines=1)
Ejemplo n.º 4
0
def createScript_sbatch(
    sbatch_script_file_name,
    executable,
    command_line_parameters,
    input_file_names,
    output_file_names,
    script_file_names,
    log_file_names=None,
    working_dir=None,
    max_num_jobs=-1,
    cvmfs_error_log=None,
    pool_id='',
    cmssw_base_dir=None,
    verbose=False,
    job_template_file='sbatch-node.sh.template',
    dry_run=False,
    validate_outputs=True,
    min_file_size=20000,
    max_num_submittedJobs=5000,
    use_home=False,
    copy_output_file=True,
):
    """Creates the python script necessary to submit analysis and/or Ntuple production jobs to the batch system
    """
    if not working_dir:
        working_dir = os.getcwd()

    if not cmssw_base_dir:
        cmssw_base_dir = os.environ['CMSSW_BASE']

    if not pool_id:
        raise ValueError('pool_id is empty')
    sbatch_analyze_lines, num_jobs = generate_sbatch_lines(
        executable=executable,
        command_line_parameters=command_line_parameters,
        input_file_names=input_file_names,
        output_file_names=output_file_names,
        script_file_names=script_file_names,
        log_file_names=log_file_names,
        working_dir=working_dir,
        max_num_jobs=max_num_jobs,
        cvmfs_error_log=cvmfs_error_log,
        pool_id=pool_id,
        cmssw_base_dir=cmssw_base_dir,
        verbose=verbose,
        job_template_file=job_template_file,
        dry_run=dry_run,
        validate_outputs=validate_outputs,
        min_file_size=min_file_size,
        max_num_submittedJobs=max_num_submittedJobs,
        use_home=use_home,
        copy_output_file=copy_output_file,
    )
    createFile(sbatch_script_file_name, sbatch_analyze_lines)
    return num_jobs
Ejemplo n.º 5
0
    def createCfg_puProfile(self, jobOptions):
        """Create python configuration file for the puProfile.sh script

        Args:
          inputFiles: list of input files (Ntuples)
          outputFile: output file of the job -- a ROOT file containing histogram
        """
        lines = jobOptions['inputFiles'] + \
                [ '', '%s %s %s' % (self.era, jobOptions['histName'], jobOptions['outputFile']) ]
        assert (len(lines) >= 3)
        createFile(jobOptions['cfgFile_path'], lines, nofNewLines=1)
    def createCfg_nonResDenom(self, jobOptions):
        """Create python configuration file for the denomHistogramProducer.sh script

        Args:
          inputFiles: list of input files (Ntuples)
          outputFile: output file of the job -- a ROOT file containing histogram
        """
        lines = jobOptions['inputFiles'] + \
                [ '', '%s %s %s' % (jobOptions['processName'], jobOptions['categoryName'], jobOptions['outputFile']) ]
        assert (len(lines) >= 3)
        createFile(jobOptions['cfgFile_path'], lines, nofNewLines=1)
Ejemplo n.º 7
0
def createScript_sbatch(sbatch_script_file_name,
                        executable, cfg_file_names, input_file_names, output_file_names, log_file_names = None,
                        working_dir = None, max_num_jobs = 100000, cvmfs_error_log = None):
    """Creates the python script necessary to submit analysis and/or Ntuple production jobs to the batch system
    """
    if not working_dir:
        working_dir = os.getcwd()
    sbatch_analyze_lines = generate_sbatch_lines(
      executable,
      cfg_file_names, input_file_names, output_file_names, log_file_names,
      working_dir, max_num_jobs, cvmfs_error_log)
    createFile(sbatch_script_file_name, sbatch_analyze_lines)
Ejemplo n.º 8
0
def createScript_sbatch_hadd(sbatch_script_file_name, input_file_names, output_file_name, hadd_stage_name,
                             working_dir = None, waitForJobs = True):
    """Creates the python script necessary to submit 'hadd' jobs to the batch system
    """
    if not working_dir:
        working_dir = os.getcwd()
    sbatch_hadd_lines = generate_sbatch_lines_hadd(
        input_file_names = input_file_names,
        output_file_name = output_file_name,
        working_dir = working_dir,
        waitForJobs = waitForJobs
    )
    createFile(sbatch_script_file_name, sbatch_hadd_lines)
Ejemplo n.º 9
0
    def createCfg_project(self, jobOptions):
        """Create python configuration file for the projection script

        Args:
          inputFiles: list of input files (Ntuples)
          outputFile: output file of the job -- a ROOT file containing histogram
        """
        last_line = '%s %s %s %s' % (jobOptions['projection_module'], self.era,
                                     jobOptions['histName'],
                                     jobOptions['outputFile'])
        if self.projection_module != 'puHist':
            last_line += ' %.6e' % jobOptions['ref_genWeight']
        lines = jobOptions['inputFiles'] + ['', last_line]
        assert (len(lines) >= 3)
        createFile(jobOptions['cfgFile_path'], lines, nofNewLines=1)
Ejemplo n.º 10
0
def createScript_sbatch_hadd(
    sbatch_script_file_name,
    input_file_names,
    output_file_name,
    script_file_name,
    log_file_name=None,
    working_dir=None,
    waitForJobs=True,
    auxDirName='',
    pool_id='',
    verbose=False,
    dry_run=False,
    max_input_files_per_job=10,
    use_home=False,
    min_file_size=20000,
    max_num_submittedJobs=5000,
):
    """Creates the python script necessary to submit 'hadd' jobs to the batch system
    """
    if not working_dir:
        working_dir = os.getcwd()
    if not pool_id:
        raise ValueError('pool_id is empty')
    sbatch_hadd_lines, num_jobs = generate_sbatch_lines_hadd(
        input_file_names=input_file_names,
        output_file_name=output_file_name,
        script_file_name=script_file_name,
        log_file_name=log_file_name,
        working_dir=working_dir,
        waitForJobs=waitForJobs,
        auxDirName=auxDirName,
        pool_id=pool_id,
        verbose=verbose,
        dry_run=dry_run,
        max_input_files_per_job=max_input_files_per_job,
        use_home=use_home,
        min_file_size=min_file_size,
        max_num_submittedJobs=max_num_submittedJobs,
    )
    createFile(sbatch_script_file_name, sbatch_hadd_lines)
    return num_jobs
Ejemplo n.º 11
0
            "_cfg.py", "_with2016_cfg.py")
        lines_prepareDatacards = []
        lines_prepareDatacards.append(
            "process.prepareDatacards.signals = cms.vstring(")
        for signal in signals:
            lines_prepareDatacards.append("    '%s'," % signal)
        lines_prepareDatacards.append(")")
        filename_prepareDatacards_output = os.path.join(
            channels[channel_name]['directory_datacards'],
            "prepareDatacards_%s_%s_with2016.root" %
            (channel_name, histogram_to_fit))
        lines_prepareDatacards.append(
            "process.fwliteInput.fileNames = cms.vstring('%s')" %
            filenames_hadd_output[channel_name])
        lines_prepareDatacards.append(
            "process.fwliteOutput.fileName = cms.string('%s')" %
            filename_prepareDatacards_output)
        create_cfg(filename_prepareDatacards_cfg_original,
                   filename_prepareDatacards_cfg_modified,
                   lines_prepareDatacards)
        command_prepareDatacards = '%s %s' % (
            executable_prepareDatacards,
            filename_prepareDatacards_cfg_modified)
        lines_shell_script.append(command_prepareDatacards)

from tthAnalysis.HiggsToTauTau.analysisTools import createFile
createFile(filename_shell_script, lines_shell_script)

print "Finished creating config files."
print "Now execute: 'source %s'" % filename_shell_script
Ejemplo n.º 12
0
def createScript_sbatch_hadd_nonBlocking(
    sbatch_script_file_name,
    inputFiles,
    outputFiles,
    script_file_name,
    log_file_name=None,
    working_dir=None,
    auxDirName='',
    verbose=False,
    dry_run=False,
    max_input_files_per_job=10,
    use_home=False,
    min_file_size=20000,
    max_num_submittedJobs=5000,
    max_mem='',
):

    header = """
from tthAnalysis.HiggsToTauTau.sbatchManager import sbatchManager
from tthAnalysis.HiggsToTauTau.ClusterHistogramAggregatorNonBlocking import ClusterHistogramAggregatorNonBlocking

import time

sbatch_managers = []
cluster_histogram_aggregators = []

    """
    job_template = """
m_{{idx}} = sbatchManager(
  '{{pool_id}}', 
  verbose               = {{verbose}}, 
  dry_run               = {{dry_run}}, 
  use_home              = {{use_home}}, 
  min_file_size         = {{min_file_size}},
  max_num_submittedJobs = {{max_num_submittedJobs}},
)
m_{{idx}}.setWorkingDir('{{working_dir}}')
m_{{idx}}.log_completion = {{verbose}}
{% if max_mem|length %}m_{{idx}}.max_mem = '{{max_mem}}'{% endif %}
sbatch_managers.append(m_{{idx}})

cluster_histogram_aggregator_{{ idx }} = ClusterHistogramAggregatorNonBlocking(
  input_files             = {{input_file_names}},
  final_output_file       = '{{output_file_name}}',
  max_input_files_per_job = {{max_input_files_per_job}},
  sbatch_manager          = m_{{ idx }},
  auxDirName              = '{{auxDirName}}',
  script_file_name        = '{{script_file_name}}',
  log_file_name           = '{{log_file_name}}',
)
cluster_histogram_aggregator_{{idx}}.create_jobs()
cluster_histogram_aggregators.append(cluster_histogram_aggregator_{{idx}})
"""

    footer = """
aggregator_status = { jobIdx : False for jobIdx in range(len(cluster_histogram_aggregators)) }
while True:
  are_all_finished = True
  for jobIdx in aggregator_status:
    if aggregator_status[jobIdx]:
      continue
    else:
      aggregator_status[jobIdx] = cluster_histogram_aggregators[jobIdx].is_done()
      if not aggregator_status[jobIdx]:
        are_all_finished = False
  if are_all_finished:
    break
  else:
    time.sleep(60)
"""
    script_str = "ClusterHistogramAggregator"

    content = []
    nof_jobs = 0
    for idxKey, key in enumerate(outputFiles.keys()):
        input_file_names_duplicates = find_duplicates(inputFiles[key])
        if input_file_names_duplicates:
            raise RuntimeError(
              "Found duplicate input files to produce output file %s: %s" % \
              (outputFiles[key], ", ".join(input_file_names_duplicates))
            )
        assert (script_file_name.find(script_str) != -1)
        script_file_name_key = script_file_name.replace(
            script_str, "{}_{}".format(key, script_str))
        assert (log_file_name.find(script_str) != -1)
        log_file_name_key = log_file_name.replace(
            script_str, "{}_{}".format(key, script_str))
        template_vars = {
            'working_dir': working_dir,
            'input_file_names': inputFiles[key],
            'output_file_name': outputFiles[key],
            'auxDirName': auxDirName,
            'pool_id': uuid.uuid4(),
            'max_input_files_per_job': max_input_files_per_job,
            'script_file_name': script_file_name_key,
            'log_file_name': log_file_name_key,
            'verbose': verbose,
            'dry_run': dry_run,
            'use_home': use_home,
            'min_file_size': min_file_size,
            'max_num_submittedJobs': max_num_submittedJobs,
            'idx': idxKey,
            'max_mem': max_mem,
        }
        job_code = jinja2.Template(job_template).render(**template_vars)
        content.append(job_code)
        nof_jobs += get_num_jobs(len(inputFiles[key]), max_input_files_per_job)

    script_content = header + '\n'.join(content) + footer
    createFile(sbatch_script_file_name, script_content.splitlines())

    return nof_jobs