def createScript_sbatch(sbatch_script_file_name, executable, command_line_parameters, input_file_names, output_file_names, script_file_names, log_file_names=None, working_dir=None, max_num_jobs=100000, cvmfs_error_log=None, pool_id='', cmssw_base_dir=None, verbose=False): """Creates the python script necessary to submit analysis and/or Ntuple production jobs to the batch system """ if not working_dir: working_dir = os.getcwd() if not cmssw_base_dir: cmssw_base_dir = os.environ['CMSSW_BASE'] if not pool_id: raise ValueError('pool_id is empty') sbatch_analyze_lines, num_jobs = generate_sbatch_lines( executable, command_line_parameters, input_file_names, output_file_names, script_file_names, log_file_names, working_dir, max_num_jobs, cvmfs_error_log, pool_id, cmssw_base_dir, verbose) createFile(sbatch_script_file_name, sbatch_analyze_lines) return num_jobs
def createScript_sbatch_hadd(sbatch_script_file_name, input_file_names, output_file_name, script_file_name, log_file_name=None, working_dir=None, waitForJobs=True, auxDirName='', pool_id='', verbose=False): """Creates the python script necessary to submit 'hadd' jobs to the batch system """ if not working_dir: working_dir = os.getcwd() if not pool_id: raise ValueError('pool_id is empty') sbatch_hadd_lines, num_jobs = generate_sbatch_lines_hadd( input_file_names=input_file_names, output_file_name=output_file_name, script_file_name=script_file_name, log_file_name=log_file_name, working_dir=working_dir, waitForJobs=waitForJobs, auxDirName=auxDirName, pool_id=pool_id, verbose=verbose, ) createFile(sbatch_script_file_name, sbatch_hadd_lines) return num_jobs
def createCfg(self, jobOptions): """Create python configuration file for the refGenWeight script Args: inputFiles: list of input files (Ntuples) outputFile: output file of the job -- a ROOT file containing histogram """ lines = jobOptions['inputFiles'] createFile(jobOptions['cfgFile_path'], lines, nofNewLines=1)
def createScript_sbatch( sbatch_script_file_name, executable, command_line_parameters, input_file_names, output_file_names, script_file_names, log_file_names=None, working_dir=None, max_num_jobs=-1, cvmfs_error_log=None, pool_id='', cmssw_base_dir=None, verbose=False, job_template_file='sbatch-node.sh.template', dry_run=False, validate_outputs=True, min_file_size=20000, max_num_submittedJobs=5000, use_home=False, copy_output_file=True, ): """Creates the python script necessary to submit analysis and/or Ntuple production jobs to the batch system """ if not working_dir: working_dir = os.getcwd() if not cmssw_base_dir: cmssw_base_dir = os.environ['CMSSW_BASE'] if not pool_id: raise ValueError('pool_id is empty') sbatch_analyze_lines, num_jobs = generate_sbatch_lines( executable=executable, command_line_parameters=command_line_parameters, input_file_names=input_file_names, output_file_names=output_file_names, script_file_names=script_file_names, log_file_names=log_file_names, working_dir=working_dir, max_num_jobs=max_num_jobs, cvmfs_error_log=cvmfs_error_log, pool_id=pool_id, cmssw_base_dir=cmssw_base_dir, verbose=verbose, job_template_file=job_template_file, dry_run=dry_run, validate_outputs=validate_outputs, min_file_size=min_file_size, max_num_submittedJobs=max_num_submittedJobs, use_home=use_home, copy_output_file=copy_output_file, ) createFile(sbatch_script_file_name, sbatch_analyze_lines) return num_jobs
def createCfg_puProfile(self, jobOptions): """Create python configuration file for the puProfile.sh script Args: inputFiles: list of input files (Ntuples) outputFile: output file of the job -- a ROOT file containing histogram """ lines = jobOptions['inputFiles'] + \ [ '', '%s %s %s' % (self.era, jobOptions['histName'], jobOptions['outputFile']) ] assert (len(lines) >= 3) createFile(jobOptions['cfgFile_path'], lines, nofNewLines=1)
def createCfg_nonResDenom(self, jobOptions): """Create python configuration file for the denomHistogramProducer.sh script Args: inputFiles: list of input files (Ntuples) outputFile: output file of the job -- a ROOT file containing histogram """ lines = jobOptions['inputFiles'] + \ [ '', '%s %s %s' % (jobOptions['processName'], jobOptions['categoryName'], jobOptions['outputFile']) ] assert (len(lines) >= 3) createFile(jobOptions['cfgFile_path'], lines, nofNewLines=1)
def createScript_sbatch(sbatch_script_file_name, executable, cfg_file_names, input_file_names, output_file_names, log_file_names = None, working_dir = None, max_num_jobs = 100000, cvmfs_error_log = None): """Creates the python script necessary to submit analysis and/or Ntuple production jobs to the batch system """ if not working_dir: working_dir = os.getcwd() sbatch_analyze_lines = generate_sbatch_lines( executable, cfg_file_names, input_file_names, output_file_names, log_file_names, working_dir, max_num_jobs, cvmfs_error_log) createFile(sbatch_script_file_name, sbatch_analyze_lines)
def createScript_sbatch_hadd(sbatch_script_file_name, input_file_names, output_file_name, hadd_stage_name, working_dir = None, waitForJobs = True): """Creates the python script necessary to submit 'hadd' jobs to the batch system """ if not working_dir: working_dir = os.getcwd() sbatch_hadd_lines = generate_sbatch_lines_hadd( input_file_names = input_file_names, output_file_name = output_file_name, working_dir = working_dir, waitForJobs = waitForJobs ) createFile(sbatch_script_file_name, sbatch_hadd_lines)
def createCfg_project(self, jobOptions): """Create python configuration file for the projection script Args: inputFiles: list of input files (Ntuples) outputFile: output file of the job -- a ROOT file containing histogram """ last_line = '%s %s %s %s' % (jobOptions['projection_module'], self.era, jobOptions['histName'], jobOptions['outputFile']) if self.projection_module != 'puHist': last_line += ' %.6e' % jobOptions['ref_genWeight'] lines = jobOptions['inputFiles'] + ['', last_line] assert (len(lines) >= 3) createFile(jobOptions['cfgFile_path'], lines, nofNewLines=1)
def createScript_sbatch_hadd( sbatch_script_file_name, input_file_names, output_file_name, script_file_name, log_file_name=None, working_dir=None, waitForJobs=True, auxDirName='', pool_id='', verbose=False, dry_run=False, max_input_files_per_job=10, use_home=False, min_file_size=20000, max_num_submittedJobs=5000, ): """Creates the python script necessary to submit 'hadd' jobs to the batch system """ if not working_dir: working_dir = os.getcwd() if not pool_id: raise ValueError('pool_id is empty') sbatch_hadd_lines, num_jobs = generate_sbatch_lines_hadd( input_file_names=input_file_names, output_file_name=output_file_name, script_file_name=script_file_name, log_file_name=log_file_name, working_dir=working_dir, waitForJobs=waitForJobs, auxDirName=auxDirName, pool_id=pool_id, verbose=verbose, dry_run=dry_run, max_input_files_per_job=max_input_files_per_job, use_home=use_home, min_file_size=min_file_size, max_num_submittedJobs=max_num_submittedJobs, ) createFile(sbatch_script_file_name, sbatch_hadd_lines) return num_jobs
"_cfg.py", "_with2016_cfg.py") lines_prepareDatacards = [] lines_prepareDatacards.append( "process.prepareDatacards.signals = cms.vstring(") for signal in signals: lines_prepareDatacards.append(" '%s'," % signal) lines_prepareDatacards.append(")") filename_prepareDatacards_output = os.path.join( channels[channel_name]['directory_datacards'], "prepareDatacards_%s_%s_with2016.root" % (channel_name, histogram_to_fit)) lines_prepareDatacards.append( "process.fwliteInput.fileNames = cms.vstring('%s')" % filenames_hadd_output[channel_name]) lines_prepareDatacards.append( "process.fwliteOutput.fileName = cms.string('%s')" % filename_prepareDatacards_output) create_cfg(filename_prepareDatacards_cfg_original, filename_prepareDatacards_cfg_modified, lines_prepareDatacards) command_prepareDatacards = '%s %s' % ( executable_prepareDatacards, filename_prepareDatacards_cfg_modified) lines_shell_script.append(command_prepareDatacards) from tthAnalysis.HiggsToTauTau.analysisTools import createFile createFile(filename_shell_script, lines_shell_script) print "Finished creating config files." print "Now execute: 'source %s'" % filename_shell_script
def createScript_sbatch_hadd_nonBlocking( sbatch_script_file_name, inputFiles, outputFiles, script_file_name, log_file_name=None, working_dir=None, auxDirName='', verbose=False, dry_run=False, max_input_files_per_job=10, use_home=False, min_file_size=20000, max_num_submittedJobs=5000, max_mem='', ): header = """ from tthAnalysis.HiggsToTauTau.sbatchManager import sbatchManager from tthAnalysis.HiggsToTauTau.ClusterHistogramAggregatorNonBlocking import ClusterHistogramAggregatorNonBlocking import time sbatch_managers = [] cluster_histogram_aggregators = [] """ job_template = """ m_{{idx}} = sbatchManager( '{{pool_id}}', verbose = {{verbose}}, dry_run = {{dry_run}}, use_home = {{use_home}}, min_file_size = {{min_file_size}}, max_num_submittedJobs = {{max_num_submittedJobs}}, ) m_{{idx}}.setWorkingDir('{{working_dir}}') m_{{idx}}.log_completion = {{verbose}} {% if max_mem|length %}m_{{idx}}.max_mem = '{{max_mem}}'{% endif %} sbatch_managers.append(m_{{idx}}) cluster_histogram_aggregator_{{ idx }} = ClusterHistogramAggregatorNonBlocking( input_files = {{input_file_names}}, final_output_file = '{{output_file_name}}', max_input_files_per_job = {{max_input_files_per_job}}, sbatch_manager = m_{{ idx }}, auxDirName = '{{auxDirName}}', script_file_name = '{{script_file_name}}', log_file_name = '{{log_file_name}}', ) cluster_histogram_aggregator_{{idx}}.create_jobs() cluster_histogram_aggregators.append(cluster_histogram_aggregator_{{idx}}) """ footer = """ aggregator_status = { jobIdx : False for jobIdx in range(len(cluster_histogram_aggregators)) } while True: are_all_finished = True for jobIdx in aggregator_status: if aggregator_status[jobIdx]: continue else: aggregator_status[jobIdx] = cluster_histogram_aggregators[jobIdx].is_done() if not aggregator_status[jobIdx]: are_all_finished = False if are_all_finished: break else: time.sleep(60) """ script_str = "ClusterHistogramAggregator" content = [] nof_jobs = 0 for idxKey, key in enumerate(outputFiles.keys()): input_file_names_duplicates = find_duplicates(inputFiles[key]) if input_file_names_duplicates: raise RuntimeError( "Found duplicate input files to produce output file %s: %s" % \ (outputFiles[key], ", ".join(input_file_names_duplicates)) ) assert (script_file_name.find(script_str) != -1) script_file_name_key = script_file_name.replace( script_str, "{}_{}".format(key, script_str)) assert (log_file_name.find(script_str) != -1) log_file_name_key = log_file_name.replace( script_str, "{}_{}".format(key, script_str)) template_vars = { 'working_dir': working_dir, 'input_file_names': inputFiles[key], 'output_file_name': outputFiles[key], 'auxDirName': auxDirName, 'pool_id': uuid.uuid4(), 'max_input_files_per_job': max_input_files_per_job, 'script_file_name': script_file_name_key, 'log_file_name': log_file_name_key, 'verbose': verbose, 'dry_run': dry_run, 'use_home': use_home, 'min_file_size': min_file_size, 'max_num_submittedJobs': max_num_submittedJobs, 'idx': idxKey, 'max_mem': max_mem, } job_code = jinja2.Template(job_template).render(**template_vars) content.append(job_code) nof_jobs += get_num_jobs(len(inputFiles[key]), max_input_files_per_job) script_content = header + '\n'.join(content) + footer createFile(sbatch_script_file_name, script_content.splitlines()) return nof_jobs