def configure_1st_event_selection(): path_cfg = dict(Any=( dict(All=('ev : ev.GenSusyMSbottom[0] == 1000', 'ev : ev.GenSusyMNeutralino[0] == 300')), dict(All=('ev : ev.GenSusyMSbottom[0] == 800', 'ev : ev.GenSusyMNeutralino[0] == 50')), dict(All=('ev : ev.GenSusyMSbottom[0] == 375', 'ev : ev.GenSusyMNeutralino[0] == 300')), )) # eventSelection = alphatwirl.selection.build_selection( path_cfg=path_cfg, AllClass=alphatwirl.selection.modules.AllwCount, AnyClass=alphatwirl.selection.modules.AnywCount, NotClass=alphatwirl.selection.modules.NotwCount) eventselection_path = os.path.join(args.outdir, 'eventselection_01.txt') if args.force or not os.path.exists(eventselection_path): alphatwirl.mkdir_p(os.path.dirname(eventselection_path)) with open(eventselection_path, 'w') as f: pprint.pprint(path_cfg, stream=f) tbl_cutflow_path = os.path.join(args.outdir, 'tbl_cutflow_01.txt') resultsCombinationMethod = alphatwirl.collector.ToTupleListWithDatasetColumn( summaryColumnNames=('depth', 'class', 'name', 'pass', 'total')) deliveryMethod = alphatwirl.collector.WriteListToFile(tbl_cutflow_path) collector = alphatwirl.loop.Collector(resultsCombinationMethod, deliveryMethod) ret = [(eventSelection, collector)] return ret
def test_raise(mock_makedirs, mock_isdir, caplog): mock_isdir.return_value = False mock_makedirs.side_effect = OSError with pytest.raises(OSError): mkdir_p('a/b') assert [mock.call('a/b')] == mock_makedirs.call_args_list
def build_parallel_dropbox(parallel_mode, quiet, user_modules, htcondor_job_desc_extra=[]): tmpdir = '_ccsp_temp' user_modules = set(user_modules) user_modules.add('alphatwirl_interface') user_modules.add('alphatwirl') alphatwirl.mkdir_p(tmpdir) if quiet: progressMonitor = alphatwirl.progressbar.NullProgressMonitor() else: if sys.stdout.isatty(): progressBar = alphatwirl.progressbar.ProgressBar() else: progressBar = alphatwirl.progressbar.ProgressPrint() progressMonitor = alphatwirl.progressbar.BProgressMonitor( presentation=progressBar) if parallel_mode == 'htcondor': dispatcher = alphatwirl.concurrently.HTCondorJobSubmitter( job_desc_extra=htcondor_job_desc_extra) else: dispatcher = alphatwirl.concurrently.SubprocessRunner() workingArea = alphatwirl.concurrently.WorkingArea( dir=tmpdir, python_modules=list(user_modules), exclusions=["*{}*".format(tmpdir)]) dropbox = alphatwirl.concurrently.TaskPackageDropbox( workingArea=workingArea, dispatcher=dispatcher) communicationChannel = alphatwirl.concurrently.CommunicationChannel( dropbox=dropbox) return Parallel(progressMonitor, communicationChannel)
def put_package(self, package): """Put a package Parameters ---------- package : a task package Returns ------- int A package index """ self.last_package_index += 1 package_index = self.last_package_index package_fullpath = self.package_fullpath(package_index) # e.g., '{path}/tpd_20161129_122841_HnpcmF/task_00009.p.gz' with gzip.open(package_fullpath, 'wb') as f: pickle.dump(package, f, protocol=pickle.HIGHEST_PROTOCOL) f.close() result_fullpath = self.result_fullpath(package_index) # e.g., '{path}/tpd_20161129_122841_HnpcmF/results/task_00009/result.p.gz' result_dir = os.path.dirname(result_fullpath) # e.g., '{path}/tpd_20161129_122841_HnpcmF/results/task_00009' alphatwirl.mkdir_p(result_dir) return package_index
def test_raise(mock_makedirs, mock_isdir, caplog): mock_isdir.return_value = False mock_makedirs.side_effect = OSError with pytest.raises(OSError): mkdir_p('a/b') assert [mock.call('a/b')] == mock_makedirs.call_args_list
def configure_datasets(): # ret = atnanoaod.query.build_datasets_from_tbl_paths( # tbl_cmsdataset_paths=args.tbl_cmsdatasets, # datasets=args.datasets if args.datasets else None # # give None to datasets if args.datasets is an empty list # # so that build_datasets() returns all datasets rather than # # an empty list. # ) ret = [ atnanoaod.dataset.Dataset( name='ZJetsToNuNu_HT400To600', files=[ '/hdfs/dpm/phy.bris.ac.uk/home/cms/store/mc/RunIISpring16MiniAODv2/ZJetsToNuNu_HT-400To600_13TeV-madgraph/MINIAODSIM/PUSpring16_80X_mcRun2_asymptotic_2016_miniAODv2_v0-v1/90000/FA22B8D3-A046-E611-AEB6-00259073E4C8.root', '/hdfs/dpm/phy.bris.ac.uk/home/cms/store/mc/RunIISpring16MiniAODv2/ZJetsToNuNu_HT-400To600_13TeV-madgraph/MINIAODSIM/PUSpring16_80X_mcRun2_asymptotic_2016_miniAODv2_v0-v1/90000/F661992C-AE46-E611-9B2D-0CC47A1E0476.root', '/hdfs/dpm/phy.bris.ac.uk/home/cms/store/mc/RunIISpring16MiniAODv2/ZJetsToNuNu_HT-400To600_13TeV-madgraph/MINIAODSIM/PUSpring16_80X_mcRun2_asymptotic_2016_miniAODv2_v0-v1/90000/CCEE61DF-FA48-E611-AEF2-44A842CF05A5.root', ]), atnanoaod.dataset.Dataset( name='ZJetsToNuNu_HT600To800', files=[ '/hdfs/dpm/phy.bris.ac.uk/home/cms/store/mc/RunIISpring16MiniAODv2/WJetsToLNu_HT-600To800_TuneCUETP8M1_13TeV-madgraphMLM-pythia8/MINIAODSIM/PUSpring16_80X_mcRun2_asymptotic_2016_miniAODv2_v0-v1/40000/F614500E-8A24-E611-AB01-B083FECFEF7D.root', '/hdfs/dpm/phy.bris.ac.uk/home/cms/store/mc/RunIISpring16MiniAODv2/WJetsToLNu_HT-600To800_TuneCUETP8M1_13TeV-madgraphMLM-pythia8/MINIAODSIM/PUSpring16_80X_mcRun2_asymptotic_2016_miniAODv2_v0-v1/40000/F431EB87-7D24-E611-ACD5-B083FECFF2BF.root', '/hdfs/dpm/phy.bris.ac.uk/home/cms/store/mc/RunIISpring16MiniAODv2/WJetsToLNu_HT-600To800_TuneCUETP8M1_13TeV-madgraphMLM-pythia8/MINIAODSIM/PUSpring16_80X_mcRun2_asymptotic_2016_miniAODv2_v0-v1/40000/F408470E-8D24-E611-A912-D4AE526DF2E3.root ', ]), ] path = os.path.join(args.outdir, 'datasets.txt') if args.force or not os.path.exists(path): alphatwirl.mkdir_p(os.path.dirname(path)) with open(path, 'w') as f: pprint.pprint(ret, stream=f) return ret
def test_already_exist(mock_makedirs, mock_isdir, caplog): mock_isdir.return_value = True mock_makedirs.side_effect = OSError(errno.EEXIST, 'already exist') with caplog.at_level(logging.DEBUG - 1): mkdir_p('a/b') assert [mock.call('a/b')] == mock_makedirs.call_args_list assert len(caplog.records) == 1 assert caplog.records[0].levelno == logging.DEBUG - 1 assert 'tried' in caplog.records[0].msg
def test_already_exist(mock_makedirs, mock_isdir, caplog): mock_isdir.return_value = True mock_makedirs.side_effect = OSError(errno.EEXIST, 'already exist') with caplog.at_level(logging.DEBUG - 1): mkdir_p('a/b') assert [mock.call('a/b')] == mock_makedirs.call_args_list assert len(caplog.records) == 1 assert caplog.records[0].levelno == logging.DEBUG - 1 assert 'tried' in caplog.records[0].msg
def _prepare_dir(self, dir): alphatwirl.mkdir_p(dir) prefix = 'tpd_{:%Y%m%d_%H%M%S}_'.format(datetime.datetime.now()) # e.g., 'tpd_20161129_122841_' path = tempfile.mkdtemp(prefix=prefix, dir=dir) # e.g., '{path}/tpd_20161129_122841_HnpcmF' return path
def configure_2nd_event_selection(): path_cfg = dict(All=( 'ev : ev.nElectronsVeto[0] == 0', 'ev : ev.nMuonsVeto[0] == 0', 'ev : ev.nIsoTracksVeto[0] <= 0', 'ev : ev.nPhotonsVeto[0] == 0', 'ev : ev.nJet40Fwd[0] == 0', 'ev : ev.nJet40[0] >= 2', 'ev : ev.jet_pt[0] > 100', 'ev : -2.5 < ev.jet_eta[0] < 2.5', 'ev : ev.ht40[0] > 200', 'ev : ev.mht40_pt[0] > 130', 'ev : ev.MhtOverMet[0] < 1.25', dict(Any=(dict(All=('ev : 200 <= ev.ht40[0] < 250', 'ev : 0.65 <= ev.alphaT[0]')), dict(All=('ev : 250 <= ev.ht40[0] < 300', 'ev : 0.60 <= ev.alphaT[0]')), dict(All=('ev : 300 <= ev.ht40[0] < 350', 'ev : 0.55 <= ev.alphaT[0]')), dict(All=('ev : 350 <= ev.ht40[0] < 400', 'ev : 0.53 <= ev.alphaT[0]')), dict(All=('ev : 400 <= ev.ht40[0] < 600', 'ev : 0.52 <= ev.alphaT[0]')), dict(All=('ev : 600 <= ev.ht40[0] < 800', 'ev : 0.52 <= ev.alphaT[0]')), dict(All=('ev : 800 <= ev.ht40[0]', )))), 'ev : ev.biasedDPhi[0] > 0.5', )) # eventSelection = alphatwirl.selection.build_selection( path_cfg=path_cfg, AllClass=alphatwirl.selection.modules.AllwCount, AnyClass=alphatwirl.selection.modules.AnywCount, NotClass=alphatwirl.selection.modules.NotwCount) eventselection_path = os.path.join(args.outdir, 'eventselection.txt') if args.force or not os.path.exists(eventselection_path): alphatwirl.mkdir_p(os.path.dirname(eventselection_path)) with open(eventselection_path, 'w') as f: pprint.pprint(path_cfg, stream=f) tbl_cutflow_path = os.path.join(args.outdir, 'tbl_cutflow_02.txt') resultsCombinationMethod = alphatwirl.collector.ToTupleListWithDatasetColumn( summaryColumnNames=('depth', 'class', 'name', 'pass', 'total')) deliveryMethod = alphatwirl.collector.WriteListToFile(tbl_cutflow_path) collector = alphatwirl.loop.Collector(resultsCombinationMethod, deliveryMethod) ret = [(eventSelection, collector)] return ret
def configure_reader_collector_pairs(): ret = [] ret.extend(configure_scribblers_before_event_selection()) ret.extend(configure_tables_after_1st_event_selection()) path = os.path.join(args.outdir, 'reader_collector_pairs.txt') alphatwirl.mkdir_p(os.path.dirname(path)) with open(path, 'w') as f: pprint.pprint(ret, stream=f) return ret
def test_collect_result(obj): obj.open() result = MockResult(name='result1') package_index = 9 result_fullpath = obj.result_fullpath(package_index) mkdir_p(os.path.dirname(result_fullpath)) with gzip.open(result_fullpath, 'wb') as f: pickle.dump(result, f) f.close() assert result == obj.collect_result(package_index=package_index)
def test_collect_result_eoferror(obj): # the file 'result.p.gz' is empty. # pickle.load() raises EOFError obj.open() package_index = 9 dirname = 'task_{:05d}'.format(package_index) result_dir = os.path.join(obj.path, 'results', dirname) mkdir_p(result_dir) result_path = os.path.join(result_dir, 'result.p.gz') with open(result_path, 'wb') as f: f.close() assert obj.collect_result(package_index=package_index) is None
def test_collect_result_eoferror(obj): # the file 'result.p.gz' is empty. # pickle.load() raises EOFError obj.open() package_index = 9 dirname = 'task_{:05d}'.format(package_index) result_dir = os.path.join(obj.path, 'results', dirname) mkdir_p(result_dir) result_path = os.path.join(result_dir, 'result.p.gz') with open(result_path, 'wb') as f: f.close() assert obj.collect_result(package_index=package_index) is None
def test_collect_result(obj): obj.open() result = MockResult(name='result1') package_index = 9 dirname = 'task_{:05d}'.format(package_index) result_dir = os.path.join(obj.path, 'results', dirname) mkdir_p(result_dir) result_path = os.path.join(result_dir, 'result.p.gz') with gzip.open(result_path, 'wb') as f: pickle.dump(result, f) f.close() assert result == obj.collect_result(package_index=package_index)
def test_collect_result(obj): obj.open() result = MockResult(name='result1') package_index = 9 dirname = 'task_{:05d}'.format(package_index) result_dir = os.path.join(obj.path, 'results', dirname) mkdir_p(result_dir) result_path = os.path.join(result_dir, 'result.p.gz') with gzip.open(result_path, 'wb') as f: pickle.dump(result, f) f.close() assert result == obj.collect_result(package_index=package_index)
def configure_1st_event_selection(): path_cfg = dict(All = ( 'ev : ev.cutflowId[0] == 1 # Signal', 'ev : ev.nIsoTracksVeto[0] <= 0', 'ev : ev.nJet40Fwd[0] == 0', 'ev : ev.nJet40failedId[0] == 0', 'ev : ev.nJet40[0] >= 2', 'ev : -2.5 < ev.jet_eta[0] < 2.5', 'ev : 0.1 <= ev.jet_chHEF[0] < 0.95', # 'ev : ev.nJet100[0] >= 1', 'ev : ev.ht40[0] >= 400', 'ev : ev.mht40_pt[0] >= 200', 'ev : ev.MhtOverMet[0] < 1.25', )) # eventSelection = alphatwirl.selection.build_selection( path_cfg = path_cfg, AllClass = alphatwirl.selection.modules.AllwCount, AnyClass = alphatwirl.selection.modules.AnywCount, NotClass = alphatwirl.selection.modules.NotwCount ) eventselection_path = os.path.join(args.outdir, 'eventselection.txt') if args.force or not os.path.exists(eventselection_path): alphatwirl.mkdir_p(os.path.dirname(eventselection_path)) with open(eventselection_path, 'w') as f: pprint.pprint(path_cfg, stream = f) tbl_cutflow_path = os.path.join(args.outdir, 'tbl_cutflow.txt') resultsCombinationMethod = alphatwirl.collector.ToTupleListWithDatasetColumn( summaryColumnNames = ('depth', 'class', 'name', 'pass', 'total') ) deliveryMethod = alphatwirl.collector.WriteListToFile(tbl_cutflow_path) collector = alphatwirl.loop.Collector(resultsCombinationMethod, deliveryMethod) ret = [(eventSelection, collector)] return ret
def build_parallel_dropbox(parallel_mode, quiet, user_modules, htcondor_job_desc_extra=[]): tmpdir = '_ccsp_temp' user_modules = set(user_modules) user_modules.add('fwtwirl') user_modules.add('alphatwirl') alphatwirl.mkdir_p(tmpdir) progressMonitor = alphatwirl.progressbar.NullProgressMonitor() if parallel_mode == 'htcondor': dispatcher = alphatwirl.concurrently.HTCondorJobSubmitter( job_desc_extra=htcondor_job_desc_extra) else: dispatcher = alphatwirl.concurrently.SubprocessRunner() workingArea = alphatwirl.concurrently.WorkingArea( dir=tmpdir, python_modules=list(user_modules)) dropbox = alphatwirl.concurrently.TaskPackageDropbox( workingArea=workingArea, dispatcher=dispatcher) communicationChannel = alphatwirl.concurrently.CommunicationChannel( dropbox=dropbox) return Parallel(progressMonitor, communicationChannel)
def build_parallel_dropbox(parallel_mode, quiet, user_modules, htcondor_job_desc_extra=[], **kwargs): tmpdir = '_ccsp_temp' user_modules = set(user_modules) user_modules.add('alphatwirl_interface') user_modules.add('alphatwirl') alphatwirl.mkdir_p(tmpdir) if quiet: progressMonitor = alphatwirl.progressbar.NullProgressMonitor() else: if sys.stdout.isatty(): progressBar = alphatwirl.progressbar.ProgressBar() else: progressBar = alphatwirl.progressbar.ProgressPrint() progressMonitor = alphatwirl.progressbar.BProgressMonitor( presentation=progressBar) if parallel_mode == 'htcondor': dispatcher = alphatwirl.concurrently.HTCondorJobSubmitter( job_desc_extra=htcondor_job_desc_extra) elif parallel_mode == 'sge': q = "hep.q" if "queue" not in kwargs else kwargs["queue"] t = 10800 if "time" not in kwargs else kwargs["time"] dispatcher = alphatwirl.concurrently.SGEJobSubmitter(queue=q, walltime=t) else: dispatcher = alphatwirl.concurrently.SubprocessRunner() workingArea = alphatwirl.concurrently.WorkingArea( dir=tmpdir, python_modules=list(user_modules)) dropbox = alphatwirl.concurrently.TaskPackageDropbox( workingArea=workingArea, dispatcher=dispatcher) communicationChannel = alphatwirl.concurrently.CommunicationChannel( dropbox=dropbox) return Parallel(progressMonitor, communicationChannel)
def run_multiple(self, workingArea, package_indices): if not package_indices: return [] cwd = os.getcwd() os.chdir(workingArea.path) package_paths = [workingArea.package_path(i) for i in package_indices] resultdir_basenames = [os.path.splitext(p)[0] for p in package_paths] resultdir_basenames = [ os.path.splitext(n)[0] for n in resultdir_basenames ] resultdirs = [os.path.join('results', n) for n in resultdir_basenames] for d in resultdirs: alphatwirl.mkdir_p(d) job_desc = self.job_desc_template.format( job_script='job_script.sh', njobs=len(package_paths), queue=self.queue, walltime=self.walltime, ) s = "#!/bin/bash\n\nulimit -c 0\n\n" for idx, package_path in enumerate(package_paths): s += "cmd1[{index}]='cd {path}'\n".format( index=idx + 1, path=resultdirs[idx], ) s += "cmd2[{index}]='python {job_script} {args}'\n".format( index=idx + 1, job_script="../../run.py", args=package_path, ) s += "\n${{cmd1[$SGE_TASK_ID]}} > {out} 2> {err}\n".format( out="stdout.txt", err="stderr.txt", ) s += "${{cmd2[$SGE_TASK_ID]}} >> {out} 2>> {err}".format( out="stdout.txt", err="stderr.txt", ) with open("job_script.sh", 'w') as f: f.write(s) proc = subprocess.Popen( job_desc.split(), stdout=subprocess.PIPE, stderr=subprocess.PIPE, ) stdout, stderr = proc.communicate() regex = re.compile( "Your job-array (\d+).1-(\d+):1 \(\"job_script.sh\"\) has been submitted" ) njobs = int(regex.search(stdout).groups()[1]) clusterid = regex.search(stdout).groups()[0] # e.g., '2448770' #change_job_priority([clusterid], 10) ## need to make configurable procid = ['{}'.format(i + 1) for i in range(njobs)] # e.g., ['1', '2', '3', '4'] clusterprocids = ['{}.{}'.format(clusterid, i) for i in procid] # e.g., ['2448770.1', '2448770.2', '2448770.3', '2448770.4'] self.clusterprocids_outstanding.extend(clusterprocids) os.chdir(cwd) return clusterprocids
def configure_1st_event_selection(): path_cfg_common = dict(All=( 'ev : ev.cutflowId[0] == 1', 'ev : ev.nIsoTracksVeto[0] <= 0', 'ev : ev.nJet40[0] >= 2', 'ev : ev.ht40[0] >= 200', 'ev : ev.nJet100[0] >= 1', 'ev : ev.nJet40failedId[0] == 0', 'ev : ev.nJet40Fwd[0] == 0', 'ev : -2.5 < ev.jet_eta[0] < 2.5', 'ev : 0.1 <= ev.jet_chHEF[0] < 0.95', 'ev : 130 <= ev.mht40_pt[0]', 'ev : ev.MhtOverMet[0] < 1.25', )) path_cfg_susy_masspoints = dict(Any=( dict(All=( 'ev : ev.componentName[0] == "SMS_T1tttt_madgraphMLM"', dict(Any=( dict(All=('ev : ev.smsmass1[0] == 1300', 'ev : ev.smsmass2[0] == 1050', path_cfg_common)), dict(All=('ev : ev.smsmass1[0] == 1800', 'ev : ev.smsmass2[0] == 500', path_cfg_common)), )), )), dict(All=( 'ev : ev.componentName[0] == "SMS_T2bb_madgraphMLM"', dict(Any=( dict(All=('ev : ev.smsmass1[0] == 500', 'ev : ev.smsmass2[0] == 450', path_cfg_common)), dict(All=('ev : ev.smsmass1[0] == 1000', 'ev : ev.smsmass2[0] == 300', path_cfg_common)), )), )), )) path_cfg = path_cfg_common if args.susy_sms: path_cfg = path_cfg_susy_masspoints # eventselection_path = os.path.join(args.outdir, 'eventselection.txt') if args.force or not os.path.exists(eventselection_path): alphatwirl.mkdir_p(os.path.dirname(eventselection_path)) with open(eventselection_path, 'w') as f: pprint.pprint(path_cfg, stream=f) # tbl_cutflow_path = os.path.join(args.outdir, 'tbl_cutflow.txt') if args.force or not os.path.exists(tbl_cutflow_path): eventSelection = alphatwirl.selection.build_selection( path_cfg=path_cfg, AllClass=alphatwirl.selection.modules.AllwCount, AnyClass=alphatwirl.selection.modules.AnywCount, NotClass=alphatwirl.selection.modules.NotwCount) resultsCombinationMethod = alphatwirl.collector.CombineIntoList( summaryColumnNames=('depth', 'class', 'name', 'pass', 'total'), sort=False, summarizer_to_tuple_list=summarizer_to_tuple_list) deliveryMethod = alphatwirl.collector.WriteListToFile(tbl_cutflow_path) collector = alphatwirl.loop.Collector(resultsCombinationMethod, deliveryMethod) else: eventSelection = alphatwirl.selection.build_selection( path_cfg=path_cfg) collector = alphatwirl.loop.NullCollector() # ret = [(eventSelection, collector)] return ret
def test_emtpy(mock_makedirs): mkdir_p('') assert [] == mock_makedirs.call_args_list
def test_emtpy(mock_makedirs): mkdir_p('') assert [ ] == mock_makedirs.call_args_list
def test_success(mock_makedirs): mkdir_p('a/b') assert [mock.call('a/b')] == mock_makedirs.call_args_list
def run_multiple(self, workingArea, package_indices): if not package_indices: return [ ] cwd = os.getcwd() os.chdir(workingArea.path) package_paths = [workingArea.package_path(i) for i in package_indices] resultdir_basenames = [os.path.splitext(p)[0] for p in package_paths] resultdir_basenames = [os.path.splitext(n)[0] for n in resultdir_basenames] resultdirs = [os.path.join('results', n) for n in resultdir_basenames] for d in resultdirs: alphatwirl.mkdir_p(d) extra_input_files = ['python_modules.tar.gz'] extra_input_files = [f for f in extra_input_files if os.path.exists(f)] job_desc = self.job_desc_template.format( input_files = ', '.join(['$(resultdir).p.gz'] + extra_input_files), resultdirs = ', '.join(resultdir_basenames) ) procargs = ['condor_submit'] logger = logging.getLogger(__name__) command_display = compose_shortened_command_for_logging(procargs) logger.debug('execute: {!r}'.format(command_display)) proc = subprocess.Popen( procargs, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE ) stdout, stderr = proc.communicate(job_desc) stdout = stdout.decode() stderr = stderr.decode() for l in stdout.rstrip().split('\n'): logger.debug(l) regex = re.compile("(\d+) job\(s\) submitted to cluster (\d+)", re.MULTILINE) njobs = int(regex.search(stdout).groups()[0]) clusterid = regex.search(stdout).groups()[1] # e.g., '3158626' change_job_priority([clusterid], 10) ## need to make configurable procid = ['{}'.format(i) for i in range(njobs)] # e.g., ['0', '1', '2', '3'] clusterprocids = ['{}.{}'.format(clusterid, i) for i in procid] # e.g., ['3158626.0', '3158626.1', '3158626.2', '3158626.3'] self.clusterprocids_outstanding.extend(clusterprocids) os.chdir(cwd) return clusterprocids
def run_multiple(self, workingArea, package_indices): if not package_indices: return [] cwd = os.getcwd() os.chdir(workingArea.path) package_paths = [ workingArea.package_relpath(i) for i in package_indices ] resultdir_basenames = [os.path.splitext(p)[0] for p in package_paths] resultdir_basenames = [ os.path.splitext(n)[0] for n in resultdir_basenames ] resultdirs = [os.path.join('results', n) for n in resultdir_basenames] for d in resultdirs: alphatwirl.mkdir_p(d) # Get list of task names task_name = None for p in package_paths: with gzip.open(p, 'rb') as f: package = pickle.load(f) if hasattr(package.task, 'progressbar_label'): if task_name is None: task_name = package.task.progressbar_label elif package.task.progressbar_label != task_name: logger = logging.getLogger(__name__) logger.warning("Task name changed somehow") else: task_name = "task" job_desc = self.job_desc_template.format( name=task_name, job_script='job_script.sh', njobs=len(package_paths), queue=self.queue, walltime=self.walltime_dict[task_name] if task_name in self.walltime_dict else self.walltime, vmem=self.vmem_dict[task_name] if task_name in self.vmem_dict else self.vmem, ) s = "#!/bin/bash\n\nulimit -c 0\n\n" for idx, package_path in enumerate(package_paths): s += "cmd1[{index}]='cd {path}'\n".format( index=idx + 1, path=resultdirs[idx], ) s += "cmd2[{index}]='python {job_script} {args}'\n".format( index=idx + 1, job_script="../../run.py", args=package_path, ) s += "\n${{cmd1[$SGE_TASK_ID]}} > {out} 2> {err}\n".format( out="stdout.txt", err="stderr.txt", ) s += "${{cmd2[$SGE_TASK_ID]}} >> {out} 2>> {err}".format( out="stdout.txt", err="stderr.txt", ) with open("job_script.sh", 'w') as f: f.write(s) proc = subprocess.Popen( job_desc.split(), stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True, ) stdout, stderr = proc.communicate() regex = re.compile( "Your job-array (\d+).1-(\d+):1 \(\"{}\"\) has been submitted". format(task_name)) try: njobs = int(regex.search(stdout).groups()[1]) clusterid = regex.search(stdout).groups()[0] # e.g., '2448770' except Exception as e: logger = logging.getLogger(__name__) logger.error(stdout) logger.error(stderr) raise AttributeError(e) #change_job_priority([clusterid], 10) ## need to make configurable procid = ['{}'.format(i + 1) for i in range(njobs)] # e.g., ['1', '2', '3', '4'] clusterprocids = ['{}.{}'.format(clusterid, i) for i in procid] # e.g., ['2448770.1', '2448770.2', '2448770.3', '2448770.4'] self.clusterprocids_outstanding.extend(clusterprocids) os.chdir(cwd) return clusterprocids
def _configure(self, components, reader_collector_pairs, analyzerName, fileName, treeName): dataset_readers = alphatwirl.datasetloop.DatasetReaderComposite() # tbl_heppyresult.txt tbl_heppyresult_path = os.path.join(self.outdir, 'tbl_heppyresult.txt') if self.force or not os.path.exists(tbl_heppyresult_path): # e.g., '74X/MC/20150810_MC/20150810_SingleMu' heppydir_rel = '/'.join(self.heppydir.rstrip('/').split('/')[-4:]) alphatwirl.mkdir_p(os.path.dirname(tbl_heppyresult_path)) f = open(tbl_heppyresult_path, 'w') f.write('heppyresult\n') f.write(heppydir_rel + '\n') f.close() # tbl_tree.txt tbl_tree_path = os.path.join(self.outdir, 'tbl_tree.txt') if self.force or not os.path.exists(tbl_tree_path): tblTree = heppyresult.TblTree( analyzerName=analyzerName, fileName=fileName, treeName=treeName, outPath=tbl_tree_path, ) dataset_readers.add(tblTree) # tbl_branch.txt tbl_branch_path = os.path.join(self.outdir, 'tbl_branch.txt') if self.force or not os.path.exists(tbl_branch_path): tblBranch = heppyresult.TblBranch( analyzerName=analyzerName, fileName=fileName, treeName=treeName, outPath=tbl_branch_path, ) dataset_readers.add(tblBranch) # tbl_branch_size.tx tbl_branch_size_path = os.path.join(self.outdir, 'tbl_branch_size.txt') if self.force or not os.path.exists(tbl_branch_size_path): tblBranchSize = heppyresult.TblBranch( analyzerName=analyzerName, fileName=fileName, treeName=treeName, outPath=tbl_branch_size_path, addType=False, addSize=True, sortBySize=True, ) dataset_readers.add(tblBranchSize) # tbl_branch_title.txt tbl_branch_title_path = os.path.join(self.outdir, 'tbl_branch_title.txt') if self.force or not os.path.exists(tbl_branch_title_path): tblBranchTitle = heppyresult.TblBranch( analyzerName=analyzerName, fileName=fileName, treeName=treeName, outPath=tbl_branch_title_path, addType=False, addSize=False, addTitle=True, ) dataset_readers.add(tblBranchTitle) # tbl_dataset.txt tbl_dataset_path = os.path.join(self.outdir, 'tbl_dataset.txt') if self.force or not os.path.exists(tbl_dataset_path): tblDataset = heppyresult.TblComponentConfig( outPath=tbl_dataset_path, columnNames=('dataset', ), keys=('dataset', ), ) dataset_readers.add(tblDataset) # tbl_xsec.txt for MC if self.datamc == 'mc' and not self.susy_sms: tbl_xsec_path = os.path.join(self.outdir, 'tbl_xsec.txt') if self.force or not os.path.exists(tbl_xsec_path): tblXsec = heppyresult.TblComponentConfig( outPath=tbl_xsec_path, columnNames=('xsec', ), keys=('xSection', ), ) dataset_readers.add(tblXsec) # tbl_nevt.txt for MC if self.datamc == 'mc' and not self.susy_sms: tbl_nevt_path = os.path.join(self.outdir, 'tbl_nevt.txt') if self.force or not os.path.exists(tbl_nevt_path): tblNevt = heppyresult.TblCounter( outPath=tbl_nevt_path, columnNames=('nevt', 'nevt_sumw'), analyzerName='skimAnalyzerCount', fileName='SkimReport.txt', levels=('All Events', 'Sum Weights')) dataset_readers.add(tblNevt) # tbl_nevt_sms.txt for MC SUSY SMS if self.datamc == 'mc' and self.susy_sms: tbl_nevt_sms_path = os.path.join(self.outdir, 'tbl_nevt_sms.txt') if self.force or not os.path.exists(tbl_nevt_sms_path): tblSMSNevt = heppyresult.TblSMSNevt( analyzerName='susyParameterScanAnalyzer', fileName='genEvtsPerMass.root', outPath=tbl_nevt_sms_path) dataset_readers.add(tblSMSNevt) reader_top = alphatwirl.loop.ReaderComposite() collector_top = alphatwirl.loop.CollectorComposite() for r, c in reader_collector_pairs: reader_top.add(r) collector_top.add(c) eventLoopRunner = alphatwirl.loop.MPEventLoopRunner( self.parallel.communicationChannel) eventBuilderConfigMaker = heppyresult.EventBuilderConfigMaker( analyzerName=analyzerName, fileName=fileName, treeName=treeName, check_files=True, skip_error_files=True) datasetIntoEventBuildersSplitter = alphatwirl.loop.DatasetIntoEventBuildersSplitter( EventBuilder=alphatwirl.roottree.BuildEvents, eventBuilderConfigMaker=eventBuilderConfigMaker, maxEvents=self.max_events_per_dataset, maxEventsPerRun=self.max_events_per_process, maxFiles=self.max_files_per_dataset, maxFilesPerRun=self.max_files_per_process) eventReader = alphatwirl.loop.EventDatasetReader( eventLoopRunner=eventLoopRunner, reader=reader_top, collector=collector_top, split_into_build_events=datasetIntoEventBuildersSplitter) dataset_readers.add(eventReader) if components == ['all']: components = None heppyResult = heppyresult.HeppyResult( path=self.heppydir, componentNames=components, componentHasTheseFiles=[analyzerName]) if self.parallel_mode in ('multiprocessing', ): loop = alphatwirl.datasetloop.DatasetLoop( datasets=heppyResult.components(), reader=dataset_readers) else: loop = alphatwirl.datasetloop.ResumableDatasetLoop( datasets=heppyResult.components(), reader=dataset_readers, workingarea=self.parallel.workingarea) return loop
def _configure(self, components, reader_collector_pairs, analyzerName, fileName, treeName): component_readers = alphatwirl.heppyresult.ComponentReaderComposite() # tbl_heppyresult.txt tbl_heppyresult_path = os.path.join(self.outdir, 'tbl_heppyresult.txt') if self.force or not os.path.exists(tbl_heppyresult_path): # e.g., '74X/MC/20150810_MC/20150810_SingleMu' heppydir_rel = '/'.join(self.heppydir.rstrip('/').split('/')[-4:]) alphatwirl.mkdir_p(os.path.dirname(tbl_heppyresult_path)) f = open(tbl_heppyresult_path, 'w') f.write('heppyresult\n') f.write(heppydir_rel + '\n') f.close() # tbl_dataset.txt tbl_dataset_path = os.path.join(self.outdir, 'tbl_dataset.txt') if self.force or not os.path.exists(tbl_dataset_path): tblDataset = alphatwirl.heppyresult.TblComponentConfig( outPath=tbl_dataset_path, columnNames=('dataset', ), keys=('dataset', ), ) component_readers.add(tblDataset) # tbl_xsec.txt for MC if not self.isdata: tbl_xsec_path = os.path.join(self.outdir, 'tbl_xsec.txt') if self.force or not os.path.exists(tbl_xsec_path): tblXsec = alphatwirl.heppyresult.TblComponentConfig( outPath=tbl_xsec_path, columnNames=('xsec', ), keys=('xSection', ), ) component_readers.add(tblXsec) # tbl_nevt.txt for MC if not self.isdata: tbl_nevt_path = os.path.join(self.outdir, 'tbl_nevt.txt') if self.force or not os.path.exists(tbl_nevt_path): tblNevt = alphatwirl.heppyresult.TblCounter( outPath=tbl_nevt_path, columnNames=('nevt', 'nevt_sumw'), analyzerName='skimAnalyzerCount', fileName='SkimReport.txt', levels=('All Events', 'Sum Weights')) component_readers.add(tblNevt) # event loop reader = alphatwirl.loop.ReaderComposite() collector = alphatwirl.loop.CollectorComposite( self.parallel.progressMonitor.createReporter()) for r, c in reader_collector_pairs: reader.add(r) collector.add(c) eventLoopRunner = alphatwirl.loop.MPEventLoopRunner( self.parallel.communicationChannel) eventBuilderConfigMaker = alphatwirl.heppyresult.EventBuilderConfigMaker( analyzerName=analyzerName, fileName=fileName, treeName=treeName, ) datasetIntoEventBuildersSplitter = alphatwirl.loop.DatasetIntoEventBuildersSplitter( EventBuilder=alphatwirl.heppyresult.EventBuilder, eventBuilderConfigMaker=eventBuilderConfigMaker, maxEvents=self.max_events_per_dataset, maxEventsPerRun=self.max_events_per_process) eventReader = alphatwirl.loop.EventsInDatasetReader( eventLoopRunner=eventLoopRunner, reader=reader, collector=collector, split_into_build_events=datasetIntoEventBuildersSplitter) component_readers.add(eventReader) if components == ['all']: components = None heppyResult = alphatwirl.heppyresult.HeppyResult( path=self.heppydir, componentNames=components, componentHasTheseFiles=[analyzerName]) componentLoop = alphatwirl.heppyresult.ComponentLoop( heppyResult, component_readers) return componentLoop
def test_success(mock_makedirs): mkdir_p('a/b') assert [mock.call('a/b')] == mock_makedirs.call_args_list