def configure_1st_event_selection():

    path_cfg = dict(Any=(
        dict(All=('ev : ev.GenSusyMSbottom[0] == 1000',
                  'ev : ev.GenSusyMNeutralino[0] == 300')),
        dict(All=('ev : ev.GenSusyMSbottom[0] == 800',
                  'ev : ev.GenSusyMNeutralino[0] == 50')),
        dict(All=('ev : ev.GenSusyMSbottom[0] == 375',
                  'ev : ev.GenSusyMNeutralino[0] == 300')),
    ))

    #
    eventSelection = alphatwirl.selection.build_selection(
        path_cfg=path_cfg,
        AllClass=alphatwirl.selection.modules.AllwCount,
        AnyClass=alphatwirl.selection.modules.AnywCount,
        NotClass=alphatwirl.selection.modules.NotwCount)

    eventselection_path = os.path.join(args.outdir, 'eventselection_01.txt')
    if args.force or not os.path.exists(eventselection_path):
        alphatwirl.mkdir_p(os.path.dirname(eventselection_path))
        with open(eventselection_path, 'w') as f:
            pprint.pprint(path_cfg, stream=f)

    tbl_cutflow_path = os.path.join(args.outdir, 'tbl_cutflow_01.txt')

    resultsCombinationMethod = alphatwirl.collector.ToTupleListWithDatasetColumn(
        summaryColumnNames=('depth', 'class', 'name', 'pass', 'total'))
    deliveryMethod = alphatwirl.collector.WriteListToFile(tbl_cutflow_path)
    collector = alphatwirl.loop.Collector(resultsCombinationMethod,
                                          deliveryMethod)

    ret = [(eventSelection, collector)]
    return ret
Beispiel #2
0
def test_raise(mock_makedirs, mock_isdir, caplog):
    mock_isdir.return_value = False
    mock_makedirs.side_effect = OSError
    with pytest.raises(OSError):
        mkdir_p('a/b')

    assert [mock.call('a/b')] == mock_makedirs.call_args_list
Beispiel #3
0
def build_parallel_dropbox(parallel_mode,
                           quiet,
                           user_modules,
                           htcondor_job_desc_extra=[]):
    tmpdir = '_ccsp_temp'
    user_modules = set(user_modules)
    user_modules.add('alphatwirl_interface')
    user_modules.add('alphatwirl')
    alphatwirl.mkdir_p(tmpdir)

    if quiet:
        progressMonitor = alphatwirl.progressbar.NullProgressMonitor()
    else:
        if sys.stdout.isatty():
            progressBar = alphatwirl.progressbar.ProgressBar()
        else:
            progressBar = alphatwirl.progressbar.ProgressPrint()
        progressMonitor = alphatwirl.progressbar.BProgressMonitor(
            presentation=progressBar)
    if parallel_mode == 'htcondor':
        dispatcher = alphatwirl.concurrently.HTCondorJobSubmitter(
            job_desc_extra=htcondor_job_desc_extra)
    else:
        dispatcher = alphatwirl.concurrently.SubprocessRunner()
    workingArea = alphatwirl.concurrently.WorkingArea(
        dir=tmpdir,
        python_modules=list(user_modules),
        exclusions=["*{}*".format(tmpdir)])
    dropbox = alphatwirl.concurrently.TaskPackageDropbox(
        workingArea=workingArea, dispatcher=dispatcher)
    communicationChannel = alphatwirl.concurrently.CommunicationChannel(
        dropbox=dropbox)

    return Parallel(progressMonitor, communicationChannel)
Beispiel #4
0
    def put_package(self, package):
        """Put a package

        Parameters
        ----------
        package :
            a task package

        Returns
        -------
        int
            A package index

        """

        self.last_package_index += 1
        package_index = self.last_package_index

        package_fullpath = self.package_fullpath(package_index)
        # e.g., '{path}/tpd_20161129_122841_HnpcmF/task_00009.p.gz'

        with gzip.open(package_fullpath, 'wb') as f:
            pickle.dump(package, f, protocol=pickle.HIGHEST_PROTOCOL)
            f.close()

        result_fullpath = self.result_fullpath(package_index)
        # e.g., '{path}/tpd_20161129_122841_HnpcmF/results/task_00009/result.p.gz'

        result_dir = os.path.dirname(result_fullpath)
        # e.g., '{path}/tpd_20161129_122841_HnpcmF/results/task_00009'

        alphatwirl.mkdir_p(result_dir)

        return package_index
Beispiel #5
0
def test_raise(mock_makedirs, mock_isdir, caplog):
    mock_isdir.return_value = False
    mock_makedirs.side_effect = OSError
    with pytest.raises(OSError):
        mkdir_p('a/b')

    assert [mock.call('a/b')] == mock_makedirs.call_args_list
Beispiel #6
0
def configure_datasets():

    # ret = atnanoaod.query.build_datasets_from_tbl_paths(
    #     tbl_cmsdataset_paths=args.tbl_cmsdatasets,
    #     datasets=args.datasets if args.datasets else None
    #     # give None to datasets if args.datasets is an empty list
    #     # so that build_datasets() returns all datasets rather than
    #     # an empty list.
    # )

    ret = [
        atnanoaod.dataset.Dataset(
            name='ZJetsToNuNu_HT400To600',
            files=[
                '/hdfs/dpm/phy.bris.ac.uk/home/cms/store/mc/RunIISpring16MiniAODv2/ZJetsToNuNu_HT-400To600_13TeV-madgraph/MINIAODSIM/PUSpring16_80X_mcRun2_asymptotic_2016_miniAODv2_v0-v1/90000/FA22B8D3-A046-E611-AEB6-00259073E4C8.root',
                '/hdfs/dpm/phy.bris.ac.uk/home/cms/store/mc/RunIISpring16MiniAODv2/ZJetsToNuNu_HT-400To600_13TeV-madgraph/MINIAODSIM/PUSpring16_80X_mcRun2_asymptotic_2016_miniAODv2_v0-v1/90000/F661992C-AE46-E611-9B2D-0CC47A1E0476.root',
                '/hdfs/dpm/phy.bris.ac.uk/home/cms/store/mc/RunIISpring16MiniAODv2/ZJetsToNuNu_HT-400To600_13TeV-madgraph/MINIAODSIM/PUSpring16_80X_mcRun2_asymptotic_2016_miniAODv2_v0-v1/90000/CCEE61DF-FA48-E611-AEF2-44A842CF05A5.root',
            ]),
        atnanoaod.dataset.Dataset(
            name='ZJetsToNuNu_HT600To800',
            files=[
                '/hdfs/dpm/phy.bris.ac.uk/home/cms/store/mc/RunIISpring16MiniAODv2/WJetsToLNu_HT-600To800_TuneCUETP8M1_13TeV-madgraphMLM-pythia8/MINIAODSIM/PUSpring16_80X_mcRun2_asymptotic_2016_miniAODv2_v0-v1/40000/F614500E-8A24-E611-AB01-B083FECFEF7D.root',
                '/hdfs/dpm/phy.bris.ac.uk/home/cms/store/mc/RunIISpring16MiniAODv2/WJetsToLNu_HT-600To800_TuneCUETP8M1_13TeV-madgraphMLM-pythia8/MINIAODSIM/PUSpring16_80X_mcRun2_asymptotic_2016_miniAODv2_v0-v1/40000/F431EB87-7D24-E611-ACD5-B083FECFF2BF.root',
                '/hdfs/dpm/phy.bris.ac.uk/home/cms/store/mc/RunIISpring16MiniAODv2/WJetsToLNu_HT-600To800_TuneCUETP8M1_13TeV-madgraphMLM-pythia8/MINIAODSIM/PUSpring16_80X_mcRun2_asymptotic_2016_miniAODv2_v0-v1/40000/F408470E-8D24-E611-A912-D4AE526DF2E3.root ',
            ]),
    ]

    path = os.path.join(args.outdir, 'datasets.txt')
    if args.force or not os.path.exists(path):
        alphatwirl.mkdir_p(os.path.dirname(path))
        with open(path, 'w') as f:
            pprint.pprint(ret, stream=f)

    return ret
Beispiel #7
0
def test_already_exist(mock_makedirs, mock_isdir, caplog):
    mock_isdir.return_value = True
    mock_makedirs.side_effect = OSError(errno.EEXIST, 'already exist')
    with caplog.at_level(logging.DEBUG - 1):
        mkdir_p('a/b')

    assert [mock.call('a/b')] == mock_makedirs.call_args_list
    assert len(caplog.records) == 1
    assert caplog.records[0].levelno == logging.DEBUG - 1
    assert 'tried' in caplog.records[0].msg
Beispiel #8
0
def test_already_exist(mock_makedirs, mock_isdir, caplog):
    mock_isdir.return_value = True
    mock_makedirs.side_effect = OSError(errno.EEXIST, 'already exist')
    with caplog.at_level(logging.DEBUG - 1):
        mkdir_p('a/b')

    assert [mock.call('a/b')] == mock_makedirs.call_args_list
    assert len(caplog.records) == 1
    assert caplog.records[0].levelno == logging.DEBUG - 1
    assert 'tried' in caplog.records[0].msg
Beispiel #9
0
    def _prepare_dir(self, dir):

        alphatwirl.mkdir_p(dir)

        prefix = 'tpd_{:%Y%m%d_%H%M%S}_'.format(datetime.datetime.now())
        # e.g., 'tpd_20161129_122841_'

        path = tempfile.mkdtemp(prefix=prefix, dir=dir)
        # e.g., '{path}/tpd_20161129_122841_HnpcmF'

        return path
def configure_2nd_event_selection():

    path_cfg = dict(All=(
        'ev : ev.nElectronsVeto[0] == 0',
        'ev : ev.nMuonsVeto[0] == 0',
        'ev : ev.nIsoTracksVeto[0] <= 0',
        'ev : ev.nPhotonsVeto[0] == 0',
        'ev : ev.nJet40Fwd[0] == 0',
        'ev : ev.nJet40[0] >= 2',
        'ev : ev.jet_pt[0] > 100',
        'ev : -2.5 < ev.jet_eta[0] < 2.5',
        'ev : ev.ht40[0] > 200',
        'ev : ev.mht40_pt[0] > 130',
        'ev : ev.MhtOverMet[0] < 1.25',
        dict(Any=(dict(All=('ev : 200 <= ev.ht40[0] < 250',
                            'ev : 0.65 <= ev.alphaT[0]')),
                  dict(All=('ev : 250 <= ev.ht40[0] < 300',
                            'ev : 0.60 <= ev.alphaT[0]')),
                  dict(All=('ev : 300 <= ev.ht40[0] < 350',
                            'ev : 0.55 <= ev.alphaT[0]')),
                  dict(All=('ev : 350 <= ev.ht40[0] < 400',
                            'ev : 0.53 <= ev.alphaT[0]')),
                  dict(All=('ev : 400 <= ev.ht40[0] < 600',
                            'ev : 0.52 <= ev.alphaT[0]')),
                  dict(All=('ev : 600 <= ev.ht40[0] < 800',
                            'ev : 0.52 <= ev.alphaT[0]')),
                  dict(All=('ev : 800 <= ev.ht40[0]', )))),
        'ev : ev.biasedDPhi[0] > 0.5',
    ))

    #
    eventSelection = alphatwirl.selection.build_selection(
        path_cfg=path_cfg,
        AllClass=alphatwirl.selection.modules.AllwCount,
        AnyClass=alphatwirl.selection.modules.AnywCount,
        NotClass=alphatwirl.selection.modules.NotwCount)

    eventselection_path = os.path.join(args.outdir, 'eventselection.txt')
    if args.force or not os.path.exists(eventselection_path):
        alphatwirl.mkdir_p(os.path.dirname(eventselection_path))
        with open(eventselection_path, 'w') as f:
            pprint.pprint(path_cfg, stream=f)

    tbl_cutflow_path = os.path.join(args.outdir, 'tbl_cutflow_02.txt')

    resultsCombinationMethod = alphatwirl.collector.ToTupleListWithDatasetColumn(
        summaryColumnNames=('depth', 'class', 'name', 'pass', 'total'))
    deliveryMethod = alphatwirl.collector.WriteListToFile(tbl_cutflow_path)
    collector = alphatwirl.loop.Collector(resultsCombinationMethod,
                                          deliveryMethod)

    ret = [(eventSelection, collector)]
    return ret
Beispiel #11
0
def configure_reader_collector_pairs():

    ret = []

    ret.extend(configure_scribblers_before_event_selection())

    ret.extend(configure_tables_after_1st_event_selection())

    path = os.path.join(args.outdir, 'reader_collector_pairs.txt')
    alphatwirl.mkdir_p(os.path.dirname(path))
    with open(path, 'w') as f:
        pprint.pprint(ret, stream=f)

    return ret
Beispiel #12
0
def test_collect_result(obj):

    obj.open()

    result = MockResult(name='result1')

    package_index = 9
    result_fullpath = obj.result_fullpath(package_index)
    mkdir_p(os.path.dirname(result_fullpath))
    with gzip.open(result_fullpath, 'wb') as f:
       pickle.dump(result, f)
       f.close()

    assert result == obj.collect_result(package_index=package_index)
Beispiel #13
0
def test_collect_result_eoferror(obj):
    # the file 'result.p.gz' is empty.
    # pickle.load() raises EOFError

    obj.open()

    package_index = 9
    dirname = 'task_{:05d}'.format(package_index)
    result_dir = os.path.join(obj.path, 'results', dirname)
    mkdir_p(result_dir)

    result_path = os.path.join(result_dir, 'result.p.gz')
    with open(result_path, 'wb') as f:
        f.close()

    assert obj.collect_result(package_index=package_index) is None
Beispiel #14
0
def test_collect_result_eoferror(obj):
   # the file 'result.p.gz' is empty.
   # pickle.load() raises EOFError

    obj.open()

    package_index = 9
    dirname = 'task_{:05d}'.format(package_index)
    result_dir = os.path.join(obj.path, 'results', dirname)
    mkdir_p(result_dir)

    result_path = os.path.join(result_dir, 'result.p.gz')
    with open(result_path, 'wb') as f:
       f.close()

    assert obj.collect_result(package_index=package_index) is None
Beispiel #15
0
def test_collect_result(obj):

    obj.open()

    result = MockResult(name='result1')

    package_index = 9
    dirname = 'task_{:05d}'.format(package_index)
    result_dir = os.path.join(obj.path, 'results', dirname)
    mkdir_p(result_dir)
    result_path = os.path.join(result_dir, 'result.p.gz')
    with gzip.open(result_path, 'wb') as f:
        pickle.dump(result, f)
        f.close()

    assert result == obj.collect_result(package_index=package_index)
Beispiel #16
0
def test_collect_result(obj):

    obj.open()

    result = MockResult(name='result1')

    package_index = 9
    dirname = 'task_{:05d}'.format(package_index)
    result_dir = os.path.join(obj.path, 'results', dirname)
    mkdir_p(result_dir)
    result_path = os.path.join(result_dir, 'result.p.gz')
    with gzip.open(result_path, 'wb') as f:
       pickle.dump(result, f)
       f.close()

    assert result == obj.collect_result(package_index=package_index)
def configure_1st_event_selection():

    path_cfg = dict(All = (
        'ev : ev.cutflowId[0] == 1 # Signal',
        'ev : ev.nIsoTracksVeto[0] <= 0',
        'ev : ev.nJet40Fwd[0] == 0',
        'ev : ev.nJet40failedId[0] == 0',
        'ev : ev.nJet40[0] >= 2',
        'ev : -2.5 < ev.jet_eta[0] < 2.5',
        'ev : 0.1 <= ev.jet_chHEF[0] < 0.95',
        # 'ev : ev.nJet100[0] >= 1',
        'ev : ev.ht40[0] >= 400',
        'ev : ev.mht40_pt[0] >= 200',
        'ev : ev.MhtOverMet[0] < 1.25',
    ))

    #
    eventSelection = alphatwirl.selection.build_selection(
        path_cfg = path_cfg,
        AllClass = alphatwirl.selection.modules.AllwCount,
        AnyClass = alphatwirl.selection.modules.AnywCount,
        NotClass = alphatwirl.selection.modules.NotwCount
    )

    eventselection_path = os.path.join(args.outdir, 'eventselection.txt')
    if args.force or not os.path.exists(eventselection_path):
        alphatwirl.mkdir_p(os.path.dirname(eventselection_path))
        with open(eventselection_path, 'w') as f:
            pprint.pprint(path_cfg, stream = f)

    tbl_cutflow_path = os.path.join(args.outdir, 'tbl_cutflow.txt')

    resultsCombinationMethod = alphatwirl.collector.ToTupleListWithDatasetColumn(
        summaryColumnNames = ('depth', 'class', 'name', 'pass', 'total')
    )
    deliveryMethod = alphatwirl.collector.WriteListToFile(tbl_cutflow_path)
    collector = alphatwirl.loop.Collector(resultsCombinationMethod, deliveryMethod)

    ret = [(eventSelection, collector)]
    return ret
Beispiel #18
0
def build_parallel_dropbox(parallel_mode,
                           quiet,
                           user_modules,
                           htcondor_job_desc_extra=[]):
    tmpdir = '_ccsp_temp'
    user_modules = set(user_modules)
    user_modules.add('fwtwirl')
    user_modules.add('alphatwirl')
    alphatwirl.mkdir_p(tmpdir)
    progressMonitor = alphatwirl.progressbar.NullProgressMonitor()
    if parallel_mode == 'htcondor':
        dispatcher = alphatwirl.concurrently.HTCondorJobSubmitter(
            job_desc_extra=htcondor_job_desc_extra)
    else:
        dispatcher = alphatwirl.concurrently.SubprocessRunner()
    workingArea = alphatwirl.concurrently.WorkingArea(
        dir=tmpdir, python_modules=list(user_modules))
    dropbox = alphatwirl.concurrently.TaskPackageDropbox(
        workingArea=workingArea, dispatcher=dispatcher)
    communicationChannel = alphatwirl.concurrently.CommunicationChannel(
        dropbox=dropbox)
    return Parallel(progressMonitor, communicationChannel)
def build_parallel_dropbox(parallel_mode,
                           quiet,
                           user_modules,
                           htcondor_job_desc_extra=[],
                           **kwargs):
    tmpdir = '_ccsp_temp'
    user_modules = set(user_modules)
    user_modules.add('alphatwirl_interface')
    user_modules.add('alphatwirl')
    alphatwirl.mkdir_p(tmpdir)

    if quiet:
        progressMonitor = alphatwirl.progressbar.NullProgressMonitor()
    else:
        if sys.stdout.isatty():
            progressBar = alphatwirl.progressbar.ProgressBar()
        else:
            progressBar = alphatwirl.progressbar.ProgressPrint()
        progressMonitor = alphatwirl.progressbar.BProgressMonitor(
            presentation=progressBar)
    if parallel_mode == 'htcondor':
        dispatcher = alphatwirl.concurrently.HTCondorJobSubmitter(
            job_desc_extra=htcondor_job_desc_extra)
    elif parallel_mode == 'sge':
        q = "hep.q" if "queue" not in kwargs else kwargs["queue"]
        t = 10800 if "time" not in kwargs else kwargs["time"]
        dispatcher = alphatwirl.concurrently.SGEJobSubmitter(queue=q,
                                                             walltime=t)
    else:
        dispatcher = alphatwirl.concurrently.SubprocessRunner()
    workingArea = alphatwirl.concurrently.WorkingArea(
        dir=tmpdir, python_modules=list(user_modules))
    dropbox = alphatwirl.concurrently.TaskPackageDropbox(
        workingArea=workingArea, dispatcher=dispatcher)
    communicationChannel = alphatwirl.concurrently.CommunicationChannel(
        dropbox=dropbox)

    return Parallel(progressMonitor, communicationChannel)
Beispiel #20
0
    def run_multiple(self, workingArea, package_indices):

        if not package_indices:
            return []

        cwd = os.getcwd()
        os.chdir(workingArea.path)

        package_paths = [workingArea.package_path(i) for i in package_indices]
        resultdir_basenames = [os.path.splitext(p)[0] for p in package_paths]
        resultdir_basenames = [
            os.path.splitext(n)[0] for n in resultdir_basenames
        ]
        resultdirs = [os.path.join('results', n) for n in resultdir_basenames]

        for d in resultdirs:
            alphatwirl.mkdir_p(d)

        job_desc = self.job_desc_template.format(
            job_script='job_script.sh',
            njobs=len(package_paths),
            queue=self.queue,
            walltime=self.walltime,
        )

        s = "#!/bin/bash\n\nulimit -c 0\n\n"
        for idx, package_path in enumerate(package_paths):
            s += "cmd1[{index}]='cd {path}'\n".format(
                index=idx + 1,
                path=resultdirs[idx],
            )
            s += "cmd2[{index}]='python {job_script} {args}'\n".format(
                index=idx + 1,
                job_script="../../run.py",
                args=package_path,
            )
        s += "\n${{cmd1[$SGE_TASK_ID]}} > {out} 2> {err}\n".format(
            out="stdout.txt",
            err="stderr.txt",
        )
        s += "${{cmd2[$SGE_TASK_ID]}} >> {out} 2>> {err}".format(
            out="stdout.txt",
            err="stderr.txt",
        )
        with open("job_script.sh", 'w') as f:
            f.write(s)

        proc = subprocess.Popen(
            job_desc.split(),
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
        )
        stdout, stderr = proc.communicate()

        regex = re.compile(
            "Your job-array (\d+).1-(\d+):1 \(\"job_script.sh\"\) has been submitted"
        )
        njobs = int(regex.search(stdout).groups()[1])
        clusterid = regex.search(stdout).groups()[0]
        # e.g., '2448770'

        #change_job_priority([clusterid], 10) ## need to make configurable

        procid = ['{}'.format(i + 1) for i in range(njobs)]
        # e.g., ['1', '2', '3', '4']

        clusterprocids = ['{}.{}'.format(clusterid, i) for i in procid]
        # e.g., ['2448770.1', '2448770.2', '2448770.3', '2448770.4']

        self.clusterprocids_outstanding.extend(clusterprocids)

        os.chdir(cwd)

        return clusterprocids
def configure_1st_event_selection():

    path_cfg_common = dict(All=(
        'ev : ev.cutflowId[0] == 1',
        'ev : ev.nIsoTracksVeto[0] <= 0',
        'ev : ev.nJet40[0] >= 2',
        'ev : ev.ht40[0] >= 200',
        'ev : ev.nJet100[0] >= 1',
        'ev : ev.nJet40failedId[0] == 0',
        'ev : ev.nJet40Fwd[0] == 0',
        'ev : -2.5 < ev.jet_eta[0] < 2.5',
        'ev : 0.1 <= ev.jet_chHEF[0] < 0.95',
        'ev : 130 <= ev.mht40_pt[0]',
        'ev : ev.MhtOverMet[0] < 1.25',
    ))

    path_cfg_susy_masspoints = dict(Any=(
        dict(All=(
            'ev : ev.componentName[0] == "SMS_T1tttt_madgraphMLM"',
            dict(Any=(
                dict(All=('ev : ev.smsmass1[0] == 1300',
                          'ev : ev.smsmass2[0] == 1050', path_cfg_common)),
                dict(All=('ev : ev.smsmass1[0] == 1800',
                          'ev : ev.smsmass2[0] == 500', path_cfg_common)),
            )),
        )),
        dict(All=(
            'ev : ev.componentName[0] == "SMS_T2bb_madgraphMLM"',
            dict(Any=(
                dict(All=('ev : ev.smsmass1[0] == 500',
                          'ev : ev.smsmass2[0] == 450', path_cfg_common)),
                dict(All=('ev : ev.smsmass1[0] == 1000',
                          'ev : ev.smsmass2[0] == 300', path_cfg_common)),
            )),
        )),
    ))

    path_cfg = path_cfg_common
    if args.susy_sms:
        path_cfg = path_cfg_susy_masspoints

    #
    eventselection_path = os.path.join(args.outdir, 'eventselection.txt')
    if args.force or not os.path.exists(eventselection_path):
        alphatwirl.mkdir_p(os.path.dirname(eventselection_path))
        with open(eventselection_path, 'w') as f:
            pprint.pprint(path_cfg, stream=f)

    #
    tbl_cutflow_path = os.path.join(args.outdir, 'tbl_cutflow.txt')
    if args.force or not os.path.exists(tbl_cutflow_path):
        eventSelection = alphatwirl.selection.build_selection(
            path_cfg=path_cfg,
            AllClass=alphatwirl.selection.modules.AllwCount,
            AnyClass=alphatwirl.selection.modules.AnywCount,
            NotClass=alphatwirl.selection.modules.NotwCount)
        resultsCombinationMethod = alphatwirl.collector.CombineIntoList(
            summaryColumnNames=('depth', 'class', 'name', 'pass', 'total'),
            sort=False,
            summarizer_to_tuple_list=summarizer_to_tuple_list)
        deliveryMethod = alphatwirl.collector.WriteListToFile(tbl_cutflow_path)
        collector = alphatwirl.loop.Collector(resultsCombinationMethod,
                                              deliveryMethod)
    else:
        eventSelection = alphatwirl.selection.build_selection(
            path_cfg=path_cfg)
        collector = alphatwirl.loop.NullCollector()

    #
    ret = [(eventSelection, collector)]
    return ret
Beispiel #22
0
def test_emtpy(mock_makedirs):
    mkdir_p('')
    assert [] == mock_makedirs.call_args_list
Beispiel #23
0
def test_emtpy(mock_makedirs):
    mkdir_p('')
    assert [ ] == mock_makedirs.call_args_list
Beispiel #24
0
def test_success(mock_makedirs):
    mkdir_p('a/b')
    assert [mock.call('a/b')] == mock_makedirs.call_args_list
    def run_multiple(self, workingArea, package_indices):

        if not package_indices:
            return [ ]

        cwd = os.getcwd()
        os.chdir(workingArea.path)

        package_paths = [workingArea.package_path(i) for i in package_indices]
        resultdir_basenames = [os.path.splitext(p)[0] for p in package_paths]
        resultdir_basenames = [os.path.splitext(n)[0] for n in resultdir_basenames]
        resultdirs = [os.path.join('results', n) for n in resultdir_basenames]

        for d in resultdirs:
            alphatwirl.mkdir_p(d)

        extra_input_files = ['python_modules.tar.gz']
        extra_input_files = [f for f in extra_input_files if os.path.exists(f)]

        job_desc = self.job_desc_template.format(
            input_files = ', '.join(['$(resultdir).p.gz'] + extra_input_files),
            resultdirs = ', '.join(resultdir_basenames)
        )

        procargs = ['condor_submit']

        logger = logging.getLogger(__name__)
        command_display = compose_shortened_command_for_logging(procargs)
        logger.debug('execute: {!r}'.format(command_display))

        proc = subprocess.Popen(
            procargs,
            stdin=subprocess.PIPE,
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE
        )

        stdout, stderr = proc.communicate(job_desc)
        stdout = stdout.decode()
        stderr = stderr.decode()

        for l in stdout.rstrip().split('\n'):
            logger.debug(l)

        regex = re.compile("(\d+) job\(s\) submitted to cluster (\d+)", re.MULTILINE)
        njobs = int(regex.search(stdout).groups()[0])
        clusterid = regex.search(stdout).groups()[1]
        # e.g., '3158626'

        change_job_priority([clusterid], 10) ## need to make configurable

        procid = ['{}'.format(i) for i in range(njobs)]
        # e.g., ['0', '1', '2', '3']

        clusterprocids = ['{}.{}'.format(clusterid, i) for i in procid]
        # e.g., ['3158626.0', '3158626.1', '3158626.2', '3158626.3']

        self.clusterprocids_outstanding.extend(clusterprocids)

        os.chdir(cwd)

        return clusterprocids
Beispiel #26
0
    def run_multiple(self, workingArea, package_indices):

        if not package_indices:
            return []

        cwd = os.getcwd()
        os.chdir(workingArea.path)

        package_paths = [
            workingArea.package_relpath(i) for i in package_indices
        ]
        resultdir_basenames = [os.path.splitext(p)[0] for p in package_paths]
        resultdir_basenames = [
            os.path.splitext(n)[0] for n in resultdir_basenames
        ]
        resultdirs = [os.path.join('results', n) for n in resultdir_basenames]

        for d in resultdirs:
            alphatwirl.mkdir_p(d)

        # Get list of task names
        task_name = None
        for p in package_paths:
            with gzip.open(p, 'rb') as f:
                package = pickle.load(f)
            if hasattr(package.task, 'progressbar_label'):
                if task_name is None:
                    task_name = package.task.progressbar_label
                elif package.task.progressbar_label != task_name:
                    logger = logging.getLogger(__name__)
                    logger.warning("Task name changed somehow")
            else:
                task_name = "task"

        job_desc = self.job_desc_template.format(
            name=task_name,
            job_script='job_script.sh',
            njobs=len(package_paths),
            queue=self.queue,
            walltime=self.walltime_dict[task_name]
            if task_name in self.walltime_dict else self.walltime,
            vmem=self.vmem_dict[task_name]
            if task_name in self.vmem_dict else self.vmem,
        )

        s = "#!/bin/bash\n\nulimit -c 0\n\n"
        for idx, package_path in enumerate(package_paths):
            s += "cmd1[{index}]='cd {path}'\n".format(
                index=idx + 1,
                path=resultdirs[idx],
            )
            s += "cmd2[{index}]='python {job_script} {args}'\n".format(
                index=idx + 1,
                job_script="../../run.py",
                args=package_path,
            )
        s += "\n${{cmd1[$SGE_TASK_ID]}} > {out} 2> {err}\n".format(
            out="stdout.txt",
            err="stderr.txt",
        )
        s += "${{cmd2[$SGE_TASK_ID]}} >> {out} 2>> {err}".format(
            out="stdout.txt",
            err="stderr.txt",
        )
        with open("job_script.sh", 'w') as f:
            f.write(s)

        proc = subprocess.Popen(
            job_desc.split(),
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            universal_newlines=True,
        )
        stdout, stderr = proc.communicate()

        regex = re.compile(
            "Your job-array (\d+).1-(\d+):1 \(\"{}\"\) has been submitted".
            format(task_name))
        try:
            njobs = int(regex.search(stdout).groups()[1])
            clusterid = regex.search(stdout).groups()[0]
            # e.g., '2448770'
        except Exception as e:
            logger = logging.getLogger(__name__)
            logger.error(stdout)
            logger.error(stderr)
            raise AttributeError(e)

        #change_job_priority([clusterid], 10) ## need to make configurable

        procid = ['{}'.format(i + 1) for i in range(njobs)]
        # e.g., ['1', '2', '3', '4']

        clusterprocids = ['{}.{}'.format(clusterid, i) for i in procid]
        # e.g., ['2448770.1', '2448770.2', '2448770.3', '2448770.4']

        self.clusterprocids_outstanding.extend(clusterprocids)

        os.chdir(cwd)

        return clusterprocids
Beispiel #27
0
    def _configure(self, components, reader_collector_pairs, analyzerName,
                   fileName, treeName):

        dataset_readers = alphatwirl.datasetloop.DatasetReaderComposite()

        # tbl_heppyresult.txt
        tbl_heppyresult_path = os.path.join(self.outdir, 'tbl_heppyresult.txt')
        if self.force or not os.path.exists(tbl_heppyresult_path):
            # e.g., '74X/MC/20150810_MC/20150810_SingleMu'
            heppydir_rel = '/'.join(self.heppydir.rstrip('/').split('/')[-4:])
            alphatwirl.mkdir_p(os.path.dirname(tbl_heppyresult_path))
            f = open(tbl_heppyresult_path, 'w')
            f.write('heppyresult\n')
            f.write(heppydir_rel + '\n')
            f.close()

        # tbl_tree.txt
        tbl_tree_path = os.path.join(self.outdir, 'tbl_tree.txt')
        if self.force or not os.path.exists(tbl_tree_path):
            tblTree = heppyresult.TblTree(
                analyzerName=analyzerName,
                fileName=fileName,
                treeName=treeName,
                outPath=tbl_tree_path,
            )
            dataset_readers.add(tblTree)

        # tbl_branch.txt
        tbl_branch_path = os.path.join(self.outdir, 'tbl_branch.txt')
        if self.force or not os.path.exists(tbl_branch_path):
            tblBranch = heppyresult.TblBranch(
                analyzerName=analyzerName,
                fileName=fileName,
                treeName=treeName,
                outPath=tbl_branch_path,
            )
            dataset_readers.add(tblBranch)

        # tbl_branch_size.tx
        tbl_branch_size_path = os.path.join(self.outdir, 'tbl_branch_size.txt')
        if self.force or not os.path.exists(tbl_branch_size_path):
            tblBranchSize = heppyresult.TblBranch(
                analyzerName=analyzerName,
                fileName=fileName,
                treeName=treeName,
                outPath=tbl_branch_size_path,
                addType=False,
                addSize=True,
                sortBySize=True,
            )
            dataset_readers.add(tblBranchSize)

        # tbl_branch_title.txt
        tbl_branch_title_path = os.path.join(self.outdir,
                                             'tbl_branch_title.txt')
        if self.force or not os.path.exists(tbl_branch_title_path):
            tblBranchTitle = heppyresult.TblBranch(
                analyzerName=analyzerName,
                fileName=fileName,
                treeName=treeName,
                outPath=tbl_branch_title_path,
                addType=False,
                addSize=False,
                addTitle=True,
            )
            dataset_readers.add(tblBranchTitle)

        # tbl_dataset.txt
        tbl_dataset_path = os.path.join(self.outdir, 'tbl_dataset.txt')
        if self.force or not os.path.exists(tbl_dataset_path):
            tblDataset = heppyresult.TblComponentConfig(
                outPath=tbl_dataset_path,
                columnNames=('dataset', ),
                keys=('dataset', ),
            )
            dataset_readers.add(tblDataset)

        # tbl_xsec.txt for MC
        if self.datamc == 'mc' and not self.susy_sms:
            tbl_xsec_path = os.path.join(self.outdir, 'tbl_xsec.txt')
            if self.force or not os.path.exists(tbl_xsec_path):
                tblXsec = heppyresult.TblComponentConfig(
                    outPath=tbl_xsec_path,
                    columnNames=('xsec', ),
                    keys=('xSection', ),
                )
                dataset_readers.add(tblXsec)

        # tbl_nevt.txt for MC
        if self.datamc == 'mc' and not self.susy_sms:
            tbl_nevt_path = os.path.join(self.outdir, 'tbl_nevt.txt')
            if self.force or not os.path.exists(tbl_nevt_path):
                tblNevt = heppyresult.TblCounter(
                    outPath=tbl_nevt_path,
                    columnNames=('nevt', 'nevt_sumw'),
                    analyzerName='skimAnalyzerCount',
                    fileName='SkimReport.txt',
                    levels=('All Events', 'Sum Weights'))
                dataset_readers.add(tblNevt)

        # tbl_nevt_sms.txt for MC SUSY SMS
        if self.datamc == 'mc' and self.susy_sms:
            tbl_nevt_sms_path = os.path.join(self.outdir, 'tbl_nevt_sms.txt')
            if self.force or not os.path.exists(tbl_nevt_sms_path):
                tblSMSNevt = heppyresult.TblSMSNevt(
                    analyzerName='susyParameterScanAnalyzer',
                    fileName='genEvtsPerMass.root',
                    outPath=tbl_nevt_sms_path)
                dataset_readers.add(tblSMSNevt)

        reader_top = alphatwirl.loop.ReaderComposite()
        collector_top = alphatwirl.loop.CollectorComposite()
        for r, c in reader_collector_pairs:
            reader_top.add(r)
            collector_top.add(c)
        eventLoopRunner = alphatwirl.loop.MPEventLoopRunner(
            self.parallel.communicationChannel)
        eventBuilderConfigMaker = heppyresult.EventBuilderConfigMaker(
            analyzerName=analyzerName,
            fileName=fileName,
            treeName=treeName,
            check_files=True,
            skip_error_files=True)
        datasetIntoEventBuildersSplitter = alphatwirl.loop.DatasetIntoEventBuildersSplitter(
            EventBuilder=alphatwirl.roottree.BuildEvents,
            eventBuilderConfigMaker=eventBuilderConfigMaker,
            maxEvents=self.max_events_per_dataset,
            maxEventsPerRun=self.max_events_per_process,
            maxFiles=self.max_files_per_dataset,
            maxFilesPerRun=self.max_files_per_process)
        eventReader = alphatwirl.loop.EventDatasetReader(
            eventLoopRunner=eventLoopRunner,
            reader=reader_top,
            collector=collector_top,
            split_into_build_events=datasetIntoEventBuildersSplitter)

        dataset_readers.add(eventReader)

        if components == ['all']: components = None
        heppyResult = heppyresult.HeppyResult(
            path=self.heppydir,
            componentNames=components,
            componentHasTheseFiles=[analyzerName])

        if self.parallel_mode in ('multiprocessing', ):
            loop = alphatwirl.datasetloop.DatasetLoop(
                datasets=heppyResult.components(), reader=dataset_readers)
        else:
            loop = alphatwirl.datasetloop.ResumableDatasetLoop(
                datasets=heppyResult.components(),
                reader=dataset_readers,
                workingarea=self.parallel.workingarea)

        return loop
    def _configure(self, components, reader_collector_pairs, analyzerName,
                   fileName, treeName):

        component_readers = alphatwirl.heppyresult.ComponentReaderComposite()

        # tbl_heppyresult.txt
        tbl_heppyresult_path = os.path.join(self.outdir, 'tbl_heppyresult.txt')
        if self.force or not os.path.exists(tbl_heppyresult_path):
            # e.g., '74X/MC/20150810_MC/20150810_SingleMu'
            heppydir_rel = '/'.join(self.heppydir.rstrip('/').split('/')[-4:])
            alphatwirl.mkdir_p(os.path.dirname(tbl_heppyresult_path))
            f = open(tbl_heppyresult_path, 'w')
            f.write('heppyresult\n')
            f.write(heppydir_rel + '\n')
            f.close()

        # tbl_dataset.txt
        tbl_dataset_path = os.path.join(self.outdir, 'tbl_dataset.txt')
        if self.force or not os.path.exists(tbl_dataset_path):
            tblDataset = alphatwirl.heppyresult.TblComponentConfig(
                outPath=tbl_dataset_path,
                columnNames=('dataset', ),
                keys=('dataset', ),
            )
            component_readers.add(tblDataset)

        # tbl_xsec.txt for MC
        if not self.isdata:
            tbl_xsec_path = os.path.join(self.outdir, 'tbl_xsec.txt')
            if self.force or not os.path.exists(tbl_xsec_path):
                tblXsec = alphatwirl.heppyresult.TblComponentConfig(
                    outPath=tbl_xsec_path,
                    columnNames=('xsec', ),
                    keys=('xSection', ),
                )
                component_readers.add(tblXsec)

        # tbl_nevt.txt for MC
        if not self.isdata:
            tbl_nevt_path = os.path.join(self.outdir, 'tbl_nevt.txt')
            if self.force or not os.path.exists(tbl_nevt_path):
                tblNevt = alphatwirl.heppyresult.TblCounter(
                    outPath=tbl_nevt_path,
                    columnNames=('nevt', 'nevt_sumw'),
                    analyzerName='skimAnalyzerCount',
                    fileName='SkimReport.txt',
                    levels=('All Events', 'Sum Weights'))
                component_readers.add(tblNevt)

        # event loop
        reader = alphatwirl.loop.ReaderComposite()
        collector = alphatwirl.loop.CollectorComposite(
            self.parallel.progressMonitor.createReporter())
        for r, c in reader_collector_pairs:
            reader.add(r)
            collector.add(c)
        eventLoopRunner = alphatwirl.loop.MPEventLoopRunner(
            self.parallel.communicationChannel)
        eventBuilderConfigMaker = alphatwirl.heppyresult.EventBuilderConfigMaker(
            analyzerName=analyzerName,
            fileName=fileName,
            treeName=treeName,
        )
        datasetIntoEventBuildersSplitter = alphatwirl.loop.DatasetIntoEventBuildersSplitter(
            EventBuilder=alphatwirl.heppyresult.EventBuilder,
            eventBuilderConfigMaker=eventBuilderConfigMaker,
            maxEvents=self.max_events_per_dataset,
            maxEventsPerRun=self.max_events_per_process)
        eventReader = alphatwirl.loop.EventsInDatasetReader(
            eventLoopRunner=eventLoopRunner,
            reader=reader,
            collector=collector,
            split_into_build_events=datasetIntoEventBuildersSplitter)
        component_readers.add(eventReader)

        if components == ['all']:
            components = None
        heppyResult = alphatwirl.heppyresult.HeppyResult(
            path=self.heppydir,
            componentNames=components,
            componentHasTheseFiles=[analyzerName])
        componentLoop = alphatwirl.heppyresult.ComponentLoop(
            heppyResult, component_readers)

        return componentLoop
Beispiel #29
0
def test_success(mock_makedirs):
    mkdir_p('a/b')
    assert [mock.call('a/b')] == mock_makedirs.call_args_list