Beispiel #1
0
    def __create_ntuple_layer(self):
        LOG_STEM = 'ntuple_job.$(cluster).$(process)'
        jobs = []

        run_config = self.__config
        input_files = run_config['files']
        if self.__variables['test']:
            input_files = [input_files[0]]

        job_set = htc.JobSet(
            exe=self.__run_script,
            copy_exe=True,
            setup_script=self.__setup_script,
            filename=os.path.join(
                self.__job_dir, 'ntuple_production.condor'),
            out_dir=self.__job_log_dir,
            out_file=LOG_STEM + '.out',
            err_dir=self.__job_log_dir,
            err_file=LOG_STEM + '.err',
            log_dir=self.__job_log_dir,
            log_file=LOG_STEM + '.log',
            share_exe_setup=True,
            common_input_files=self.__input_files,
            transfer_hdfs_input=False,
            hdfs_store=run_config['outLFNDirBase'] + '/tmp',
            certificate=self.REQUIRE_GRID_CERT,
            cpus=1,
            memory='1500MB'
        )
        parameters = 'files={files} output_file={output_file} {params}'
        if run_config['lumiMask']:
            parameters += ' json_url={0}'.format(run_config['lumiMask'])
        n_files_per_group = SPLITTING_BY_FILE['DEFAULT']
        for name, value in SPLITTING_BY_FILE.items():
            if name in run_config['inputDataset']:
                n_files_per_group = value

        grouped_files = make_even_chunks(input_files, n_files_per_group)
        for i, f in enumerate(grouped_files):
            output_file = '{dataset}_ntuple_{job_number}.root'.format(
                dataset=run_config['outputDatasetTag'],
                job_number=i)
            args = parameters.format(
                files=','.join(f),
                output_file=output_file,
                params=run_config['pyCfgParams']
            )
            rel_out_dir = os.path.relpath(RESULTDIR, NTPROOT)
            rel_log_dir = os.path.relpath(LOGDIR, NTPROOT)
            rel_out_file = os.path.join(rel_out_dir, output_file)
            rel_log_file = os.path.join(rel_log_dir, 'ntp.log')
            job = htc.Job(
                name='ntuple_job_{0}'.format(i),
                args=args,
                output_files=[rel_out_file, rel_log_file])
            job_set.add_job(job)
            jobs.append(job)
        return jobs
 def test_make_chunks(self):
     from ntp.utils import make_even_chunks
     l = list(range(100))
     size_of_chunk = 9
     n_groups = int(len(l) / size_of_chunk)
     chunks = list(make_even_chunks(l, size_of_chunk))
     self.assertEqual(len(chunks), n_groups)
     for c in chunks:
         self.assertGreaterEqual(len(c), size_of_chunk)
Beispiel #3
0
 def test_make_chunks(self):
     from ntp.utils import make_even_chunks
     l = list(range(100))
     size_of_chunk = 9
     n_groups = int(len(l) / size_of_chunk)
     chunks = list(make_even_chunks(l, size_of_chunk))
     self.assertEqual(len(chunks), n_groups)
     for c in chunks:
         self.assertGreaterEqual(len(c), size_of_chunk)
Beispiel #4
0
    def __create_ntuple_layer(self):
        jobs = []

        run_config = self.__config
        input_files = run_config['files']
        if self.__variables['test']:
            input_files = [input_files[0]]

        job_set = htc.JobSet(exe=self.__run_script,
                             copy_exe=True,
                             setup_script=self.__setup_script,
                             filename=os.path.join(self.__job_dir,
                                                   'ntuple_production.condor'),
                             out_dir=self.__job_log_dir,
                             out_file=LOG_STEM + '.out',
                             err_dir=self.__job_log_dir,
                             err_file=LOG_STEM + '.err',
                             log_dir=self.__job_log_dir,
                             log_file=LOG_STEM + '.log',
                             share_exe_setup=True,
                             common_input_files=self.__input_files,
                             transfer_hdfs_input=False,
                             hdfs_store=run_config['outLFNDirBase'] + '/tmp',
                             certificate=self.REQUIRE_GRID_CERT,
                             cpus=1,
                             memory='1500MB')
        parameters = 'files={files} output_file={output_file} {params}'
        if run_config['lumiMask']:
            parameters += ' json_url={0}'.format(run_config['lumiMask'])
        n_files_per_group = SPLITTING_BY_FILE['DEFAULT']
        for name, value in SPLITTING_BY_FILE.items():
            if name in run_config['inputDataset']:
                n_files_per_group = value

        grouped_files = make_even_chunks(input_files, n_files_per_group)
        for i, f in enumerate(grouped_files):
            output_file = '{dataset}_ntuple_{job_number}.root'.format(
                dataset=run_config['outputDatasetTag'], job_number=i)
            args = parameters.format(files=','.join(f),
                                     output_file=output_file,
                                     params=run_config['pyCfgParams'])
            rel_out_dir = os.path.relpath(RESULTDIR, NTPROOT)
            rel_log_dir = os.path.relpath(LOGDIR, NTPROOT)
            rel_out_file = os.path.join(rel_out_dir, output_file)
            rel_log_file = os.path.join(rel_log_dir, 'ntp.log')
            job = htc.Job(name='ntuple_job_{0}'.format(i),
                          args=args,
                          output_files=[rel_out_file, rel_log_file])
            job_set.add_job(job)
            jobs.append(job)
        return jobs
Beispiel #5
0
    def create_job_layer(self, input_files, mode):
        jobs = []
        self.__root_output_files = []

        config = self.__config
        if self.__variables['test']:
            input_files = [input_files[0]]
        self.__config['input_files'] = input_files

        hdfs_store = config['outputDir']
        job_set = htc.JobSet(
            exe=self.__run_script,
            copy_exe=True,
            setup_script=self.__setup_script,
            filename=os.path.join(self.__job_dir, '{0}.condor'.format(PREFIX)),
            out_dir=self.__job_log_dir,
            out_file=OUT_FILE,
            err_dir=self.__job_log_dir,
            err_file=ERR_FILE,
            log_dir=self.__job_log_dir,
            log_file=LOG_FILE,
            share_exe_setup=True,
            common_input_files=self.__input_files,
            transfer_hdfs_input=True,
            hdfs_store=hdfs_store,
            certificate=self.REQUIRE_GRID_CERT,
            cpus=1,
            memory='900MB',
            other_args={'Requirements':'( !stringListMember(substr(Target.Machine,0,2),"sm,bs") )'},
        )

        parameters = 'files={files} output_file_suffix={suffix} mode={mode}'
        parameters += ' dataset={dataset}'

        dataset = config['parameters']['dataset']

        n_files_per_group = N_FILES_PER_ANALYSIS_JOB
        for name, value in SPLITTING_BY_FILE.items():
            if name in dataset:
                n_files_per_group = value

        grouped_files = make_even_chunks(
            input_files, size_of_chunk=n_files_per_group)

        for i, f in enumerate(grouped_files):
            suffix = 'atOutput_{job_number}.root'.format(
                dataset=dataset,
                mode=mode,
                job_number=i
            )

            args = parameters.format(
                files=','.join(f),
                suffix=suffix,
                mode=mode,
                dataset=dataset,
            )
            output_file = '_'.join([dataset, mode, suffix])
            rel_out_dir = os.path.relpath(RESULTDIR, NTPROOT)
            rel_log_dir = os.path.relpath(LOGDIR, NTPROOT)
            rel_out_file = os.path.join(rel_out_dir, output_file)
            rel_log_file = os.path.join(rel_log_dir, 'ntp.log')
            job = htc.Job(
                name='{0}_{1}_job_{2}'.format(PREFIX, mode, i),
                args=args,
                output_files=[rel_out_file, rel_log_file])
            job_set.add_job(job)
            jobs.append(job)

        return jobs
        
 def test_make_chunks_one_element(self):
     from ntp.utils import make_even_chunks
     l = [5]
     chunks = list(make_even_chunks(l, 50))
     self.assertEqual(len(chunks), 1)
Beispiel #7
0
    def create_job_layer(self, input_files, mode):
        jobs = []
        self.__root_output_files = []

        config = self.__config
        if self.__variables['test']:
            input_files = [input_files[0]]
        self.__config['input_files'] = input_files

        hdfs_store = config['outputDir']
        job_set = htc.JobSet(
            exe=self.__run_script,
            copy_exe=True,
            setup_script=self.__setup_script,
            filename=os.path.join(self.__job_dir, '{0}.condor'.format(PREFIX)),
            out_dir=self.__job_log_dir,
            out_file=OUT_FILE,
            err_dir=self.__job_log_dir,
            err_file=ERR_FILE,
            log_dir=self.__job_log_dir,
            log_file=LOG_FILE,
            share_exe_setup=True,
            common_input_files=self.__input_files,
            transfer_hdfs_input=False,
            hdfs_store=hdfs_store,
            certificate=self.REQUIRE_GRID_CERT,
            cpus=1,
            memory='1500MB',
        )

        job_set.job_template='/storage/ec6821/NTupleProd/new/NTupleProduction/job.condor'


        parameters = 'files={files} output_file_suffix={suffix} mode={mode}'
        parameters += ' dataset={dataset}'

        n_files_per_group = N_FILES_PER_ANALYSIS_JOB
        grouped_files = make_even_chunks(
            input_files, size_of_chunk=n_files_per_group)

        dataset = config['parameters']['dataset']
        for i, f in enumerate(grouped_files):
            suffix = 'atOutput_{job_number}.root'.format(
                dataset=dataset,
                mode=mode,
                job_number=i
            )

            args = parameters.format(
                files=','.join(f),
                suffix=suffix,
                mode=mode,
                dataset=dataset,
            )
            output_file = '_'.join([dataset, mode, suffix])
            rel_out_dir = os.path.relpath(RESULTDIR, NTPROOT)
            rel_log_dir = os.path.relpath(LOGDIR, NTPROOT)
            rel_out_file = os.path.join(rel_out_dir, output_file)
            rel_log_file = os.path.join(rel_log_dir, 'ntp.log')
            job = htc.Job(
                name='{0}_{1}_job_{2}'.format(PREFIX, mode, i),
                args=args,
                output_files=[rel_out_file, rel_log_file])
            job_set.add_job(job)
            jobs.append(job)

        return jobs
        
Beispiel #8
0
 def test_make_chunks_one_element(self):
     from ntp.utils import make_even_chunks
     l = [5]
     chunks = list(make_even_chunks(l, 50))
     self.assertEqual(len(chunks), 1)