예제 #1
0
def reset_singleQuad_xml_files(type, modules=modulelist, chips=chiplist):
    xmlobject = XMLInfo(xmlfile_blueprint)
    xmlobject.set_module_attribute_by_moduleid(0, 'Id', 1)
    xmlobject.set_txtfilepath_by_moduleid(1, txtfolder)
    for chip in chiplist:
        if chip == 0: continue
        xmlobject.copy_chip_by_moduleid(1, 0, chip)
    for module in modules:
        targetname = os.path.join(
            xmlfolder, 'CMSIT_singleQuad_%s_%s.xml' % (module, type))
        xmlobject.save_xml_as(targetname)
예제 #2
0
파일: Submitter.py 프로젝트: reimersa/LEAF
    def __init__(self, xmlfilename):
        self.xmlinfo = XMLInfo(xmlfilename)

        path_parts_local = [item for item in self.xmlinfo.xmlfilename.split('/')[:-1]]
        self.configdir = str(os.path.join('/', *path_parts_local))

        path_parts_local.append('workdir_%s' % (self.xmlinfo.xmlfilename.split('/')[-1][:-4]))
        self.workdir_local = str(os.path.join('/', *path_parts_local))

        self.se_director = self.xmlinfo.configsettings.SEDirector
        self.use_se = False
        if self.xmlinfo.configsettings.OutputDirectory.startswith('/pnfs') and not ('t3dcachedb03.psi.ch' in self.se_director):
            self.use_se = True
        path_parts_remote = [item for item in self.xmlinfo.configsettings.OutputDirectory.split('/')]
        path_parts_remote.append('workdir_%s' % (self.xmlinfo.xmlfilename.split('/')[-1][:-4]))

        self.workdir_remote = str(os.path.join('/', *path_parts_remote))

        self.missing_files_txt = os.path.join(self.workdir_local, 'commands_missing_files.txt')
예제 #3
0
파일: Submitter.py 프로젝트: thaarres/LEAF
    def __init__(self, xmlfilename):
        self.xmlinfo = XMLInfo(xmlfilename)

        path_parts_local = [
            item for item in self.xmlinfo.xmlfilename.split('/')[:-1]
        ]
        self.configdir = str(os.path.join('/', *path_parts_local))

        path_parts_local.append('workdir_%s' %
                                (self.xmlinfo.xmlfilename.split('/')[-1][:-4]))
        self.workdir_local = str(os.path.join('/', *path_parts_local))

        path_parts_remote = [
            item
            for item in self.xmlinfo.configsettings.OutputDirectory.split('/')
        ]
        path_parts_remote.append(
            'workdir_%s' % (self.xmlinfo.xmlfilename.split('/')[-1][:-4]))
        self.workdir_remote = str(os.path.join('/', *path_parts_remote))

        self.missing_files_txt = os.path.join(self.workdir_local,
                                              'commands_missing_files.txt')
예제 #4
0
def prepare_singleQuad_xml_files(type_name, type_setting, modules=modulelist):
    for module in modules:
        xmlfilename = os.path.join(
            xmlfolder, 'CMSIT_singleQuad_%s_%s.xml' % (module, type_name))
        xmlobject = XMLInfo(xmlfilename)
        for setting in daqsettings_per_xmltype[type_setting]:
            xmlobject.set_daq_settings(
                setting, daqsettings_per_xmltype[type_setting][setting])
        for chip in chiplist:
            xmlobject.set_chip_attribute_by_moduleid(1, chip, 'Lane', str(
                chip))  # the lane corresponds to the chip ID on the single AB
            xmlobject.set_chip_attribute_by_moduleid(
                1, chip, 'configfile',
                'CMSIT_RD53_%s_chip%i_default.txt' % (module, chip))

        chip_settings_thismodule = chip_settings[module]
        keepchips = []
        for chip in chip_settings_thismodule:
            settings = chip_settings_thismodule[chip]
            for setting in settings:
                xmlobject.set_chip_setting_by_modulename(
                    module, chip, setting, settings[setting])
        xmlobject.keep_only_chips_by_modulename(
            module, chip_settings_thismodule.keys())
        xmlobject.save_xml_as(xmlfilename)
예제 #5
0
def reset_and_prepare_Ring_xml_file(type_name, type_setting,
                                    ids_and_chips_per_module, ring):

    xmlobject = XMLInfo(xmlfile_blueprint)

    for setting in daqsettings_per_xmltype[type_setting]:
        xmlobject.set_daq_settings(
            setting, daqsettings_per_xmltype[type_setting][setting])

    # set up all modules
    moduleids = []
    for mod in ids_and_chips_per_module:
        moduleids.append(ids_and_chips_per_module[mod][0])
    for id in moduleids:
        if id == 0: continue
        xmlobject.copy_module_by_moduleid(0, id)
        xmlobject.set_txtfilepath_by_moduleid(id, txtfolder)
    xmlobject.keep_only_modules_by_moduleid(moduleids)

    # set up chips per module
    for module in ids_and_chips_per_module:
        id = ids_and_chips_per_module[module][0]
        chips = ids_and_chips_per_module[module][1]

        # first create all chips, then delete the unwanted ones
        for chip in [0, 1, 2, 3]:
            if not chip == 0:
                xmlobject.copy_chip_by_moduleid(id, 0, chip)
            if ring == 'R1':
                xmlobject.set_chip_attribute_by_moduleid(
                    id, chip, 'Lane', str(chip))
            elif ring == 'R3':
                xmlobject.set_chip_attribute_by_moduleid(id, chip, 'Lane', 0)
            else:
                raise AttributeError('invalid ring specified: %s' %
                                     (str(ring)))
            xmlobject.set_chip_attribute_by_moduleid(
                id, chip, 'configfile',
                'CMSIT_RD53_%s_chip%i_default.txt' % (module, chip))
            if chip in chip_settings[module]:
                settings = chip_settings[module][chip]
                for setting in settings:
                    xmlobject.set_chip_setting_by_modulename(
                        module, chip, setting, settings[setting])
        xmlobject.keep_only_chips_by_moduleid(id, chips)

    targetname = os.path.join(xmlfolder,
                              'CMSIT_disk%s_%s.xml' % (str(ring), type_name))
    xmlobject.save_xml_as(targetname)
예제 #6
0
def run_ber_scan(modules,
                 chips_per_module,
                 ring,
                 positions,
                 tap_settings=[],
                 mode='time',
                 value=10):

    # first, enable TAP1, TAP2

    for moduleidx, module in enumerate(modules):
        for chip in chips_per_module[module]:

            if ring == 'singleQuad':
                if not len(modules) == 1:
                    raise AttributeError(
                        'Trying to run BER in singleQuad mode with mode than one module'
                    )
                module = modules[0]
                xmlfile_for_ber = os.path.join(
                    xmlfolder, 'CMSIT_%s_%s_%s.xml' % (ring, module, 'ber'))
                reset_singleQuad_xml_files(type='ber',
                                           modules=modules,
                                           chips=chips)
                prepare_singleQuad_xml_files(type_name='ber',
                                             type_setting='scurve',
                                             modules=modules)
            elif ring == 'R1':
                xmlfile_for_ber = os.path.join(
                    xmlfolder, 'CMSIT_disk%s_%s.xml' % (ring, 'ber'))
                reset_and_prepare_Ring_xml_file('ber', 'scurve',
                                                ids_and_chips_per_module_R1,
                                                ring)
            elif ring == 'R3':
                xmlfile_for_ber = os.path.join(
                    xmlfolder, 'CMSIT_disk%s_%s.xml' % (ring, 'ber'))
            xmlobject = XMLInfo(xmlfile_for_ber)
            print module, chip
            xmlobject.keep_only_modules_by_modulename([module])
            xmlobject.keep_only_chips_by_modulename(module, [chip])
            xmlobject.set_chip_setting_by_modulename(module, chip,
                                                     'CML_CONFIG', '127')

            # now, in a loop, set TAP values to scan through
            for tap0, tap1, tap2 in tap_settings:
                print(tap0, tap1, tap2)
                xmlobject.set_chip_setting_by_modulename(
                    module, chip, 'CML_TAP0_BIAS', str(tap0))
                xmlobject.set_chip_setting_by_modulename(
                    module, chip, 'CML_TAP1_BIAS', str(tap1))
                xmlobject.set_chip_setting_by_modulename(
                    module, chip, 'CML_TAP2_BIAS', str(tap2))
                xmlobject.save_xml_as(xmlfile_for_ber)

                # assemble the OS command
                if mode is 'time': tuningstepname = 'prbstime'
                elif mode is 'frames': tuningstepname = 'prbsframes'
                else:
                    raise AttributeError(
                        'Function \'run_ber_scan()\' received invalid argument for \'mode\': %s. Must be \'time\' or \'frames\''
                        % mode)
                command_p = 'CMSITminiDAQ -f %s p' % (xmlfile_for_ber)
                command_ber = 'CMSITminiDAQ -f %s -c %s %i BE-FE 2>&1 | tee %s' % (
                    xmlfile_for_ber, tuningstepname, value,
                    os.path.join(
                        logfolder, 'ber_%s_%s_chip%i_pos%s_%i_%i_%i.log' %
                        (ring, module, chip, str(
                            positions[moduleidx]), tap0, tap1, tap2)))

                # execute the OS command
                print(command_p)
                os.system(command_p)
                time.sleep(1)
                print(command_ber)
                os.system(command_ber)
                time.sleep(2)
예제 #7
0
파일: Submitter.py 프로젝트: reimersa/LEAF
class Submitter:
    def __init__(self, xmlfilename):
        self.xmlinfo = XMLInfo(xmlfilename)

        path_parts_local = [item for item in self.xmlinfo.xmlfilename.split('/')[:-1]]
        self.configdir = str(os.path.join('/', *path_parts_local))

        path_parts_local.append('workdir_%s' % (self.xmlinfo.xmlfilename.split('/')[-1][:-4]))
        self.workdir_local = str(os.path.join('/', *path_parts_local))

        self.se_director = self.xmlinfo.configsettings.SEDirector
        self.use_se = False
        if self.xmlinfo.configsettings.OutputDirectory.startswith('/pnfs') and not ('t3dcachedb03.psi.ch' in self.se_director):
            self.use_se = True
        path_parts_remote = [item for item in self.xmlinfo.configsettings.OutputDirectory.split('/')]
        path_parts_remote.append('workdir_%s' % (self.xmlinfo.xmlfilename.split('/')[-1][:-4]))

        self.workdir_remote = str(os.path.join('/', *path_parts_remote))

        self.missing_files_txt = os.path.join(self.workdir_local, 'commands_missing_files.txt')



    def Divide(self):
        print green('--> Dividing and preparing jobs')

        # First create local and remote (potentially T3) workdir. Inside, create folders for each sample to store the small XML and root files
        self.create_local_workdir()
        self.create_remote_workdir()
        njobs_and_type_per_dataset = self.divide_xml_files()
        self.write_expected_files(njobs_and_type_per_dataset=njobs_and_type_per_dataset)
        self.Output()



    def Submit(self):
        print green('--> Submitting jobs')

        missing_files_per_dataset = self.Output()

        (hms, queue, runtime_str) = tuplize_runtime(str(self.xmlinfo.submissionsettings.Walltime))

        for datasetname in missing_files_per_dataset:
            missing_files = os.path.join(self.workdir_local, datasetname, 'commands_missing_files.txt')
            with open(missing_files, 'r') as f:
                njobs = len(f.readlines())
            if njobs < 1: continue
            environ_path = os.getenv('PATH')
            environ_ld_lib_path = os.getenv('LD_LIBRARY_PATH')
            command = 'sbatch -a 1-%i -J %s -p %s --chdir %s -t %s submit_analyzer_command.sh %s %s %s' % (njobs, datasetname, queue, os.path.join(self.workdir_local, datasetname, 'joboutput'), runtime_str, missing_files, environ_path, environ_ld_lib_path)
            jobid = int(subprocess.check_output(command.split(' ')).rstrip('\n').split(' ')[-1])
            print green('  --> Submitted array of %i jobs for dataset %s. JobID: %i' % (njobs, datasetname, jobid))



    def RunLocal(self, ncores):
        print green('--> Locally running jobs on %i cores' % (ncores))
        missing_files_per_dataset = self.Output()

        # njobs = -1
        # with open(self.missing_files_txt, 'r') as f:
        #     njobs = len(f.readlines())
        commands = []
        for datasetname in missing_files_per_dataset:
            missing_files = os.path.join(self.workdir_local, datasetname, 'commands_missing_files.txt')
            with open(missing_files, 'r') as f:
                for line in f.readlines():
                    commands.append(line)
        if len(commands) < 1: return
        print green('  --> Going to run %i jobs locally on %i cores.' % (len(commands), ncores))
        execute_commands_parallel(commands, ncores)
        print green('  --> Finished running missing jobs locally.')





    def Output(self):
        print green('--> Checking outputs and updating list of missing files')

        expected_files = self.read_expected_files()
        missing_files_per_dataset = self.find_missing_files(expected_files=expected_files)
        missing_files_all = []
        for datasetname in missing_files_per_dataset:
            # for item in missing_files_per_dataset[datasetname]:
            #     missing_files_all.append(item)
            commands = [self.get_command_for_file(filename_expected=filename_expected) for filename_expected in missing_files_per_dataset[datasetname]]
            with open(os.path.join(self.workdir_local, datasetname, 'commands_missing_files.txt'), 'wr') as f:
                for command in commands:
                    f.write(command + '\n')

        # print information on missing files to shell
        print '\n\n'
        print green('  Status report of job processing')
        print green('  ===============================\n')


        table = PrettyTable(['Sample', 'Processed files'])
        table.align['Sample'] = 'l'
        table.align['Processed files'] = 'l'
        nmissing_total = 0
        nexpected_total = 0
        for datasetname in missing_files_per_dataset:
            table.add_row([datasetname, '(%i / %i)' % (len(expected_files[datasetname]) - len(missing_files_per_dataset[datasetname]), len(expected_files[datasetname]))])
            nmissing_total += len(missing_files_per_dataset[datasetname])
            nexpected_total += len(expected_files[datasetname])


        print green(table)
        total = '(%i / %i)' % (nexpected_total - nmissing_total, nexpected_total)
        tb_widths = table._widths
        print green('| Total' + ' ' * (tb_widths[0] - len('Total') ) + ' | ' + total + ' ' * (tb_widths[1] - len(total) ) + ' |')
        print green('+-' + '-' * tb_widths[0] + '-+-' + '-' * tb_widths[1] + '-+')
        print '\n\n'
        return missing_files_per_dataset



    def Clean(self):
        print green('--> Cleaning up (removing) local and remote workdir')

        shutil.rmtree(self.workdir_local, ignore_errors=True)
        if not self.use_se:
            shutil.rmtree(self.workdir_remote, ignore_errors=True)
        else:
            rmcommand = 'LD_LIBRARY_PATH=\'\' PYTHONPATH=\'\' gfal-rm -r %s' % (str(self.se_director + self.workdir_remote))
            execute_command_silent(rmcommand)
        print green('--> Cleaned up (removed) local and remote workdir')


    def Add(self, force=False, ignoretree=False, nchunks=9):
        print green('--> Adding finished samples')

        # update list of missing files
        self.Output()

        # find datasets that are already done
        datasetnames_complete = []
        expected_files = self.read_expected_files()
        missing_files = self.find_missing_files(expected_files=expected_files)
        for datasetname in missing_files:
            if len(missing_files[datasetname]) == 0:
                datasetnames_complete.append(datasetname)

        # add all completed datasets
        for datasetname in datasetnames_complete:

            print green('  --> Adding dataset \'%s\'' % (datasetname))

            # get list of expected files
            if len(expected_files[datasetname]) < 1: continue

            outpath_parts = self.workdir_remote.split('/')[:-1]
            if self.use_se:
                outpath = self.se_director + str(os.path.join('/', *outpath_parts))
            else:
                outpath = str(os.path.join('/', *outpath_parts))

            # get outfilename
            outfilename_parts = expected_files[datasetname][0].split('/')[-1].split('_')[:-1]
            outfilename = '_'.join(outfilename_parts) + '.root'
            outfilepath_and_name = os.path.join(outpath, outfilename)


            chunklength = len(expected_files[datasetname]) / nchunks
            if chunklength < 1:
                # just execute single command
                expected_files_ordered = order_haddlist(haddlist=clean_haddlist(haddlist=expected_files[datasetname], use_se=self.use_se))
                expected_files_ordered_str = ' '.join(expected_files_ordered)
                hadd_large(outfilepath_and_name, expected_files_ordered, force, ignoretree)

            else:
                chunkoutnames = []
                commands = []
                argumentlist = []
                for idx, chunk in enumerate(chunks(expected_files[datasetname], chunklength)):
                    chunkoutname = outfilepath_and_name.replace('.root', '_tmp%i.root' % (idx))
                    chunkoutnames.append(chunkoutname)

                    # order expected files such that the first has an AnalysisTree (if any of the files has one).
                    expected_files_ordered = order_haddlist(haddlist=clean_haddlist(haddlist=chunk, use_se=self.use_se))
                    expected_files_ordered_str = ' '.join(expected_files_ordered)

                    if os.path.isfile(outfilepath_and_name) and not force:
                        continue

                    command = 'hadd'
                    if force: command += ' -f'
                    if ignoretree: command += ' -T'
                    command += ' %s' % (chunkoutname)
                    command += ' ' + expected_files_ordered_str
                    commands.append(command)

                    # print force, ignoretree
                    singleargument = '%s---%s---%s---%s' % (chunkoutname, expected_files_ordered_str, str(force), str(ignoretree))
                    # print singleargument
                    argumentlist.append(singleargument)


                # execute_commands_parallel(commands)
                execute_function_parallel(func=hadd_large_singlearg, argumentlist=argumentlist)
                chunkoutnames_ordered = order_haddlist(haddlist=clean_haddlist(haddlist=chunkoutnames, use_se=self.use_se))
                chunkoutnames_str = ' '.join(order_haddlist(haddlist=clean_haddlist(haddlist=chunkoutnames, use_se=self.use_se)))
                hadd_large(outfilepath_and_name, chunkoutnames_ordered, force, ignoretree)

                for chunkoutname in chunkoutnames_ordered:
                    command = 'rm -rf %s' % (chunkoutname)
                    os.system(command)


        if force:
            print green('--> Added all completed files')
        else:
            print green('--> Added newly completed files, if any')



    def Hadd(self):
        print green('--> Hadding samples into groups. Assuming all samples have been processed entirely.')

        # update list of missing files
        # self.Output()

        datasets_per_group = OrderedDict()
        for ds in self.xmlinfo.datasets:
            datasetname = str(ds.settings.Name)
            group = str(ds.settings.Group)
            datasettype = str(ds.settings.Type)
            if group in datasets_per_group:
                datasets_per_group[group][1].append(datasetname)
            else:
                datasets_per_group[group] = (datasettype, [datasetname])

        commands = []
        argumentlist = []
        for group in datasets_per_group:
            if group == 'None': continue # don't hadd if files are not part of a group
            grouptype = datasets_per_group[group][0]
            outfilename = '%s__%s.root' % (grouptype, group)
            outpath_parts = self.workdir_remote.split('/')[:-1]
            if self.use_se:
                outpath = self.se_director + str(os.path.join('/', *outpath_parts))
            else:
                outpath = str(os.path.join('/', *outpath_parts))
            outfilepath_and_name = os.path.join(outpath, outfilename)
            files_to_add = []
            for filename in datasets_per_group[group][1]:
                files_to_add.append(os.path.join(outpath,  '%s__%s.root' % (grouptype, filename)))
            files_to_add_ordered = order_haddlist(haddlist=clean_haddlist(haddlist=files_to_add, use_se=self.use_se))
            sourcestring = ' '.join( files_to_add_ordered )
            # sourcestring = ' '.join( [os.path.join(outpath,  '%s__%s.root' % (grouptype, filename)) for filename in datasets_per_group[group][1]] )

            # command = 'hadd -f -T %s %s' % (outfilepath_and_name, sourcestring)
            # commands.append(command)

            singleargument = '%s---%s---%s---%s' % (outfilepath_and_name, sourcestring, 'True', 'True')
            # print singleargument
            argumentlist.append(singleargument)

        # execute_commands_parallel(commands)
        execute_function_parallel(func=hadd_large_singlearg, argumentlist=argumentlist)
        print green('--> Added datasets into groups.')















    # smaller functions to keep main part clean


    def create_local_workdir(self):
        """ Create workdir + 1 subdir per sample for Submitter in the local config/ directory on the same level as the XML file """

        ensureDirectory(self.workdir_local)
        for dataset in self.xmlinfo.datasets:
            samplename = dataset.settings.Name
            ensureDirectory(os.path.join(self.workdir_local, samplename))

            # copy .dtd file
            copy(os.path.join(self.configdir, 'Configuration.dtd'), os.path.join(self.workdir_local, samplename))
        print green('  --> Created local workdir \'%s\'' % (self.workdir_local))


    def create_remote_workdir(self):
        """ Create workdir + 1 subdir per sample for Submitter in the remote target directory """

        workdirname = self.workdir_remote
        if self.use_se:
             workdirname = self.se_director + workdirname
        ensureDirectory(workdirname, use_se=self.use_se)
        for dataset in self.xmlinfo.datasets:
            samplename = dataset.settings.Name
            ensureDirectory(os.path.join(workdirname, samplename), use_se=self.use_se)
        print green('  --> Created remote workdir \'%s\'' % (self.workdir_remote))

    def divide_xml_files(self):
        """ From the main XML file, create a number of smaller ones, one per job to be submitted and divided by sample. """

        # check how to split jobs
        nfiles_per_job  = int(self.xmlinfo.submissionsettings.FilesPerJob)
        nevents_per_job = int(self.xmlinfo.submissionsettings.EventsPerJob)

        is_filesplit  = False
        is_eventsplit = False
        if nfiles_per_job > 0: is_filesplit = True
        if nevents_per_job > 0: is_eventsplit = True

        if (not is_filesplit and not is_eventsplit) or (is_filesplit and is_eventsplit):
            raise ValueError(red('In the XML file, either both or neither of "EventsPerJob" and "FilesPerJob" is >0. This is not supported, please choose one of the two options to split jobs.'))

        # all_datasets = deepcopy(self.xmlinfo.datasets)

        # handle filesplit
        njobs_and_type_per_dataset = OrderedDict()
        if is_filesplit:
            for dataset in self.xmlinfo.datasets:
                datasetname = str(dataset.settings.Name)
                print green('    --> Dividing sample %s' % (datasetname))
                njobs_this_dataset = 0
                nfiles = len(dataset.infiles)
                njobs = int(math.ceil(nfiles/float(nfiles_per_job)))
                for i in range(njobs):
                    self.write_single_xml(datasetname=datasetname, index=i+1, nfiles_per_job=nfiles_per_job)
                    # self.xmlinfo.datasets = all_datasets
                    njobs_this_dataset += 1
                njobs_and_type_per_dataset[datasetname] = (njobs_this_dataset, str(dataset.settings.Type))

        # handle eventsplit
        else:
            for dataset in self.xmlinfo.datasets:
                datasetname = str(dataset.settings.Name)
                print green('    --> Dividing sample %s' % (datasetname))
                njobs_this_dataset = 0
                nevents = get_number_events_in_dataset(dataset=dataset)
                njobs = int(math.ceil(nevents/float(nevents_per_job)))
                for i in range(njobs):
                    self.write_single_xml(datasetname=datasetname, index=i+1, nevents_per_job=nevents_per_job)
                    # self.xmlinfo.datasets = all_datasets
                    njobs_this_dataset += 1
                njobs_and_type_per_dataset[datasetname] = (njobs_this_dataset, str(dataset.settings.Type))

        print green('  --> Divided XML files')
        return njobs_and_type_per_dataset


    def write_expected_files(self, njobs_and_type_per_dataset):
        """ Create a txt file, containing a list of samples to be expected according to the given split settings. Uses return-value of divide_xml_files()."""

        for datasetname in njobs_and_type_per_dataset:
            njobs = njobs_and_type_per_dataset[datasetname][0]
            datasettype = njobs_and_type_per_dataset[datasetname][1]

            if self.use_se:
                outpath = self.se_director + os.path.join(self.workdir_remote, datasetname)
            else:
                outpath = os.path.join(self.workdir_remote, datasetname)
            expected_files = []
            for i in range(njobs):
                filename = '%s__%s_%i.root\n' % (datasettype, datasetname, i+1)
                expected_files.append(os.path.join(outpath, filename))

            outfilename = os.path.join(self.workdir_local, datasetname, 'expected_files.txt')
            with open(outfilename, 'wr') as f:
                for file in expected_files:
                    f.write(file)

            sampleworkdir = os.path.join(self.workdir_local, datasetname, 'joboutput')
            ensureDirectory(sampleworkdir)

        print green('  --> Wrote list of expected files')











    def write_single_xml(self, datasetname, index, nfiles_per_job=-1, nevents_per_job=-1):

        if nfiles_per_job <= 0 and nevents_per_job <= 0:
            raise ValueError(red('It seems like the new XML files should be written neither in filesplit nor eventsplit mode. Exit.'))
        if nfiles_per_job > 0 and nevents_per_job > 0:
            raise ValueError(red('It seems like the new XML files should be written both in filesplit and eventsplit mode. Exit.'))


        outfilename = os.path.join(self.workdir_local, datasetname, '%s_%i.xml' % (datasetname, index))\

        # throw away all other datasets but the one we specified as a parameter here
        self.xmlinfo.datasets_to_write = [deepcopy(ds) for ds in self.xmlinfo.datasets if ds.settings.Name == datasetname]
        if len(self.xmlinfo.datasets_to_write) != 1:
            raise ValueError(red('Found != 1 datasets with name \'%s\'' % (datasetname)))


        # filesplit mode
        if nfiles_per_job > 0:

            # remove most infiles from the one dataset selected above: keep only those we want in this small xml file
            fileindex_start = (index-1) * nfiles_per_job
            fileindex_stop  = fileindex_start + nfiles_per_job
            self.xmlinfo.datasets_to_write[0].infiles = self.xmlinfo.datasets_to_write[0].infiles[fileindex_start:fileindex_stop]

        # eventsplit mode
        else:

            # only need to specify nevents_max and _skip in the xml file
            self.xmlinfo.configsettings.NEventsSkip = str((index-1) * nevents_per_job)
            self.xmlinfo.configsettings.NEventsMax  = str(nevents_per_job)

        # adjust general settings: output path, postfix
        # this does not need extra care if using the T2 or T3 SE, the "Analyzer" will do this
        self.xmlinfo.configsettings.OutputDirectory = os.path.join(self.workdir_remote, datasetname)
        self.xmlinfo.configsettings.PostFix         = '_%i' % (index)

        # write new xml file
        with open(outfilename, 'wr') as f:

            # header
            f.write('<?xml version="1.0" encoding="UTF-8"?>\n')
            f.write('<!DOCTYPE Configuration PUBLIC "" "Configuration.dtd"[]>\n')

            # main body (everything except for header)
            f.write(self.xmlinfo.get_XML_document().toprettyxml())


    def read_expected_files(self):

        files_per_dataset = OrderedDict()
        for dataset in self.xmlinfo.datasets:
            datasetname = str(dataset.settings.Name)
            infilename = os.path.join(self.workdir_local, datasetname, 'expected_files.txt')

            with open(infilename, 'r') as f:
                lines = f.readlines()
            filenames_expected = []
            for filename in lines:
                if filename.endswith('\n'):
                    filenames_expected.append(filename[:-len('\n')])
            files_per_dataset[datasetname] = filenames_expected
        return files_per_dataset


    def find_missing_files(self, expected_files):

        DEVNULL = open(os.devnull, 'wb')
        missing_files_per_dataset = OrderedDict()
        # nmissing_per_dataset = OrderedDict()
        for datasetname in expected_files:
            nmissing = 0
            missing = []
            for file in expected_files[datasetname]:
                # print file
                lscommand = 'ls ' if not self.use_se else 'LD_LIBRARY_PATH=\'\' PYTHONPATH=\'\' gfal-ls '
                lscommand += file
                # result = subprocess.Popen([lscommand, file], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
                result = subprocess.Popen(lscommand, stdout=DEVNULL, stderr=DEVNULL, shell=True)
                output = result.communicate()[0]
                returncode = result.returncode
                if returncode > 0: # opening failed
                    missing.append(file)
                    nmissing += 1
            missing_files_per_dataset[datasetname] = missing
            # nmissing_per_dataset[datasetname] = nmissing
        DEVNULL.close()
        return missing_files_per_dataset



    def get_command_for_file(self, filename_expected):
        datasetname = filename_expected.split('/')[-2]
        xmlfilename = filename_expected.split('/')[-1].split('__')[1].replace('.root', '.xml')
        fullxmlfilename = os.path.join(self.workdir_local, datasetname, xmlfilename)
        command = 'Analyzer %s' % (fullxmlfilename)
        return command