예제 #1
0
    def process(self):
        print '---------------------------------'
        print '           Nano Merger           '
        print '---------------------------------'

        user = os.environ["USER"]

        if self.mcprivate:
            locationSE = '/pnfs/psi.ch/cms/trivcat/store/user/{}/BHNLsGen/{}/'.format(
                user, self.prodlabel)

            pointdirs = NanoTools.getPointDirs(self, locationSE)

            for pointdir in pointdirs:
                if 'merged' in pointdir: continue

                point = pointdir[pointdir.rfind('/') + 1:len(pointdir)]
                print '\n --- Mass point: {} --- '.format(point)

                self.runMergingModule(pointdir + '/nanoFiles/')

        elif self.data or self.mccentral:
            dataset_label = NanoTools.getDataLabel(
                self, self.dataset) if self.data else NanoTools.getMCLabel(
                    self, self.dataset)
            locationSE = '/pnfs/psi.ch/cms/trivcat/store/user/{}/BHNLsGen/{}/{}/{}'.format(
                user, 'data' if self.data else 'mc_central', self.prodlabel,
                dataset_label)

            self.runMergingModule(locationSE)
    def writeFileList(self, failed_files):

        logdir = NanoTools.getLogDir(self, failed_files[0], self.prodlabel,
                                     self.data)
        label = logdir[logdir.find('logs') + 5:].replace('/', '_')

        if self.data:
            filename = './files/resubmit_data_{}'.format(label)
        elif self.mcprivate:
            filename = './files/resubmit_mcprivate_{}'.format(label)
        elif self.mccentral:
            filename = './files/resubmit_mccentral_{}'.format(label)

        for file_ in failed_files:
            # open file list
            filelist = open(
                filename + '_nj{}.txt'.format(NanoTools.getStep(self, file_)),
                'w+')

            # get the file to reprocess
            logfile = NanoTools.getLogFile(self, logdir, file_)
            command = 'grep "going to run nano step on" {}'.format(logfile)
            output = subprocess.check_output(command, shell=True)
            file_toresubmit = output[output.find('step on') + 8:len(output)]

            # write to file list
            filelist.write(file_toresubmit + '\n')

            # close file list
            filelist.close()

            #print 'created {}_nj{}.txt'.format(filename, NanoTools.getStep(self, file_))

        return filename
예제 #3
0
    def launchMerger(self, logdir, label, jobIds, filetype):
        self.writeMergerSubmitter(label, filetype)

        if not self.doquick:
            slurm_options = '-p wn --account=t3 -o {ld}/merger{ft}step.log -e {ld}/merger{ft}step.log --job-name=mergerstep_{pl} --time=02:00:00 --dependency=afterany:{jobid}'.format(
                ld=logdir,
                ft=filetype,
                pl=label,
                jobid=NanoTools.getJobIdsList(self, jobIds),
            )
        else:
            slurm_options = '-p quick --account=t3 -o {ld}/merger{ft}step.log -e {ld}/merger{ft}step.log --job-name=mergerstep_{pl} --dependency=afterany:{jobid}'.format(
                ld=logdir,
                ft=filetype,
                pl=label,
                jobid=NanoTools.getJobIdsList(self, jobIds),
            )

        command_merge = 'sbatch {slurm_opt} submitter_merger.sh'.format(
            slurm_opt=slurm_options, )

        n_job_merge = subprocess.check_output(command_merge, shell=True)
        print '       ---> (dependency)'
        print '            {}'.format(n_job_merge)

        os.system('rm submitter_merger.sh')
    def resubmit(self, failed_files):
        # strategy: per chunk resubmission
        #           submit job arrays with indices corresponding to the stepId of the failed jobs

        logdir = NanoTools.getLogDir(self, failed_files[0], self.prodlabel,
                                     self.data)
        label = logdir[logdir.find('logs') + 5:].replace('/', '_')
        array = self.getArray(failed_files)
        outputdir = failed_files[0][0:failed_files[0].find('bparknano')]
        filelist = self.writeFileList(failed_files)

        command = 'sbatch -p wn --account=t3 -o {ld}/nanostep_nj%a.log -e {ld}/nanostep_nj%a.log --job-name=nanostep_nj%a_{pl} --array {ar} --time=03:00:00 submitter.sh {outdir} {usr} {pl} {tag} {isMC} {rmt} {lst} 1'.format(
            ld=logdir,
            pl=label,
            ar=self.getArray(failed_files),
            outdir=outputdir,
            usr=os.environ["USER"],
            tag=0,  #if self.tag == None else self.tag, # adapt
            isMC=1 if self.mcprivate or self.mccentral else 0,
            rmt=0 if self.mcprivate else 1,
            lst=filelist,
        )

        #print command
        os.system(command)
예제 #5
0
    def launchDumper(self, nfiles, outputdir, logdir, filelist, label, jobId):
        self.writeDumperStarter(nfiles, outputdir, filelist, label)

        if not self.doquick:
            slurm_options = '-p wn --account=t3 -o {ld}/dumperstep.log -e {ld}/dumperstep.log --job-name=dumperstep_{pl} --time=03:00:00 {dp}'.format(
                ld=logdir,
                pl=label,
                dp='--dependency=afterany:{}'.format(jobId)
                if jobId != -99 else '',
            )
        else:
            slurm_options = '-p quick --account=t3 -o {ld}/dumperstep.log -e {ld}/dumperstep.log --job-name=dumperstep_{pl} {dp}'.format(
                ld=logdir,
                pl=label,
                dp='--dependency=afterany:{}'.format(jobId)
                if jobId != -99 else '',
            )

        command = 'sbatch {slurm_opt} submitter_dumper.sh {outdir} {usr} {pl} {tag} {isMC}'.format(
            slurm_opt=slurm_options,
            pl=label,
            outdir=outputdir,
            usr=os.environ["USER"],
            tag=0 if self.tag == None else self.tag,
            isMC=1 if self.mcprivate or self.mccentral else 0,
        )

        job_dump = subprocess.check_output(command, shell=True)
        if jobId == -99:
            print '\n       ---> {}'.format(job_dump)
        else:
            print '       ---> (dependency)'
            print '            {}'.format(job_dump)

        return NanoTools.getJobId(self, job_dump)
예제 #6
0
    def launchNano(self, nfiles, outputdir, logdir, filelist, label):
        if not self.doquick:
            slurm_options = '-p wn --account=t3 -o {ld}/nanostep_nj%a.log -e {ld}/nanostep_nj%a.log --job-name=nanostep_nj%a_{pl} --array {ar} --time=03:00:00'.format(
                ld=logdir,
                pl=label,
                ar='1-{}'.format(nfiles),
            )
        else:
            slurm_options = '-p quick --account=t3 -o {ld}/nanostep_nj%a.log -e {ld}/nanostep_nj%a.log --job-name=nanostep_nj%a_{pl} --array {ar}'.format(
                ld=logdir,
                pl=label,
                ar='1-{}'.format(nfiles),
            )

        command = 'sbatch {slurm_opt} submitter.sh {outdir} {usr} {pl} {tag} {isMC} {rmt} {lst} 0'.format(
            slurm_opt=slurm_options,
            pl=label,
            outdir=outputdir,
            usr=os.environ["USER"],
            tag=0 if self.tag == None else self.tag,
            isMC=1 if self.mcprivate or self.mccentral else 0,
            rmt=0 if self.mcprivate else 1,
            lst=filelist,
        )

        job = subprocess.check_output(command, shell=True)
        print '\n       ---> {}'.format(job)

        return NanoTools.getJobId(self, job)
예제 #7
0
    def writeDumperStarter(self, nfiles, outputdir, filelist, label):
        nanoname = 'bparknano' if self.tag == None else 'bparknano_{}'.format(
            self.tag)

        f = open(filelist)
        lines = f.readlines()

        event_chain = []
        event_chain.append('TChain* c = new TChain("Events");')
        for iFile in range(1, nfiles + 1):
            file_step = NanoTools.getStep(
                self, lines[iFile - 1]) if self.mcprivate else iFile
            event_chain.append('  c->Add("{}/{}_nj{}.root");'.format(
                outputdir, nanoname, file_step))
        event_chain.append('  c->Process("NanoDumper.C+", outFileName);')
        event_chain = '\n'.join(event_chain)

        run_chain = []
        run_chain.append('TChain* c_run = new TChain("Runs");')
        for iFile in range(1, nfiles + 1):
            file_step = NanoTools.getStep(
                self, lines[iFile - 1]) if self.mcprivate else iFile
            run_chain.append('  c->Add("{}/{}_nj{}.root");'.format(
                outputdir, nanoname, file_step))
        run_chain.append('  c_run->Process("NanoRunDumper.C+", outFileName);')
        run_chain = '\n'.join(run_chain)

        content = [
            '#include "TChain.h"',
            '#include <iostream>',
            '#include "TProof.h"\n',
            'void starter(){',
            '  TString outFileName = "flat_bparknano.root";',
            '  {addMC}'.format(
                addMC='' if self.data else 'outFileName += "_isMC";'),
            '  {addevt}'.format(addevt=event_chain),
            '  {addrun}'.format(addrun='' if self.data else run_chain),
            '}',
        ]
        content = '\n'.join(content)

        starter_name = './files/starter_{}.C'.format(label)
        dumper_starter = open(starter_name, 'w+')
        dumper_starter.write(content)
        dumper_starter.close()
예제 #8
0
    def writeFileList(self, nfiles_perchunk, point=None):
        if not path.exists('./files'):
            os.system('mkdir ./files')

        if self.mcprivate:
            filename = './files/filelist_{}_{}'.format(self.prodlabel, point)
        else:
            ds_label = NanoTools.getDataLabel(
                self, self.dataset) if self.data else NanoTools.getMCLabel(
                    self, self.dataset)
            filename = './files/filelist_{dsl}_{pl}'.format(dsl=ds_label,
                                                            pl=self.prodlabel)

        if len(glob.glob('{}*.txt'.format(
                filename))) == 0:  # do not create file if already exists
            if self.mcprivate:  # fetch miniAOD files
                myfile = open(filename + '.txt', "w+")
                nanofiles = NanoTools.getLocalMiniAODFiles(
                    self, self.user, self.prodlabel, point)

                for nanofile in nanofiles:
                    if NanoTools.checkLocalFile(self, nanofile):
                        myfile.write(nanofile + '\n')
                    else:
                        print '    could not open {} --> skipping'.format(
                            nanofile)

                myfile.close()
            else:  # fetch files on DAS
                command = 'dasgoclient --query="file dataset={ds} | grep file.name" > {fn}.txt'.format(
                    ds=self.dataset, fn=filename)
                os.system(command)

            # slurm cannot deal with too large arrays
            # -> submit job arrays of size 750
            if NanoTools.getNFiles(self, filename + '.txt') > nfiles_perchunk:
                command_split = 'split -l {nfiles} {fn}.txt {fn}_ --additional-suffix=.txt'.format(
                    nfiles=nfiles_perchunk, fn=filename)
                os.system(command_split)
                os.system('rm {fn}.txt'.format(fn=filename))

        print '    ---> {}*.txt created'.format(filename)

        return filename
예제 #9
0
    def doMerging(self, nanoName, mergedName, locationSE, outputdir, cond):
        print '\n-> Getting the different subdirectories (chunk/signal points)'
        subdirs = [f for f in glob.glob(locationSE + '/*')]

        for subdir in subdirs:
            if 'merged' in subdir: continue
            if '.root' in subdir: continue

            print '\n-> Processing: {}'.format(subdir[subdir.rfind('/') +
                                                      1:len(subdir)])

            # get files
            nanoFilesPlain = [f for f in glob.glob(subdir + nanoName)]

            nanoFiles = map(lambda x: 'root://t3dcachedb.psi.ch:1094/' + x,
                            nanoFilesPlain)

            # create the outputdir that will contain the merged file
            if not path.exists(subdir + outputdir):
                os.system('mkdir {}'.format(subdir + outputdir))

            outputname = '{}/{}'.format(
                subdir +
                outputdir, mergedName) if not self.dobatch else 'merge.root'
            command = 'python haddnano.py {}'.format(outputname)

            if len(nanoFiles) == 0:
                print 'no files of interest in this chunk'

            else:
                print "\n-> Checking the files"
                for iFile, fileName in enumerate(nanoFiles):
                    if iFile % 100 == 0:
                        print '     --> checked {}% of the files'.format(
                            round(float(iFile) / len(nanoFiles) * 100, 1))
                    elif iFile == len(nanoFiles) - 1:
                        print '     --> checked 100% of the files'

                    if not NanoTools.checkLocalFile(self, fileName, cond):
                        continue

                    command = command + ' {}'.format(fileName)

                print '\n-> Start of the merge'
                os.system(command)

                if self.dobatch:
                    command_xrdcp = 'xrdcp -f merge.root root://t3dcachedb.psi.ch:1094/{}/{}'.format(
                        subdir + outputdir, mergedName)
                    os.system(command_xrdcp)

                print '{}/{} created \n'.format(subdir + outputdir, mergedName)
예제 #10
0
    def process(self):
        if self.docompile:
            print '-> Compiling'
            self.compile()

        print '\n------------'
        print ' Processing NanoLauncher on production {} '.format(
            self.prodlabel if self.mcprivate else self.dataset)
        print ' --> on the batch'
        print '------------'

        if self.mcprivate:
            locationSE = '/pnfs/psi.ch/cms/trivcat/store/user/{}/BHNLsGen/{}/'.format(
                self.user, self.prodlabel)

            print '\n-> Getting the different mass points'
            pointsdir = NanoTools.getPointDirs(self, locationSE)
            points = [
                point[point.rfind('/') + 1:len(point)] for point in pointsdir
            ]

            # looping over the signal points
            for point in points:
                print '\n-> Processing mass/ctau point: {}'.format(point)

                #if point != 'mass3.0_ctau184.256851021': continue

                self.launchingModule(point=point)

        elif self.data or self.mccentral:
            dataset_label = NanoTools.getDataLabel(
                self, self.dataset) if self.data else NanoTools.getMCLabel(
                    self, self.dataset)

            self.launchingModule(ds_label=dataset_label)

        print '\n-> Submission completed'
예제 #11
0
    def doChunkMerging(self, nanoName, mergedName, locationSE, cond=True):
        print '\n---> Merging the different chunks'

        nanoFilesPlain = [
            f for f in glob.glob(locationSE + '/Chunk*/' + nanoName)
        ]
        nanoFiles = map(lambda x: 'root://t3dcachedb.psi.ch:1094/' + x,
                        nanoFilesPlain)

        # create the outputdir that will contain the merged file
        if not path.exists('{}/merged'.format(locationSE)):
            os.system('mkdir {}/merged'.format(locationSE))

        filesValid = []
        print "\n-> Checking the files"
        for fileName in nanoFiles:
            if not NanoTools.checkLocalFile(self, fileName, cond): continue

            filesValid.append(fileName)

        print '\n-> Start of the merge'
        outputname = '{}/merged/{}'.format(
            locationSE, mergedName) if not self.dobatch else 'fullmerge.root'
        command = 'python haddnano.py {}'.format(outputname)
        for iFile, fileName in enumerate(filesValid):
            command = command + ' {}'.format(fileName)

        os.system(command)

        if self.dobatch:
            command_xrdcp = 'xrdcp -f fullmerge.root root://t3dcachedb.psi.ch:1094/{}/merged/{}'.format(
                locationSE, mergedName)
            os.system(command_xrdcp)

        print '{}/merged/{} created \n'.format(locationSE, mergedName)

        # clean
        print 'cleaning'
        for f in glob.glob(locationSE + '/Chunk*/merged/'):
            command_clean_file = 'rm -rf root://t3dcachedb.psi.ch:1094/{}/{}'.format(
                f, mergedName)
            os.system(command_clean_file)
    def process(self):
        print '---------------------------------'
        print '         Nano Job Manager        '
        print '---------------------------------'

        if self.data and self.dataset == None:
            print '\nINFO: the JobManager Tool is going to be run on all dataset having "{}" as production label\n'.format(
                opt.pl)

        # pnfs directory where nano samples are located
        location = NanoTools.getFilesLocation(self, self.data)

        # get the directories associated to the production label
        dirs = NanoTools.getNanoDirectories(self, location, self.prodlabel,
                                            self.dataset)
        #dirs = [f for f in glob.glob('{loc}/ParkingBPH1_Run2018B*{pl}'.format(loc=location, pl=self.prodlabel))]
        if len(dirs) == 0:
            raise RuntimeError(
                'No samples with the production label "{pl}" were found in {loc}'
                .format(pl=self.prodlabel, loc=location))

        n_good = 0
        n_failed = 0
        n_unfinished = 0
        n_unprocessed = 0

        n_failure_xrootd = 0
        n_failure_readerr = 0
        n_failure_timeout = 0
        n_failure_memout = 0
        n_failure_node = 0
        n_failure_other = 0

        time_wallclock = 0
        time_cpu = 0

        for dir_ in dirs:
            print '\n\n {}'.format(dir_)

            # define counters
            n_good_perdir = 0
            n_failed_perdir = 0
            n_unfinished_perdir = 0
            n_unprocessed_perdir = 0

            n_failure_xrootd_perdir = 0
            n_failure_readerr_perdir = 0
            n_failure_timeout_perdir = 0
            n_failure_memout_perdir = 0
            n_failure_node_perdir = 0
            n_failure_other_perdir = 0

            time_wallclock_perdir = 0
            time_cpu_perdir = 0

            # get different chunks
            chunks = [f for f in glob.glob('{}/Chunk*'.format(dir_))]

            for chunk_ in chunks:
                if self.dofullreport:
                    print '\n -- {} --'.format(chunk_[chunk_.rfind('/') +
                                                      1:len(chunk_)])

                failed_files = []

                n_good_perchunk = 0
                n_failed_perchunk = 0
                n_unfinished_perchunk = 0
                n_unprocessed_perchunk = 0

                time_wallclock_perchunk = 0
                time_cpu_perchunk = 0

                n_exp = self.getNExpectedNanoFiles(chunk_)

                files = [
                    chunk_ + '/bparknano_nj' + str(nj) + '.root'
                    for nj in range(1, n_exp + 1)
                ]

                for file_ in files:
                    # get the log file
                    logdir = NanoTools.getLogDir(self, file_, self.prodlabel,
                                                 self.data)
                    logfile = NanoTools.getLogFile(self, logdir, file_)

                    # idle jobs
                    if not NanoTools.checkFileExists(self, logfile):
                        n_unprocessed_perchunk += 1
                        continue

                    if NanoTools.checkFileExists(self,
                                                 file_):  # successfull job
                        n_good_perchunk += 1

                        if self.dofetchtime:
                            time_wallclock_perchunk += self.fetchTime(
                                logfile, 'wallclock')
                            time_cpu_perchunk += self.fetchTime(logfile, 'cpu')

                    else:  # failed or still running job

                        if not self.isJobFinished(logfile):
                            n_unfinished_perchunk += 1
                        else:
                            n_failed_perchunk += 1

                            if self.dofullreport:
                                try:
                                    failure_reason = self.checkFailureReason(
                                        logfile)
                                except:
                                    pass
                                if failure_reason == 'xrootd':
                                    n_failure_xrootd_perdir += 1
                                if failure_reason == 'readerror':
                                    n_failure_readerr_perdir += 1
                                if failure_reason == 'slurm_timeout':
                                    n_failure_timeout_perdir += 1
                                if failure_reason == 'slurm_memout':
                                    n_failure_memout_perdir += 1
                                if failure_reason == 'slurm_nodefailure':
                                    n_failure_node_perdir += 1
                                if failure_reason == 'other':
                                    n_failure_other_perdir += 1

                                #if failure_reason == 'other':
                                # print '{} does not exist'.format(file_)

                                #print '{} does not exist'.format(file_)
                                failed_files.append(file_)

                if self.dofullreport:
                    print 'number of successfull jobs in this chunk: {}'.format(
                        n_good_perchunk)
                    print 'number of failed jobs in this chunk     : {}'.format(
                        n_failed_perchunk)
                    if n_unfinished_perchunk != 0:
                        print 'number of running jobs in this chunk    : {}'.format(
                            n_unfinished_perchunk)
                    if n_unprocessed_perchunk != 0:
                        print 'number of idle jobs in this chunk       : {}'.format(
                            n_unprocessed_perchunk)
                    if self.dofetchtime:
                        print 'average wallclock time in this chunk    : {}min '.format(
                            round(
                                time_wallclock_perchunk /
                                (n_good_perchunk *
                                 60), 1) if n_good_perchunk != 0 else 0)
                        print 'average CPU time/event in this chunk    : {}s/event '.format(
                            round(time_cpu_perchunk / n_good_perchunk, 3
                                  ) if n_good_perchunk != 0 else 0)

                # resubmission
                if self.doresubmit and len(failed_files) != 0:
                    print ' --> resubmission of failed files ({})'.format(
                        self.getArray(failed_files))
                    self.resubmit(failed_files)

                n_good_perdir += n_good_perchunk
                n_failed_perdir += n_failed_perchunk
                n_unfinished_perdir += n_unfinished_perchunk
                n_unprocessed_perdir += n_unprocessed_perchunk

                time_wallclock_perdir += time_wallclock_perchunk
                time_cpu_perdir += time_cpu_perchunk

            n_tot_perdir = n_good_perdir + n_failed_perdir + n_unfinished_perdir + n_unprocessed_perdir

            print '\n'
            print ' --> number of successfull jobs      : {}    {}%'.format(
                n_good_perdir,
                round(n_good_perdir / float(n_tot_perdir) * 100, 2))
            print ' --> number of failed jobs           : {}    {}%'.format(
                n_failed_perdir,
                round(n_failed_perdir / float(n_tot_perdir) * 100, 2))
            if self.dofullreport:
                print '      - xrootd error:        {}'.format(
                    n_failure_xrootd_perdir)
                print '      - file read error:     {}'.format(
                    n_failure_readerr_perdir)
                print '      - slurm timeout error: {}'.format(
                    n_failure_timeout_perdir)
                print '      - slurm memout error:  {}'.format(
                    n_failure_memout_perdir)
                print '      - slurm node failure:  {}'.format(
                    n_failure_node_perdir)
                print '      - other:               {}'.format(
                    n_failure_other_perdir)
            print ' --> number of running jobs          : {}    {}%'.format(
                n_unfinished_perdir,
                round(n_unfinished_perdir / float(n_tot_perdir) * 100, 2))
            print ' --> number of idle jobs             : {}    {}%'.format(
                n_unprocessed_perdir,
                round(n_unprocessed_perdir / float(n_tot_perdir) * 100, 2))
            if self.dofetchtime:
                print ' --> average wallclock time          : {}min '.format(
                    round(time_wallclock_perdir /
                          (n_good_perdir *
                           60), 1) if n_good_perdir != 0 else 0)
                print ' --> average CPU time/event          : {}s/event '.format(
                    round(time_cpu_perdir /
                          (n_good_perdir), 3) if n_good_perdir != 0 else 0)
            #if n_unstarted_chunk != 0:
            #  print ' --> number of yet unprocessed chunks: {}'.format(n_unstarted_chunk)

            n_good += n_good_perdir
            n_failed += n_failed_perdir
            n_unfinished += n_unfinished_perdir
            n_unprocessed += n_unprocessed_perdir

            n_failure_xrootd += n_failure_xrootd_perdir
            n_failure_readerr += n_failure_readerr_perdir
            n_failure_timeout += n_failure_timeout_perdir
            n_failure_memout += n_failure_memout_perdir
            n_failure_node += n_failure_node_perdir
            n_failure_other += n_failure_other_perdir

            time_wallclock += time_wallclock_perdir
            time_cpu += time_cpu_perdir

        n_tot = n_good + n_failed + n_unfinished + n_unprocessed

        print '\n'
        print '----------------------------------------------------------'
        print '                      Status Report                       '

        print '\n'
        print ' ---> number of successfull jobs           : {}    {}%'.format(
            n_good, round(n_good / float(n_tot) * 100, 2))
        print ' ---> number of failed jobs                : {}    {}%'.format(
            n_failed, round(n_failed / float(n_tot) * 100, 2))
        print ' ---> number of running jobs               : {}    {}%'.format(
            n_unfinished, round(n_unfinished / float(n_tot) * 100, 2))
        print ' ---> number of idle jobs                  : {}    {}%'.format(
            n_unprocessed, round(n_unprocessed / float(n_tot) * 100, 2))

        if self.dofullreport:
            print '\n'
            print ' Error summary: '
            print '     - xrootd error:        {} ({}%)'.format(
                n_failure_xrootd,
                round(n_failure_xrootd / float(n_failed) *
                      100, 1) if float(n_failed) != 0 else 0)
            print '     - file read error:     {} ({}%)'.format(
                n_failure_readerr,
                round(n_failure_readerr / float(n_failed) *
                      100, 1) if float(n_failed) != 0 else 0)
            print '     - slurm timeout error: {} ({}%)'.format(
                n_failure_timeout,
                round(n_failure_timeout / float(n_failed) *
                      100, 1) if float(n_failed) != 0 else 0)
            print '     - slurm memout error:  {} ({}%)'.format(
                n_failure_memout,
                round(n_failure_memout / float(n_failed) *
                      100, 1) if float(n_failed) != 0 else 0)
            print '     - slurm node failure:  {} ({}%)'.format(
                n_failure_node,
                round(n_failure_node / float(n_failed) *
                      100, 1) if float(n_failed) != 0 else 0)
            print '     - other:               {} ({}%)'.format(
                n_failure_other,
                round(n_failure_other / float(n_failed) *
                      100, 1) if float(n_failed) != 0 else 0)

        if self.dofetchtime:
            print '\n'
            print ' Time summary: '
            print '     - average wallclock time: {}min'.format(
                round(time_wallclock / (n_good * 60), 1) if n_good != 0 else 0)
            print '     - average CPU time/event: {}s/event'.format(
                round(time_cpu / n_good, 3) if n_good != 0 else 0)

        print '\n'
        print '----------------------------------------------------------'
 def getArray(self, failed_files):
     idx = []
     for file_ in failed_files:
         idx.append(NanoTools.getStep(self, file_))
     return ','.join(idx)
예제 #14
0
    def launchingModule(self, point=None, ds_label=None):
        # declaring some useful quantities
        # ids of the launched jobs, needed for dependency of the merger
        nano_jobIds = []
        flat_jobIds = []

        # counter of processed files
        nfiles_tot = 0

        # slurm cannot deal with too large arrays, so does haddnano (keep it hardcoded)
        maxfiles_perchunk = 750

        print '\n  --> Fetching the files'
        filelistname = self.writeFileList(maxfiles_perchunk, point)

        # loop on the files (containing at most 750 samples)
        for iFile, filelist in enumerate(
                glob.glob('{}*.txt'.format(filelistname))):
            if NanoTools.getNFiles(self, filelist) == 0:
                print '        WARNING: no files were found with the corresponding production label'
                print '                 Did you set the correct username using --user <username>?'

            # enforcing max files limit
            if self.maxfiles != None and nfiles_tot >= int(self.maxfiles):
                continue

            # number of files to process in this chunk
            if self.maxfiles == None or int(
                    self.maxfiles) - nfiles_tot > maxfiles_perchunk:
                nfiles = NanoTools.getNFiles(self, filelist)
            else:
                nfiles = int(self.maxfiles) - nfiles_tot
            nfiles_tot += nfiles

            # merging step (if any) must happen after nano or flat steps are completed
            if self.maxfiles == None:
                merge_cond = (
                    iFile == len(glob.glob('{}*.txt'.format(filelistname))) -
                    1)
            else:
                merge_cond = (nfiles_tot == int(self.maxfiles))

            print '\n  --> Creating output directory'
            outputdir = '/pnfs/psi.ch/cms/trivcat/store/user/{}/BHNLsGen/'.format(
                os.environ["USER"])
            if self.mcprivate:
                outputdir += '{}/{}/nanoFiles/Chunk{}_n{}'.format(
                    self.prodlabel, point, iFile, nfiles)
            else:
                dirname = 'data' if self.data else 'mc_central'
                outputdir += '{}/{}/{}/Chunk{}_n{}/'.format(
                    dirname, self.prodlabel, ds_label, iFile, nfiles)
            if not path.exists(outputdir):
                os.system('mkdir -p {}'.format(outputdir))

            print '\n  --> Creating log directory'
            label1 = self.prodlabel if self.mcprivate else ds_label
            label2 = point if self.mcprivate else self.prodlabel
            logdir = './logs/{}/{}/Chunk{}_n{}'.format(label1, label2, iFile, nfiles) if self.tag == None \
                     else './logs/{}/{}_{}/Chunk{}_n{}'.format(label1, label2, self.tag, iFile, nfiles)
            if not path.exists(logdir):
                os.system('mkdir -p {}'.format(logdir))

            label = '{}_{}_Chunk{}_n{}'.format(
                label1,
                label2 if self.tag == None else label2 + '_' + self.tag, iFile,
                NanoTools.getNFiles(self, filelist))

            nano_jobId = -99

            if self.donano:
                nano_jobId = self.launchNano(nfiles, outputdir, logdir,
                                             filelist, label)

                if self.domergenano:
                    nano_jobIds.append(nano_jobId)

                    if merge_cond:
                        self.launchMerger(logdir, label, nano_jobIds, 'nano')

            if self.doflat:
                flat_outputdir = outputdir + '/flat'
                if not path.exists(flat_outputdir):
                    os.system('mkdir -p {}'.format(flat_outputdir))

                flat_jobId = self.launchDumper(nfiles, outputdir, logdir,
                                               filelist, label, nano_jobId)

                # merging of flat files happens automatically
                flat_jobIds.append(flat_jobId)

                if merge_cond:
                    self.launchMerger(logdir, label, flat_jobIds, 'flat')