def testModule(era): dtypes = None channel = None filters = None vetoes = None samples = getsamples(era, channel=channel, dtype=dtypes, filter=filters, veto=vetoes)
def main_get(args): """Get information of given variable in configuration or samples.""" if args.verbosity >= 1: print ">>> main_get", args variable = args.variable eras = args.eras # eras to loop over and run channels = args.channels or [""] # channels to loop over and run dtypes = args.dtypes # filter (only include) these sample types ('data','mc','embed') filters = args.samples # filter (only include) these samples (glob patterns) dasfilters = args.dasfilters # filter (only include) these das paths (glob patterns) vetoes = args.vetoes # exclude these sample (glob patterns) dasvetoes = args.dasvetoes # exclude these DAS paths (glob patterns) inclurl = args.inclurl # include URL in filelist checkdas = args.checkdas or args.dasfiles # check file list in DAS checklocal = args.checklocal # check nevents in local files split = args.split # split samples with multiple DAS dataset paths limit = args.limit writedir = args.write # write sample file list to text file tag = args.tag ncores = args.ncores # number of cores to get nevents in parallel verbosity = args.verbosity getnevts = variable in ['nevents', 'nevts'] cfgname = CONFIG._path if verbosity >= 1: print '-' * 80 print ">>> %-14s = %s" % ('variable', variable) print ">>> %-14s = %s" % ('eras', eras) print ">>> %-14s = %s" % ('channels', channels) print ">>> %-14s = %s" % ('cfgname', cfgname) print ">>> %-14s = %s" % ('config', CONFIG) print ">>> %-14s = %s" % ('checkdas', checkdas) print ">>> %-14s = %s" % ('checklocal', checklocal) print ">>> %-14s = %s" % ('split', split) print ">>> %-14s = %s" % ('limit', limit) print ">>> %-14s = %s" % ('writedir', writedir) print ">>> %-14s = %s" % ('ncores', ncores) print '-' * 80 # LIST SAMPLES if variable == 'samples': if not eras: LOG.warning("Please specify an era to get a sample for.") for era in eras: for channel in channels: if channel: print ">>> Getting file list for era %r, channel %r" % ( era, channel) else: print ">>> Getting file list for era %r" % (era) samples = getsamples(era, channel=channel, dtype=dtypes, filter=filters, veto=vetoes, dasfilter=dasfilters, dasveto=dasvetoes, verb=verbosity) if not samples: LOG.warning("No samples found for era %r." % (era)) for sample in samples: print ">>> %s" % (bold(sample.name)) for path in sample.paths: print ">>> %s" % (path) # LIST SAMPLE FILES elif variable in ['files', 'nevents', 'nevts']: # LOOP over ERAS & CHANNELS if not eras: LOG.warning("Please specify an era to get a sample for.") for era in eras: for channel in channels: target = "file list" if variable == 'files' else "nevents" if channel: print ">>> Getting %s for era %r, channel %r" % ( target, era, channel) else: print ">>> Getting %s for era %r" % (target, era) print ">>> " # GET SAMPLES LOG.insist( era in CONFIG.eras, "Era '%s' not found in the configuration file. Available: %s" % (era, CONFIG.eras)) samples = getsamples(era, channel=channel, dtype=dtypes, filter=filters, veto=vetoes, dasfilter=dasfilters, dasveto=dasvetoes, split=split, verb=verbosity) # LOOP over SAMPLES for sample in samples: print ">>> %s" % (bold(sample.name)) for path in sample.paths: print ">>> %s" % (bold(path)) if getnevts or checkdas or checklocal: das = checkdas and not checklocal # checklocal overrides checkdas refresh = das # (not sample.storage and all('/store' in f for f in sample.files) nevents = sample.getnevents(das=das, refresh=refresh, verb=verbosity + 1) storage = "(%s)" % sample.storage.__class__.__name__ if checklocal else "(DAS)" if checkdas else "" print ">>> %-7s = %s %s" % ('nevents', nevents, storage) if variable == 'files': infiles = sample.getfiles(das=checkdas, url=inclurl, limit=limit, verb=verbosity + 1) print ">>> %-7s = %r" % ('channel', channel) print ">>> %-7s = %r" % ('url', sample.url) print ">>> %-7s = %r" % ('postfix', sample.postfix) print ">>> %-7s = %s" % ('nfiles', len(infiles)) print ">>> %-7s = [ " % ('infiles') for file in infiles: print ">>> %r" % file print ">>> ]" print ">>> " if writedir: # write files to text files flistname = repkey(writedir, ERA=era, GROUP=sample.group, SAMPLE=sample.name, TAG=tag) print ">>> Write list to %r..." % (flistname) sample.writefiles(flistname, nevts=getnevts, ncores=ncores) # CONFIGURATION else: if variable in CONFIG: print ">>> Configuration of %r: %s" % (variable, color(CONFIG[variable])) else: print ">>> Did not find %r in the configuration" % (variable)
def main_run(args): """Run given module locally.""" if args.verbosity >= 1: print ">>> main_run", args eras = args.eras # eras to loop over and run channels = args.channels # channels to loop over and run tag = args.tag # extra tag for output file outdir = args.outdir # output directory dtypes = args.dtypes # filter (only include) these sample types ('data','mc','embed') filters = args.samples # filter (only include) these samples (glob patterns) dasfilters = args.dasfilters # filter (only include) these das paths (glob patterns) vetoes = args.vetoes # exclude these sample (glob patterns) dasvetoes = args.dasvetoes # exclude these DAS paths (glob patterns) extraopts = args.extraopts # extra options for module (for all runs) prefetch = args.prefetch # copy input file first to local output directory maxevts = args.maxevts # maximum number of files (per sample, era, channel) dasfiles = args.dasfiles # explicitly process nanoAOD files stored on DAS (as opposed to local storage) userfiles = args.infiles # use these input files nfiles = args.nfiles # maximum number of files (per sample, era, channel) nsamples = args.nsamples # maximum number of samples (per era, channel) dryrun = args.dryrun # prepare and print command, without executing verbosity = args.verbosity # verbosity to print out what's going on under the hood preselect = args.preselect # extra selection string if len(filters) == 0: filters = [None] # LOOP over ERAS if not eras: print ">>> Please specify a valid era (-y)." if not channels: print ">>> Please specify a valid channel (-c)." for era in eras: moddict = { } # save time by loading samples and get their files only once # LOOP over CHANNELS for channel in channels: LOG.header("%s, %s" % (era, channel)) # MODULE & PROCESSOR skim = 'skim' in channel.lower() module, processor, procopts, extrachopts = getmodule( channel, extraopts) # VERBOSE if verbosity >= 1: print '-' * 80 print ">>> Running %r" % (channel) print ">>> %-12s = %r" % ('channel', channel) print ">>> %-12s = %r" % ('module', module) print ">>> %-12s = %r" % ('processor', processor) print ">>> %-12s = %r" % ('procopts', procopts) print ">>> %-12s = %r" % ('extrachopts', extrachopts) print ">>> %-12s = %r" % ('prefetch', prefetch) print ">>> %-12s = %r" % ('preselect', preselect) print ">>> %-12s = %s" % ('filters', filters) print ">>> %-12s = %s" % ('vetoes', vetoes) print ">>> %-12s = %r" % ('dtypes', dtypes) print ">>> %-12s = %r" % ('userfiles', userfiles) print ">>> %-12s = %r" % ('outdir', outdir) # LOOP over FILTERS samples = [] for filter in filters: filters_ = [filter] if filter else [] if verbosity >= 2: print ">>> Checking filters=%s, vetoes=%s, dtypes=%s..." % ( filters_, vetoes, dtypes) # GET SAMPLES if not userfiles and (filters_ or vetoes or dtypes): LOG.insist( era in CONFIG.eras, "Era '%s' not found in the configuration file. Available: %s" % (era, CONFIG.eras)) samples_ = getsamples(era, channel=channel, tag=tag, dtype=dtypes, filter=filters_, veto=vetoes, dasfilter=dasfilters, dasveto=dasvetoes, moddict=moddict, verb=verbosity) for sample in samples_[:]: if sample in samples: # avoid duplicates samples_.remove(sample) if nsamples > 0: # limit number of samples to maximum nsamples samples_ = samples_[:nsamples] samples.extend(samples_) if not sample: samples = [None] if not userfiles and (filters_ or vetoes or dtypes): print_no_samples(dtypes, filters_, vetoes) if verbosity >= 1: print ">>> %-12s = %r" % ('samples', samples) print '-' * 80 # LOOP over SAMPLES for sample in samples: if sample: print ">>> %s" % (bold(sample.name)) if verbosity >= 1: for path in sample.paths: print ">>> %s" % (bold(path)) # SETTINGS dtype = None extraopts_ = extrachopts[:] # extra options for module (for this channel & sample) if sample: filetag = "_%s_%s_%s%s" % (channel, era, sample.name, tag) if sample.extraopts: extraopts_.extend(sample.extraopts) else: filetag = "_%s_%s%s" % (channel, era, tag) if verbosity >= 1: print ">>> %-12s = %s" % ('sample', sample) print ">>> %-12s = %r" % ('filetag', filetag) # postfix print ">>> %-12s = %s" % ('extraopts', extraopts_) # GET FILES infiles = [] if userfiles: infiles = userfiles[:] elif sample: nevents = 0 infiles = sample.getfiles(das=dasfiles, verb=verbosity) dtype = sample.dtype if nfiles > 0: infiles = infiles[:nfiles] if verbosity == 1: print ">>> %-12s = %r" % ('dtype', dtype) print ">>> %-12s = %s" % ('nfiles', len(infiles)) print ">>> %-12s = [ " % ('infiles') for file in infiles: print ">>> %r" % file print ">>> ]" # RUN runcmd = processor if procopts: runcmd += " %s" % (procopts) if skim: runcmd += " -y %s -o %s" % (era, outdir) if preselect: runcmd += " --preselect '%s'" % (preselect) ###elif 'test' in channel: ### runcmd += " -o %s"%(outdir) else: # analysis runcmd += " -y %s -c %s -M %s -o %s" % (era, channel, module, outdir) if dtype: runcmd += " -d %r" % (dtype) if filetag: runcmd += " -t %r" % (filetag) # postfix if maxevts: runcmd += " -m %s" % (maxevts) if infiles: runcmd += " -i %s" % (' '.join(infiles)) if prefetch: runcmd += " -p" if extraopts_: runcmd += " --opt '%s'" % ("' '".join(extraopts_)) #elif nfiles: # runcmd += " --nfiles %s"%(nfiles) print ">>> Executing: " + bold(runcmd) if not dryrun: #execute(runcmd,dry=dryrun,verb=verbosity+1) # real-time print out does not work well with python script os.system(runcmd) print
def main_write(args): """Get information of given variable in configuration or samples.""" if args.verbosity >= 1: print ">>> main_write", args listname = args.listname # write sample file list to text file eras = args.eras # eras to loop over and run channels = args.channels or [""] # channels to loop over and run dtypes = args.dtypes # filter (only include) these sample types ('data','mc','embed') filters = args.samples # filter (only include) these samples (glob patterns) dasfilters = args.dasfilters # filter (only include) these das paths (glob patterns) vetoes = args.vetoes # exclude these sample (glob patterns) dasvetoes = args.dasvetoes # exclude these DAS paths (glob patterns) checkdas = args.checkdas or args.dasfiles # check file list in DAS split = args.split # split samples with multiple DAS dataset paths retries = args.retries # retry if error is thrown getnevts = args.getnevts # check nevents in local files ncores = args.ncores # number of cores to get nevents in parallel verbosity = args.verbosity cfgname = CONFIG._path if verbosity >= 1: print '-' * 80 print ">>> %-14s = %s" % ('listname', listname) print ">>> %-14s = %s" % ('getnevts', getnevts) print ">>> %-14s = %s" % ('eras', eras) print ">>> %-14s = %s" % ('channels', channels) print ">>> %-14s = %s" % ('cfgname', cfgname) print ">>> %-14s = %s" % ('config', CONFIG) print '-' * 80 # LOOP over ERAS & CHANNELS if not eras: LOG.warning("Please specify an era to get a sample for.") for era in eras: for channel in channels: info = ">>> Getting file list for era %r" % (era) if channel: info += ", channel %r" % (channel) print info print ">>> " LOG.insist( era in CONFIG.eras, "Era '%s' not found in the configuration file. Available: %s" % (era, CONFIG.eras)) samples0 = getsamples(era, channel=channel, dtype=dtypes, filter=filters, veto=vetoes, dasfilter=dasfilters, dasveto=dasvetoes, split=split, verb=verbosity) sampleset = [samples0] for retry in range(retries): sampleset.append([]) # list for retries # LOOP over SAMPLES for retry, samples in enumerate(sampleset): if not samples: break if retry > 0 and len(samples0) > 1: if retries >= 2: print ">>> Retry %d/%d: %d/%d samples...\n>>>" % ( retry, retries, len(samples), len(samples0)) else: print ">>> Trying again %d/%d samples...\n>>>" % ( len(samples), len(samples0)) for sample in samples: print ">>> %s" % (bold(sample.name)) sample.filelist = None # do not load from existing text file; overwrite existing ones for path in sample.paths: print ">>> %s" % (bold(path)) #infiles = sample.getfiles(das=checkdas,url=inclurl,limit=limit,verb=verbosity+1) flistname = repkey(listname, ERA=era, GROUP=sample.group, SAMPLE=sample.name) #,TAG=tag try: sample.writefiles(flistname, nevts=getnevts, das=checkdas, refresh=checkdas, ncores=ncores, verb=verbosity) except IOError as err: # one of the ROOT file could not be opened print "IOError: " + err.message if retry < retries and sample not in sampleset[ retry + 1]: # try again after the others print ">>> Will try again..." sampleset[retry + 1].append(sample) print ">>> "