def getsamples(era,channel="",tag="",dtype=[],filter=[],veto=[],moddict={},verb=0): """Help function to get samples from a sample list and filter if needed.""" import TauFW.PicoProducer.tools.config as GLOB CONFIG = GLOB.getconfig(verb=verb) filters = filter if not filter or isinstance(filter,list) else [filter] vetoes = veto if not veto or isinstance(veto,list) else [veto] dtypes = dtype if not dtype or isinstance(dtype,list) else [dtype] sampfile = ensurefile("samples",repkey(CONFIG.eras[era],ERA=era,CHANNEL=channel,TAG=tag)) samppath = sampfile.replace('.py','').replace('/','.') if samppath not in moddict: moddict[samppath] = importlib.import_module(samppath) # save time by loading once if not hasattr(moddict[samppath],'samples'): LOG.throw(IOError,"Module '%s' must have a list of Sample objects called 'samples'!"%(samppath)) samplelist = moddict[samppath].samples samples = [ ] sampledict = { } # ensure for unique names LOG.verb("getsamples: samplelist=%r"%(samplelist),verb,3) for sample in samplelist: if filters and not sample.match(filters,verb): continue if vetoes and sample.match(vetoes,verb): continue if dtypes and sample.dtype not in dtypes: continue if channel and sample.channels and not any(fnmatch(channel,c) for c in sample.channels): continue if sample.name in sampledict: LOG.throw(IOError,"Sample short names should be unique. Found two samples '%s'!\n\t%s\n\t%s"%( sample.name,','.join(sampledict[sample.name].paths),','.join(sample.paths))) if 'skim' in channel and sample.dosplit: # split samples with multiple DAS dataset paths, and submit as separate jobs for subsample in sample.split(): samples.append(subsample) # keep correspondence sample to one sample in DAS else: samples.append(sample) sampledict[sample.name] = sample return samples
def getjson(era, dtype='data'): """Get JSON file of data.""" # https://twiki.cern.ch/twiki/bin/viewauth/CMS/TWikiLUM # https://twiki.cern.ch/twiki/bin/viewauth/CMS/PdmV2016Analysis json = None year = getyear(era) if dtype == 'data': if 'UL' in era: if year == 2016: json = 'Cert_271036-284044_13TeV_Legacy2016_Collisions16_JSON.txt' elif year == 2017: json = 'Cert_294927-306462_13TeV_UL2017_Collisions17_GoldenJSON.txt' elif year == 2018: json = 'Cert_314472-325175_13TeV_Legacy2018_Collisions18_JSON.txt' else: if year == 2016: json = 'Cert_271036-284044_13TeV_ReReco_07Aug2017_Collisions16_JSON.txt' elif year == 2017: json = 'Cert_294927-306462_13TeV_PromptReco_Collisions17_JSON.txt' elif year == 2018: json = 'Cert_314472-325175_13TeV_PromptReco_Collisions18_JSON.txt' print "Warning! Using outdated certified run JSON file %s for era %s... Please move to UltraLegacy (UL)!" % ( json, era) assert json != None, "getjson: Did not find certified run JSON for era %r, year %r" % ( era, year) json = ensurefile(datadir, 'json', str(year), json) return json
def getjson(era, dtype='data'): """Get JSON file of data.""" # https://twiki.cern.ch/twiki/bin/viewauth/CMS/TWikiLUM # https://twiki.cern.ch/twiki/bin/viewauth/CMS/PdmV2016Analysis json = None if dtype == 'data': if era == 2016: json = 'Cert_271036-284044_13TeV_ReReco_07Aug2017_Collisions16_JSON.txt' elif era == 2017: json = 'Cert_294927-306462_13TeV_PromptReco_Collisions17_JSON.txt' else: json = 'Cert_314472-325175_13TeV_PromptReco_Collisions18_JSON.txt' json = ensurefile(datadir, 'json', json) return json
def loadfiles(self, listname, **kwargs): """Load filenames from text file for fast look up in future.""" listname = repkey(listname, ERA=self.era, GROUP=self.group, SAMPLE=self.name) filenevts = self.filenevts nevents = 0 if self.verbosity + 2 >= 1: print ">>> Loading sample files from '%r'" % (listname) ensurefile(listname, fatal=True) filelist = [] with open(listname, 'r') as file: for line in file: line = line.strip().split() if not line: continue line = line[0].strip() # remove spaces, one per line if line[0] == '#': continue # do not consider out-commented #if v.endswith('.root'): match = fevtsexp.match(line) # match $FILENAM(:NEVTS) if not match: continue infile = match.group(1) if match.group(2): # found nevents in filename nevts = int(match.group(2)) filenevts[infile] = nevts # store/cache in dictionary nevents += nevts filelist.append(infile) if self.nevents <= 0: self.nevents = nevents elif self.nevents != nevents: LOG.warning( "loadfiles: stored nevents=%d does not match the sum total of file events, %d!" % (self.nevents, nevents)) self.nevents == nevents self.files = filelist self.files.sort() return self.files
def main_link(args): """Link channels or eras in the config file.""" if args.verbosity >= 1: print ">>> main_link", args variable = args.subcommand varkey = variable + 's' key = args.key value = args.value verbosity = args.verbosity cfgname = CONFIG._path if verbosity >= 1: print '-' * 80 print ">>> Linking %s '%s' to '%s' in the configuration..." % ( variable, color(key), value) if verbosity >= 1: print ">>> %-14s = %s" % ('cfgname', cfgname) print ">>> %-14s = %s" % ('key', key) print ">>> %-14s = %s" % ('value', value) print ">>> %-14s = %s" % ('config', CONFIG) print '-' * 80 # SANITY CHECKS if varkey not in CONFIG: CONFIG[varkey] = {} LOG.insist(isinstance(CONFIG[varkey], dict), "%s in %s has to be a dictionary" % (varkey, cfgname)) oldval = value for char in '/\,:;!?\'" ': if char in key: LOG.throw( IOError, "Given key '%s', but keys cannot contain any of these characters: %s" % (key, char)) if varkey == 'channels': if 'skim' in key.lower(): #or 'test' in key: parts = value.split(' ') # "PROCESSOR [--FLAG[=VALUE] ...]" script = os.path.basename(parts[0]) # separate script from options ensurefile("python/processors", script) value = ' '.join([script] + parts[1:]) else: parts = value.split(' ') # "MODULE [KEY=VALUE ...]" module = parts[0] LOG.insist( all('=' in o for o in parts[1:]), "All extra module options should be of format KEY=VALUE!") if 'python/analysis/' in module: # useful for tab completion module = module.split('python/analysis/')[-1].replace('/', '.') module = rreplace(module, '.py') path = os.path.join('python/analysis/', '/'.join(module.split('.')[:-1])) ensureinit(path, by="pico.py") ensuremodule(module) value = ' '.join([module] + parts[1:]) elif varkey == 'eras': if 'samples/' in value: # useful for tab completion value = ''.join(value.split('samples/')[1:]) path = os.path.join("samples", repkey(value, ERA='*', CHANNEL='*', TAG='*')) LOG.insist(glob.glob(path), "Did not find any sample lists '%s'" % (path)) ensureinit(os.path.dirname(path), by="pico.py") if value != oldval: print ">>> Converted '%s' to '%s'" % (oldval, value) CONFIG[varkey][key] = value CONFIG.write()