Beispiel #1
0
def getsamples(era,channel="",tag="",dtype=[],filter=[],veto=[],moddict={},verb=0):
  """Help function to get samples from a sample list and filter if needed."""
  import TauFW.PicoProducer.tools.config as GLOB
  CONFIG   = GLOB.getconfig(verb=verb)
  filters  = filter if not filter or isinstance(filter,list) else [filter]
  vetoes   = veto   if not veto   or isinstance(veto,list)   else [veto]
  dtypes   = dtype  if not dtype  or isinstance(dtype,list)  else [dtype]
  sampfile = ensurefile("samples",repkey(CONFIG.eras[era],ERA=era,CHANNEL=channel,TAG=tag))
  samppath = sampfile.replace('.py','').replace('/','.')
  if samppath not in moddict:
    moddict[samppath] = importlib.import_module(samppath) # save time by loading once
  if not hasattr(moddict[samppath],'samples'):
    LOG.throw(IOError,"Module '%s' must have a list of Sample objects called 'samples'!"%(samppath))
  samplelist = moddict[samppath].samples
  samples    = [ ]
  sampledict = { } # ensure for unique names
  LOG.verb("getsamples: samplelist=%r"%(samplelist),verb,3)
  for sample in samplelist:
    if filters and not sample.match(filters,verb): continue
    if vetoes and sample.match(vetoes,verb): continue
    if dtypes and sample.dtype not in dtypes: continue
    if channel and sample.channels and not any(fnmatch(channel,c) for c in sample.channels): continue
    if sample.name in sampledict:
      LOG.throw(IOError,"Sample short names should be unique. Found two samples '%s'!\n\t%s\n\t%s"%(
                        sample.name,','.join(sampledict[sample.name].paths),','.join(sample.paths)))
    if 'skim' in channel and sample.dosplit: # split samples with multiple DAS dataset paths, and submit as separate jobs
      for subsample in sample.split():
        samples.append(subsample) # keep correspondence sample to one sample in DAS
    else:
      samples.append(sample)
    sampledict[sample.name] = sample
  return samples
Beispiel #2
0
def getjson(era, dtype='data'):
    """Get JSON file of data."""
    # https://twiki.cern.ch/twiki/bin/viewauth/CMS/TWikiLUM
    # https://twiki.cern.ch/twiki/bin/viewauth/CMS/PdmV2016Analysis
    json = None
    year = getyear(era)
    if dtype == 'data':
        if 'UL' in era:
            if year == 2016:
                json = 'Cert_271036-284044_13TeV_Legacy2016_Collisions16_JSON.txt'
            elif year == 2017:
                json = 'Cert_294927-306462_13TeV_UL2017_Collisions17_GoldenJSON.txt'
            elif year == 2018:
                json = 'Cert_314472-325175_13TeV_Legacy2018_Collisions18_JSON.txt'
        else:
            if year == 2016:
                json = 'Cert_271036-284044_13TeV_ReReco_07Aug2017_Collisions16_JSON.txt'
            elif year == 2017:
                json = 'Cert_294927-306462_13TeV_PromptReco_Collisions17_JSON.txt'
            elif year == 2018:
                json = 'Cert_314472-325175_13TeV_PromptReco_Collisions18_JSON.txt'
            print "Warning! Using outdated certified run JSON file %s for era %s... Please move to UltraLegacy (UL)!" % (
                json, era)
        assert json != None, "getjson: Did not find certified run JSON for era %r, year %r" % (
            era, year)
    json = ensurefile(datadir, 'json', str(year), json)
    return json
Beispiel #3
0
def getjson(era, dtype='data'):
    """Get JSON file of data."""
    # https://twiki.cern.ch/twiki/bin/viewauth/CMS/TWikiLUM
    # https://twiki.cern.ch/twiki/bin/viewauth/CMS/PdmV2016Analysis
    json = None
    if dtype == 'data':
        if era == 2016:
            json = 'Cert_271036-284044_13TeV_ReReco_07Aug2017_Collisions16_JSON.txt'
        elif era == 2017:
            json = 'Cert_294927-306462_13TeV_PromptReco_Collisions17_JSON.txt'
        else:
            json = 'Cert_314472-325175_13TeV_PromptReco_Collisions18_JSON.txt'
    json = ensurefile(datadir, 'json', json)
    return json
Beispiel #4
0
 def loadfiles(self, listname, **kwargs):
     """Load filenames from text file for fast look up in future."""
     listname = repkey(listname,
                       ERA=self.era,
                       GROUP=self.group,
                       SAMPLE=self.name)
     filenevts = self.filenevts
     nevents = 0
     if self.verbosity + 2 >= 1:
         print ">>> Loading sample files from '%r'" % (listname)
     ensurefile(listname, fatal=True)
     filelist = []
     with open(listname, 'r') as file:
         for line in file:
             line = line.strip().split()
             if not line: continue
             line = line[0].strip()  # remove spaces, one per line
             if line[0] == '#': continue  # do not consider out-commented
             #if v.endswith('.root'):
             match = fevtsexp.match(line)  # match $FILENAM(:NEVTS)
             if not match: continue
             infile = match.group(1)
             if match.group(2):  # found nevents in filename
                 nevts = int(match.group(2))
                 filenevts[infile] = nevts  # store/cache in dictionary
                 nevents += nevts
             filelist.append(infile)
     if self.nevents <= 0:
         self.nevents = nevents
     elif self.nevents != nevents:
         LOG.warning(
             "loadfiles: stored nevents=%d does not match the sum total of file events, %d!"
             % (self.nevents, nevents))
         self.nevents == nevents
     self.files = filelist
     self.files.sort()
     return self.files
Beispiel #5
0
def main_link(args):
    """Link channels or eras in the config file."""
    if args.verbosity >= 1:
        print ">>> main_link", args
    variable = args.subcommand
    varkey = variable + 's'
    key = args.key
    value = args.value
    verbosity = args.verbosity
    cfgname = CONFIG._path
    if verbosity >= 1:
        print '-' * 80
    print ">>> Linking %s '%s' to '%s' in the configuration..." % (
        variable, color(key), value)
    if verbosity >= 1:
        print ">>> %-14s = %s" % ('cfgname', cfgname)
        print ">>> %-14s = %s" % ('key', key)
        print ">>> %-14s = %s" % ('value', value)
        print ">>> %-14s = %s" % ('config', CONFIG)
        print '-' * 80

    # SANITY CHECKS
    if varkey not in CONFIG:
        CONFIG[varkey] = {}
    LOG.insist(isinstance(CONFIG[varkey], dict),
               "%s in %s has to be a dictionary" % (varkey, cfgname))
    oldval = value
    for char in '/\,:;!?\'" ':
        if char in key:
            LOG.throw(
                IOError,
                "Given key '%s', but keys cannot contain any of these characters: %s"
                % (key, char))
    if varkey == 'channels':
        if 'skim' in key.lower():  #or 'test' in key:
            parts = value.split(' ')  # "PROCESSOR [--FLAG[=VALUE] ...]"
            script = os.path.basename(parts[0])  # separate script from options
            ensurefile("python/processors", script)
            value = ' '.join([script] + parts[1:])
        else:
            parts = value.split(' ')  # "MODULE [KEY=VALUE ...]"
            module = parts[0]
            LOG.insist(
                all('=' in o for o in parts[1:]),
                "All extra module options should be of format KEY=VALUE!")
            if 'python/analysis/' in module:  # useful for tab completion
                module = module.split('python/analysis/')[-1].replace('/', '.')
            module = rreplace(module, '.py')
            path = os.path.join('python/analysis/',
                                '/'.join(module.split('.')[:-1]))
            ensureinit(path, by="pico.py")
            ensuremodule(module)
            value = ' '.join([module] + parts[1:])
    elif varkey == 'eras':
        if 'samples/' in value:  # useful for tab completion
            value = ''.join(value.split('samples/')[1:])
        path = os.path.join("samples",
                            repkey(value, ERA='*', CHANNEL='*', TAG='*'))
        LOG.insist(glob.glob(path),
                   "Did not find any sample lists '%s'" % (path))
        ensureinit(os.path.dirname(path), by="pico.py")
    if value != oldval:
        print ">>> Converted '%s' to '%s'" % (oldval, value)
    CONFIG[varkey][key] = value
    CONFIG.write()