Ejemplo n.º 1
0
 def getfiles(self, refresh=False, url=True, verb=0):
     """Get list of files from DAS."""
     files = self.files
     if self.refreshable and (not files or refresh):
         files = []
         for path in self.paths:
             if self.storage:  # get files from storage system
                 sepath = repkey(self.storage, PATH=path).replace('//', '/')
                 storage = getstorage(sepath, verb=verb - 1)
                 outlist = storage.getfiles(url=url, verb=verb - 1)
             else:  # get files from DAS
                 dascmd = 'dasgoclient --query="file dataset=%s instance=%s"' % (
                     path, self.instance)  #--limit=0
                 LOG.verb(repr(dascmd), verb)
                 cmdout = execute(dascmd, verb=verb - 1)
                 outlist = cmdout.split(os.linesep)
             for line in outlist:  # filter root files
                 line = line.strip()
                 if line.endswith('.root') and not any(
                         f.endswith(line) for f in self.blacklist):
                     if url and self.url not in line and 'root://' not in line:
                         line = self.url + line
                     files.append(line)
         files.sort()  # for consistent list order
         self.files = files
     return files
Ejemplo n.º 2
0
def getsamples(era,
               channel="",
               tag="",
               dtype=[],
               filter=[],
               veto=[],
               moddict={},
               verb=0):
    """Help function to get samples from a sample list and filter if needed."""
    CONFIG = GLOB.getconfig(verb=verb)
    filters = filter if isinstance(filter, list) else [filter]
    vetoes = veto if isinstance(veto, list) else [veto]
    dtypes = dtype if isinstance(dtype, list) else [dtype]
    sampfile = ensurefile(
        "samples", repkey(CONFIG.eras[era], ERA=era, CHANNEL=channel, TAG=tag))
    samppath = sampfile.replace('.py', '').replace('/', '.')
    if samppath not in moddict:
        moddict[samppath] = importlib.import_module(
            samppath)  # save time by loading once
    if not hasattr(moddict[samppath], 'samples'):
        LOG.throw(
            IOError,
            "Module '%s' must have a list of Sample objects called 'samples'!"
            % (samppath))
    samplelist = moddict[samppath].samples
    samples = []
    sampledict = {}  # ensure for unique names
    for sample in samplelist:
        if filters and not sample.match(filters, verb): continue
        if vetoes and sample.match(vetoes, verb): continue
        if dtypes and sample.dtype not in dtypes: continue
        if sample.name in sampledict:
            LOG.throw(
                IOError,
                "Sample short names should be unique. Found two samples '%s'!\n\t%s\n\t%s"
                % (sample.name, ','.join(
                    sampledict[sample.name].paths), ','.join(sample.paths)))
        if 'skim' in channel and len(sample.paths) >= 2:
            for subsample in sample.split():
                samples.append(
                    subsample
                )  # keep correspondence sample to one sample in DAS
        else:
            samples.append(sample)
        sampledict[sample.name] = sample
    return samples
Ejemplo n.º 3
0
    def __init__(self, group, name, *paths, **kwargs):
        """Container class for CMSSW samples, e.g.:
       - group: DY (used to group similar samples in final output)
       - name:  DYJetsToLL_M-50 (used as shorthand and jobname)
       - path:  /DYJetsToLL_M-50_TuneCP5_13TeV-madgraphMLM-pythia8/RunIIAutumn18NanoAODv6_Nano25Oct2019_102X_mcRun2/NANOAODSIM
       - dtype: 'mc', 'data', 'embed'
    """

        # PATH
        assert len(paths) >= 1, "Need at least one path to create a sample..."
        if len(paths) == 1 and isinstance(paths[0], list):
            paths = paths[0]
        for path in paths:
            assert path.count('/') >= 3 and path.startswith(
                '/'), "Path %s has wrong format. Need /SAMPLE/CAMPAIGN/FORMAT."
            #sample = '/'.join(line.split('/')[-3:])

        # DATA TYPE
        dtype = kwargs.get('dtype', None)
        dtypes = ['mc', 'data', 'embed']
        if dtype == None:  # automatic recognition
            path = paths[0]
            if 'Embed' in path:
                dtype = 'embed'
            elif path.endswith('SIM') or any(g in path
                                             for g in ['pythia', 'madgraph']):
                dtype = 'mc'
            elif re.search(r"/Run20\d\d", path):
                dtype = 'data'
        assert dtype in dtypes, "Given data type '%s' is not recongized! Please choose from %s..." % (
            dtype, ', '.join(dtypes))

        # ATTRIBUTES
        self.group = group
        self.name = name
        self.paths = paths  # DAS path
        self.dtype = dtype
        self.channels = kwargs.get('channels', None)
        self.storage = kwargs.get('store',
                                  None)  # if stored elsewhere than DAS
        self.url = kwargs.get('url', None)
        self.blacklist = kwargs.get('blacklist', [])  # black list file
        self.instance = kwargs.get(
            'instance',
            'prod/phys03' if path.endswith('USER') else 'prod/global')
        self.nfilesperjob = kwargs.get('nfilesperjob', -1)
        self.subtry = kwargs.get('subtry',
                                 0)  # to help keep track of resubmission
        self.jobcfg = kwargs.get('jobcfg',
                                 {})  # to help keep track of resubmission
        self.nevents = kwargs.get('nevents', 0)
        self.files = kwargs.get(
            'files', [])  # list of ROOT files, OR text file with list of files
        self.era = kwargs.get('era', "")  # for expansion of $ERA variable
        self.verbosity = kwargs.get('verbosity',
                                    0)  # verbosity level for debugging
        self.refreshable = not self.files  # allow refresh on file list in getfiles()

        # STORAGE & URL DEFAULTS
        if self.storage:
            self.storage = repkey(self.storage,
                                  ERA=self.era,
                                  GROUP=self.group,
                                  SAMPLE=self.name)
        if not self.url:
            if self.storage:
                from TauFW.PicoProducer.storage.StorageSystem import Local
                storage = getstorage(repkey(self.storage, PATH=self.paths[0]))
                if isinstance(storage, Local):
                    self.url = "root://cms-xrd-global.cern.ch/"
                else:
                    self.url = storage.fileurl
            else:
                self.url = "root://cms-xrd-global.cern.ch/"

        # GET FILE LIST FROM TEXT FILE
        if isinstance(self.files, str):
            filename = repkey(self.files,
                              ERA=self.era,
                              GROUP=self.group,
                              SAMPLE=self.name)
            if self.verbosity >= 1:
                print ">>> Loading sample files from '%r'" % (filename)
            if self.verbosity >= 2:
                print ">>> %-14s = %s" % ('filelist', self.files)
                print ">>> %-14s = %s" % ('filename', filename)
            filelist = []
            with open(filename, 'r') as file:
                for line in file:
                    line = line.strip().split()
                    if not line: continue
                    infile = line[0].strip()
                    if infile[0] == '#': continue
                    if infile.endswith('.root'):
                        filelist.append(infile)
            self.files = filelist
            self.files.sort()
Ejemplo n.º 4
0
def main_status(args):
  """Check status of jobs (succesful/pending/failed/missing), or hadd job output."""
  if args.verbosity>=1:
    print ">>> main_status", args
  
  # SETTING
  eras           = args.eras
  channels       = args.channels
  tag            = args.tag
  checkdas       = args.checkdas
  checkqueue     = args.checkqueue
  dtypes         = args.dtypes
  filters        = args.samples
  vetoes         = args.vetoes
  force          = args.force
  hadd           = args.subcommand=='hadd'
  cleanup        = args.cleanup if hadd else False
  dryrun         = args.dryrun
  verbosity      = args.verbosity
  cmdverb        = max(1,verbosity)
  outdirformat   = CONFIG.outdir
  jobdirformat   = CONFIG.jobdir
  storedirformat = CONFIG.picodir
  jobs           = [ ]
  
  # LOOP over ERAS
  for era in eras:
    
    # LOOP over CHANNELS
    for channel in channels:
      print header("%s, %s"%(era,channel))
      
      # GET SAMPLES
      jobcfgs = repkey(os.path.join(jobdirformat,"config/jobconfig_$CHANNEL$TAG_try[0-9]*.json"),
                       ERA=era,SAMPLE='*',GROUP='*',CHANNEL=channel,TAG=tag)
      if verbosity>=1:
        print ">>> %-12s = %s"%('cwd',os.getcwd())
        print ">>> %-12s = %s"%('jobcfgs',jobcfgs)
        print ">>> %-12s = %s"%('filters',filters)
        print ">>> %-12s = %s"%('vetoes',vetoes)
        print ">>> %-12s = %s"%('dtypes',dtypes)
      samples = getcfgsamples(jobcfgs,filter=filters,veto=vetoes,dtype=dtypes,verb=verbosity)
      if verbosity>=2:
        print ">>> Found samples: "+", ".join(repr(s.name) for s in samples)
      if hadd and 'skim' in channel:
        LOG.warning("Hadding into one file not available for skimming...")
        print
        continue
      
      # SAMPLE over SAMPLES
      found = False
      for sample in samples:
        if sample.channels and channel not in sample.channels: continue
        found = True
        print ">>> %s"%(bold(sample.name))
        for path in sample.paths:
          print ">>> %s"%(bold(path))
        
        # CHECK JOBS ONLY ONCE
        if checkqueue==1 and not jobs:
          batch = getbatch(CONFIG,verb=verbosity)
          jobs  = batch.jobs(verb=verbosity-1)
        
        # HADD
        if hadd:
          jobdir   = sample.jobcfg['jobdir']
          outdir   = sample.jobcfg['outdir']
          storedir = repkey(storedirformat,ERA=era,CHANNEL=channel,TAG=tag,SAMPLE=sample.name,
                                           DAS=sample.paths[0].strip('/'),GROUP=sample.group)
          storage  = getstorage(storedir,ensure=True,verb=verbosity)
          outfile  = '%s_%s%s.root'%(sample.name,channel,tag)
          infiles  = os.path.join(outdir,'*_%s%s_[0-9]*.root'%(channel,tag))
          cfgfiles = os.path.join(sample.jobcfg['cfgdir'],'job*_%s%s_try[0-9]*.*'%(channel,tag))
          logfiles = os.path.join(sample.jobcfg['logdir'],'*_%s%s_try[0-9]*.*.*.log'%(channel,tag))
          if verbosity>=1:
            print ">>> Hadd'ing job output for '%s'"%(sample.name)
            print ">>> %-12s = %r"%('jobdir',jobdir)
            print ">>> %-12s = %r"%('outdir',outdir)
            print ">>> %-12s = %r"%('storedir',storedir)
            print ">>> %-12s = %s"%('infiles',infiles)
            print ">>> %-12s = %r"%('outfile',outfile)
          resubfiles, chunkdict = checkchuncks(sample,channel=channel,tag=tag,jobs=jobs,
                                               checkqueue=checkqueue,das=checkdas,verb=verbosity)
          if len(resubfiles)>0 and not force:
            LOG.warning("Cannot hadd job output because %d chunks need to be resubmitted..."%(len(resubfiles))+
                        "Please use -f or --force to hadd anyway.")
            continue
          #haddcmd = 'hadd -f %s %s'%(outfile,infiles)
          #haddout = execute(haddcmd,dry=dryrun,verb=max(1,verbosity))
          haddout = storage.hadd(infiles,outfile,dry=dryrun,verb=cmdverb)
          #os.system(haddcmd)
          
          # CLEAN UP
          # TODO: check if hadd was succesful with isvalid
          if cleanup:
            rmfiles   = ""
            rmfileset = [infiles,cfgfiles,logfiles]
            for files in rmfileset:
              if len(glob.glob(files))>0:
                rmfiles += ' '+files
            if verbosity>=2:
              print ">>> %-12s = %s"%('rmfileset',rmfileset)
              print ">>> %-12s = %s"%('rmfiles',rmfiles)
            if rmfiles:
              rmcmd = "rm %s"%(rmfiles)
              rmout = execute(rmcmd,dry=dryrun,verb=cmdverb)
        
        # ONLY CHECK STATUS
        else:
          outdir   = sample.jobcfg['outdir']
          if verbosity>=1:
            print ">>> Checking job status for '%s'"%(sample.name) 
            print ">>> %-12s = %r"%('outdir',outdir)
          checkchuncks(sample,channel=channel,tag=tag,jobs=jobs,
                       checkqueue=checkqueue,das=checkdas,verb=verbosity)
        
        print
      
      if not found:
        print ">>> Did not find any samples."
        print
Ejemplo n.º 5
0
def main_get(args):
  """Get information of given variable."""
  if args.verbosity>=1:
    print ">>> main_get", args
  variable  = args.variable
  eras      = args.eras
  dtypes    = args.dtypes
  filters   = args.samples
  vetoes    = args.vetoes
  channels  = args.channels or [""]
  checkdas  = args.checkdas
  writedir  = args.write
  tag       = args.tag
  verbosity = args.verbosity
  cfgname   = CONFIG._path
  if verbosity>=1:
    print '-'*80
    print ">>> %-14s = %s"%('variable',variable)
    print ">>> %-14s = %s"%('cfgname',cfgname)
    print ">>> %-14s = %s"%('config',CONFIG)
    print '-'*80
    
  # SAMPLES
  if variable=='files':
    
    # LOOP over ERAS & CHANNELS
    if not eras:
      LOG.warning("Please specify an era to get a sample for.")
    for era in eras:
      for channel in channels:
        
        # VERBOSE
        if verbosity>=1:
          print ">>> %-12s = %r"%('channel',channel)
        
        # GET SAMPLES
        assert era in CONFIG.eras, "Era '%s' not found in the configuration file. Available: %s"%(era,CONFIG.eras)
        samples = getsamples(era,channel=channel,dtype=dtypes,filter=filters,veto=vetoes,verb=verbosity)
        
        # LOOP over SAMPLES
        for sample in samples:
          print ">>> %s"%(bold(sample.name))
          for path in sample.paths:
            print ">>> %s"%(bold(path))
            infiles = sample.getfiles(url=False,verb=verbosity+1)
            if checkdas:
              ndasevents = sample.getnevents(verb=verbosity+1)
              print ">>> %-12s = %s"%('ndasevents',ndasevents)
            print ">>> %-12s = %r"%('url',sample.url)
            print ">>> %-12s = %s"%('nfiles',len(infiles))
            print ">>> %-12s = [ "%('infiles')
            for file in infiles:
              print ">>>   %r"%file
            print ">>> ]"
            if writedir:
              flistname = repkey(writedir,ERA=era,GROUP=sample.group,SAMPLE=sample.name,TAG=tag)
              print ">>> Write list to %r..."%(flistname)
              ensuredir(os.path.dirname(flistname))
              with open(flistname,'w+') as flist:
                for infile in infiles:
                  flist.write(infile+'\n')
  
  # CONFIGURATION
  else:
    if variable in CONFIG:
      print ">>> Configuration of %r: %s"%(variable,color(CONFIG[variable]))
    else:
      print ">>> Did not find %r in the configuration"%(variable)
Ejemplo n.º 6
0
def preparejobs(args):
  """Help function to iterate over samples per given channel and era and prepare job config and list."""
  if args.verbosity>=1:
    print ">>> preparejobs", args
  
  resubmit     = args.subcommand=='resubmit'
  eras         = args.eras
  channels     = args.channels
  tag          = args.tag
  dtypes       = args.dtypes
  filters      = args.samples
  vetoes       = args.vetoes
  checkdas     = args.checkdas
  checkqueue   = args.checkqueue
  prefetch     = args.prefetch
  nfilesperjob = args.nfilesperjob
  split_nfpj   = args.split_nfpj
  verbosity    = args.verbosity
  jobs         = [ ]
  
  # LOOP over ERAS
  for era in eras:
    moddict = { } # save time by loading samples and get their file list only once
    
    # LOOP over CHANNELS
    for channel in channels:
      print header("%s, %s"%(era,channel))
      
      # CHANNEL -> MODULE
      assert channel in CONFIG.channels, "Channel '%s' not found in the configuration file. Available: %s"%(channel,CONFIG.channels)
      module = CONFIG.channels[channel]
      if channel!='test' and 'skim' not in channel:
        ensuremodule(module)
      if verbosity>=1:
        print '-'*80
        print ">>> %-12s = %r"%('channel',channel)
        print ">>> %-12s = %r"%('module',module)
        print ">>> %-12s = %s"%('filters',filters)
        print ">>> %-12s = %s"%('vetoes',vetoes)
        print ">>> %-12s = %r"%('dtypes',dtypes)
      
      # PROCESSOR
      if 'skim' in channel:
        processor = module
      elif channel=='test':
        processor = module
      else:
        processor = "picojob.py"
      procpath  = os.path.join("python/processors",processor)
      if not os.path.isfile(procpath):
        LOG.throw(IOError,"Processor '%s' does not exist in '%s'..."%(processor,procpath))
      processor = os.path.abspath(procpath)
      if verbosity>=1:
        print ">>> %-12s = %r"%('processor',processor)
        print '-'*80
      
      # GET SAMPLES
      jobdirformat = CONFIG.jobdir # for job config & log files
      outdirformat = CONFIG.nanodir if 'skim' in channel else CONFIG.outdir # for job output
      if resubmit:
        # TODO: allow user to resubmit given config file
        jobcfgs  = repkey(os.path.join(jobdirformat,"config/jobconfig_$SAMPLE$TAG_try[0-9]*.json"),
                          ERA=era,SAMPLE='*',CHANNEL=channel,TAG=tag)
        if verbosity>=2:
          print ">>> %-12s = %s"%('cwd',os.getcwd())
          print ">>> %-12s = %s"%('jobcfgs',jobcfgs)
        samples = getcfgsamples(jobcfgs,filter=filters,veto=vetoes,dtype=dtypes,verb=verbosity)
      else:
        assert era in CONFIG.eras, "Era '%s' not found in the configuration file. Available: %s"%(era,CONFIG.eras)
        samples = getsamples(era,channel=channel,tag=tag,dtype=dtypes,filter=filters,veto=vetoes,moddict=moddict,verb=verbosity)
      if verbosity>=2:
        print ">>> Found samples: "+", ".join(repr(s.name) for s in samples)
      
      # SAMPLE over SAMPLES
      found = False
      for sample in samples:
        if sample.channels and channel not in sample.channels: continue
        found = True
        print ">>> %s"%(bold(sample.name))
        for path in sample.paths:
          print ">>> %s"%(bold(path))
        
        # DIRECTORIES
        subtry        = sample.subtry+1 if resubmit else 1
        jobids        = sample.jobcfg.get('jobids',[ ])
        postfix       = "_%s%s"%(channel,tag)
        jobtag        = '_%s%s_try%d'%(channel,tag,subtry)
        jobname       = sample.name+jobtag.rstrip('try1').rstrip('_')
        nfilesperjob_ = sample.nfilesperjob if sample.nfilesperjob>0 else nfilesperjob
        if split_nfpj>1:
          nfilesperjob_ = min(1,nfilesperjob_/split_nfpj)
        outdir        = repkey(outdirformat,ERA=era,CHANNEL=channel,TAG=tag,SAMPLE=sample.name,
                                            DAS=sample.paths[0].strip('/'),GROUP=sample.group)
        jobdir        = ensuredir(repkey(jobdirformat,ERA=era,CHANNEL=channel,TAG=tag,SAMPLE=sample.name,
                                                      DAS=sample.paths[0].strip('/'),GROUP=sample.group))
        cfgdir        = ensuredir(jobdir,"config")
        logdir        = ensuredir(jobdir,"log")
        cfgname       = "%s/jobconfig%s.json"%(cfgdir,jobtag)
        joblist       = '%s/jobarglist%s.txt'%(cfgdir,jobtag)
        if verbosity==1:
          print ">>> %-12s = %s"%('cfgname',cfgname)
          print ">>> %-12s = %s"%('joblist',joblist)
        elif verbosity>=2:
          print '-'*80
          print ">>> Preparing job %ssubmission for '%s'"%("re" if resubmit else "",sample.name)
          print ">>> %-12s = %r"%('processor',processor)
          print ">>> %-12s = %r"%('jobname',jobname)
          print ">>> %-12s = %r"%('jobtag',jobtag)
          print ">>> %-12s = %r"%('postfix',postfix)
          print ">>> %-12s = %r"%('outdir',outdir)
          print ">>> %-12s = %r"%('cfgdir',cfgdir)
          print ">>> %-12s = %r"%('logdir',logdir)
          print ">>> %-12s = %r"%('cfgname',cfgname)
          print ">>> %-12s = %r"%('joblist',joblist)
          print ">>> %-12s = %s"%('try',subtry)
          print ">>> %-12s = %r"%('jobids',jobids)
        
        # CHECKS
        if os.path.isfile(cfgname):
          # TODO: check for running jobs
          LOG.warning("Job configuration '%s' already exists and will be overwritten! "+
                      "Beware of conflicting job output!"%(cfgname))
        if not resubmit:
          cfgpattern = re.sub(r"(?<=try)\d+(?=.json$)",r"*",cfgname)
          cfgnames   = [f for f in glob.glob(cfgpattern) if not f.endswith("_try1.json")]
          if cfgnames:
            LOG.warning("Job configurations for resubmission already exists! This can cause conflicting job output!"+
              "If you are sure you want to submit from scratch, please remove these files:\n>>>   "+"\n>>>   ".join(cfgnames))
        storage = getstorage(outdir,verb=verbosity,ensure=True)
        
        # GET FILES
        nevents = 0
        if resubmit: # resubmission
          if checkqueue==0 and not jobs: # check jobs only once
            batch = getbatch(CONFIG,verb=verbosity)
            jobs  = batch.jobs(verb=verbosity-1)
          infiles, chunkdict = checkchuncks(sample,outdir=outdir,channel=channel,tag=tag,jobs=jobs,
                                         checkqueue=checkqueue,das=checkdas,verb=verbosity)
          nevents = sample.jobcfg['nevents'] # updated in checkchuncks
        else: # first-time submission
          infiles   = sample.getfiles(verb=verbosity-1)
          if checkdas:
            nevents = sample.getnevents()
          chunkdict = { }
        if args.testrun:
          infiles = infiles[:2]
        if verbosity==1:
          print ">>> %-12s = %s"%('nfilesperjob',nfilesperjob_)
          print ">>> %-12s = %s"%('nfiles',len(infiles))
        elif verbosity>=2:
          print ">>> %-12s = %s"%('nfilesperjob',nfilesperjob_)
          print ">>> %-12s = %s"%('nfiles',len(infiles))
          print ">>> %-12s = [ "%('infiles')
          for file in infiles:
            print ">>>   %r"%file
          print ">>> ]"
          print ">>> %-12s = %s"%('nevents',nevents)
        
        # CHUNKS
        infiles.sort() # to have consistent order with resubmission
        chunks    = [ ] # chunk indices
        fchunks   = chunkify(infiles,nfilesperjob_) # file chunks
        nfiles    = len(infiles)
        nchunks   = len(fchunks)
        if verbosity>=1:
          print ">>> %-12s = %s"%('nchunks',nchunks)
        if verbosity>=2:
          print '-'*80
        
        # WRITE JOB LIST with arguments per job
        if args.verbosity>=1:
          print ">>> Creating job list %s..."%(joblist)
        with open(joblist,'w') as listfile:
          ichunk = 0
          for fchunk in fchunks:
            while ichunk in chunkdict:
              ichunk   += 1 # allows for different nfilesperjob on resubmission
              continue
            jobfiles    = ' '.join(fchunk) # list of input files
            filetag     = postfix
            if 'skim' not in channel:
              filetag  += "_%d"%(ichunk)
            jobcmd      = processor
            if 'skim' in channel:
              jobcmd += " -y %s --copydir %s -t %s --jec-sys"%(era,outdir,filetag)
            elif 'test' in channel:
              jobcmd += " -o %s -t %s -i %s"%(outdir,filetag)
            else:
              jobcmd += " -y %s -c %s -M %s --copydir %s -t %s"%(era,channel,module,outdir,filetag)
            if prefetch:
              jobcmd += " -p"
            jobcmd += " -i %s"%(jobfiles) # add last
            if args.verbosity>=1:
              print jobcmd
            listfile.write(jobcmd+'\n')
            chunkdict[ichunk] = fchunk
            chunks.append(ichunk)
        
        # JSON CONFIG
        jobcfg = OrderedDict([
          ('time',str(datetime.now())),
          ('group',sample.group), ('paths',sample.paths), ('name',sample.name), ('nevents',nevents),
          ('channel',channel),    ('module',module),
          ('jobname',jobname),    ('jobtag',jobtag),      ('tag',tag),          ('postfix',postfix),
          ('try',subtry),         ('jobids',jobids),
          ('outdir',outdir),      ('jobdir',jobdir),      ('cfgdir',cfgdir),    ('logdir',logdir),
          ('cfgname',cfgname),    ('joblist',joblist),
          ('nfiles',nfiles),      ('files',infiles),      ('nfilesperjob',nfilesperjob_), #('nchunks',nchunks),
          ('nchunks',nchunks),    ('chunks',chunks),      ('chunkdict',chunkdict),
        ])
        
        # YIELD
        yield jobcfg
        print
        #if args.testrun:
        #  break # only run one sample
      
      if not found:
        print ">>> Did not find any samples."
        if verbosity>=1:
          print ">>> %-8s = %s"%('filters',filters)
          print ">>> %-8s = %s"%('vetoes',vetoes)