Beispiel #1
0
def testModule(era):
    dtypes = None
    channel = None
    filters = None
    vetoes = None
    samples = getsamples(era,
                         channel=channel,
                         dtype=dtypes,
                         filter=filters,
                         veto=vetoes)
Beispiel #2
0
def main_get(args):
    """Get information of given variable in configuration or samples."""
    if args.verbosity >= 1:
        print ">>> main_get", args
    variable = args.variable
    eras = args.eras  # eras to loop over and run
    channels = args.channels or [""]  # channels to loop over and run
    dtypes = args.dtypes  # filter (only include) these sample types ('data','mc','embed')
    filters = args.samples  # filter (only include) these samples (glob patterns)
    dasfilters = args.dasfilters  # filter (only include) these das paths (glob patterns)
    vetoes = args.vetoes  # exclude these sample (glob patterns)
    dasvetoes = args.dasvetoes  # exclude these DAS paths (glob patterns)
    inclurl = args.inclurl  # include URL in filelist
    checkdas = args.checkdas or args.dasfiles  # check file list in DAS
    checklocal = args.checklocal  # check nevents in local files
    split = args.split  # split samples with multiple DAS dataset paths
    limit = args.limit
    writedir = args.write  # write sample file list to text file
    tag = args.tag
    ncores = args.ncores  # number of cores to get nevents in parallel
    verbosity = args.verbosity
    getnevts = variable in ['nevents', 'nevts']
    cfgname = CONFIG._path
    if verbosity >= 1:
        print '-' * 80
        print ">>> %-14s = %s" % ('variable', variable)
        print ">>> %-14s = %s" % ('eras', eras)
        print ">>> %-14s = %s" % ('channels', channels)
        print ">>> %-14s = %s" % ('cfgname', cfgname)
        print ">>> %-14s = %s" % ('config', CONFIG)
        print ">>> %-14s = %s" % ('checkdas', checkdas)
        print ">>> %-14s = %s" % ('checklocal', checklocal)
        print ">>> %-14s = %s" % ('split', split)
        print ">>> %-14s = %s" % ('limit', limit)
        print ">>> %-14s = %s" % ('writedir', writedir)
        print ">>> %-14s = %s" % ('ncores', ncores)
        print '-' * 80

    # LIST SAMPLES
    if variable == 'samples':
        if not eras:
            LOG.warning("Please specify an era to get a sample for.")
        for era in eras:
            for channel in channels:
                if channel:
                    print ">>> Getting file list for era %r, channel %r" % (
                        era, channel)
                else:
                    print ">>> Getting file list for era %r" % (era)
                samples = getsamples(era,
                                     channel=channel,
                                     dtype=dtypes,
                                     filter=filters,
                                     veto=vetoes,
                                     dasfilter=dasfilters,
                                     dasveto=dasvetoes,
                                     verb=verbosity)
                if not samples:
                    LOG.warning("No samples found for era %r." % (era))
                for sample in samples:
                    print ">>> %s" % (bold(sample.name))
                    for path in sample.paths:
                        print ">>>   %s" % (path)

    # LIST SAMPLE FILES
    elif variable in ['files', 'nevents', 'nevts']:

        # LOOP over ERAS & CHANNELS
        if not eras:
            LOG.warning("Please specify an era to get a sample for.")
        for era in eras:
            for channel in channels:
                target = "file list" if variable == 'files' else "nevents"
                if channel:
                    print ">>> Getting %s for era %r, channel %r" % (
                        target, era, channel)
                else:
                    print ">>> Getting %s for era %r" % (target, era)
                print ">>> "

                # GET SAMPLES
                LOG.insist(
                    era in CONFIG.eras,
                    "Era '%s' not found in the configuration file. Available: %s"
                    % (era, CONFIG.eras))
                samples = getsamples(era,
                                     channel=channel,
                                     dtype=dtypes,
                                     filter=filters,
                                     veto=vetoes,
                                     dasfilter=dasfilters,
                                     dasveto=dasvetoes,
                                     split=split,
                                     verb=verbosity)

                # LOOP over SAMPLES
                for sample in samples:
                    print ">>> %s" % (bold(sample.name))
                    for path in sample.paths:
                        print ">>> %s" % (bold(path))
                    if getnevts or checkdas or checklocal:
                        das = checkdas and not checklocal  # checklocal overrides checkdas
                        refresh = das  # (not sample.storage and all('/store' in f for f in sample.files)
                        nevents = sample.getnevents(das=das,
                                                    refresh=refresh,
                                                    verb=verbosity + 1)
                        storage = "(%s)" % sample.storage.__class__.__name__ if checklocal else "(DAS)" if checkdas else ""
                        print ">>>   %-7s = %s %s" % ('nevents', nevents,
                                                      storage)
                    if variable == 'files':
                        infiles = sample.getfiles(das=checkdas,
                                                  url=inclurl,
                                                  limit=limit,
                                                  verb=verbosity + 1)
                        print ">>>   %-7s = %r" % ('channel', channel)
                        print ">>>   %-7s = %r" % ('url', sample.url)
                        print ">>>   %-7s = %r" % ('postfix', sample.postfix)
                        print ">>>   %-7s = %s" % ('nfiles', len(infiles))
                        print ">>>   %-7s = [ " % ('infiles')
                        for file in infiles:
                            print ">>>     %r" % file
                        print ">>>   ]"
                    print ">>> "
                    if writedir:  # write files to text files
                        flistname = repkey(writedir,
                                           ERA=era,
                                           GROUP=sample.group,
                                           SAMPLE=sample.name,
                                           TAG=tag)
                        print ">>> Write list to %r..." % (flistname)
                        sample.writefiles(flistname,
                                          nevts=getnevts,
                                          ncores=ncores)

    # CONFIGURATION
    else:
        if variable in CONFIG:
            print ">>> Configuration of %r: %s" % (variable,
                                                   color(CONFIG[variable]))
        else:
            print ">>> Did not find %r in the configuration" % (variable)
Beispiel #3
0
def main_run(args):
    """Run given module locally."""
    if args.verbosity >= 1:
        print ">>> main_run", args
    eras = args.eras  # eras to loop over and run
    channels = args.channels  # channels to loop over and run
    tag = args.tag  # extra tag for output file
    outdir = args.outdir  # output directory
    dtypes = args.dtypes  # filter (only include) these sample types ('data','mc','embed')
    filters = args.samples  # filter (only include) these samples (glob patterns)
    dasfilters = args.dasfilters  # filter (only include) these das paths (glob patterns)
    vetoes = args.vetoes  # exclude these sample (glob patterns)
    dasvetoes = args.dasvetoes  # exclude these DAS paths (glob patterns)
    extraopts = args.extraopts  # extra options for module (for all runs)
    prefetch = args.prefetch  # copy input file first to local output directory
    maxevts = args.maxevts  # maximum number of files (per sample, era, channel)
    dasfiles = args.dasfiles  # explicitly process nanoAOD files stored on DAS (as opposed to local storage)
    userfiles = args.infiles  # use these input files
    nfiles = args.nfiles  # maximum number of files (per sample, era, channel)
    nsamples = args.nsamples  # maximum number of samples (per era, channel)
    dryrun = args.dryrun  # prepare and print command, without executing
    verbosity = args.verbosity  # verbosity to print out what's going on under the hood
    preselect = args.preselect  # extra selection string
    if len(filters) == 0:
        filters = [None]

    # LOOP over ERAS
    if not eras:
        print ">>> Please specify a valid era (-y)."
    if not channels:
        print ">>> Please specify a valid channel (-c)."
    for era in eras:
        moddict = {
        }  # save time by loading samples and get their files only once

        # LOOP over CHANNELS
        for channel in channels:
            LOG.header("%s, %s" % (era, channel))

            # MODULE & PROCESSOR
            skim = 'skim' in channel.lower()
            module, processor, procopts, extrachopts = getmodule(
                channel, extraopts)

            # VERBOSE
            if verbosity >= 1:
                print '-' * 80
                print ">>> Running %r" % (channel)
                print ">>> %-12s = %r" % ('channel', channel)
                print ">>> %-12s = %r" % ('module', module)
                print ">>> %-12s = %r" % ('processor', processor)
                print ">>> %-12s = %r" % ('procopts', procopts)
                print ">>> %-12s = %r" % ('extrachopts', extrachopts)
                print ">>> %-12s = %r" % ('prefetch', prefetch)
                print ">>> %-12s = %r" % ('preselect', preselect)
                print ">>> %-12s = %s" % ('filters', filters)
                print ">>> %-12s = %s" % ('vetoes', vetoes)
                print ">>> %-12s = %r" % ('dtypes', dtypes)
                print ">>> %-12s = %r" % ('userfiles', userfiles)
                print ">>> %-12s = %r" % ('outdir', outdir)

            # LOOP over FILTERS
            samples = []
            for filter in filters:
                filters_ = [filter] if filter else []
                if verbosity >= 2:
                    print ">>> Checking filters=%s, vetoes=%s, dtypes=%s..." % (
                        filters_, vetoes, dtypes)

                # GET SAMPLES
                if not userfiles and (filters_ or vetoes or dtypes):
                    LOG.insist(
                        era in CONFIG.eras,
                        "Era '%s' not found in the configuration file. Available: %s"
                        % (era, CONFIG.eras))
                    samples_ = getsamples(era,
                                          channel=channel,
                                          tag=tag,
                                          dtype=dtypes,
                                          filter=filters_,
                                          veto=vetoes,
                                          dasfilter=dasfilters,
                                          dasveto=dasvetoes,
                                          moddict=moddict,
                                          verb=verbosity)
                    for sample in samples_[:]:
                        if sample in samples:  # avoid duplicates
                            samples_.remove(sample)
                    if nsamples > 0:  # limit number of samples to maximum nsamples
                        samples_ = samples_[:nsamples]
                    samples.extend(samples_)
            if not sample:
                samples = [None]
                if not userfiles and (filters_ or vetoes or dtypes):
                    print_no_samples(dtypes, filters_, vetoes)
            if verbosity >= 1:
                print ">>> %-12s = %r" % ('samples', samples)
                print '-' * 80

            # LOOP over SAMPLES
            for sample in samples:
                if sample:
                    print ">>> %s" % (bold(sample.name))
                    if verbosity >= 1:
                        for path in sample.paths:
                            print ">>> %s" % (bold(path))

                # SETTINGS
                dtype = None
                extraopts_ = extrachopts[:]  # extra options for module (for this channel & sample)
                if sample:
                    filetag = "_%s_%s_%s%s" % (channel, era, sample.name, tag)
                    if sample.extraopts:
                        extraopts_.extend(sample.extraopts)
                else:
                    filetag = "_%s_%s%s" % (channel, era, tag)
                if verbosity >= 1:
                    print ">>> %-12s = %s" % ('sample', sample)
                    print ">>> %-12s = %r" % ('filetag', filetag)  # postfix
                    print ">>> %-12s = %s" % ('extraopts', extraopts_)

                # GET FILES
                infiles = []
                if userfiles:
                    infiles = userfiles[:]
                elif sample:
                    nevents = 0
                    infiles = sample.getfiles(das=dasfiles, verb=verbosity)
                    dtype = sample.dtype
                    if nfiles > 0:
                        infiles = infiles[:nfiles]
                    if verbosity == 1:
                        print ">>> %-12s = %r" % ('dtype', dtype)
                        print ">>> %-12s = %s" % ('nfiles', len(infiles))
                        print ">>> %-12s = [ " % ('infiles')
                        for file in infiles:
                            print ">>>   %r" % file
                        print ">>> ]"

                # RUN
                runcmd = processor
                if procopts:
                    runcmd += " %s" % (procopts)
                if skim:
                    runcmd += " -y %s -o %s" % (era, outdir)
                    if preselect:
                        runcmd += " --preselect '%s'" % (preselect)
                ###elif 'test' in channel:
                ###  runcmd += " -o %s"%(outdir)
                else:  # analysis
                    runcmd += " -y %s -c %s -M %s -o %s" % (era, channel,
                                                            module, outdir)
                if dtype:
                    runcmd += " -d %r" % (dtype)
                if filetag:
                    runcmd += " -t %r" % (filetag)  # postfix
                if maxevts:
                    runcmd += " -m %s" % (maxevts)
                if infiles:
                    runcmd += " -i %s" % (' '.join(infiles))
                if prefetch:
                    runcmd += " -p"
                if extraopts_:
                    runcmd += " --opt '%s'" % ("' '".join(extraopts_))
                #elif nfiles:
                #  runcmd += " --nfiles %s"%(nfiles)
                print ">>> Executing: " + bold(runcmd)
                if not dryrun:
                    #execute(runcmd,dry=dryrun,verb=verbosity+1) # real-time print out does not work well with python script
                    os.system(runcmd)
                print
Beispiel #4
0
def main_write(args):
    """Get information of given variable in configuration or samples."""
    if args.verbosity >= 1:
        print ">>> main_write", args
    listname = args.listname  # write sample file list to text file
    eras = args.eras  # eras to loop over and run
    channels = args.channels or [""]  # channels to loop over and run
    dtypes = args.dtypes  # filter (only include) these sample types ('data','mc','embed')
    filters = args.samples  # filter (only include) these samples (glob patterns)
    dasfilters = args.dasfilters  # filter (only include) these das paths (glob patterns)
    vetoes = args.vetoes  # exclude these sample (glob patterns)
    dasvetoes = args.dasvetoes  # exclude these DAS paths (glob patterns)
    checkdas = args.checkdas or args.dasfiles  # check file list in DAS
    split = args.split  # split samples with multiple DAS dataset paths
    retries = args.retries  # retry if error is thrown
    getnevts = args.getnevts  # check nevents in local files
    ncores = args.ncores  # number of cores to get nevents in parallel
    verbosity = args.verbosity
    cfgname = CONFIG._path
    if verbosity >= 1:
        print '-' * 80
        print ">>> %-14s = %s" % ('listname', listname)
        print ">>> %-14s = %s" % ('getnevts', getnevts)
        print ">>> %-14s = %s" % ('eras', eras)
        print ">>> %-14s = %s" % ('channels', channels)
        print ">>> %-14s = %s" % ('cfgname', cfgname)
        print ">>> %-14s = %s" % ('config', CONFIG)
        print '-' * 80

    # LOOP over ERAS & CHANNELS
    if not eras:
        LOG.warning("Please specify an era to get a sample for.")
    for era in eras:
        for channel in channels:
            info = ">>> Getting file list for era %r" % (era)
            if channel:
                info += ", channel %r" % (channel)
            print info
            print ">>> "

            LOG.insist(
                era in CONFIG.eras,
                "Era '%s' not found in the configuration file. Available: %s" %
                (era, CONFIG.eras))
            samples0 = getsamples(era,
                                  channel=channel,
                                  dtype=dtypes,
                                  filter=filters,
                                  veto=vetoes,
                                  dasfilter=dasfilters,
                                  dasveto=dasvetoes,
                                  split=split,
                                  verb=verbosity)
            sampleset = [samples0]
            for retry in range(retries):
                sampleset.append([])  # list for retries

            # LOOP over SAMPLES
            for retry, samples in enumerate(sampleset):
                if not samples:
                    break
                if retry > 0 and len(samples0) > 1:
                    if retries >= 2:
                        print ">>> Retry %d/%d: %d/%d samples...\n>>>" % (
                            retry, retries, len(samples), len(samples0))
                    else:
                        print ">>> Trying again %d/%d samples...\n>>>" % (
                            len(samples), len(samples0))
                for sample in samples:
                    print ">>> %s" % (bold(sample.name))
                    sample.filelist = None  # do not load from existing text file; overwrite existing ones
                    for path in sample.paths:
                        print ">>> %s" % (bold(path))
                    #infiles = sample.getfiles(das=checkdas,url=inclurl,limit=limit,verb=verbosity+1)
                    flistname = repkey(listname,
                                       ERA=era,
                                       GROUP=sample.group,
                                       SAMPLE=sample.name)  #,TAG=tag
                    try:
                        sample.writefiles(flistname,
                                          nevts=getnevts,
                                          das=checkdas,
                                          refresh=checkdas,
                                          ncores=ncores,
                                          verb=verbosity)
                    except IOError as err:  # one of the ROOT file could not be opened
                        print "IOError: " + err.message
                        if retry < retries and sample not in sampleset[
                                retry + 1]:  # try again after the others
                            print ">>> Will try again..."
                            sampleset[retry + 1].append(sample)
                    print ">>> "