Python list_datasets Examples

Programming Language: Python

Namespace/Package Name: pyAMI.atlas.api

Method/Function: list_datasets

Examples at hotexamples.com: 14

Python list_datasets - 14 examples found. These are the top rated real world Python examples of pyAMI.atlas.api.list_datasets extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: weights.py Project: kratsg/SampleWeights

    def get_sample_info(sample):
      global samplePattern
      weight = {'num events': 0.0,
                'errors': [],
                'cross section': -1.0,
                'filter efficiency': -1.0,
                'k-factor': -1.0,
                'rel uncert': -1.0}

      getWeights_logger.info("Processing: {0:s}".format(sample.name()))
      did = get_did(sample.name())
      gen_tag = get_generator_tag(sample.name())
      getWeights_logger.info("\tDID: {0:s}\n\tGen: {1:s}".format(did, gen_tag))
      # find the corresponding EVNT sample name
      res = api.list_datasets(client,  patterns='{0:s}.{1:s}.%.evgen.EVNT.{2:s}'.format('mc15_13TeV',did, gen_tag))
      if len(res) != 1: return (did, weight)
      evnt_file_name = res[0]['ldn']
      getWeights_logger.info("\tEVNT file: {0:s}".format(evnt_file_name))

      try:
        for fname in sample.makeFileList():
          count = get_cutflow(fname)
          # we return the filename if we can't open it for reading
          try:
            weight['num events'] += float(count)
          except (ValueError, TypeError) as e:
            weight['errors'].append(count)
        weight.update(get_info(evnt_file_name))
        return (did, weight)
      except Exception, e:
        # we crashed
        getWeights_logger.exception("{0}\nAn exception was caught!".format("-"*20))
        return (did, weight)

Example #2

Show file

File: MyDxAODInfoGetterOk.py Project: UCATLAS/ZJetBalance

    def getLdn(s, dsid, daodType, ptag, rtag=None):
        pattern = ""
        if not rtag:
            pattern = "mc15_13TeV." + dsid + "%.merge.DAOD_" + daodType + "%" + ptag + "%"
        else:
            pattern = "mc15_13TeV." + dsid + "%.merge.DAOD_" + daodType + "%" + rtag + "%" + ptag + "%"

        samples = AtlasAPI.list_datasets(s.client, patterns=[pattern])

        if len(samples) == 0:
            print "NO DAOD_%s DS found for %s" % (DAOD_Type, dsid)
            return None
        elif len(samples) != 1:
            print "More than one DAOD_%s DS round for %s" % (DAOD_Type, dsid)
            for sample in samples:
                print sample
            print "please set rtag to select one proper one"
            return None

        return samples[0]["ldn"]

Example #3

Show file

File: checkSamples.py Project: othrif/VBFAnalysisCode

def main(fileList, samples):
    client = pyAMI.client.Client('atlas')
    AtlasAPI.init()

    dirDict=getDirNames(samples)

    listInput=open(fileList)
    inputDatasets=listInput.readlines()
    listToCheck=[]
    for ds in inputDatasets:
        if "#" in ds:
            continue
        dsid=ds.split(".")[1]
        ds=ds.strip("\n")
        tmp=AtlasAPI.list_datasets(client, patterns = [ds], fields = ['events'])
        try:
            inputEvents=int(tmp[0]['events'])
        except:
            print "WARNING: skipping", ds
            continue
        try:
            processedEvents=getNumberEvents(dirDict[dsid])
        except KeyError:
            print "WARNING: tihs sample may be missing:",dsid, ",not found in input samples", samples
            processedEvents=0
        if inputEvents==0:
            print "inputEvents==0 for",ds
            continue
        frac=float(processedEvents)/float(inputEvents)
        diff=inputEvents-processedEvents
        if diff>0:
            listToCheck += [dsid]
            missing_sample_line = '  ----> Event numbers do NOT match (%s/%s). Please check your download for %s. Fraction of Events downloaded %0.4f. Difference in events: %s' %(processedEvents,inputEvents,dsid,frac,diff)
            print missing_sample_line
    print "\nThe following list of samples is incomplete/missing:", ' '.join(map(str, listToCheck))
    print "============================================================================================================"
    for sp in listToCheck:
        for line in inputDatasets:
            if re.search(sp, line):
                print line,
    print "============================================================================================================"

Example #4

Show file

File: AMIDataBase.py Project: jonipham/WZanalysis_release21

    def loadRuns(self, Y, derivations=[], project="13TeV"):
        ### import AMI
        getAmiClient()
        import pyAMI.client
        import pyAMI.atlas.api as AtlasAPI

        periods = GetPeriodRunConverter().GetSubPeriods(Y, project=project)

        ### I'm not happy about this pattern line. If we change project to cos or hi
        ### then the patter might differ what AMI needs
        Pattern = "data%i_%s.%%physics_Main.%%" % (Y, project)

        DSIDS = AtlasAPI.list_datasets(
            getAmiClient(),
            patterns=[Pattern],
            fields=[
                'run_number',
                "period",
                'type',
                'events',
                'ami_status',
            ],
            period=",".join(GetPeriodRunConverter().GetSubPeriods(
                Y, project=project)),
            type=ClearFromDuplicates(["AOD"] + derivations))

        ### Read out the AMI query
        for entry in DSIDS:
            R = int(entry["run_number"])
            if not self.getRunElement(R): self.__runs += [AMIdataEntry(R)]
            runElement = self.getRunElement(R)
            flavour = entry["type"]
            tag = self.__getDSTag(entry['ldn'], flavour)
            nevents = int(entry['events'])
            runElement.addDataset(data_type=flavour,
                                  tag=tag,
                                  events=nevents,
                                  status="")

Example #5

Show file

def main():
    from argparse import RawTextHelpFormatter

    parser = argparse.ArgumentParser(description=__doc__,
                                     formatter_class=RawTextHelpFormatter)

    parser.add_argument(
        '--inDsTxt',
        action="store",
        help=
        "text file containing datasets to make PRW for (one per line) [REQUIRED]",
        required=True)
    parser.add_argument('--outDS',
                        action="store",
                        default="",
                        help="Name of the output dataset",
                        required=False)
    parser.add_argument(
        '--forceStaged',
        action="store_true",
        help="If set, grid jobs will be submitted with forceStaged option")
    parser.add_argument(
        '--skipNTUP_PILEUP',
        action="store_true",
        help="If set, will not check for existing NTUP_PILEUP datasets")
    parser.add_argument('prwFiles',
                        nargs="*",
                        help="Exosting PRW Config files to check")

    args = parser.parse_args()

    try:
        import pyAMI.atlas.api as atlasAPI
        import pyAMI.client
    except ImportError:
        print "Could not import pyAMI ... please do: lsetup pyAMI"
        print "Also ensure you have a valid certificate (voms-proxy-init -voms atlas)"
        return 1

    client = pyAMI.client.Client('atlas')
    atlasAPI.init()

    #read datasets into list
    with open(args.inDsTxt) as f:
        datasets = f.read().splitlines()

    import ROOT

    out = ROOT.CP.TPileupReweighting("out")
    for f in args.prwFiles:
        out.AddConfigFile(f)
    out.ResetCountingMode()  #trick tool into going into counting mode
    #list of known period numbers
    periodNumbers = out.GetPeriodNumbers()

    print "Determining provenances of %d datasets ..." % len(datasets)

    aodDatasets = []
    ntupDatasets = []
    for dataset in datasets:
        dataset = dataset.strip("/")
        #strip the scope if it's there
        if dataset.startswith("#"): continue
        dataset = dataset.rsplit(":")[-1].strip()
        if len(dataset) == 0: continue

        print "Doing %s" % dataset
        prov = atlasAPI.get_dataset_prov(client, dataset)
        if 'node' not in prov:
            print "ERROR: Could not determine provenance of %s, skipping!" % dataset
            continue
        theParent = ""
        for ds in prov['node']:
            if ds[u'dataType'] != u'AOD': continue
            theParent = str(ds[u'logicalDatasetName'])
            theParentSize = int(ds[u'events'])

            break
        if theParent == "":
            print "ERROR: Could not determine provenance of %s, skipping!" % dataset
            continue

        #check input prw files, if we specified
        isIncomplete = False
        if len(args.prwFiles):
            total = 0
            dsid = theParent.split(".")[1]
            for p in periodNumbers:
                if p == -1: continue
                hist = out.GetInputHistogram(int(dsid), p)
                if hist: total += hist.GetEntries()

            if total == theParentSize:
                print "INFO: %s is complete in your existing PRW files. Good!" % dataset
                continue
            if total > theParentSize:
                print "WARNING: %s is suspect in your existing PRW files, has %d events when expected %d ... please check you didn't overmerge" % (
                    dataset, total, theParentSize)
                continue
            else:
                if total != 0:
                    print "WARNING: %s is incomplete (%d events when expected %d) ... will try to find centrally produced NTUP_PILEUP or prepare to generate" % (
                        dataset, total, theParentSize)
                    isIncomplete = True

        #before adding the dataset, see if we can find an NTUP_PILEUP for it
        if not args.skipNTUP_PILEUP:
            ntupDatasetName = theParent.replace("AOD", "NTUP_PILEUP")
            ntupDatasetName = ntupDatasetName.replace("aod", "%")
            ntupDatasetName = ntupDatasetName.replace("merge", "%")
            #remove everything after first rtag of ami tag .. replace with wildcard
            first_rtag_pos = ntupDatasetName.index(
                "_r", ntupDatasetName.index("NTUP_PILEUP"))
            try:
                next_underscore_pos = ntupDatasetName.index(
                    "_", first_rtag_pos + 1)
            except ValueError:
                next_underscore_pos = len(ntupDatasetName)
            ntupDatasetName = ntupDatasetName[:next_underscore_pos] + "%"
            res = atlasAPI.list_datasets(client,
                                         ntupDatasetName,
                                         fields='ldn,prodsys_status')
            foundNTUP = False
            for r in res:
                if r[u'prodsys_status'] != "ALL EVENTS AVAILABLE" and (
                        isIncomplete or
                        r[u'prodsys_status'] != "EVENTS PARTIALLY AVAILABLE"):
                    continue
                print "Found existing NTUP_PILEUP ... please download: %s" % r[
                    u"ldn"]
                ntupDatasets += [r[u'ldn']]
                foundNTUP = True
                break
            if foundNTUP == True: continue

        aodDatasets += [theParent]

    if len(aodDatasets) > 0:
        if args.outDS == "":
            print "NTUP_PILEUP need generating for the following datasets, please specify the --outDS option to give a name to the output dataset"
            print ",".join(aodDatasets)
            return 1
        print "...submitting job to grid..."

        extraOpts = ""
        if args.forceStaged: extraOpts += "--forceStaged "

        mycommand = """pathena --inDS="%s" --outDS="%s" PileupReweighting/generatePRW_jobOptions.py %s--mergeOutput --nGBPerJob=MAX --addNthFieldOfInDSToLFN=2,6""" % (
            ",".join(aodDatasets), args.outDS, extraOpts)

        print "Command: %s" % mycommand

        from subprocess import call
        if call(mycommand, shell=True) != 0:
            print "Problem executing command. Did you remember to do: lsetup panda"
            return 1

        print "... finished. Please monitor your job on the grid, and when it is finished, download the files!"

    if len(ntupDatasets):
        frucio_fn = 'rucio_downloads_%s.sh' % args.inDsTxt
        print "Please download existing config files from these datasets (see also output file %s):" % frucio_fn
        with open(frucio_fn, 'w') as frucio:
            for ds in ntupDatasets:
                command = "rucio download %s" % ds
                print command
                frucio.write(command + '\n')
        print ""

    if len(ntupDatasets) or len(aodDatasets):
        print "After downloading, you are advised to check they are complete: checkPRW.py --inDsTxt=%s <downloaded files> " % args.inDsTxt
        print "Thank you for generating config files, you get a gold star!"
    else:
        print "Looks like you are ready for pileup reweighting!"

    return 0

Example #6

Show file

    # Get total number of events
    n_aod_events = 0
    for f in os.listdir(mc_dir + d):
        tf = ROOT.TFile(mc_dir + d + '/' + f, 'READ')
        n_aod_events += tf.Get('EventCountHist').GetBinContent(1)

    # Compare to AMI value
    did = d.split(".")[2]
    tags = d.split(".")[3].split("_p")[0]

    # Don't check data
    if did.startswith("00"): continue

    # Get all possible AODs for the output ntuple
    base_files = AtlasAPI.list_datasets(
        client,
        patterns=['mc15_13TeV.' + did + '%merge.AOD%' + tags + '%'],
        type='AOD')
    print "-------------------------------------------------------------------------"
    print "Ntuple name:", d
    print base_files[0]['ldn']

    if len(base_files) > 1:
        print "Found more than one matching AOD."
        print "For file", f
        print "using", base_files[0]['ldn']

    info = AtlasAPI.get_dataset_info(client, base_files[0]['ldn'])[0]
    n_real_events = info.get('totalEvents')
    if float(n_real_events) < float(n_aod_events):
        print "\033[91m Error in dsid", did
        bad_dids += [did]

Example #7

Show file

File: AMIDataBase.py Project: jonipham/WZanalysis_release21

    def getMCDataSets(self,
                      channels=[],
                      campaign="mc16_13TeV",
                      derivations=[]):
        getAmiClient()
        import pyAMI.client
        import pyAMI.atlas.api as AtlasAPI

        data_type = ClearFromDuplicates(["AOD"] + derivations)
        channels_to_use = []
        # Check only the dsid which are non-existent or not complete
        for mc in channels:
            ami_channel = self.getMCchannel(dsid=mc, campaign=campaign)
            if not ami_channel:
                channels_to_use.append(mc)
                continue
            # Check if the dsid is already complete w.r.t all data-formats
            to_append = False
            for data in data_type:
                if not ami_channel.hasDataType(data):
                    to_append = True
                if to_append:
                    break
            if to_append:
                channels_to_use.append(mc)

        Blocks = []
        # Try to block the queries in DSIDS of thousands
        for mc in channels_to_use:
            FirstDigits = int(str(mc)[0:3])
            if FirstDigits not in Blocks:
                Blocks.append(FirstDigits)
        # Summarizing into blocks leads to a huge reduction of queries
        if len(Blocks) < len(channels_to_use):
            channels_to_use = Blocks
        print "<AMIDataBase> INFO: going to ask AMI about %d different things" % (
            len(channels_to_use))
        prompt = max(int(len(channels_to_use) / 10), 2)
        for i, mc in enumerate(channels_to_use):
            if i % prompt == 0:
                print "<AMIDataBase> INFO: %d/%d stubbed AMI :-P" % (
                    i, len(channels_to_use))
            # AMI query
            DSIDS = AtlasAPI.list_datasets(
                getAmiClient(),
                patterns=["%s.%i%%.%%" % (campaign, mc)],
                fields=[
                    'type',
                    'events',
                    'ami_status',
                    "physics_short",
                    "dataset_number",
                    "cross_section",
                    "prodsys_status",
                ],
                ### Maximum 1000 datasets and foreach one
                limit=[1, 1000 * 50],
                type=data_type)
            for amiDS in DSIDS:
                DS = int(amiDS["dataset_number"])
                ami_entry = self.getMCchannel(dsid=DS, campaign=campaign)
                # a fresh AMImcEntry needs to be created
                if not ami_entry:
                    physics_name = amiDS["physics_short"]
                    try:
                        xS = float(amiDS["cross_section"])
                    except Exception:
                        print "<AMIDataBase> WARNING: No x-section found for %s (%i) in AMI" % (
                            physics_name, DS)
                        xS = 1.
                    ami_entry = AMImcEntry(dsid=DS,
                                           xsec=xS,
                                           physics_name=physics_name,
                                           campaign=campaign)
                    self.__mc_channels.append(ami_entry)
                ds_type = amiDS["type"]
                tag = self.__getDSTag(amiDS['ldn'])
                nevents = int(amiDS['events'])
                ami_entry.addDataset(data_type=ds_type,
                                     tag=tag,
                                     events=nevents,
                                     status="")
        return True

Example #8

Show file

def makeContainer(c1):
    ### get run list
    x0 = AtlasAPI.list_datasets(client, patterns = [c1.project+'.%.physics_Main.merge.AOD%'], fields = ['run_number', 'ldn', 'events', 'total_size'], order=None, limit=None, show_archived=False, grl=c1.grl, data_period=c1.period)
    runs = set([a['run_number'] for a in x0])

    if len(runs)==0:
        print ('no run in period'+c1.period+' is listed in GRL:', c1.grl)
        return

    ### get AOD datasets
    x = AtlasAPI.list_datasets(client, patterns = [c1.project+'.%.physics_Main.merg%'+c1.derivation+'%'], fields = ['run_number', 'ldn', 'events', 'total_size'], order=None, limit=None, show_archived=False, grl=c1.grl, data_period=c1.period)

    ### get dataset info
    dic1={}
    for a in x:
        print (a['run_number'], a['ldn'])
        try:
            dic1[a['ldn'].split('_')[-1]].append((a['run_number'],a['ldn']))
        except KeyError:
            dic1[a['ldn'].split('_')[-1]] = [(a['run_number'],a['ldn'])]

    ### get the tags, sorted by popularity
    allTags = sorted(dic1.keys(), key=lambda k: len(dic1[k]),reverse=True)
    for tag in allTags:
        print (tag,':', end='')
        for xx in dic1[tag]:
            print (xx[0], end='')
        print()

    ### use the most popular tags if not spicified
    t_acceptTags = c1.acceptTags if c1.acceptTags else allTags

    #### get the list of dataset
    ds = []
    for t in t_acceptTags:
        for a in dic1.get(t,[]):
            found = False
            for d in ds:
                if d[0] == a[0]:
                    found = True
                    break
            if not found:
                ds.append(a)

    ### Warning when the given tag does not select any dataset
    if len(ds)==0:
        print ('No dataset in period',c1.period+', exiting...')
        return

    #### find any missing runs
    for d in ds:
        print (d[0],d[1])
        runs.remove(d[0])
    print (runs)

    #### Warn when there are any missing runs
    if len(runs) !=0:
        print ('!'*10)
        print ('MISSING ', ' '.join(runs))

    ### prepare commands
    dlist=','.join([d[1] for d in ds])
    superTag = 'period'+c1.period+','+c1.sTag+ds[0][1][-5:]
    comments = superTag+','+c1.derivation

    cmd='ami cmd COMAPopulateSuperProductionDataset -rucioRegistration="yes" -creationComment="'+comments+'" -selectionType="run_config" -superTag="'+superTag+'" -containedDatasets="'+dlist+'" -separator="," '
    print ('command:',cmd)

    ### write out script if asked
    if c1.outScript:
        with open(c1.outScript,'a') as f1:
            f1.write(cmd+'\n')
        return

    ### create the contianier imitiately if desired
    if not c1.autoCreate:
        while True:
            x = raw_input("create contianer: y[es]/N[o]/e[xit]")
            if x == 'e' or x=='N': return
            elif x=='y': break

    call(cmd, shell=True)

Example #9

Show file

client = pyAMI.client.Client('atlas')
AtlasAPI.init()

inputDS = []
eventCounts = {}
inputFile = open(o.inFileName,"r")

for line in inputFile:
    if line.startswith("#"): continue
    words = line.split()
    if not len(words): continue
    
    dsName = words[0].rstrip("/")
    dsID = dsName.split(".")[1]
    print dsName

    try: 
        dsInfo = AtlasAPI.list_datasets(client, patterns = dsName, fields = ['events'])
        nEvents = dsInfo[0]['events']
    except: 
        print "Skipping",dsName
        continue
    

    inputDS.append(dsName)
    eventCounts[dsName] = nEvents

for ds in inputDS:
    print ds,"\t",eventCounts[ds]

Example #10

Show file

File: check_events_in_datasets.py Project: grigiq/HTopMultilepAnalysis

from pyAMI.client import Client
from pyAMI.atlas.api import list_datasets

client = Client('atlas')

#my_datasets = ["data15_13TeV.00280753.physics_Main.merge.DAOD_HIGG8D1.f629_m1504_p2432/","data15_13TeV.00281075.physics_Main.merge.DAOD_HIGG8D1.f629_m1504_p2432/","data15_13TeV.00279515.physics_Main.merge.DAOD_HIGG8D1.f628_m1497_p2432/","data15_13TeV.00280520.physics_Main.merge.DAOD_HIGG8D1.f632_m1504_p2432/","data15_13TeV.00279259.physics_Main.merge.DAOD_HIGG8D1.f628_m1497_p2432/","data15_13TeV.00280614.physics_Main.merge.DAOD_HIGG8D1.f629_m1504_p2432/","data15_13TeV.00281074.physics_Main.merge.DAOD_HIGG8D1.f629_m1504_p2432/","data15_13TeV.00281070.physics_Main.merge.DAOD_HIGG8D1.f629_m1504_p2432/","data15_13TeV.00280853.physics_Main.merge.DAOD_HIGG8D1.f629_m1504_p2432/","data15_13TeV.00280464.physics_Main.merge.DAOD_HIGG8D1.f629_m1504_p2432/","data15_13TeV.00276416.physics_Main.merge.DAOD_HIGG8D1.f620_m1480_p2432/","data15_13TeV.00280368.physics_Main.merge.DAOD_HIGG8D1.f629_m1504_p2432/","data15_13TeV.00280977.physics_Main.merge.DAOD_HIGG8D1.f629_m1504_p2432/","data15_13TeV.00279928.physics_Main.merge.DAOD_HIGG8D1.f628_m1497_p2432/","data15_13TeV.00276262.physics_Main.merge.DAOD_HIGG8D1.f620_m1480_p2432/","data15_13TeV.00276954.physics_Main.merge.DAOD_HIGG8D1.f620_m1480_p2432/","data15_13TeV.00278968.physics_Main.merge.DAOD_HIGG8D1.f628_m1497_p2432/","data15_13TeV.00279764.physics_Main.merge.DAOD_HIGG8D1.f628_m1497_p2432/","data15_13TeV.00276790.physics_Main.merge.DAOD_HIGG8D1.f620_m1480_p2432/","data15_13TeV.00280500.physics_Main.merge.DAOD_HIGG8D1.f631_m1504_p2432/","data15_13TeV.00282625.physics_Main.merge.DAOD_HIGG8D1.f638_m1511_p2432/","data15_13TeV.00279984.physics_Main.merge.DAOD_HIGG8D1.f629_m1504_p2432/","data15_13TeV.00276336.physics_Main.merge.DAOD_HIGG8D1.f620_m1480_p2432/","data15_13TeV.00280862.physics_Main.merge.DAOD_HIGG8D1.f629_m1504_p2432/","data15_13TeV.00279685.physics_Main.merge.DAOD_HIGG8D1.f628_m1497_p2432/","data15_13TeV.00280231.physics_Main.merge.DAOD_HIGG8D1.f630_m1504_p2432/","data15_13TeV.00278880.physics_Main.merge.DAOD_HIGG8D1.f628_m1497_p2432/","data15_13TeV.00276329.physics_Main.merge.DAOD_HIGG8D1.f620_m1480_p2432/","data15_13TeV.00279932.physics_Main.merge.DAOD_HIGG8D1.f629_m1504_p2432/","data15_13TeV.00281317.physics_Main.merge.DAOD_HIGG8D1.f629_m1504_p2432/","data15_13TeV.00279169.physics_Main.merge.DAOD_HIGG8D1.f628_m1497_p2432/","data15_13TeV.00279867.physics_Main.merge.DAOD_HIGG8D1.f628_m1497_p2432/","data15_13TeV.00279284.physics_Main.merge.DAOD_HIGG8D1.f628_m1497_p2432/","data15_13TeV.00279345.physics_Main.merge.DAOD_HIGG8D1.f628_m1497_p2432/","data15_13TeV.00279598.physics_Main.merge.DAOD_HIGG8D1.f628_m1497_p2432/","data15_13TeV.00279279.physics_Main.merge.DAOD_HIGG8D1.f628_m1497_p2432/","data15_13TeV.00279813.physics_Main.merge.DAOD_HIGG8D1.f628_m1497_p2432/","data15_13TeV.00278912.physics_Main.merge.DAOD_HIGG8D1.f628_m1497_p2432/","data15_13TeV.00276952.physics_Main.merge.DAOD_HIGG8D1.f620_m1480_p2432/","data15_13TeV.00281385.physics_Main.merge.DAOD_HIGG8D1.f629_m1504_p2432/","data15_13TeV.00276778.physics_Main.merge.DAOD_HIGG8D1.f620_m1480_p2432/","data15_13TeV.00276511.physics_Main.merge.DAOD_HIGG8D1.f620_m1480_p2432/","data15_13TeV.00280950.physics_Main.merge.DAOD_HIGG8D1.f629_m1504_p2432/","data15_13TeV.00280319.physics_Main.merge.DAOD_HIGG8D1.f629_m1504_p2432/","data15_13TeV.00280423.physics_Main.merge.DAOD_HIGG8D1.f629_m1504_p2432/","data15_13TeV.00283074.physics_Main.merge.DAOD_HIGG8D1.f640_m1511_p2432/","data15_13TeV.00282712.physics_Main.merge.DAOD_HIGG8D1.f640_m1511_p2432/","data15_13TeV.00282631.physics_Main.merge.DAOD_HIGG8D1.f640_m1511_p2432/","data15_13TeV.00276689.physics_Main.merge.DAOD_HIGG8D1.f623_m1480_p2432/","data15_13TeV.00283155.physics_Main.merge.DAOD_HIGG8D1.f640_m1511_p2432/","data15_13TeV.00282625.physics_Main.merge.DAOD_HIGG8D1.f640_m1511_p2432/","data15_13TeV.00282992.physics_Main.merge.DAOD_HIGG8D1.f640_m1511_p2432/","data15_13TeV.00282784.physics_Main.merge.DAOD_HIGG8D1.f640_m1511_p2432/","data15_13TeV.00281411.physics_Main.merge.DAOD_HIGG8D1.f629_m1504_p2432/","data15_13TeV.00284484.physics_Main.merge.DAOD_HIGG8D1.f644_m1518_p2432/","data15_13TeV.00284420.physics_Main.merge.DAOD_HIGG8D1.f643_m1518_p2432/","data15_13TeV.00284427.physics_Main.merge.DAOD_HIGG8D1.f643_m1518_p2432/","data15_13TeV.00284285.physics_Main.merge.DAOD_HIGG8D1.f643_m1518_p2432/","data15_13TeV.00280673.physics_Main.merge.DAOD_HIGG8D1.f629_m1504_p2436/","data15_13TeV.00284154.physics_Main.merge.DAOD_HIGG8D1.f643_m1518_p2436/","data15_13TeV.00283270.physics_Main.merge.DAOD_HIGG8D1.f640_m1511_p2436/","data15_13TeV.00284213.physics_Main.merge.DAOD_HIGG8D1.f643_m1518_p2436/","data15_13TeV.00283780.physics_Main.merge.DAOD_HIGG8D1.f643_m1518_p2436/","data15_13TeV.00283429.physics_Main.merge.DAOD_HIGG8D1.f643_m1518_p2436/","data15_13TeV.00284006.physics_Main.merge.DAOD_HIGG8D1.f643_m1518_p2436/","data15_13TeV.00283608.physics_Main.merge.DAOD_HIGG8D1.f643_m1518_p2436/"]

my_datasets = ["data15_13TeV.00276262.physics_Main.merge.DAOD_HIGG8D1.f620_m1480_p2559/","data15_13TeV.00276329.physics_Main.merge.DAOD_HIGG8D1.f620_m1480_p2559/","data15_13TeV.00276336.physics_Main.merge.DAOD_HIGG8D1.f620_m1480_p2559/","data15_13TeV.00276416.physics_Main.merge.DAOD_HIGG8D1.f620_m1480_p2559/","data15_13TeV.00276511.physics_Main.merge.DAOD_HIGG8D1.f620_m1480_p2559/","data15_13TeV.00276689.physics_Main.merge.DAOD_HIGG8D1.f623_m1480_p2559/","data15_13TeV.00276778.physics_Main.merge.DAOD_HIGG8D1.f620_m1480_p2559/","data15_13TeV.00276790.physics_Main.merge.DAOD_HIGG8D1.f620_m1480_p2559/","data15_13TeV.00276952.physics_Main.merge.DAOD_HIGG8D1.f620_m1480_p2559/","data15_13TeV.00276954.physics_Main.merge.DAOD_HIGG8D1.f620_m1480_p2559/","data15_13TeV.00278880.physics_Main.merge.DAOD_HIGG8D1.f628_m1497_p2559/","data15_13TeV.00278912.physics_Main.merge.DAOD_HIGG8D1.f628_m1497_p2559/","data15_13TeV.00278968.physics_Main.merge.DAOD_HIGG8D1.f628_m1497_p2559/","data15_13TeV.00279169.physics_Main.merge.DAOD_HIGG8D1.f628_m1497_p2559/","data15_13TeV.00279259.physics_Main.merge.DAOD_HIGG8D1.f628_m1497_p2559/","data15_13TeV.00279279.physics_Main.merge.DAOD_HIGG8D1.f628_m1497_p2559/","data15_13TeV.00279284.physics_Main.merge.DAOD_HIGG8D1.f628_m1497_p2559/","data15_13TeV.00279345.physics_Main.merge.DAOD_HIGG8D1.f628_m1497_p2559/","data15_13TeV.00279515.physics_Main.merge.DAOD_HIGG8D1.f628_m1497_p2559/","data15_13TeV.00279598.physics_Main.merge.DAOD_HIGG8D1.f628_m1497_p2559/","data15_13TeV.00279685.physics_Main.merge.DAOD_HIGG8D1.f628_m1497_p2559/","data15_13TeV.00279764.physics_Main.merge.DAOD_HIGG8D1.f628_m1497_p2559/","data15_13TeV.00279813.physics_Main.merge.DAOD_HIGG8D1.f628_m1497_p2559/","data15_13TeV.00279867.physics_Main.merge.DAOD_HIGG8D1.f628_m1497_p2559/","data15_13TeV.00279928.physics_Main.merge.DAOD_HIGG8D1.f628_m1497_p2559/","data15_13TeV.00279932.physics_Main.merge.DAOD_HIGG8D1.f629_m1504_p2559/","data15_13TeV.00279984.physics_Main.merge.DAOD_HIGG8D1.f629_m1504_p2559/","data15_13TeV.00280231.physics_Main.merge.DAOD_HIGG8D1.f630_m1504_p2559/","data15_13TeV.00280319.physics_Main.merge.DAOD_HIGG8D1.f629_m1504_p2559/","data15_13TeV.00280368.physics_Main.merge.DAOD_HIGG8D1.f629_m1504_p2559/","data15_13TeV.00280423.physics_Main.merge.DAOD_HIGG8D1.f629_m1504_p2559/","data15_13TeV.00280464.physics_Main.merge.DAOD_HIGG8D1.f629_m1504_p2559/","data15_13TeV.00280500.physics_Main.merge.DAOD_HIGG8D1.f631_m1504_p2559/","data15_13TeV.00280520.physics_Main.merge.DAOD_HIGG8D1.f632_m1504_p2559/","data15_13TeV.00280614.physics_Main.merge.DAOD_HIGG8D1.f629_m1504_p2559/","data15_13TeV.00280673.physics_Main.merge.DAOD_HIGG8D1.f629_m1504_p2559/","data15_13TeV.00280753.physics_Main.merge.DAOD_HIGG8D1.f629_m1504_p2559/","data15_13TeV.00280853.physics_Main.merge.DAOD_HIGG8D1.f629_m1504_p2559/","data15_13TeV.00280862.physics_Main.merge.DAOD_HIGG8D1.f629_m1504_p2559/","data15_13TeV.00280950.physics_Main.merge.DAOD_HIGG8D1.f629_m1504_p2559/","data15_13TeV.00280977.physics_Main.merge.DAOD_HIGG8D1.f629_m1504_p2559/","data15_13TeV.00281070.physics_Main.merge.DAOD_HIGG8D1.f629_m1504_p2559/","data15_13TeV.00281074.physics_Main.merge.DAOD_HIGG8D1.f629_m1504_p2559/","data15_13TeV.00281075.physics_Main.merge.DAOD_HIGG8D1.f629_m1504_p2559/","data15_13TeV.00281317.physics_Main.merge.DAOD_HIGG8D1.f629_m1504_p2559/","data15_13TeV.00281385.physics_Main.merge.DAOD_HIGG8D1.f629_m1504_p2559/","data15_13TeV.00281411.physics_Main.merge.DAOD_HIGG8D1.f629_m1504_p2559/","data15_13TeV.00282625.physics_Main.merge.DAOD_HIGG8D1.f640_m1511_p2559/","data15_13TeV.00282631.physics_Main.merge.DAOD_HIGG8D1.f640_m1511_p2559/","data15_13TeV.00282712.physics_Main.merge.DAOD_HIGG8D1.f640_m1511_p2559/","data15_13TeV.00282784.physics_Main.merge.DAOD_HIGG8D1.f640_m1511_p2559/","data15_13TeV.00282992.physics_Main.merge.DAOD_HIGG8D1.f640_m1511_p2559/","data15_13TeV.00283074.physics_Main.merge.DAOD_HIGG8D1.f640_m1511_p2559/","data15_13TeV.00283155.physics_Main.merge.DAOD_HIGG8D1.f640_m1511_p2559/","data15_13TeV.00283270.physics_Main.merge.DAOD_HIGG8D1.f640_m1511_p2559/","data15_13TeV.00283429.physics_Main.merge.DAOD_HIGG8D1.f643_m1518_p2559/","data15_13TeV.00283608.physics_Main.merge.DAOD_HIGG8D1.f643_m1518_p2559/","data15_13TeV.00283780.physics_Main.merge.DAOD_HIGG8D1.f643_m1518_p2559/","data15_13TeV.00284006.physics_Main.merge.DAOD_HIGG8D1.f643_m1518_p2559/","data15_13TeV.00284154.physics_Main.merge.DAOD_HIGG8D1.f643_m1518_p2559/","data15_13TeV.00284213.physics_Main.merge.DAOD_HIGG8D1.f643_m1518_p2559/","data15_13TeV.00284285.physics_Main.merge.DAOD_HIGG8D1.f643_m1518_p2559/","data15_13TeV.00284420.physics_Main.merge.DAOD_HIGG8D1.f643_m1518_p2559/","data15_13TeV.00284427.physics_Main.merge.DAOD_HIGG8D1.f643_m1518_p2559/","data15_13TeV.00284484.physics_Main.merge.DAOD_HIGG8D1.f644_m1518_p2559/"]

#my_datasets = ["mc15_13TeV.410000.PowhegPythiaEvtGen_P2012_ttbar_hdamp172p5_nonallhad.merge.DAOD_HIGG8D1.e3698_s2608_s2183_r7267_r6282_p2501/"]

#my_datasets = ["mc15_13TeV.410000.PowhegPythiaEvtGen_P2012_ttbar_hdamp172p5_nonallhad.merge.DAOD_HIGG8D1.e3698_s2608_s2183_r7267_r6282_p2559/"]

total_events = 0

for dataset in my_datasets:

    print("Looking at dataset: {0}\n".format(dataset))

    samples = list_datasets(
        client, dataset, fields=['events'])

    for sample in samples:
        print("\t {0}".format(sample))
        total_events += int(sample['events'])

print("Total events: {0}".format(total_events))

Example #11

Show file

    print(" => Done: %f x 10^30 cm^-2" % (integratedLumi))
    lumiPerRun[run] = integratedLumi

# now go to AMI to get the event yields for the datasets you're interested in
import pyAMI.client
import pyAMI.atlas.api as AtlasAPI

client = pyAMI.client.Client('atlas')
AtlasAPI.init()

for run in d['Run']:

    pattern = "data15_13TeV.%08d.physics_Main.merge.DESDM_RPVLL" % run
    pattern += '.f%_m%'
    dslist = AtlasAPI.list_datasets(client,
                                    patterns=pattern,
                                    fields=['events'],
                                    type='DESDM_RPVLL')
    #print (dslist)
    if len(dslist) > 0:
        print(dslist[0]['events'])
        selectedEventsPerRun[run] = dslist[0]['events']
    pattern = "data15_13TeV.%08d.physics_Main.merge.AOD" % run
    pattern += '.f%_m%'
    dslist = AtlasAPI.list_datasets(client,
                                    patterns=pattern,
                                    fields=['events'],
                                    type='AOD')
    #print (dslist)
    if len(dslist) > 0:
        print(dslist[0]['events'])
        eventsPerRun[run] = dslist[0]['events']

Example #12

Show file

def main():
    logging.basicConfig(format='%(levelname)s:%(message)s')

    import time, datetime
    from pytz import timezone
    import argparse

    try:
        import pyAMI.client
        import pyAMI.atlas.api as AtlasAPI
        import pyAMI.config
    except ImportError:
        logging.error(
            "Unable to find pyAMI client. Please try this command first: lsetup pyAMI"
        )
        return -1

    extraFieldDefaults = {
    }  #{"approx_crossSection":None,"approx_GenFiltEff":1.0}

    fieldDefaults = {"subprocessID": 0, "dataset_number": 0}
    #populate the fieldDefaults ... for all, assume 'None'
    for field in pyAMI.config.tables['datasets'].keys():
        if str(field) == "cross_section":
            continue  #special exception because this field only present in
        if str(field) in fieldDefaults.keys(): continue
        if str(field).startswith("@"): continue
        fieldDefaults[str(field)] = None

    import commands
    #check the voms proxy
    status, out = commands.getstatusoutput("voms-proxy-info -fqan -exists")
    if status != 0:
        logging.error(
            "Please renew your certificate with this command: voms-proxy-init -voms atlas"
        )
        return -1

    try:
        client = pyAMI.client.Client('atlas')
        AtlasAPI.init()
    except:
        logging.error(
            "Could not establish pyAMI session. Are you sure you have a valid certificate? Do: voms-proxy-init -voms atlas"
        )
        return -1

    #need to collect the ami dataset parameter defaults
    paramExplains = []  #for the help message only

    paramUnits = dict()

    paramDefaults = {}

    res = client.execute('ListPhysicsParameterDefs', format='dom_object')
    for r in res.get_rows():  #r is OrderedDict
        explainString = "%s: %s" % (r[u'PARAMNAME'], r[u'DESCRIPTION'])
        if r[u'UNITS'] != u'NULL':
            explainString += " (units: %s)" % r[u'UNITS']
            paramUnits[r[u'PARAMNAME']] = r[u'UNITS']
        if r[u'HASDEFAULT'] == u'N': paramDefaults[str(r[u'PARAMNAME'])] = None
        else:
            explainString += " (default value = %s)" % r[u'DEFAULTVALUE']
            if r[u'PARAMTYPE'] == u'number':
                paramDefaults[str(r[u'PARAMNAME'])] = float(
                    r[u'DEFAULTVALUE']
                )  #FIXME: Assumes all parameters are floats
            elif r[u'PARAMTYPE'] == u'string':
                paramDefaults[str(r[u'PARAMNAME'])] = str(r[u'DEFAULTVALUE'])
        paramExplains += [explainString]

    paramDefaults["crossSection_pb"] = None
    paramUnits["crossSection_pb"] = "pb"
    paramExplains += [
        "crossSection_pb: Same as crossSection except in pb units (units: pb)"
    ]

    cern_time = timezone('UCT')
    current_time = datetime.datetime.fromtimestamp(
        time.time(), cern_time).strftime('%Y-%m-%d %H:%M:%S')

    from argparse import RawTextHelpFormatter
    parser = argparse.ArgumentParser(description=__doc__,
                                     formatter_class=RawTextHelpFormatter)
    parser.add_argument('--inDS',
                        nargs='+',
                        default=[""],
                        help="List of datasets to retrieve parameters for")
    parser.add_argument(
        '--inDsTxt',
        default="",
        help=
        "Alternative to --inDS, can specify the datasets from an input file")
    parser.add_argument(
        '--fields',
        nargs='+',
        help=
        "List of parameters to extract. Available parameters are: \n\n  %s\n\nYou can also include any from:\n  %s\nYou can also do keyword_xxx to add a bool branch for keywords"
        % ("\n  ".join(paramExplains),
           ", ".join(fieldDefaults.keys() + extraFieldDefaults.keys())),
        default=["dataset_number", "crossSection", "kFactor", "genFiltEff"])
    parser.add_argument(
        '--timestamp',
        default=current_time,
        help=
        "The timestamp to query parameters at, specified in Universal Central Time (UCT). If left blank, will take the current time"
    )
    parser.add_argument(
        '--physicsGroups',
        nargs='+',
        default=["PMG,MCGN"],
        help=
        "Physics group from which to retrieve parameters, listed in order of priority (highest first). Default value is 'PMG,MCGN' (i.e. try to use PMG values, fallback on MCGN values if unavailable). Allowed groups are:\n   PMG (this is the PMG's group name), BPHY, COSM, DAPR, EGAM, EXOT, FTAG, HIGG, HION, IDET, IDTR, JETM, LARG, MCGN (this is the AMI default group name), MDET, MUON, PHYS, REPR, SIMU, STDM, SUSY, TAUP, TCAL, TDAQ, THLT, TOPQ, TRIG, UPGR, VALI"
    )

    parser.add_argument(
        '--oldTimestamp',
        default="",
        help=
        "If specified, will instead display a diff between the old and new timestamp, showing explanation of any changed parameters"
    )

    parser.add_argument(
        '--explainFields',
        nargs='+',
        default=[],
        help=
        "The fields you would like explained .. will appear as comment lines after each row in the output"
    )
    parser.add_argument(
        '--explainInfo',
        nargs='+',
        default=['explanation', 'insert_time'],
        help=
        "Properties of the parameter you want to show in the explanation. Can list from: explanation, insert_time, physicsGroup, createdby. Default is: explanation,insert_time"
    )
    parser.add_argument(
        '--outFile',
        default=sys.stdout,
        type=argparse.FileType('w'),
        help="Where to print the output to. Leave blank to print to stdout")
    parser.add_argument(
        '--delim',
        default="",
        help=
        "The delimiter character. Defaults to spaces leading to nice formatting table"
    )
    parser.add_argument('-v',
                        action='store_true',
                        help="Verbose output for debugging")

    args = parser.parse_args()

    if args.v: logging.getLogger().setLevel(logging.DEBUG)
    else: logging.getLogger().setLevel(logging.INFO)
    logging.debug(args.inDS)
    logging.debug(args.fields)
    logging.debug(args.timestamp)

    if args.timestamp == "the dawn of time":
        logging.error(
            "Unfortunately we don't know any parameters from this time period... but we're working on it!"
        )
        return 9999

    #split elements of fields by comma to get full list
    args.fields = sum((y.split(',') for y in args.fields), [])
    args.fields = [x.strip() for x in args.fields]  #strips whitespace
    #look for keyword_ fields, these are special ...
    args.keywords = []
    for f in args.fields:
        if f.startswith("keyword_"):
            k = f[8:]
            #and then add each keyword to the extraFieldDefaults so it is recognised thusly
            extraFieldDefaults["keyword_%s" % k] = bool(False)
            args.keywords += [k]

    #same for physics groups
    args.physicsGroups = sum((y.split(',') for y in args.physicsGroups), [])
    args.physicsGroups = [x.strip()
                          for x in args.physicsGroups]  #strips whitespace

    #same for explainFields and explainInfo
    args.explainFields = sum((y.split(',') for y in args.explainFields), [])
    args.explainFields = [x.strip()
                          for x in args.explainFields]  #strips whitespace
    args.explainInfo = sum((y.split(',') for y in args.explainInfo), [])
    args.explainInfo = [x.strip()
                        for x in args.explainInfo]  #strips whitespace

    if args.inDsTxt != '': args.inDS = readDsFromFile(args.inDsTxt)

    #and same for inDS
    args.inDS = sum((y.split(',') for y in args.inDS), [])
    args.inDS = [x.strip() for x in args.inDS]  #strips whitespace

    #1. check field values are allowed, we obtain default field values at same time..
    #2. For each entry in inDS, if contains wildcard we obtain list of DS, otherwise check DS exists. During this time we obtain the datasetid and numEvents properties, incase we need them
    #3.  For each of these DS, get parameters from ami matching the timestamp. Organize into fields and index by subprocessID
    #4.  Output a line to our output file

    #1.
    #before adding all the ami parameters, identify which of provided fields are: 1). Obtained from list_datasets command (dsFields) 2). actual parameters
    dsFields = [
        x for x in args.fields
        if x in fieldDefaults.keys() and x not in ["subprocessID", "ldn"]
    ]
    extraFields = [x for x in args.fields if x in extraFieldDefaults.keys()]
    paramFields = [x for x in args.fields if x in paramDefaults.keys()]

    if len(paramFields) > 0 and args.physicsGroups == [""]:
        logging.error(
            "You must specify at least one physics group. See -h for allowed groups"
        )
        return -1

    #combine paramDefaults with fieldDefaults
    fieldDefaults.update(paramDefaults)
    #and with extra fields
    fieldDefaults.update(extraFieldDefaults)

    for field in args.fields:
        if field not in fieldDefaults:
            logging.error("%s is not a recognised field. Allowed fields are:" %
                          field)
            logging.error(fieldDefaults.keys())
            return -1

    if args.oldTimestamp != "":
        logging.info("oldTimestamp option specified. Running in diff mode...")
        args.explainFields = args.fields
        args.explainInfo = [
            "explanation", "insert_time", "physicsGroup", "createdby"
        ]

    #2.
    #replace all '*' with '%' and strip "/"
    args.inDS = [ds.replace("*", "%") for ds in args.inDS]
    args.inDS = [ds.rstrip("/") for ds in args.inDS]

    if len(args.inDS) == 0 or (len(args.inDS) == 1 and args.inDS[0] == ""):
        logging.error(
            "No datasets provided. Please specify datasets with the --inDS or --inDsTxt options"
        )
        return -1

    logging.info(
        "Fetching list of datasets from AMI (this may take a few minutes)...")

    #obtain list of datasets
    res = AtlasAPI.list_datasets(
        client,
        patterns=args.inDS,
        fields=dsFields + ['ldn'],
        ami_status="VALID"
    )  #changed status from %, to only catch valid now: wb 08/2015

    logging.info("...Found %d datasets matching your selection" % len(res))

    if len(res) == 0:
        return 0

    #NOTE: Should we allow retrieval of the extra information: keyword, genfiltereff, approx crossection, .. these all come from GetDatasetInfo ami command

    dataset_values = dict()
    for r in res:
        mydict = dict()
        dataset_values[str(r['ldn'])] = mydict
        for field in r.items():
            if str(field[0]) == "ldn": continue
            if str(field[0]) not in args.fields: continue
            mydict[str(field[0])] = str(field[1])
        #also if we have the 'extra fields or keywords' we will need to execute AtlasAPI.get_dataset_info ..
        if len(extraFields) > 0 or len(args.keywords) > 0:
            info_res = AtlasAPI.get_dataset_info(client, str(r['ldn']))
            #print(info_res)
            if len(info_res) == 0:
                logging.error("Unable to retrieve dataset info for %s" %
                              str(r['ldn']))
                return -1
            for field in extraFields:
                #ignore the keyword_ fields
                if field.startswith("keyword_"): continue
                mydict[field] = float(info_res[0][unicode(field)]) if isfloat(
                    info_res[0][unicode(field)]) else extraFieldDefaults[field]
            for k in args.keywords:
                mydict["keyword_%s" % k] = int(
                    (k in str(info_res[0][unicode('keyword')]).split(",")))

    #sort dataset_values as well as possible
    from collections import OrderedDict
    sorted_values = OrderedDict()
    for ds in args.inDS:
        if ds in dataset_values.keys():
            sorted_values[ds] = dataset_values[ds]

    for ds in sorted(dataset_values):
        if ds not in sorted_values.keys():
            sorted_values[ds] = dataset_values[ds]
    dataset_values = sorted_values

    logging.debug(dataset_values)

    #res = client.execute(['GetDatasetInfo

    for ds in args.inDS:
        if '%' not in ds and ds not in dataset_values.keys():
            logging.warning("Unknown dataset: %s" % ds)

    datasetsToQuery = ",".join(dataset_values.keys())

    #if using inDsTxt, retain any comment or blank lines in structure of output
    complete_values = OrderedDict()
    if args.inDsTxt != "":
        # read lines
        commentcount = 0
        import re
        txt = open(args.inDsTxt)
        for tmpLine in txt:
            # remove \n
            tmpLine = re.sub('\n', '', tmpLine)
            # remove white spaces
            tmpLine = tmpLine.strip()
            # skip comment or empty
            if tmpLine.startswith('#') or tmpLine == '':
                complete_values['comment%d' % (commentcount)] = tmpLine
                commentcount = commentcount + 1
                continue
            # append
            tmpLine = tmpLine.rstrip("/")
            if tmpLine in dataset_values.keys():
                complete_values[tmpLine] = dataset_values[tmpLine]
            else:
                print("cannot find %s" % tmpLine)
        # close file
        txt.close()
        dataset_values = complete_values

    logging.info(
        "Obtaining %s for selected datasets at timestamp=%s... (please be patient)"
        % (args.fields, args.timestamp))

    #do as one query, to be efficient
    if (args.timestamp == current_time):
        res = client.execute([
            'GetPhysicsParamsForDataset',
            "--logicalDatasetName=%s" % datasetsToQuery,
            "--timestamp='%s'" % args.timestamp
        ],
                             format='dom_object')
    else:
        res = client.execute([
            'GetPhysicsParamsForDataset',
            "--logicalDatasetName=%s" % datasetsToQuery,
            "--timestamp='%s'" % args.timestamp, "--history=true"
        ],
                             format='dom_object')

    #organize results by dataset
    parameterQueryResults = dict()
    for r in res.get_rows():
        if r[u'logicalDatasetName'] not in parameterQueryResults.keys():
            parameterQueryResults[r[u'logicalDatasetName']] = []
        parameterQueryResults[r[u'logicalDatasetName']] += [
            r
        ]  #puts row in the list for this dataset

    if args.oldTimestamp != "":
        logging.info(
            "Obtaining %s for selected datasets at timestamp=%s... (please be patient)"
            % (args.fields, args.oldTimestamp))
        res2 = client.execute([
            'GetPhysicsParamsForDataset',
            "--logicalDatasetName=%s" % datasetsToQuery,
            "--timestamp='%s'" % args.oldTimestamp, "--history=true"
        ],
                              format='dom_object')
        old_parameterQueryResults = dict()
        for r in res2.get_rows():
            if r[u'logicalDatasetName'] not in old_parameterQueryResults.keys(
            ):
                old_parameterQueryResults[r[u'logicalDatasetName']] = []
            old_parameterQueryResults[r[u'logicalDatasetName']] += [
                r
            ]  #puts row in the list for this dataset

    headerString = ""
    doneHeader = False
    commentCache = ""
    commentCount = 0

    #result is a list of lists (each list is 1 row)
    outputTable = []
    tableHeaders = []

    for ds in dataset_values.keys():
        if ds.startswith('comment'):
            if commentCount > 0: commentCache += "\n"
            commentCache += dataset_values[ds]
            commentCount = commentCount + 1
            continue
        #obtain list of parameters for this dataset
        #if(args.timestamp==current_time):
        #    res = client.execute(['GetPhysicsParamsForDataset',"--logicalDatasetName=%s"% ds,"--timestamp='%s'"%args.timestamp], format='dom_object')
        #else:
        #     res = client.execute(['GetPhysicsParamsForDataset',"--logicalDatasetName=%s"% ds,"--timestamp='%s'"%args.timestamp,"--history=true"], format='dom_object')
        res = parameterQueryResults.get(ds, [])
        if args.oldTimestamp != "":
            res2 = old_parameterQueryResults.get(ds, [])

        #first we have to determine how many subprocesses this ds has
        dsSubprocesses = [0]  #always have the 0 subprocess
        for r in res:
            sp = int(r[u'subprocessID'])
            if sp not in dsSubprocesses: dsSubprocesses += [sp]

        #now for each subprocess we have to locate each required field value (in paramFields)
        #rank by physicsGroup
        for sp in dsSubprocesses:
            paramVals = dict()
            paramVals2 = dict()
            groupsWithVals = dict()  #held for helpful output
            #need to keep explanations for requested fields
            explainInfo = dict()
            for i in args.explainFields:
                explainInfo[i] = dict()

            for param in paramFields:
                groupsWithVals[param] = []
                bestGroupIndex = len(args.physicsGroups)
                import copy
                paramVals[param] = copy.copy(fieldDefaults[param])
                for r in res:
                    if int(r[u'subprocessID']) != sp: continue
                    if str(r[u'paramName']) != param and not (
                            param == "crossSection_pb"
                            and str(r[u'paramName']) == "crossSection"):
                        continue
                    if str(r[u'physicsGroup']) not in args.physicsGroups:
                        groupsWithVals[param] += [(str(r[u'physicsGroup']),
                                                   str(r[u'paramValue']))]
                        continue
                    if args.physicsGroups.index(str(
                            r[u'physicsGroup'])) > bestGroupIndex:
                        continue
                    if args.physicsGroups.index(str(
                            r[u'physicsGroup'])) == bestGroupIndex:
                        logging.warning(
                            "Duplicate parameter %s for group %s in dataset %s (subprocess %d). Please report this!"
                            % (param, str(r[u'physicsGroup']), ds, sp))
                    paramVals[param] = str(r[u'paramValue'])
                    if param == "crossSection_pb":
                        paramVals[param] = str(
                            float(paramVals[param]) * 1000.0)
                    bestGroupIndex = args.physicsGroups.index(
                        str(r[u'physicsGroup']))
                    #keep the explanation info for the requested fields
                    if param in explainInfo.keys():
                        for e in args.explainInfo:
                            if unicode(e) not in r:
                                logging.error(
                                    "Unrecognised explainInfo field: %s" % e)
                                return -1
                            explainInfo[param][e] = str(r[unicode(e)])
                if args.oldTimestamp != "":
                    bestGroupIndex = len(args.physicsGroups)
                    paramVals2[param] = copy.copy(fieldDefaults[param])
                    for r in res2:
                        if int(r[u'subprocessID']) != sp: continue
                        if str(r[u'paramName']) != param and not (
                                param == "crossSection_pb"
                                and str(r[u'paramName']) == "crossSection"):
                            continue
                        if str(r[u'physicsGroup']) not in args.physicsGroups:
                            continue
                        if args.physicsGroups.index(str(
                                r[u'physicsGroup'])) > bestGroupIndex:
                            continue
                        if args.physicsGroups.index(str(
                                r[u'physicsGroup'])) == bestGroupIndex:
                            logging.warning(
                                "Duplicate parameter %s for group %s in dataset %s (subprocess %d). Please report this!"
                                % (param, str(r[u'physicsGroup']), ds, sp))
                        paramVals2[param] = str(r[u'paramValue'])
                        if param == "crossSection_pb":
                            paramVals2[param] = str(
                                float(paramVals2[param]) * 1000.0)
                        bestGroupIndex = args.physicsGroups.index(
                            str(r[u'physicsGroup']))
            #at this stage, parameters reside in paramVals dict or dataset_values[ds] dict
            #print them in the requested order .. if any is "None" then stop, because it doesn't have a default value and didn't find a value for it either
            rowString = ""
            rowList = []
            firstPrint = False
            for param in args.fields:
                val = None
                if param == "ldn": val = ds
                elif param == "subprocessID": val = sp
                elif param in dataset_values[ds].keys():
                    val = dataset_values[ds][param]
                else:
                    val = paramVals.get(param, None)
                if val == None:
                    if args.outFile != sys.stdout:
                        logging.warning(
                            "dataset %s (subprocess %d) does not have parameter %s, which has no default."
                            % (ds, sp, param))
                    if len(groupsWithVals.get(param, [])) > 0:
                        logging.warning(
                            "The follow physicsGroups have defined that parameter though:"
                        )
                        logging.warning(groupsWithVals[param])
                    val = "#UNKNOWN#"
                    #return -1
                #if isfloat(str(val)): val = "%.6g" % float(val)
                if args.oldTimestamp != "":
                    #diff val to old val
                    val2 = None
                    if param == "ldn": val2 = ds
                    elif param == "subprocessID": val2 = sp
                    elif param in dataset_values[ds].keys():
                        val2 = dataset_values[ds][param]
                    else:
                        val2 = paramVals2.get(param, None)
                    if val2 == None: val2 = "#UNKNOWN#"
                    #if isfloat(str(val2)): val2 = "%.6g" % float(val)
                    if (str(val) != str(val2)):
                        if not firstPrint: print("%s:" % ds)
                        firstPrint = True
                        print("  %s : %s  --->  %s" %
                              (param, str(val2), str(val)))
                        print("        insert_time  : %s" %
                              explainInfo[param]['insert_time'])
                        print("        explanation  : %s" %
                              explainInfo[param]['explanation'])
                        print("        createdby    : %s" %
                              explainInfo[param]['createdby'])
                        print("        physicsGroup : %s" %
                              explainInfo[param]['physicsGroup'])
                    continue

                rowList += [str(val)]
                if rowString != "" and args.delim != "":
                    rowString += args.delim
                rowString += str(val)
                #inspect the type of str(val) to build up the header
                if not doneHeader:
                    headerString += param
                    if args.outFile != sys.stdout:
                        if type(fieldDefaults[param]) == bool:
                            headerString += "/O:"
                        elif type(fieldDefaults[param]) == int:
                            headerString += "/I:"
                        elif type(fieldDefaults[param]) == float:
                            headerString += "/D:"
                        elif isfloat(str(val)):
                            headerString += "/D:"
                            #elif isint(str(val)): headerString += "/I:" TO BE SAFE WE MAKE ALL NUMERIC FIELDS FLOATS, EXCEPT if the default value is type int
                        else:
                            headerString += "/C:"
                    else:
                        v = param
                        if param in paramUnits:
                            headerString += " [%s]" % paramUnits[param]
                            v += " [%s]" % paramUnits[param]
                        tableHeaders += [v]
                        headerString += "  "
            if args.oldTimestamp != "":
                continue  #print nothing more for diff mode
            if not doneHeader:
                doneHeader = True
                if args.outFile != sys.stdout:
                    print(headerString[:-1], file=args.outFile)
            if commentCount > 0:
                if args.outFile != sys.stdout and args.delim != "":
                    print(commentCache, file=args.outFile)
                outputTable += [["COMMENT", commentCache]]
                commentCache = ''
                commentCount = 0
            if args.outFile != sys.stdout and args.delim != "":
                print(rowString, file=args.outFile)
            outputTable += [rowList]
            #also print the required explanations
            for (field, expl) in explainInfo.items():
                outString = "#%s: { " % field
                doneFirst = False
                for eField in args.explainInfo:
                    if doneFirst: outString += " , "
                    if not eField in expl.keys():
                        outString += " %s: <NONE .. value is default>" % eField
                    else:
                        outString += "%s: %s" % (eField, expl[eField])
                    doneFirst = True
                outString += " }"
                #print(outString,file=args.outFile)
                outputTable += [["COMMENT", outString]]

    if args.oldTimestamp != "":
        args.outFile.close()
        return 0

    #print the table in nicely formatted state
    if args.outFile == sys.stdout or args.delim == "":
        #determine column widths
        columnWidths = [0] * len(args.fields)
        for i in range(0, len(tableHeaders)):
            columnWidths[i] = len(tableHeaders[i])
        for r in outputTable:
            if len(r) > 0 and r[0] == "COMMENT": continue
            for i in range(0, len(r)):
                if len(r[i]) > columnWidths[i]: columnWidths[i] = len(r[i])
        lineout = ""
        for i in range(0, len(tableHeaders)):
            lineout += tableHeaders[i].ljust(columnWidths[i]) + "  "
        print(lineout)
        for r in outputTable:
            lineout = ""
            if len(r) > 0 and r[0] == "COMMENT": lineout = r[1]
            else:
                for i in range(0, len(r)):
                    lineout += r[i].ljust(columnWidths[i]) + "  "
            print(lineout, file=args.outFile)

    #print the footer, which is the command to reproduce this output
    import os
    if args.outFile != sys.stdout:
        #remove comment from dataset_values
        datasetss = [
            x for x in dataset_values.keys() if not x.startswith("comment")
        ]

        print("", file=args.outFile)
        print("#lsetup  \"asetup %s,%s\" pyAMI" %
              (os.environ.get('AtlasProject', 'UNKNOWN!'),
               os.environ.get('AtlasVersion', 'UNKNOWN!')),
              file=args.outFile)
        print(
            "#getMetadata.py --timestamp=\"%s\" --physicsGroups=\"%s\" --fields=\"%s\" --inDS=\"%s\""
            % (args.timestamp, ",".join(args.physicsGroups), ",".join(
                args.fields), ",".join(datasetss)),
            file=args.outFile)
        logging.info("Results written to: %s" % args.outFile.name)

    args.outFile.close()

Example #13

Show file

def main():
    # configurable options
    config = parseCmdLine(sys.argv[1:])

    if (config.baseline or config.official) and config.sample:
        print "--baseline, --official and --sample are mutually exclusive"
        sys.exit(1)

    if (config.baseline or config.official
            or config.sample) and config.grl != "":
        print "--grl is incompatible with --baseline, --official and --sample"
        sys.exit(1)

    # AMI client connection
    client = pyAMI.client.Client('atlas')
    pyAMI.client.endpoint = config.server
    pyAMI.atlas.api.init()

    # consistency checks
    if config.whichMC15 != '':
        if config.whichMC15 == 'week1' and config.prefix != 'mc15_week1':
            print 'prefix changed to mc15_week1 in agrement with whichMC15'
            config.prefix = 'mc15_week1'
        elif config.whichMC15 == '50ns' and config.prefix != 'mc15_13TeV':
            print 'prefix changed to mc15_13TeV in agrement with whichMC15'
            config.prefix = 'mc15_13TeV'
        elif config.whichMC15 == '25ns' and config.prefix != 'mc15_13TeV':
            print 'prefix changed to mc15_13TeV in agrement with whichMC15'
            config.prefix = 'mc15_13TeV'

    # data type is NTUP_SUSY for 2011/2012 and AOD for 2014 on
    datatype = config.datatype
    if 'mc11_' in config.prefix or 'mc12_' in config.prefix or 'data11_' in config.prefix or 'data12_' in config.prefix:
        datatype = '%.merge.NTUP_SUSY%'

    # make list of official datasets (baseline+alt)
    officialids = []
    if config.official or config.baseline or config.sample:
        if 'mc12_8TeV' in config.prefix or 'mc14_8TeV' in config.prefix:
            import mc12_8TeV_MCSampleList as mcsl
        elif 'mc14_13TeV' in config.prefix:
            import mc14_13TeV_MCSampleList as mcsl
        elif 'mc15_13TeV' in config.prefix:
            import mc15_13TeV_MCSampleList as mcsl
        elif 'mc15_week1' in config.prefix:
            import mc15_13TeV_week1_MCSampleList as mcsl
        else:
            print '--official is only supported for mc12_8TeV, mc14_8TeV, mc14_13TeV, mc15_13TeV and mc15_week1'
            sys.exit(1)
        if config.sample:
            officialids = mcsl.__dict__[str(config.sample)]
        else:
            officialids = mcsl.__dict__["lbaseline"]
            if config.official:
                officialids += mcsl.__dict__["lalt"]
    elif config.grl != "":
        if not os.path.exists(config.grl):
            print 'Couldnot find GRL', config.grl
            sys.exit(1)
            pass
        doc = ET.parse(config.grl)
        for item in doc.findall('./NamedLumiRange/Metadata'):
            if item.attrib['Name'] == 'RunList':
                for r in item.text.split(','):
                    officialids.append(int(r))
        pass

    # get all datasets matching prefix & tag and then filter them
    from pyAMI.atlas.api import get_dataset_info, list_datasets

    alldatasets = []
    if config.whichMC15 != '':
        prefix = config.prefix
        if prefix == 'mc15_week1': prefix = 'mc15_13TeV'
        for tag in mc15_rtags[config.whichMC15]:
            dskey = prefix + datatype + tag + config.tag
            print 'Querying AMI for datasets matching pattern', dskey
            alldatasets += list_datasets(client, dskey)
    else:
        prefix = config.prefix
        if prefix == 'mc15_week1': prefix = 'mc15_13TeV'
        dskey = config.prefix + datatype + config.tag
        print 'Querying AMI for datasets matching pattern', dskey
        alldatasets = list_datasets(client, dskey)

    acceptedDS = []
    for DSlist in alldatasets:
        dsname = DSlist['ldn']
        cut = False
        for filter in filters:
            if filter in dsname.split('.')[2]: cut = True
        if (config.official or config.baseline
                or config.sample or config.grl != "") and not int(
                    dsname.split('.')[1]) in officialids:
            cut = True
        if config.signal:
            cut = True
            for pattern in lsignals:
                if pattern in dsname: cut = False
        if cut: continue
        acceptedDS.append(dsname)
        pass
    acceptedDS.sort()

    # get informations for all accepted datasets
    dsinfos = []
    for dsname in acceptedDS:
        dsinfos.append(get_dataset_info(client, dsname)[0])
        pass

    # write file
    coveredids = set()
    if not (config.suffix == ""):
        myoutputfile = 'datasets_' + config.suffix + '.txt'
    else:
        myoutputfile = 'datasets.txt'
    fout = open(myoutputfile, 'w')
    for info in dsinfos:
        try:
            dsname = info['logicalDatasetName']
            if config.grl == "":
                generatorString = info['generatorName']
                version = info['version']
                if badDataset(dsname, generatorString, version): continue
            availability = info['prodsysStatus']
            if config.onlyComplete and availability != u'ALL EVENTS AVAILABLE':
                print 'Skip incomplete dataset', dsname, availability
                continue
            nFiles = int(info['nFiles'])
            if nFiles > 0 and config.prefix.startswith('data'):
                fout.write(dsname + '\n')
            elif nFiles > 0:
                period = 'MC'
                xsec = 0.
                effic = 1.
                if info.has_key('period'):
                    period = info['period']
                else:
                    datasetNumber = int(info[u'datasetNumber'])
                    coveredids.add(datasetNumber)
                    # confirmed with AMI team that this should be enought, no need
                    # to re-implement get_dataset_xsec_effic for PyAMI5

                    # there are sometime problems in the propagation of these
                    # properties to the xAOD/derived datasets so go back in
                    # parentage to find the information
                    xsec = info[u'crossSection']
                    if info.has_key(u'approx_GenFiltEff'):
                        effic = info[u'approx_GenFiltEff']

                    if config.datatype == '%TRUTH1%':
                        effic = 1

                    if ((xsec == u'NULL'
                         or not info.has_key(u'approx_GenFiltEff'))
                            and not (config.datatype == '%TRUTH1%')):
                        xsec, effic = genParamsFromParents(
                            client, dsname, datasetNumber)

                    if not xsec: xsec = 0
                    if not effic:
                        print 'No approx_GenFiltEff found for', dsname, 'set to 0 !!!!'
                        effic = 0
                    pass
                nevts = info['totalEvents']
                nfiles = info['nFiles']
                if not dsname.endswith('/'): dsname += '/'
                fout.write("%s %s %s %s %s %s\n" %
                           (dsname, nevts, nfiles, period, xsec, effic))
        except KeyError as prop:
            print 'Missing property', prop, 'for dataset ', dsname, 'in AMI, skip'
    fout.close()

    if len(coveredids) == 0:
        if not config.prefix.startswith('data'):
            print 'Could not extract any channel IDs from datasets found, this is OK for data but suspicious for MC'
    else:
        for id in officialids:
            if not id in coveredids:
                print 'No dataset found for channel ', id

    pass

Example #14

Show file

File: makeDatasetList.py Project: rsmith54/ZeroLeptonRun2

def main():
    # configurable options
    config = parseCmdLine(sys.argv[1:])

    if (config.baseline or config.official ) and config.sample:
        print "--baseline, --official and --sample are mutually exclusive"
        sys.exit(1)

    # AMI client connection
    client = pyAMI.client.Client('atlas')
    pyAMI.client.endpoint = config.server
    pyAMI.atlas.api.init()

    # data type is NTUP_SUSY for 2011/2012 and AOD for 2014 on
    datatype = config.datatype
    if 'mc11_' in config.prefix or 'mc12_' in config.prefix or 'data11_' in config.prefix or 'data12_' in config.prefix : datatype = '%.merge.NTUP_SUSY%'

    # make list of official datasets (baseline+alt)
    officialids = []
    if config.official or config.baseline or config.sample:
        if 'mc12_8TeV' in config.prefix or 'mc14_8TeV' in config.prefix:
            import mc12_8TeV_MCSampleList as mcsl
        elif 'mc14_13TeV' in config.prefix:
            import mc14_13TeV_MCSampleList as mcsl
        else:
            print '--official is only supported for mc12_8TeV, mc14_8TeV and mc14_13TeV'
            sys.exit(1)
        if config.sample:
            officialids = mcsl.__dict__[str(config.sample)]
        else:
            officialids = mcsl.__dict__["lbaseline"]
            if config.official:
                officialids += mcsl.__dict__["lalt"]

    # get all datasets matching prefix & tag and then filter them
    from pyAMI.atlas.api import get_dataset_info, list_datasets

    dskey = config.prefix+datatype+config.tag
    print 'Querying AMI for datasets matching pattern',dskey
    alldatasets = list_datasets(client,dskey)
    acceptedDS = []
    for DSlist in alldatasets:
        dsname = DSlist['ldn']
        cut = False
        for filter in filters:
            if filter in dsname.split('.')[2]: cut = True
        if (config.official or config.baseline or config.sample) and not int(dsname.split('.')[1]) in officialids: cut = True
        if config.signal :
            cut = True
            for pattern in lsignals:
                if pattern in dsname: cut = False
        if cut: continue
        acceptedDS.append(dsname)
        pass
    acceptedDS.sort()

    # get informations for all accepted datasets
    dsinfos = []
    for dsname in acceptedDS:
        dsinfos.append(get_dataset_info(client,dsname)[0])
        pass

    # write file
    fout = open('datasets.txt','w')
    for info in dsinfos:
        try:
            dsname = info['logicalDatasetName']
            generatorString  = info['generatorName']
            version  = info['version']
            if badDataset(dsname,generatorString,version): continue
            availability = info['prodsysStatus']
            nFiles = int(info['nFiles'])
            if nFiles>0:
                period = 'MC'
                xsec = 0.
                effic = 1.
                if info.has_key('period'):
                    period = info['period']
                else:
                    #(xsec, effic) = get_dataset_xsec_effic(client,info.info['logicalDatasetName'])
                    # confirmed with AMI team that this should be enought, no need
                    # to re-implement get_dataset_xsec_effic for PyAMI5
                    xsec = info[u'crossSection']
                    effic =  info[u'approx_GenFiltEff']
                nevts = info['totalEvents']
                nfiles = info['nFiles']
                if not dsname.endswith('/'): dsname += '/'
                fout.write("%s %s %s %s %s %s\n" % (dsname,nevts,nfiles,period,xsec,effic))
        except KeyError as prop:
            print 'Missing property',prop,'for dataset ',dsname,'in AMI, skip'
    fout.close()
    pass