def get_sample_info(sample): global samplePattern weight = {'num events': 0.0, 'errors': [], 'cross section': -1.0, 'filter efficiency': -1.0, 'k-factor': -1.0, 'rel uncert': -1.0} getWeights_logger.info("Processing: {0:s}".format(sample.name())) did = get_did(sample.name()) gen_tag = get_generator_tag(sample.name()) getWeights_logger.info("\tDID: {0:s}\n\tGen: {1:s}".format(did, gen_tag)) # find the corresponding EVNT sample name res = api.list_datasets(client, patterns='{0:s}.{1:s}.%.evgen.EVNT.{2:s}'.format('mc15_13TeV',did, gen_tag)) if len(res) != 1: return (did, weight) evnt_file_name = res[0]['ldn'] getWeights_logger.info("\tEVNT file: {0:s}".format(evnt_file_name)) try: for fname in sample.makeFileList(): count = get_cutflow(fname) # we return the filename if we can't open it for reading try: weight['num events'] += float(count) except (ValueError, TypeError) as e: weight['errors'].append(count) weight.update(get_info(evnt_file_name)) return (did, weight) except Exception, e: # we crashed getWeights_logger.exception("{0}\nAn exception was caught!".format("-"*20)) return (did, weight)
def getLdn(s, dsid, daodType, ptag, rtag=None): pattern = "" if not rtag: pattern = "mc15_13TeV." + dsid + "%.merge.DAOD_" + daodType + "%" + ptag + "%" else: pattern = "mc15_13TeV." + dsid + "%.merge.DAOD_" + daodType + "%" + rtag + "%" + ptag + "%" samples = AtlasAPI.list_datasets(s.client, patterns=[pattern]) if len(samples) == 0: print "NO DAOD_%s DS found for %s" % (DAOD_Type, dsid) return None elif len(samples) != 1: print "More than one DAOD_%s DS round for %s" % (DAOD_Type, dsid) for sample in samples: print sample print "please set rtag to select one proper one" return None return samples[0]["ldn"]
def main(fileList, samples): client = pyAMI.client.Client('atlas') AtlasAPI.init() dirDict=getDirNames(samples) listInput=open(fileList) inputDatasets=listInput.readlines() listToCheck=[] for ds in inputDatasets: if "#" in ds: continue dsid=ds.split(".")[1] ds=ds.strip("\n") tmp=AtlasAPI.list_datasets(client, patterns = [ds], fields = ['events']) try: inputEvents=int(tmp[0]['events']) except: print "WARNING: skipping", ds continue try: processedEvents=getNumberEvents(dirDict[dsid]) except KeyError: print "WARNING: tihs sample may be missing:",dsid, ",not found in input samples", samples processedEvents=0 if inputEvents==0: print "inputEvents==0 for",ds continue frac=float(processedEvents)/float(inputEvents) diff=inputEvents-processedEvents if diff>0: listToCheck += [dsid] missing_sample_line = ' ----> Event numbers do NOT match (%s/%s). Please check your download for %s. Fraction of Events downloaded %0.4f. Difference in events: %s' %(processedEvents,inputEvents,dsid,frac,diff) print missing_sample_line print "\nThe following list of samples is incomplete/missing:", ' '.join(map(str, listToCheck)) print "============================================================================================================" for sp in listToCheck: for line in inputDatasets: if re.search(sp, line): print line, print "============================================================================================================"
def loadRuns(self, Y, derivations=[], project="13TeV"): ### import AMI getAmiClient() import pyAMI.client import pyAMI.atlas.api as AtlasAPI periods = GetPeriodRunConverter().GetSubPeriods(Y, project=project) ### I'm not happy about this pattern line. If we change project to cos or hi ### then the patter might differ what AMI needs Pattern = "data%i_%s.%%physics_Main.%%" % (Y, project) DSIDS = AtlasAPI.list_datasets( getAmiClient(), patterns=[Pattern], fields=[ 'run_number', "period", 'type', 'events', 'ami_status', ], period=",".join(GetPeriodRunConverter().GetSubPeriods( Y, project=project)), type=ClearFromDuplicates(["AOD"] + derivations)) ### Read out the AMI query for entry in DSIDS: R = int(entry["run_number"]) if not self.getRunElement(R): self.__runs += [AMIdataEntry(R)] runElement = self.getRunElement(R) flavour = entry["type"] tag = self.__getDSTag(entry['ldn'], flavour) nevents = int(entry['events']) runElement.addDataset(data_type=flavour, tag=tag, events=nevents, status="")
def main(): from argparse import RawTextHelpFormatter parser = argparse.ArgumentParser(description=__doc__, formatter_class=RawTextHelpFormatter) parser.add_argument( '--inDsTxt', action="store", help= "text file containing datasets to make PRW for (one per line) [REQUIRED]", required=True) parser.add_argument('--outDS', action="store", default="", help="Name of the output dataset", required=False) parser.add_argument( '--forceStaged', action="store_true", help="If set, grid jobs will be submitted with forceStaged option") parser.add_argument( '--skipNTUP_PILEUP', action="store_true", help="If set, will not check for existing NTUP_PILEUP datasets") parser.add_argument('prwFiles', nargs="*", help="Exosting PRW Config files to check") args = parser.parse_args() try: import pyAMI.atlas.api as atlasAPI import pyAMI.client except ImportError: print "Could not import pyAMI ... please do: lsetup pyAMI" print "Also ensure you have a valid certificate (voms-proxy-init -voms atlas)" return 1 client = pyAMI.client.Client('atlas') atlasAPI.init() #read datasets into list with open(args.inDsTxt) as f: datasets = f.read().splitlines() import ROOT out = ROOT.CP.TPileupReweighting("out") for f in args.prwFiles: out.AddConfigFile(f) out.ResetCountingMode() #trick tool into going into counting mode #list of known period numbers periodNumbers = out.GetPeriodNumbers() print "Determining provenances of %d datasets ..." % len(datasets) aodDatasets = [] ntupDatasets = [] for dataset in datasets: dataset = dataset.strip("/") #strip the scope if it's there if dataset.startswith("#"): continue dataset = dataset.rsplit(":")[-1].strip() if len(dataset) == 0: continue print "Doing %s" % dataset prov = atlasAPI.get_dataset_prov(client, dataset) if 'node' not in prov: print "ERROR: Could not determine provenance of %s, skipping!" % dataset continue theParent = "" for ds in prov['node']: if ds[u'dataType'] != u'AOD': continue theParent = str(ds[u'logicalDatasetName']) theParentSize = int(ds[u'events']) break if theParent == "": print "ERROR: Could not determine provenance of %s, skipping!" % dataset continue #check input prw files, if we specified isIncomplete = False if len(args.prwFiles): total = 0 dsid = theParent.split(".")[1] for p in periodNumbers: if p == -1: continue hist = out.GetInputHistogram(int(dsid), p) if hist: total += hist.GetEntries() if total == theParentSize: print "INFO: %s is complete in your existing PRW files. Good!" % dataset continue if total > theParentSize: print "WARNING: %s is suspect in your existing PRW files, has %d events when expected %d ... please check you didn't overmerge" % ( dataset, total, theParentSize) continue else: if total != 0: print "WARNING: %s is incomplete (%d events when expected %d) ... will try to find centrally produced NTUP_PILEUP or prepare to generate" % ( dataset, total, theParentSize) isIncomplete = True #before adding the dataset, see if we can find an NTUP_PILEUP for it if not args.skipNTUP_PILEUP: ntupDatasetName = theParent.replace("AOD", "NTUP_PILEUP") ntupDatasetName = ntupDatasetName.replace("aod", "%") ntupDatasetName = ntupDatasetName.replace("merge", "%") #remove everything after first rtag of ami tag .. replace with wildcard first_rtag_pos = ntupDatasetName.index( "_r", ntupDatasetName.index("NTUP_PILEUP")) try: next_underscore_pos = ntupDatasetName.index( "_", first_rtag_pos + 1) except ValueError: next_underscore_pos = len(ntupDatasetName) ntupDatasetName = ntupDatasetName[:next_underscore_pos] + "%" res = atlasAPI.list_datasets(client, ntupDatasetName, fields='ldn,prodsys_status') foundNTUP = False for r in res: if r[u'prodsys_status'] != "ALL EVENTS AVAILABLE" and ( isIncomplete or r[u'prodsys_status'] != "EVENTS PARTIALLY AVAILABLE"): continue print "Found existing NTUP_PILEUP ... please download: %s" % r[ u"ldn"] ntupDatasets += [r[u'ldn']] foundNTUP = True break if foundNTUP == True: continue aodDatasets += [theParent] if len(aodDatasets) > 0: if args.outDS == "": print "NTUP_PILEUP need generating for the following datasets, please specify the --outDS option to give a name to the output dataset" print ",".join(aodDatasets) return 1 print "...submitting job to grid..." extraOpts = "" if args.forceStaged: extraOpts += "--forceStaged " mycommand = """pathena --inDS="%s" --outDS="%s" PileupReweighting/generatePRW_jobOptions.py %s--mergeOutput --nGBPerJob=MAX --addNthFieldOfInDSToLFN=2,6""" % ( ",".join(aodDatasets), args.outDS, extraOpts) print "Command: %s" % mycommand from subprocess import call if call(mycommand, shell=True) != 0: print "Problem executing command. Did you remember to do: lsetup panda" return 1 print "... finished. Please monitor your job on the grid, and when it is finished, download the files!" if len(ntupDatasets): frucio_fn = 'rucio_downloads_%s.sh' % args.inDsTxt print "Please download existing config files from these datasets (see also output file %s):" % frucio_fn with open(frucio_fn, 'w') as frucio: for ds in ntupDatasets: command = "rucio download %s" % ds print command frucio.write(command + '\n') print "" if len(ntupDatasets) or len(aodDatasets): print "After downloading, you are advised to check they are complete: checkPRW.py --inDsTxt=%s <downloaded files> " % args.inDsTxt print "Thank you for generating config files, you get a gold star!" else: print "Looks like you are ready for pileup reweighting!" return 0
# Get total number of events n_aod_events = 0 for f in os.listdir(mc_dir + d): tf = ROOT.TFile(mc_dir + d + '/' + f, 'READ') n_aod_events += tf.Get('EventCountHist').GetBinContent(1) # Compare to AMI value did = d.split(".")[2] tags = d.split(".")[3].split("_p")[0] # Don't check data if did.startswith("00"): continue # Get all possible AODs for the output ntuple base_files = AtlasAPI.list_datasets( client, patterns=['mc15_13TeV.' + did + '%merge.AOD%' + tags + '%'], type='AOD') print "-------------------------------------------------------------------------" print "Ntuple name:", d print base_files[0]['ldn'] if len(base_files) > 1: print "Found more than one matching AOD." print "For file", f print "using", base_files[0]['ldn'] info = AtlasAPI.get_dataset_info(client, base_files[0]['ldn'])[0] n_real_events = info.get('totalEvents') if float(n_real_events) < float(n_aod_events): print "\033[91m Error in dsid", did bad_dids += [did]
def getMCDataSets(self, channels=[], campaign="mc16_13TeV", derivations=[]): getAmiClient() import pyAMI.client import pyAMI.atlas.api as AtlasAPI data_type = ClearFromDuplicates(["AOD"] + derivations) channels_to_use = [] # Check only the dsid which are non-existent or not complete for mc in channels: ami_channel = self.getMCchannel(dsid=mc, campaign=campaign) if not ami_channel: channels_to_use.append(mc) continue # Check if the dsid is already complete w.r.t all data-formats to_append = False for data in data_type: if not ami_channel.hasDataType(data): to_append = True if to_append: break if to_append: channels_to_use.append(mc) Blocks = [] # Try to block the queries in DSIDS of thousands for mc in channels_to_use: FirstDigits = int(str(mc)[0:3]) if FirstDigits not in Blocks: Blocks.append(FirstDigits) # Summarizing into blocks leads to a huge reduction of queries if len(Blocks) < len(channels_to_use): channels_to_use = Blocks print "<AMIDataBase> INFO: going to ask AMI about %d different things" % ( len(channels_to_use)) prompt = max(int(len(channels_to_use) / 10), 2) for i, mc in enumerate(channels_to_use): if i % prompt == 0: print "<AMIDataBase> INFO: %d/%d stubbed AMI :-P" % ( i, len(channels_to_use)) # AMI query DSIDS = AtlasAPI.list_datasets( getAmiClient(), patterns=["%s.%i%%.%%" % (campaign, mc)], fields=[ 'type', 'events', 'ami_status', "physics_short", "dataset_number", "cross_section", "prodsys_status", ], ### Maximum 1000 datasets and foreach one limit=[1, 1000 * 50], type=data_type) for amiDS in DSIDS: DS = int(amiDS["dataset_number"]) ami_entry = self.getMCchannel(dsid=DS, campaign=campaign) # a fresh AMImcEntry needs to be created if not ami_entry: physics_name = amiDS["physics_short"] try: xS = float(amiDS["cross_section"]) except Exception: print "<AMIDataBase> WARNING: No x-section found for %s (%i) in AMI" % ( physics_name, DS) xS = 1. ami_entry = AMImcEntry(dsid=DS, xsec=xS, physics_name=physics_name, campaign=campaign) self.__mc_channels.append(ami_entry) ds_type = amiDS["type"] tag = self.__getDSTag(amiDS['ldn']) nevents = int(amiDS['events']) ami_entry.addDataset(data_type=ds_type, tag=tag, events=nevents, status="") return True
def makeContainer(c1): ### get run list x0 = AtlasAPI.list_datasets(client, patterns = [c1.project+'.%.physics_Main.merge.AOD%'], fields = ['run_number', 'ldn', 'events', 'total_size'], order=None, limit=None, show_archived=False, grl=c1.grl, data_period=c1.period) runs = set([a['run_number'] for a in x0]) if len(runs)==0: print ('no run in period'+c1.period+' is listed in GRL:', c1.grl) return ### get AOD datasets x = AtlasAPI.list_datasets(client, patterns = [c1.project+'.%.physics_Main.merg%'+c1.derivation+'%'], fields = ['run_number', 'ldn', 'events', 'total_size'], order=None, limit=None, show_archived=False, grl=c1.grl, data_period=c1.period) ### get dataset info dic1={} for a in x: print (a['run_number'], a['ldn']) try: dic1[a['ldn'].split('_')[-1]].append((a['run_number'],a['ldn'])) except KeyError: dic1[a['ldn'].split('_')[-1]] = [(a['run_number'],a['ldn'])] ### get the tags, sorted by popularity allTags = sorted(dic1.keys(), key=lambda k: len(dic1[k]),reverse=True) for tag in allTags: print (tag,':', end='') for xx in dic1[tag]: print (xx[0], end='') print() ### use the most popular tags if not spicified t_acceptTags = c1.acceptTags if c1.acceptTags else allTags #### get the list of dataset ds = [] for t in t_acceptTags: for a in dic1.get(t,[]): found = False for d in ds: if d[0] == a[0]: found = True break if not found: ds.append(a) ### Warning when the given tag does not select any dataset if len(ds)==0: print ('No dataset in period',c1.period+', exiting...') return #### find any missing runs for d in ds: print (d[0],d[1]) runs.remove(d[0]) print (runs) #### Warn when there are any missing runs if len(runs) !=0: print ('!'*10) print ('MISSING ', ' '.join(runs)) ### prepare commands dlist=','.join([d[1] for d in ds]) superTag = 'period'+c1.period+','+c1.sTag+ds[0][1][-5:] comments = superTag+','+c1.derivation cmd='ami cmd COMAPopulateSuperProductionDataset -rucioRegistration="yes" -creationComment="'+comments+'" -selectionType="run_config" -superTag="'+superTag+'" -containedDatasets="'+dlist+'" -separator="," ' print ('command:',cmd) ### write out script if asked if c1.outScript: with open(c1.outScript,'a') as f1: f1.write(cmd+'\n') return ### create the contianier imitiately if desired if not c1.autoCreate: while True: x = raw_input("create contianer: y[es]/N[o]/e[xit]") if x == 'e' or x=='N': return elif x=='y': break call(cmd, shell=True)
client = pyAMI.client.Client('atlas') AtlasAPI.init() inputDS = [] eventCounts = {} inputFile = open(o.inFileName,"r") for line in inputFile: if line.startswith("#"): continue words = line.split() if not len(words): continue dsName = words[0].rstrip("/") dsID = dsName.split(".")[1] print dsName try: dsInfo = AtlasAPI.list_datasets(client, patterns = dsName, fields = ['events']) nEvents = dsInfo[0]['events'] except: print "Skipping",dsName continue inputDS.append(dsName) eventCounts[dsName] = nEvents for ds in inputDS: print ds,"\t",eventCounts[ds]
from pyAMI.client import Client from pyAMI.atlas.api import list_datasets client = Client('atlas') #my_datasets = ["data15_13TeV.00280753.physics_Main.merge.DAOD_HIGG8D1.f629_m1504_p2432/","data15_13TeV.00281075.physics_Main.merge.DAOD_HIGG8D1.f629_m1504_p2432/","data15_13TeV.00279515.physics_Main.merge.DAOD_HIGG8D1.f628_m1497_p2432/","data15_13TeV.00280520.physics_Main.merge.DAOD_HIGG8D1.f632_m1504_p2432/","data15_13TeV.00279259.physics_Main.merge.DAOD_HIGG8D1.f628_m1497_p2432/","data15_13TeV.00280614.physics_Main.merge.DAOD_HIGG8D1.f629_m1504_p2432/","data15_13TeV.00281074.physics_Main.merge.DAOD_HIGG8D1.f629_m1504_p2432/","data15_13TeV.00281070.physics_Main.merge.DAOD_HIGG8D1.f629_m1504_p2432/","data15_13TeV.00280853.physics_Main.merge.DAOD_HIGG8D1.f629_m1504_p2432/","data15_13TeV.00280464.physics_Main.merge.DAOD_HIGG8D1.f629_m1504_p2432/","data15_13TeV.00276416.physics_Main.merge.DAOD_HIGG8D1.f620_m1480_p2432/","data15_13TeV.00280368.physics_Main.merge.DAOD_HIGG8D1.f629_m1504_p2432/","data15_13TeV.00280977.physics_Main.merge.DAOD_HIGG8D1.f629_m1504_p2432/","data15_13TeV.00279928.physics_Main.merge.DAOD_HIGG8D1.f628_m1497_p2432/","data15_13TeV.00276262.physics_Main.merge.DAOD_HIGG8D1.f620_m1480_p2432/","data15_13TeV.00276954.physics_Main.merge.DAOD_HIGG8D1.f620_m1480_p2432/","data15_13TeV.00278968.physics_Main.merge.DAOD_HIGG8D1.f628_m1497_p2432/","data15_13TeV.00279764.physics_Main.merge.DAOD_HIGG8D1.f628_m1497_p2432/","data15_13TeV.00276790.physics_Main.merge.DAOD_HIGG8D1.f620_m1480_p2432/","data15_13TeV.00280500.physics_Main.merge.DAOD_HIGG8D1.f631_m1504_p2432/","data15_13TeV.00282625.physics_Main.merge.DAOD_HIGG8D1.f638_m1511_p2432/","data15_13TeV.00279984.physics_Main.merge.DAOD_HIGG8D1.f629_m1504_p2432/","data15_13TeV.00276336.physics_Main.merge.DAOD_HIGG8D1.f620_m1480_p2432/","data15_13TeV.00280862.physics_Main.merge.DAOD_HIGG8D1.f629_m1504_p2432/","data15_13TeV.00279685.physics_Main.merge.DAOD_HIGG8D1.f628_m1497_p2432/","data15_13TeV.00280231.physics_Main.merge.DAOD_HIGG8D1.f630_m1504_p2432/","data15_13TeV.00278880.physics_Main.merge.DAOD_HIGG8D1.f628_m1497_p2432/","data15_13TeV.00276329.physics_Main.merge.DAOD_HIGG8D1.f620_m1480_p2432/","data15_13TeV.00279932.physics_Main.merge.DAOD_HIGG8D1.f629_m1504_p2432/","data15_13TeV.00281317.physics_Main.merge.DAOD_HIGG8D1.f629_m1504_p2432/","data15_13TeV.00279169.physics_Main.merge.DAOD_HIGG8D1.f628_m1497_p2432/","data15_13TeV.00279867.physics_Main.merge.DAOD_HIGG8D1.f628_m1497_p2432/","data15_13TeV.00279284.physics_Main.merge.DAOD_HIGG8D1.f628_m1497_p2432/","data15_13TeV.00279345.physics_Main.merge.DAOD_HIGG8D1.f628_m1497_p2432/","data15_13TeV.00279598.physics_Main.merge.DAOD_HIGG8D1.f628_m1497_p2432/","data15_13TeV.00279279.physics_Main.merge.DAOD_HIGG8D1.f628_m1497_p2432/","data15_13TeV.00279813.physics_Main.merge.DAOD_HIGG8D1.f628_m1497_p2432/","data15_13TeV.00278912.physics_Main.merge.DAOD_HIGG8D1.f628_m1497_p2432/","data15_13TeV.00276952.physics_Main.merge.DAOD_HIGG8D1.f620_m1480_p2432/","data15_13TeV.00281385.physics_Main.merge.DAOD_HIGG8D1.f629_m1504_p2432/","data15_13TeV.00276778.physics_Main.merge.DAOD_HIGG8D1.f620_m1480_p2432/","data15_13TeV.00276511.physics_Main.merge.DAOD_HIGG8D1.f620_m1480_p2432/","data15_13TeV.00280950.physics_Main.merge.DAOD_HIGG8D1.f629_m1504_p2432/","data15_13TeV.00280319.physics_Main.merge.DAOD_HIGG8D1.f629_m1504_p2432/","data15_13TeV.00280423.physics_Main.merge.DAOD_HIGG8D1.f629_m1504_p2432/","data15_13TeV.00283074.physics_Main.merge.DAOD_HIGG8D1.f640_m1511_p2432/","data15_13TeV.00282712.physics_Main.merge.DAOD_HIGG8D1.f640_m1511_p2432/","data15_13TeV.00282631.physics_Main.merge.DAOD_HIGG8D1.f640_m1511_p2432/","data15_13TeV.00276689.physics_Main.merge.DAOD_HIGG8D1.f623_m1480_p2432/","data15_13TeV.00283155.physics_Main.merge.DAOD_HIGG8D1.f640_m1511_p2432/","data15_13TeV.00282625.physics_Main.merge.DAOD_HIGG8D1.f640_m1511_p2432/","data15_13TeV.00282992.physics_Main.merge.DAOD_HIGG8D1.f640_m1511_p2432/","data15_13TeV.00282784.physics_Main.merge.DAOD_HIGG8D1.f640_m1511_p2432/","data15_13TeV.00281411.physics_Main.merge.DAOD_HIGG8D1.f629_m1504_p2432/","data15_13TeV.00284484.physics_Main.merge.DAOD_HIGG8D1.f644_m1518_p2432/","data15_13TeV.00284420.physics_Main.merge.DAOD_HIGG8D1.f643_m1518_p2432/","data15_13TeV.00284427.physics_Main.merge.DAOD_HIGG8D1.f643_m1518_p2432/","data15_13TeV.00284285.physics_Main.merge.DAOD_HIGG8D1.f643_m1518_p2432/","data15_13TeV.00280673.physics_Main.merge.DAOD_HIGG8D1.f629_m1504_p2436/","data15_13TeV.00284154.physics_Main.merge.DAOD_HIGG8D1.f643_m1518_p2436/","data15_13TeV.00283270.physics_Main.merge.DAOD_HIGG8D1.f640_m1511_p2436/","data15_13TeV.00284213.physics_Main.merge.DAOD_HIGG8D1.f643_m1518_p2436/","data15_13TeV.00283780.physics_Main.merge.DAOD_HIGG8D1.f643_m1518_p2436/","data15_13TeV.00283429.physics_Main.merge.DAOD_HIGG8D1.f643_m1518_p2436/","data15_13TeV.00284006.physics_Main.merge.DAOD_HIGG8D1.f643_m1518_p2436/","data15_13TeV.00283608.physics_Main.merge.DAOD_HIGG8D1.f643_m1518_p2436/"] my_datasets = ["data15_13TeV.00276262.physics_Main.merge.DAOD_HIGG8D1.f620_m1480_p2559/","data15_13TeV.00276329.physics_Main.merge.DAOD_HIGG8D1.f620_m1480_p2559/","data15_13TeV.00276336.physics_Main.merge.DAOD_HIGG8D1.f620_m1480_p2559/","data15_13TeV.00276416.physics_Main.merge.DAOD_HIGG8D1.f620_m1480_p2559/","data15_13TeV.00276511.physics_Main.merge.DAOD_HIGG8D1.f620_m1480_p2559/","data15_13TeV.00276689.physics_Main.merge.DAOD_HIGG8D1.f623_m1480_p2559/","data15_13TeV.00276778.physics_Main.merge.DAOD_HIGG8D1.f620_m1480_p2559/","data15_13TeV.00276790.physics_Main.merge.DAOD_HIGG8D1.f620_m1480_p2559/","data15_13TeV.00276952.physics_Main.merge.DAOD_HIGG8D1.f620_m1480_p2559/","data15_13TeV.00276954.physics_Main.merge.DAOD_HIGG8D1.f620_m1480_p2559/","data15_13TeV.00278880.physics_Main.merge.DAOD_HIGG8D1.f628_m1497_p2559/","data15_13TeV.00278912.physics_Main.merge.DAOD_HIGG8D1.f628_m1497_p2559/","data15_13TeV.00278968.physics_Main.merge.DAOD_HIGG8D1.f628_m1497_p2559/","data15_13TeV.00279169.physics_Main.merge.DAOD_HIGG8D1.f628_m1497_p2559/","data15_13TeV.00279259.physics_Main.merge.DAOD_HIGG8D1.f628_m1497_p2559/","data15_13TeV.00279279.physics_Main.merge.DAOD_HIGG8D1.f628_m1497_p2559/","data15_13TeV.00279284.physics_Main.merge.DAOD_HIGG8D1.f628_m1497_p2559/","data15_13TeV.00279345.physics_Main.merge.DAOD_HIGG8D1.f628_m1497_p2559/","data15_13TeV.00279515.physics_Main.merge.DAOD_HIGG8D1.f628_m1497_p2559/","data15_13TeV.00279598.physics_Main.merge.DAOD_HIGG8D1.f628_m1497_p2559/","data15_13TeV.00279685.physics_Main.merge.DAOD_HIGG8D1.f628_m1497_p2559/","data15_13TeV.00279764.physics_Main.merge.DAOD_HIGG8D1.f628_m1497_p2559/","data15_13TeV.00279813.physics_Main.merge.DAOD_HIGG8D1.f628_m1497_p2559/","data15_13TeV.00279867.physics_Main.merge.DAOD_HIGG8D1.f628_m1497_p2559/","data15_13TeV.00279928.physics_Main.merge.DAOD_HIGG8D1.f628_m1497_p2559/","data15_13TeV.00279932.physics_Main.merge.DAOD_HIGG8D1.f629_m1504_p2559/","data15_13TeV.00279984.physics_Main.merge.DAOD_HIGG8D1.f629_m1504_p2559/","data15_13TeV.00280231.physics_Main.merge.DAOD_HIGG8D1.f630_m1504_p2559/","data15_13TeV.00280319.physics_Main.merge.DAOD_HIGG8D1.f629_m1504_p2559/","data15_13TeV.00280368.physics_Main.merge.DAOD_HIGG8D1.f629_m1504_p2559/","data15_13TeV.00280423.physics_Main.merge.DAOD_HIGG8D1.f629_m1504_p2559/","data15_13TeV.00280464.physics_Main.merge.DAOD_HIGG8D1.f629_m1504_p2559/","data15_13TeV.00280500.physics_Main.merge.DAOD_HIGG8D1.f631_m1504_p2559/","data15_13TeV.00280520.physics_Main.merge.DAOD_HIGG8D1.f632_m1504_p2559/","data15_13TeV.00280614.physics_Main.merge.DAOD_HIGG8D1.f629_m1504_p2559/","data15_13TeV.00280673.physics_Main.merge.DAOD_HIGG8D1.f629_m1504_p2559/","data15_13TeV.00280753.physics_Main.merge.DAOD_HIGG8D1.f629_m1504_p2559/","data15_13TeV.00280853.physics_Main.merge.DAOD_HIGG8D1.f629_m1504_p2559/","data15_13TeV.00280862.physics_Main.merge.DAOD_HIGG8D1.f629_m1504_p2559/","data15_13TeV.00280950.physics_Main.merge.DAOD_HIGG8D1.f629_m1504_p2559/","data15_13TeV.00280977.physics_Main.merge.DAOD_HIGG8D1.f629_m1504_p2559/","data15_13TeV.00281070.physics_Main.merge.DAOD_HIGG8D1.f629_m1504_p2559/","data15_13TeV.00281074.physics_Main.merge.DAOD_HIGG8D1.f629_m1504_p2559/","data15_13TeV.00281075.physics_Main.merge.DAOD_HIGG8D1.f629_m1504_p2559/","data15_13TeV.00281317.physics_Main.merge.DAOD_HIGG8D1.f629_m1504_p2559/","data15_13TeV.00281385.physics_Main.merge.DAOD_HIGG8D1.f629_m1504_p2559/","data15_13TeV.00281411.physics_Main.merge.DAOD_HIGG8D1.f629_m1504_p2559/","data15_13TeV.00282625.physics_Main.merge.DAOD_HIGG8D1.f640_m1511_p2559/","data15_13TeV.00282631.physics_Main.merge.DAOD_HIGG8D1.f640_m1511_p2559/","data15_13TeV.00282712.physics_Main.merge.DAOD_HIGG8D1.f640_m1511_p2559/","data15_13TeV.00282784.physics_Main.merge.DAOD_HIGG8D1.f640_m1511_p2559/","data15_13TeV.00282992.physics_Main.merge.DAOD_HIGG8D1.f640_m1511_p2559/","data15_13TeV.00283074.physics_Main.merge.DAOD_HIGG8D1.f640_m1511_p2559/","data15_13TeV.00283155.physics_Main.merge.DAOD_HIGG8D1.f640_m1511_p2559/","data15_13TeV.00283270.physics_Main.merge.DAOD_HIGG8D1.f640_m1511_p2559/","data15_13TeV.00283429.physics_Main.merge.DAOD_HIGG8D1.f643_m1518_p2559/","data15_13TeV.00283608.physics_Main.merge.DAOD_HIGG8D1.f643_m1518_p2559/","data15_13TeV.00283780.physics_Main.merge.DAOD_HIGG8D1.f643_m1518_p2559/","data15_13TeV.00284006.physics_Main.merge.DAOD_HIGG8D1.f643_m1518_p2559/","data15_13TeV.00284154.physics_Main.merge.DAOD_HIGG8D1.f643_m1518_p2559/","data15_13TeV.00284213.physics_Main.merge.DAOD_HIGG8D1.f643_m1518_p2559/","data15_13TeV.00284285.physics_Main.merge.DAOD_HIGG8D1.f643_m1518_p2559/","data15_13TeV.00284420.physics_Main.merge.DAOD_HIGG8D1.f643_m1518_p2559/","data15_13TeV.00284427.physics_Main.merge.DAOD_HIGG8D1.f643_m1518_p2559/","data15_13TeV.00284484.physics_Main.merge.DAOD_HIGG8D1.f644_m1518_p2559/"] #my_datasets = ["mc15_13TeV.410000.PowhegPythiaEvtGen_P2012_ttbar_hdamp172p5_nonallhad.merge.DAOD_HIGG8D1.e3698_s2608_s2183_r7267_r6282_p2501/"] #my_datasets = ["mc15_13TeV.410000.PowhegPythiaEvtGen_P2012_ttbar_hdamp172p5_nonallhad.merge.DAOD_HIGG8D1.e3698_s2608_s2183_r7267_r6282_p2559/"] total_events = 0 for dataset in my_datasets: print("Looking at dataset: {0}\n".format(dataset)) samples = list_datasets( client, dataset, fields=['events']) for sample in samples: print("\t {0}".format(sample)) total_events += int(sample['events']) print("Total events: {0}".format(total_events))
print(" => Done: %f x 10^30 cm^-2" % (integratedLumi)) lumiPerRun[run] = integratedLumi # now go to AMI to get the event yields for the datasets you're interested in import pyAMI.client import pyAMI.atlas.api as AtlasAPI client = pyAMI.client.Client('atlas') AtlasAPI.init() for run in d['Run']: pattern = "data15_13TeV.%08d.physics_Main.merge.DESDM_RPVLL" % run pattern += '.f%_m%' dslist = AtlasAPI.list_datasets(client, patterns=pattern, fields=['events'], type='DESDM_RPVLL') #print (dslist) if len(dslist) > 0: print(dslist[0]['events']) selectedEventsPerRun[run] = dslist[0]['events'] pattern = "data15_13TeV.%08d.physics_Main.merge.AOD" % run pattern += '.f%_m%' dslist = AtlasAPI.list_datasets(client, patterns=pattern, fields=['events'], type='AOD') #print (dslist) if len(dslist) > 0: print(dslist[0]['events']) eventsPerRun[run] = dslist[0]['events']
def main(): logging.basicConfig(format='%(levelname)s:%(message)s') import time, datetime from pytz import timezone import argparse try: import pyAMI.client import pyAMI.atlas.api as AtlasAPI import pyAMI.config except ImportError: logging.error( "Unable to find pyAMI client. Please try this command first: lsetup pyAMI" ) return -1 extraFieldDefaults = { } #{"approx_crossSection":None,"approx_GenFiltEff":1.0} fieldDefaults = {"subprocessID": 0, "dataset_number": 0} #populate the fieldDefaults ... for all, assume 'None' for field in pyAMI.config.tables['datasets'].keys(): if str(field) == "cross_section": continue #special exception because this field only present in if str(field) in fieldDefaults.keys(): continue if str(field).startswith("@"): continue fieldDefaults[str(field)] = None import commands #check the voms proxy status, out = commands.getstatusoutput("voms-proxy-info -fqan -exists") if status != 0: logging.error( "Please renew your certificate with this command: voms-proxy-init -voms atlas" ) return -1 try: client = pyAMI.client.Client('atlas') AtlasAPI.init() except: logging.error( "Could not establish pyAMI session. Are you sure you have a valid certificate? Do: voms-proxy-init -voms atlas" ) return -1 #need to collect the ami dataset parameter defaults paramExplains = [] #for the help message only paramUnits = dict() paramDefaults = {} res = client.execute('ListPhysicsParameterDefs', format='dom_object') for r in res.get_rows(): #r is OrderedDict explainString = "%s: %s" % (r[u'PARAMNAME'], r[u'DESCRIPTION']) if r[u'UNITS'] != u'NULL': explainString += " (units: %s)" % r[u'UNITS'] paramUnits[r[u'PARAMNAME']] = r[u'UNITS'] if r[u'HASDEFAULT'] == u'N': paramDefaults[str(r[u'PARAMNAME'])] = None else: explainString += " (default value = %s)" % r[u'DEFAULTVALUE'] if r[u'PARAMTYPE'] == u'number': paramDefaults[str(r[u'PARAMNAME'])] = float( r[u'DEFAULTVALUE'] ) #FIXME: Assumes all parameters are floats elif r[u'PARAMTYPE'] == u'string': paramDefaults[str(r[u'PARAMNAME'])] = str(r[u'DEFAULTVALUE']) paramExplains += [explainString] paramDefaults["crossSection_pb"] = None paramUnits["crossSection_pb"] = "pb" paramExplains += [ "crossSection_pb: Same as crossSection except in pb units (units: pb)" ] cern_time = timezone('UCT') current_time = datetime.datetime.fromtimestamp( time.time(), cern_time).strftime('%Y-%m-%d %H:%M:%S') from argparse import RawTextHelpFormatter parser = argparse.ArgumentParser(description=__doc__, formatter_class=RawTextHelpFormatter) parser.add_argument('--inDS', nargs='+', default=[""], help="List of datasets to retrieve parameters for") parser.add_argument( '--inDsTxt', default="", help= "Alternative to --inDS, can specify the datasets from an input file") parser.add_argument( '--fields', nargs='+', help= "List of parameters to extract. Available parameters are: \n\n %s\n\nYou can also include any from:\n %s\nYou can also do keyword_xxx to add a bool branch for keywords" % ("\n ".join(paramExplains), ", ".join(fieldDefaults.keys() + extraFieldDefaults.keys())), default=["dataset_number", "crossSection", "kFactor", "genFiltEff"]) parser.add_argument( '--timestamp', default=current_time, help= "The timestamp to query parameters at, specified in Universal Central Time (UCT). If left blank, will take the current time" ) parser.add_argument( '--physicsGroups', nargs='+', default=["PMG,MCGN"], help= "Physics group from which to retrieve parameters, listed in order of priority (highest first). Default value is 'PMG,MCGN' (i.e. try to use PMG values, fallback on MCGN values if unavailable). Allowed groups are:\n PMG (this is the PMG's group name), BPHY, COSM, DAPR, EGAM, EXOT, FTAG, HIGG, HION, IDET, IDTR, JETM, LARG, MCGN (this is the AMI default group name), MDET, MUON, PHYS, REPR, SIMU, STDM, SUSY, TAUP, TCAL, TDAQ, THLT, TOPQ, TRIG, UPGR, VALI" ) parser.add_argument( '--oldTimestamp', default="", help= "If specified, will instead display a diff between the old and new timestamp, showing explanation of any changed parameters" ) parser.add_argument( '--explainFields', nargs='+', default=[], help= "The fields you would like explained .. will appear as comment lines after each row in the output" ) parser.add_argument( '--explainInfo', nargs='+', default=['explanation', 'insert_time'], help= "Properties of the parameter you want to show in the explanation. Can list from: explanation, insert_time, physicsGroup, createdby. Default is: explanation,insert_time" ) parser.add_argument( '--outFile', default=sys.stdout, type=argparse.FileType('w'), help="Where to print the output to. Leave blank to print to stdout") parser.add_argument( '--delim', default="", help= "The delimiter character. Defaults to spaces leading to nice formatting table" ) parser.add_argument('-v', action='store_true', help="Verbose output for debugging") args = parser.parse_args() if args.v: logging.getLogger().setLevel(logging.DEBUG) else: logging.getLogger().setLevel(logging.INFO) logging.debug(args.inDS) logging.debug(args.fields) logging.debug(args.timestamp) if args.timestamp == "the dawn of time": logging.error( "Unfortunately we don't know any parameters from this time period... but we're working on it!" ) return 9999 #split elements of fields by comma to get full list args.fields = sum((y.split(',') for y in args.fields), []) args.fields = [x.strip() for x in args.fields] #strips whitespace #look for keyword_ fields, these are special ... args.keywords = [] for f in args.fields: if f.startswith("keyword_"): k = f[8:] #and then add each keyword to the extraFieldDefaults so it is recognised thusly extraFieldDefaults["keyword_%s" % k] = bool(False) args.keywords += [k] #same for physics groups args.physicsGroups = sum((y.split(',') for y in args.physicsGroups), []) args.physicsGroups = [x.strip() for x in args.physicsGroups] #strips whitespace #same for explainFields and explainInfo args.explainFields = sum((y.split(',') for y in args.explainFields), []) args.explainFields = [x.strip() for x in args.explainFields] #strips whitespace args.explainInfo = sum((y.split(',') for y in args.explainInfo), []) args.explainInfo = [x.strip() for x in args.explainInfo] #strips whitespace if args.inDsTxt != '': args.inDS = readDsFromFile(args.inDsTxt) #and same for inDS args.inDS = sum((y.split(',') for y in args.inDS), []) args.inDS = [x.strip() for x in args.inDS] #strips whitespace #1. check field values are allowed, we obtain default field values at same time.. #2. For each entry in inDS, if contains wildcard we obtain list of DS, otherwise check DS exists. During this time we obtain the datasetid and numEvents properties, incase we need them #3. For each of these DS, get parameters from ami matching the timestamp. Organize into fields and index by subprocessID #4. Output a line to our output file #1. #before adding all the ami parameters, identify which of provided fields are: 1). Obtained from list_datasets command (dsFields) 2). actual parameters dsFields = [ x for x in args.fields if x in fieldDefaults.keys() and x not in ["subprocessID", "ldn"] ] extraFields = [x for x in args.fields if x in extraFieldDefaults.keys()] paramFields = [x for x in args.fields if x in paramDefaults.keys()] if len(paramFields) > 0 and args.physicsGroups == [""]: logging.error( "You must specify at least one physics group. See -h for allowed groups" ) return -1 #combine paramDefaults with fieldDefaults fieldDefaults.update(paramDefaults) #and with extra fields fieldDefaults.update(extraFieldDefaults) for field in args.fields: if field not in fieldDefaults: logging.error("%s is not a recognised field. Allowed fields are:" % field) logging.error(fieldDefaults.keys()) return -1 if args.oldTimestamp != "": logging.info("oldTimestamp option specified. Running in diff mode...") args.explainFields = args.fields args.explainInfo = [ "explanation", "insert_time", "physicsGroup", "createdby" ] #2. #replace all '*' with '%' and strip "/" args.inDS = [ds.replace("*", "%") for ds in args.inDS] args.inDS = [ds.rstrip("/") for ds in args.inDS] if len(args.inDS) == 0 or (len(args.inDS) == 1 and args.inDS[0] == ""): logging.error( "No datasets provided. Please specify datasets with the --inDS or --inDsTxt options" ) return -1 logging.info( "Fetching list of datasets from AMI (this may take a few minutes)...") #obtain list of datasets res = AtlasAPI.list_datasets( client, patterns=args.inDS, fields=dsFields + ['ldn'], ami_status="VALID" ) #changed status from %, to only catch valid now: wb 08/2015 logging.info("...Found %d datasets matching your selection" % len(res)) if len(res) == 0: return 0 #NOTE: Should we allow retrieval of the extra information: keyword, genfiltereff, approx crossection, .. these all come from GetDatasetInfo ami command dataset_values = dict() for r in res: mydict = dict() dataset_values[str(r['ldn'])] = mydict for field in r.items(): if str(field[0]) == "ldn": continue if str(field[0]) not in args.fields: continue mydict[str(field[0])] = str(field[1]) #also if we have the 'extra fields or keywords' we will need to execute AtlasAPI.get_dataset_info .. if len(extraFields) > 0 or len(args.keywords) > 0: info_res = AtlasAPI.get_dataset_info(client, str(r['ldn'])) #print(info_res) if len(info_res) == 0: logging.error("Unable to retrieve dataset info for %s" % str(r['ldn'])) return -1 for field in extraFields: #ignore the keyword_ fields if field.startswith("keyword_"): continue mydict[field] = float(info_res[0][unicode(field)]) if isfloat( info_res[0][unicode(field)]) else extraFieldDefaults[field] for k in args.keywords: mydict["keyword_%s" % k] = int( (k in str(info_res[0][unicode('keyword')]).split(","))) #sort dataset_values as well as possible from collections import OrderedDict sorted_values = OrderedDict() for ds in args.inDS: if ds in dataset_values.keys(): sorted_values[ds] = dataset_values[ds] for ds in sorted(dataset_values): if ds not in sorted_values.keys(): sorted_values[ds] = dataset_values[ds] dataset_values = sorted_values logging.debug(dataset_values) #res = client.execute(['GetDatasetInfo for ds in args.inDS: if '%' not in ds and ds not in dataset_values.keys(): logging.warning("Unknown dataset: %s" % ds) datasetsToQuery = ",".join(dataset_values.keys()) #if using inDsTxt, retain any comment or blank lines in structure of output complete_values = OrderedDict() if args.inDsTxt != "": # read lines commentcount = 0 import re txt = open(args.inDsTxt) for tmpLine in txt: # remove \n tmpLine = re.sub('\n', '', tmpLine) # remove white spaces tmpLine = tmpLine.strip() # skip comment or empty if tmpLine.startswith('#') or tmpLine == '': complete_values['comment%d' % (commentcount)] = tmpLine commentcount = commentcount + 1 continue # append tmpLine = tmpLine.rstrip("/") if tmpLine in dataset_values.keys(): complete_values[tmpLine] = dataset_values[tmpLine] else: print("cannot find %s" % tmpLine) # close file txt.close() dataset_values = complete_values logging.info( "Obtaining %s for selected datasets at timestamp=%s... (please be patient)" % (args.fields, args.timestamp)) #do as one query, to be efficient if (args.timestamp == current_time): res = client.execute([ 'GetPhysicsParamsForDataset', "--logicalDatasetName=%s" % datasetsToQuery, "--timestamp='%s'" % args.timestamp ], format='dom_object') else: res = client.execute([ 'GetPhysicsParamsForDataset', "--logicalDatasetName=%s" % datasetsToQuery, "--timestamp='%s'" % args.timestamp, "--history=true" ], format='dom_object') #organize results by dataset parameterQueryResults = dict() for r in res.get_rows(): if r[u'logicalDatasetName'] not in parameterQueryResults.keys(): parameterQueryResults[r[u'logicalDatasetName']] = [] parameterQueryResults[r[u'logicalDatasetName']] += [ r ] #puts row in the list for this dataset if args.oldTimestamp != "": logging.info( "Obtaining %s for selected datasets at timestamp=%s... (please be patient)" % (args.fields, args.oldTimestamp)) res2 = client.execute([ 'GetPhysicsParamsForDataset', "--logicalDatasetName=%s" % datasetsToQuery, "--timestamp='%s'" % args.oldTimestamp, "--history=true" ], format='dom_object') old_parameterQueryResults = dict() for r in res2.get_rows(): if r[u'logicalDatasetName'] not in old_parameterQueryResults.keys( ): old_parameterQueryResults[r[u'logicalDatasetName']] = [] old_parameterQueryResults[r[u'logicalDatasetName']] += [ r ] #puts row in the list for this dataset headerString = "" doneHeader = False commentCache = "" commentCount = 0 #result is a list of lists (each list is 1 row) outputTable = [] tableHeaders = [] for ds in dataset_values.keys(): if ds.startswith('comment'): if commentCount > 0: commentCache += "\n" commentCache += dataset_values[ds] commentCount = commentCount + 1 continue #obtain list of parameters for this dataset #if(args.timestamp==current_time): # res = client.execute(['GetPhysicsParamsForDataset',"--logicalDatasetName=%s"% ds,"--timestamp='%s'"%args.timestamp], format='dom_object') #else: # res = client.execute(['GetPhysicsParamsForDataset',"--logicalDatasetName=%s"% ds,"--timestamp='%s'"%args.timestamp,"--history=true"], format='dom_object') res = parameterQueryResults.get(ds, []) if args.oldTimestamp != "": res2 = old_parameterQueryResults.get(ds, []) #first we have to determine how many subprocesses this ds has dsSubprocesses = [0] #always have the 0 subprocess for r in res: sp = int(r[u'subprocessID']) if sp not in dsSubprocesses: dsSubprocesses += [sp] #now for each subprocess we have to locate each required field value (in paramFields) #rank by physicsGroup for sp in dsSubprocesses: paramVals = dict() paramVals2 = dict() groupsWithVals = dict() #held for helpful output #need to keep explanations for requested fields explainInfo = dict() for i in args.explainFields: explainInfo[i] = dict() for param in paramFields: groupsWithVals[param] = [] bestGroupIndex = len(args.physicsGroups) import copy paramVals[param] = copy.copy(fieldDefaults[param]) for r in res: if int(r[u'subprocessID']) != sp: continue if str(r[u'paramName']) != param and not ( param == "crossSection_pb" and str(r[u'paramName']) == "crossSection"): continue if str(r[u'physicsGroup']) not in args.physicsGroups: groupsWithVals[param] += [(str(r[u'physicsGroup']), str(r[u'paramValue']))] continue if args.physicsGroups.index(str( r[u'physicsGroup'])) > bestGroupIndex: continue if args.physicsGroups.index(str( r[u'physicsGroup'])) == bestGroupIndex: logging.warning( "Duplicate parameter %s for group %s in dataset %s (subprocess %d). Please report this!" % (param, str(r[u'physicsGroup']), ds, sp)) paramVals[param] = str(r[u'paramValue']) if param == "crossSection_pb": paramVals[param] = str( float(paramVals[param]) * 1000.0) bestGroupIndex = args.physicsGroups.index( str(r[u'physicsGroup'])) #keep the explanation info for the requested fields if param in explainInfo.keys(): for e in args.explainInfo: if unicode(e) not in r: logging.error( "Unrecognised explainInfo field: %s" % e) return -1 explainInfo[param][e] = str(r[unicode(e)]) if args.oldTimestamp != "": bestGroupIndex = len(args.physicsGroups) paramVals2[param] = copy.copy(fieldDefaults[param]) for r in res2: if int(r[u'subprocessID']) != sp: continue if str(r[u'paramName']) != param and not ( param == "crossSection_pb" and str(r[u'paramName']) == "crossSection"): continue if str(r[u'physicsGroup']) not in args.physicsGroups: continue if args.physicsGroups.index(str( r[u'physicsGroup'])) > bestGroupIndex: continue if args.physicsGroups.index(str( r[u'physicsGroup'])) == bestGroupIndex: logging.warning( "Duplicate parameter %s for group %s in dataset %s (subprocess %d). Please report this!" % (param, str(r[u'physicsGroup']), ds, sp)) paramVals2[param] = str(r[u'paramValue']) if param == "crossSection_pb": paramVals2[param] = str( float(paramVals2[param]) * 1000.0) bestGroupIndex = args.physicsGroups.index( str(r[u'physicsGroup'])) #at this stage, parameters reside in paramVals dict or dataset_values[ds] dict #print them in the requested order .. if any is "None" then stop, because it doesn't have a default value and didn't find a value for it either rowString = "" rowList = [] firstPrint = False for param in args.fields: val = None if param == "ldn": val = ds elif param == "subprocessID": val = sp elif param in dataset_values[ds].keys(): val = dataset_values[ds][param] else: val = paramVals.get(param, None) if val == None: if args.outFile != sys.stdout: logging.warning( "dataset %s (subprocess %d) does not have parameter %s, which has no default." % (ds, sp, param)) if len(groupsWithVals.get(param, [])) > 0: logging.warning( "The follow physicsGroups have defined that parameter though:" ) logging.warning(groupsWithVals[param]) val = "#UNKNOWN#" #return -1 #if isfloat(str(val)): val = "%.6g" % float(val) if args.oldTimestamp != "": #diff val to old val val2 = None if param == "ldn": val2 = ds elif param == "subprocessID": val2 = sp elif param in dataset_values[ds].keys(): val2 = dataset_values[ds][param] else: val2 = paramVals2.get(param, None) if val2 == None: val2 = "#UNKNOWN#" #if isfloat(str(val2)): val2 = "%.6g" % float(val) if (str(val) != str(val2)): if not firstPrint: print("%s:" % ds) firstPrint = True print(" %s : %s ---> %s" % (param, str(val2), str(val))) print(" insert_time : %s" % explainInfo[param]['insert_time']) print(" explanation : %s" % explainInfo[param]['explanation']) print(" createdby : %s" % explainInfo[param]['createdby']) print(" physicsGroup : %s" % explainInfo[param]['physicsGroup']) continue rowList += [str(val)] if rowString != "" and args.delim != "": rowString += args.delim rowString += str(val) #inspect the type of str(val) to build up the header if not doneHeader: headerString += param if args.outFile != sys.stdout: if type(fieldDefaults[param]) == bool: headerString += "/O:" elif type(fieldDefaults[param]) == int: headerString += "/I:" elif type(fieldDefaults[param]) == float: headerString += "/D:" elif isfloat(str(val)): headerString += "/D:" #elif isint(str(val)): headerString += "/I:" TO BE SAFE WE MAKE ALL NUMERIC FIELDS FLOATS, EXCEPT if the default value is type int else: headerString += "/C:" else: v = param if param in paramUnits: headerString += " [%s]" % paramUnits[param] v += " [%s]" % paramUnits[param] tableHeaders += [v] headerString += " " if args.oldTimestamp != "": continue #print nothing more for diff mode if not doneHeader: doneHeader = True if args.outFile != sys.stdout: print(headerString[:-1], file=args.outFile) if commentCount > 0: if args.outFile != sys.stdout and args.delim != "": print(commentCache, file=args.outFile) outputTable += [["COMMENT", commentCache]] commentCache = '' commentCount = 0 if args.outFile != sys.stdout and args.delim != "": print(rowString, file=args.outFile) outputTable += [rowList] #also print the required explanations for (field, expl) in explainInfo.items(): outString = "#%s: { " % field doneFirst = False for eField in args.explainInfo: if doneFirst: outString += " , " if not eField in expl.keys(): outString += " %s: <NONE .. value is default>" % eField else: outString += "%s: %s" % (eField, expl[eField]) doneFirst = True outString += " }" #print(outString,file=args.outFile) outputTable += [["COMMENT", outString]] if args.oldTimestamp != "": args.outFile.close() return 0 #print the table in nicely formatted state if args.outFile == sys.stdout or args.delim == "": #determine column widths columnWidths = [0] * len(args.fields) for i in range(0, len(tableHeaders)): columnWidths[i] = len(tableHeaders[i]) for r in outputTable: if len(r) > 0 and r[0] == "COMMENT": continue for i in range(0, len(r)): if len(r[i]) > columnWidths[i]: columnWidths[i] = len(r[i]) lineout = "" for i in range(0, len(tableHeaders)): lineout += tableHeaders[i].ljust(columnWidths[i]) + " " print(lineout) for r in outputTable: lineout = "" if len(r) > 0 and r[0] == "COMMENT": lineout = r[1] else: for i in range(0, len(r)): lineout += r[i].ljust(columnWidths[i]) + " " print(lineout, file=args.outFile) #print the footer, which is the command to reproduce this output import os if args.outFile != sys.stdout: #remove comment from dataset_values datasetss = [ x for x in dataset_values.keys() if not x.startswith("comment") ] print("", file=args.outFile) print("#lsetup \"asetup %s,%s\" pyAMI" % (os.environ.get('AtlasProject', 'UNKNOWN!'), os.environ.get('AtlasVersion', 'UNKNOWN!')), file=args.outFile) print( "#getMetadata.py --timestamp=\"%s\" --physicsGroups=\"%s\" --fields=\"%s\" --inDS=\"%s\"" % (args.timestamp, ",".join(args.physicsGroups), ",".join( args.fields), ",".join(datasetss)), file=args.outFile) logging.info("Results written to: %s" % args.outFile.name) args.outFile.close()
def main(): # configurable options config = parseCmdLine(sys.argv[1:]) if (config.baseline or config.official) and config.sample: print "--baseline, --official and --sample are mutually exclusive" sys.exit(1) if (config.baseline or config.official or config.sample) and config.grl != "": print "--grl is incompatible with --baseline, --official and --sample" sys.exit(1) # AMI client connection client = pyAMI.client.Client('atlas') pyAMI.client.endpoint = config.server pyAMI.atlas.api.init() # consistency checks if config.whichMC15 != '': if config.whichMC15 == 'week1' and config.prefix != 'mc15_week1': print 'prefix changed to mc15_week1 in agrement with whichMC15' config.prefix = 'mc15_week1' elif config.whichMC15 == '50ns' and config.prefix != 'mc15_13TeV': print 'prefix changed to mc15_13TeV in agrement with whichMC15' config.prefix = 'mc15_13TeV' elif config.whichMC15 == '25ns' and config.prefix != 'mc15_13TeV': print 'prefix changed to mc15_13TeV in agrement with whichMC15' config.prefix = 'mc15_13TeV' # data type is NTUP_SUSY for 2011/2012 and AOD for 2014 on datatype = config.datatype if 'mc11_' in config.prefix or 'mc12_' in config.prefix or 'data11_' in config.prefix or 'data12_' in config.prefix: datatype = '%.merge.NTUP_SUSY%' # make list of official datasets (baseline+alt) officialids = [] if config.official or config.baseline or config.sample: if 'mc12_8TeV' in config.prefix or 'mc14_8TeV' in config.prefix: import mc12_8TeV_MCSampleList as mcsl elif 'mc14_13TeV' in config.prefix: import mc14_13TeV_MCSampleList as mcsl elif 'mc15_13TeV' in config.prefix: import mc15_13TeV_MCSampleList as mcsl elif 'mc15_week1' in config.prefix: import mc15_13TeV_week1_MCSampleList as mcsl else: print '--official is only supported for mc12_8TeV, mc14_8TeV, mc14_13TeV, mc15_13TeV and mc15_week1' sys.exit(1) if config.sample: officialids = mcsl.__dict__[str(config.sample)] else: officialids = mcsl.__dict__["lbaseline"] if config.official: officialids += mcsl.__dict__["lalt"] elif config.grl != "": if not os.path.exists(config.grl): print 'Couldnot find GRL', config.grl sys.exit(1) pass doc = ET.parse(config.grl) for item in doc.findall('./NamedLumiRange/Metadata'): if item.attrib['Name'] == 'RunList': for r in item.text.split(','): officialids.append(int(r)) pass # get all datasets matching prefix & tag and then filter them from pyAMI.atlas.api import get_dataset_info, list_datasets alldatasets = [] if config.whichMC15 != '': prefix = config.prefix if prefix == 'mc15_week1': prefix = 'mc15_13TeV' for tag in mc15_rtags[config.whichMC15]: dskey = prefix + datatype + tag + config.tag print 'Querying AMI for datasets matching pattern', dskey alldatasets += list_datasets(client, dskey) else: prefix = config.prefix if prefix == 'mc15_week1': prefix = 'mc15_13TeV' dskey = config.prefix + datatype + config.tag print 'Querying AMI for datasets matching pattern', dskey alldatasets = list_datasets(client, dskey) acceptedDS = [] for DSlist in alldatasets: dsname = DSlist['ldn'] cut = False for filter in filters: if filter in dsname.split('.')[2]: cut = True if (config.official or config.baseline or config.sample or config.grl != "") and not int( dsname.split('.')[1]) in officialids: cut = True if config.signal: cut = True for pattern in lsignals: if pattern in dsname: cut = False if cut: continue acceptedDS.append(dsname) pass acceptedDS.sort() # get informations for all accepted datasets dsinfos = [] for dsname in acceptedDS: dsinfos.append(get_dataset_info(client, dsname)[0]) pass # write file coveredids = set() if not (config.suffix == ""): myoutputfile = 'datasets_' + config.suffix + '.txt' else: myoutputfile = 'datasets.txt' fout = open(myoutputfile, 'w') for info in dsinfos: try: dsname = info['logicalDatasetName'] if config.grl == "": generatorString = info['generatorName'] version = info['version'] if badDataset(dsname, generatorString, version): continue availability = info['prodsysStatus'] if config.onlyComplete and availability != u'ALL EVENTS AVAILABLE': print 'Skip incomplete dataset', dsname, availability continue nFiles = int(info['nFiles']) if nFiles > 0 and config.prefix.startswith('data'): fout.write(dsname + '\n') elif nFiles > 0: period = 'MC' xsec = 0. effic = 1. if info.has_key('period'): period = info['period'] else: datasetNumber = int(info[u'datasetNumber']) coveredids.add(datasetNumber) # confirmed with AMI team that this should be enought, no need # to re-implement get_dataset_xsec_effic for PyAMI5 # there are sometime problems in the propagation of these # properties to the xAOD/derived datasets so go back in # parentage to find the information xsec = info[u'crossSection'] if info.has_key(u'approx_GenFiltEff'): effic = info[u'approx_GenFiltEff'] if config.datatype == '%TRUTH1%': effic = 1 if ((xsec == u'NULL' or not info.has_key(u'approx_GenFiltEff')) and not (config.datatype == '%TRUTH1%')): xsec, effic = genParamsFromParents( client, dsname, datasetNumber) if not xsec: xsec = 0 if not effic: print 'No approx_GenFiltEff found for', dsname, 'set to 0 !!!!' effic = 0 pass nevts = info['totalEvents'] nfiles = info['nFiles'] if not dsname.endswith('/'): dsname += '/' fout.write("%s %s %s %s %s %s\n" % (dsname, nevts, nfiles, period, xsec, effic)) except KeyError as prop: print 'Missing property', prop, 'for dataset ', dsname, 'in AMI, skip' fout.close() if len(coveredids) == 0: if not config.prefix.startswith('data'): print 'Could not extract any channel IDs from datasets found, this is OK for data but suspicious for MC' else: for id in officialids: if not id in coveredids: print 'No dataset found for channel ', id pass
def main(): # configurable options config = parseCmdLine(sys.argv[1:]) if (config.baseline or config.official ) and config.sample: print "--baseline, --official and --sample are mutually exclusive" sys.exit(1) # AMI client connection client = pyAMI.client.Client('atlas') pyAMI.client.endpoint = config.server pyAMI.atlas.api.init() # data type is NTUP_SUSY for 2011/2012 and AOD for 2014 on datatype = config.datatype if 'mc11_' in config.prefix or 'mc12_' in config.prefix or 'data11_' in config.prefix or 'data12_' in config.prefix : datatype = '%.merge.NTUP_SUSY%' # make list of official datasets (baseline+alt) officialids = [] if config.official or config.baseline or config.sample: if 'mc12_8TeV' in config.prefix or 'mc14_8TeV' in config.prefix: import mc12_8TeV_MCSampleList as mcsl elif 'mc14_13TeV' in config.prefix: import mc14_13TeV_MCSampleList as mcsl else: print '--official is only supported for mc12_8TeV, mc14_8TeV and mc14_13TeV' sys.exit(1) if config.sample: officialids = mcsl.__dict__[str(config.sample)] else: officialids = mcsl.__dict__["lbaseline"] if config.official: officialids += mcsl.__dict__["lalt"] # get all datasets matching prefix & tag and then filter them from pyAMI.atlas.api import get_dataset_info, list_datasets dskey = config.prefix+datatype+config.tag print 'Querying AMI for datasets matching pattern',dskey alldatasets = list_datasets(client,dskey) acceptedDS = [] for DSlist in alldatasets: dsname = DSlist['ldn'] cut = False for filter in filters: if filter in dsname.split('.')[2]: cut = True if (config.official or config.baseline or config.sample) and not int(dsname.split('.')[1]) in officialids: cut = True if config.signal : cut = True for pattern in lsignals: if pattern in dsname: cut = False if cut: continue acceptedDS.append(dsname) pass acceptedDS.sort() # get informations for all accepted datasets dsinfos = [] for dsname in acceptedDS: dsinfos.append(get_dataset_info(client,dsname)[0]) pass # write file fout = open('datasets.txt','w') for info in dsinfos: try: dsname = info['logicalDatasetName'] generatorString = info['generatorName'] version = info['version'] if badDataset(dsname,generatorString,version): continue availability = info['prodsysStatus'] nFiles = int(info['nFiles']) if nFiles>0: period = 'MC' xsec = 0. effic = 1. if info.has_key('period'): period = info['period'] else: #(xsec, effic) = get_dataset_xsec_effic(client,info.info['logicalDatasetName']) # confirmed with AMI team that this should be enought, no need # to re-implement get_dataset_xsec_effic for PyAMI5 xsec = info[u'crossSection'] effic = info[u'approx_GenFiltEff'] nevts = info['totalEvents'] nfiles = info['nFiles'] if not dsname.endswith('/'): dsname += '/' fout.write("%s %s %s %s %s %s\n" % (dsname,nevts,nfiles,period,xsec,effic)) except KeyError as prop: print 'Missing property',prop,'for dataset ',dsname,'in AMI, skip' fout.close() pass