def getDatasetInfo(dataset, debug=False): client = pyAMI.client.Client('atlas') AtlasAPI.init() results = AtlasAPI.get_dataset_info(client, dataset) if len(results) != 1: raise SystemExit('\n***EXIT*** no valid results for dataset %s'%dataset) eff = None for name, value in results[0].iteritems(): if name=='totalEvents': nevents = value elif name=='crossSection': xsec = float(value)*1e6 #xsec is in nb, hence the 1e6 factor to get it in fb elif name=='datasetNumber': dsid = value elif name=='genFiltEff': eff = float(value) #if geFiltEff is not available, use aprox_GenFiltEff if eff is None: for name, value in results[0].iteritems(): if name=='approx_GenFiltEff': eff = float(value) print '%s %e %e %s'%(dsid, xsec, eff, nevents) if debug: for name, value in results[0].iteritems(): print ' %s %s'%((name+':').ljust(24),value) print '' return dsid, xsec, eff, nevents
def main(fileList, samples): client = pyAMI.client.Client('atlas') AtlasAPI.init() dirDict=getDirNames(samples) listInput=open(fileList) inputDatasets=listInput.readlines() listToCheck=[] for ds in inputDatasets: if "#" in ds: continue dsid=ds.split(".")[1] ds=ds.strip("\n") tmp=AtlasAPI.list_datasets(client, patterns = [ds], fields = ['events']) try: inputEvents=int(tmp[0]['events']) except: print "WARNING: skipping", ds continue try: processedEvents=getNumberEvents(dirDict[dsid]) except KeyError: print "WARNING: tihs sample may be missing:",dsid, ",not found in input samples", samples processedEvents=0 if inputEvents==0: print "inputEvents==0 for",ds continue frac=float(processedEvents)/float(inputEvents) diff=inputEvents-processedEvents if diff>0: listToCheck += [dsid] missing_sample_line = ' ----> Event numbers do NOT match (%s/%s). Please check your download for %s. Fraction of Events downloaded %0.4f. Difference in events: %s' %(processedEvents,inputEvents,dsid,frac,diff) print missing_sample_line print "\nThe following list of samples is incomplete/missing:", ' '.join(map(str, listToCheck)) print "============================================================================================================" for sp in listToCheck: for line in inputDatasets: if re.search(sp, line): print line, print "============================================================================================================"
def getAmiClient(): global m_AMIClient if m_AMIClient: return m_AMIClient try: import pyAMI.client import pyAMI.atlas.api as AtlasAPI except ImportError: print 'No AMI setup is found please SETUP AMI using "localSetupPyAMI"' sys.exit(1) if os.getenv("RUCIO_ACCOUNT") is None: print "No RUCIO ACCOUNT is available.. please define a rucio Account" exit(1) while CheckRemainingProxyTime() < 600: print "VOMS-PROXY is running out, renewing..." m_AMIClient = pyAMI.client.Client('atlas') AtlasAPI.init() return m_AMIClient
def main(): opts = parse_opts() files = load_files(opts) print "Initializing PyAMI" ami_client = pyAMI.client.Client("atlas") atlas_api.init() print "Checking dataset %s with %d files" % (opts.dataset, len(files)) daod_events, cbk_events = get_local_events(files) good = True if not check_derivation_stats(ami_client, opts.dataset, daod_events): good = False if not check_sample_stats(ami_client, opts.dataset, cbk_events): good = False if not good: sys.exit(1)
def getAmiClient(): global m_AMIClient if m_AMIClient: return m_AMIClient try: import pyAMI.client import pyAMI.atlas.api as AtlasAPI except ImportError: logging.error( 'No AMI setup is found please SETUP AMI using "localSetupPyAMI"') sys.exit(1) if not RUCIO_ACCOUNT: logging.error( "No RUCIO ACCOUNT is available.. please define a rucio Account") exit(1) while CheckRemainingProxyTime() < 600: logging.info("VOMS-PROXY is running out, renewing...") m_AMIClient = pyAMI.client.Client('atlas') AtlasAPI.init() return m_AMIClient
def main(): from argparse import RawTextHelpFormatter parser = argparse.ArgumentParser(description=__doc__, formatter_class=RawTextHelpFormatter) parser.add_argument( '--inDsTxt', action="store", help= "text file containing datasets to make PRW for (one per line) [REQUIRED]", required=True) parser.add_argument('--outDS', action="store", default="", help="Name of the output dataset", required=False) parser.add_argument( '--forceStaged', action="store_true", help="If set, grid jobs will be submitted with forceStaged option") parser.add_argument( '--skipNTUP_PILEUP', action="store_true", help="If set, will not check for existing NTUP_PILEUP datasets") parser.add_argument('prwFiles', nargs="*", help="Exosting PRW Config files to check") args = parser.parse_args() try: import pyAMI.atlas.api as atlasAPI import pyAMI.client except ImportError: print "Could not import pyAMI ... please do: lsetup pyAMI" print "Also ensure you have a valid certificate (voms-proxy-init -voms atlas)" return 1 client = pyAMI.client.Client('atlas') atlasAPI.init() #read datasets into list with open(args.inDsTxt) as f: datasets = f.read().splitlines() import ROOT out = ROOT.CP.TPileupReweighting("out") for f in args.prwFiles: out.AddConfigFile(f) out.ResetCountingMode() #trick tool into going into counting mode #list of known period numbers periodNumbers = out.GetPeriodNumbers() print "Determining provenances of %d datasets ..." % len(datasets) aodDatasets = [] ntupDatasets = [] for dataset in datasets: dataset = dataset.strip("/") #strip the scope if it's there if dataset.startswith("#"): continue dataset = dataset.rsplit(":")[-1].strip() if len(dataset) == 0: continue print "Doing %s" % dataset prov = atlasAPI.get_dataset_prov(client, dataset) if 'node' not in prov: print "ERROR: Could not determine provenance of %s, skipping!" % dataset continue theParent = "" for ds in prov['node']: if ds[u'dataType'] != u'AOD': continue theParent = str(ds[u'logicalDatasetName']) theParentSize = int(ds[u'events']) break if theParent == "": print "ERROR: Could not determine provenance of %s, skipping!" % dataset continue #check input prw files, if we specified isIncomplete = False if len(args.prwFiles): total = 0 dsid = theParent.split(".")[1] for p in periodNumbers: if p == -1: continue hist = out.GetInputHistogram(int(dsid), p) if hist: total += hist.GetEntries() if total == theParentSize: print "INFO: %s is complete in your existing PRW files. Good!" % dataset continue if total > theParentSize: print "WARNING: %s is suspect in your existing PRW files, has %d events when expected %d ... please check you didn't overmerge" % ( dataset, total, theParentSize) continue else: if total != 0: print "WARNING: %s is incomplete (%d events when expected %d) ... will try to find centrally produced NTUP_PILEUP or prepare to generate" % ( dataset, total, theParentSize) isIncomplete = True #before adding the dataset, see if we can find an NTUP_PILEUP for it if not args.skipNTUP_PILEUP: ntupDatasetName = theParent.replace("AOD", "NTUP_PILEUP") ntupDatasetName = ntupDatasetName.replace("aod", "%") ntupDatasetName = ntupDatasetName.replace("merge", "%") #remove everything after first rtag of ami tag .. replace with wildcard first_rtag_pos = ntupDatasetName.index( "_r", ntupDatasetName.index("NTUP_PILEUP")) try: next_underscore_pos = ntupDatasetName.index( "_", first_rtag_pos + 1) except ValueError: next_underscore_pos = len(ntupDatasetName) ntupDatasetName = ntupDatasetName[:next_underscore_pos] + "%" res = atlasAPI.list_datasets(client, ntupDatasetName, fields='ldn,prodsys_status') foundNTUP = False for r in res: if r[u'prodsys_status'] != "ALL EVENTS AVAILABLE" and ( isIncomplete or r[u'prodsys_status'] != "EVENTS PARTIALLY AVAILABLE"): continue print "Found existing NTUP_PILEUP ... please download: %s" % r[ u"ldn"] ntupDatasets += [r[u'ldn']] foundNTUP = True break if foundNTUP == True: continue aodDatasets += [theParent] if len(aodDatasets) > 0: if args.outDS == "": print "NTUP_PILEUP need generating for the following datasets, please specify the --outDS option to give a name to the output dataset" print ",".join(aodDatasets) return 1 print "...submitting job to grid..." extraOpts = "" if args.forceStaged: extraOpts += "--forceStaged " mycommand = """pathena --inDS="%s" --outDS="%s" PileupReweighting/generatePRW_jobOptions.py %s--mergeOutput --nGBPerJob=MAX --addNthFieldOfInDSToLFN=2,6""" % ( ",".join(aodDatasets), args.outDS, extraOpts) print "Command: %s" % mycommand from subprocess import call if call(mycommand, shell=True) != 0: print "Problem executing command. Did you remember to do: lsetup panda" return 1 print "... finished. Please monitor your job on the grid, and when it is finished, download the files!" if len(ntupDatasets): frucio_fn = 'rucio_downloads_%s.sh' % args.inDsTxt print "Please download existing config files from these datasets (see also output file %s):" % frucio_fn with open(frucio_fn, 'w') as frucio: for ds in ntupDatasets: command = "rucio download %s" % ds print command frucio.write(command + '\n') print "" if len(ntupDatasets) or len(aodDatasets): print "After downloading, you are advised to check they are complete: checkPRW.py --inDsTxt=%s <downloaded files> " % args.inDsTxt print "Thank you for generating config files, you get a gold star!" else: print "Looks like you are ready for pileup reweighting!" return 0
#!/usr/bin/env python import ROOT import os import pyAMI.client import pyAMI.atlas.api as AtlasAPI # Check if it's likely that we have a buggy number of AOD events # To start, just checking AMI for the full original number, then seeing if it's higher. # Set up pyAMI client = pyAMI.client.Client('atlas') AtlasAPI.init() # Dir for MC files mc_dir = "/afs/cern.ch/work/t/tholmes/ZMETSamples/eos/atlas/user/l/longjon/Ntuples/v00-13/" #mc_dir = "/afs/cern.ch/work/t/tholmes/ZMETSamples/eos/atlas/user/t/tholmes/Ntuples/v00-11/" # expect to have the form user.zmarshal.361443.e4133_s2608_s2183_r6869_r6282_p2419.zmetjets-v00-11_Sep2815_ntupleOutput.root # with files in that dir bad_dids = [] for d in os.listdir(mc_dir): if not d.endswith(".root"): continue if not "_p2411" in d: continue #if not "zmarshal.361" in d: continue print d # Get total number of events n_aod_events = 0 for f in os.listdir(mc_dir + d): tf = ROOT.TFile(mc_dir + d + '/' + f, 'READ')
o, a = parser.parse_args() try: import pyAMI.client except: print "Failed to load pyAMI.client, you need to set up local PyAMI" print "do" print " > localSetupPyAMI" import sys sys.exit(-1) import pyAMI.atlas.api as AtlasAPI client = pyAMI.client.Client('atlas') AtlasAPI.init() inputDS = [] eventCounts = {} inputFile = open(o.inFileName,"r") for line in inputFile: if line.startswith("#"): continue words = line.split() if not len(words): continue dsName = words[0].rstrip("/") dsID = dsName.split(".")[1] print dsName try:
# set verbosity for python printing if args.verbose < 4: getWeights_logger.setLevel(20 - args.verbose*5) else: getWeights_logger.setLevel(logging.NOTSET + 1) try: # get PyAMI import pyAMI.client import pyAMI.atlas.api as api except ImportError: getWeights_logger.exception("You must set up PyAMI first. lsetup pyami will do the trick. Make sure you have a valid certificate (voms-proxy-init -voms atlas) or run `ami auth` to log in.") sys.exit(0) # INIT ATLAS API api.init() # INSTANTIATE THE PYAMI CLIENT FOR ATLAS client = pyAMI.client.Client('atlas') try: import timing # check submission directory if args.force_overwrite: getWeights_logger.info("removing {0:s}".format(args.output_filename)) try: os.remove(args.output_filename) except OSError: pass else:
def main(): from argparse import RawTextHelpFormatter parser = argparse.ArgumentParser(description=__doc__,formatter_class=RawTextHelpFormatter) parser.add_argument('--outPRWFile',action="store",help="OPTIONAL Name of the output prw file containing valid configs",required=False) parser.add_argument('--outputSuspect',action="store_true",help="allow for suspect channels to be included in the output prw file",default=False) parser.add_argument('--inDsTxt',action="store",help="text file containing datasets to make PRW for (one per line)",required=True) parser.add_argument('prwFiles',nargs="+",help="PRW Config files to scan") args = parser.parse_args() try: import pyAMI.atlas.api as atlasAPI import pyAMI.client except ImportError: print "Could not import pyAMI ... please do: lsetup pyAMI" print "Also ensure you have a valid certificate (voms-proxy-init -voms atlas)" return 1 client = pyAMI.client.Client('atlas') atlasAPI.init() #read datasets into list datasets=[] for txtFile in args.inDsTxt.split(","): with open(txtFile) as f: datasets += f.read().splitlines() print "Determining provenances of %d datasets ..." % len(datasets) aodDatasets=dict() for dataset in datasets: #strip the scope if it's there if dataset.startswith("#"): continue dataset = dataset.rsplit(":")[-1].strip() if len(dataset)==0: continue print "Doing %s" % dataset prov = atlasAPI.get_dataset_prov(client, dataset ) if 'node' not in prov: print "ERROR: Could not determine provenance of %s, skipping!" % dataset continue theParent="" for ds in prov['node']: if ds[u'dataType']!=u'AOD': continue theParent = str(ds[u'logicalDatasetName']) theParentSize = int(ds[u'events']) break if theParent=="": print "ERROR: Could not determine provenance of %s, skipping!" % dataset continue #extract the dsid ... theParent = theParent.split(".")[1] if theParent in aodDatasets: aodDatasets[theParent] += theParentSize else: aodDatasets[theParent] = theParentSize #aodDatasets is now a chanNum -> eventNumber pairing ... import ROOT out = ROOT.CP.TPileupReweighting("out") for f in args.prwFiles: out.AddConfigFile(f) out.ResetCountingMode() #trick tool into going into counting mode #list of known period numbers periodNumbers = out.GetPeriodNumbers() for dsid,nevents in aodDatasets.iteritems(): #get the sum of weights from the tool total=0; for p in periodNumbers: if p==-1: continue hist = out.GetInputHistogram(int(dsid),p) if hist: total += hist.GetEntries() if total==nevents: print "channel %s is ok" % dsid elif total<nevents: print "channel %s is incomplete (missing %d events from config files)" % (dsid,nevents-total) out.RemoveChannel(int(dsid)) elif total>nevents: print "channel %s is suspect! (config files have additional %d events)" % (dsid,total-nevents) if not args.outputSuspect: out.RemoveChannel(int(dsid)) if args.outPRWFile: out.Initialize(); out.WriteToFile(args.outPRWFile); return 0
def __init__(s): s.client = pyAMI.client.Client("atlas") AtlasAPI.init()
def main(): logging.basicConfig(format='%(levelname)s:%(message)s') import time, datetime from pytz import timezone import argparse try: import pyAMI.client import pyAMI.atlas.api as AtlasAPI import pyAMI.config except ImportError: logging.error( "Unable to find pyAMI client. Please try this command first: lsetup pyAMI" ) return -1 extraFieldDefaults = { } #{"approx_crossSection":None,"approx_GenFiltEff":1.0} fieldDefaults = {"subprocessID": 0, "dataset_number": 0} #populate the fieldDefaults ... for all, assume 'None' for field in pyAMI.config.tables['datasets'].keys(): if str(field) == "cross_section": continue #special exception because this field only present in if str(field) in fieldDefaults.keys(): continue if str(field).startswith("@"): continue fieldDefaults[str(field)] = None import commands #check the voms proxy status, out = commands.getstatusoutput("voms-proxy-info -fqan -exists") if status != 0: logging.error( "Please renew your certificate with this command: voms-proxy-init -voms atlas" ) return -1 try: client = pyAMI.client.Client('atlas') AtlasAPI.init() except: logging.error( "Could not establish pyAMI session. Are you sure you have a valid certificate? Do: voms-proxy-init -voms atlas" ) return -1 #need to collect the ami dataset parameter defaults paramExplains = [] #for the help message only paramUnits = dict() paramDefaults = {} res = client.execute('ListPhysicsParameterDefs', format='dom_object') for r in res.get_rows(): #r is OrderedDict explainString = "%s: %s" % (r[u'PARAMNAME'], r[u'DESCRIPTION']) if r[u'UNITS'] != u'NULL': explainString += " (units: %s)" % r[u'UNITS'] paramUnits[r[u'PARAMNAME']] = r[u'UNITS'] if r[u'HASDEFAULT'] == u'N': paramDefaults[str(r[u'PARAMNAME'])] = None else: explainString += " (default value = %s)" % r[u'DEFAULTVALUE'] if r[u'PARAMTYPE'] == u'number': paramDefaults[str(r[u'PARAMNAME'])] = float( r[u'DEFAULTVALUE'] ) #FIXME: Assumes all parameters are floats elif r[u'PARAMTYPE'] == u'string': paramDefaults[str(r[u'PARAMNAME'])] = str(r[u'DEFAULTVALUE']) paramExplains += [explainString] paramDefaults["crossSection_pb"] = None paramUnits["crossSection_pb"] = "pb" paramExplains += [ "crossSection_pb: Same as crossSection except in pb units (units: pb)" ] cern_time = timezone('UCT') current_time = datetime.datetime.fromtimestamp( time.time(), cern_time).strftime('%Y-%m-%d %H:%M:%S') from argparse import RawTextHelpFormatter parser = argparse.ArgumentParser(description=__doc__, formatter_class=RawTextHelpFormatter) parser.add_argument('--inDS', nargs='+', default=[""], help="List of datasets to retrieve parameters for") parser.add_argument( '--inDsTxt', default="", help= "Alternative to --inDS, can specify the datasets from an input file") parser.add_argument( '--fields', nargs='+', help= "List of parameters to extract. Available parameters are: \n\n %s\n\nYou can also include any from:\n %s\nYou can also do keyword_xxx to add a bool branch for keywords" % ("\n ".join(paramExplains), ", ".join(fieldDefaults.keys() + extraFieldDefaults.keys())), default=["dataset_number", "crossSection", "kFactor", "genFiltEff"]) parser.add_argument( '--timestamp', default=current_time, help= "The timestamp to query parameters at, specified in Universal Central Time (UCT). If left blank, will take the current time" ) parser.add_argument( '--physicsGroups', nargs='+', default=["PMG,MCGN"], help= "Physics group from which to retrieve parameters, listed in order of priority (highest first). Default value is 'PMG,MCGN' (i.e. try to use PMG values, fallback on MCGN values if unavailable). Allowed groups are:\n PMG (this is the PMG's group name), BPHY, COSM, DAPR, EGAM, EXOT, FTAG, HIGG, HION, IDET, IDTR, JETM, LARG, MCGN (this is the AMI default group name), MDET, MUON, PHYS, REPR, SIMU, STDM, SUSY, TAUP, TCAL, TDAQ, THLT, TOPQ, TRIG, UPGR, VALI" ) parser.add_argument( '--oldTimestamp', default="", help= "If specified, will instead display a diff between the old and new timestamp, showing explanation of any changed parameters" ) parser.add_argument( '--explainFields', nargs='+', default=[], help= "The fields you would like explained .. will appear as comment lines after each row in the output" ) parser.add_argument( '--explainInfo', nargs='+', default=['explanation', 'insert_time'], help= "Properties of the parameter you want to show in the explanation. Can list from: explanation, insert_time, physicsGroup, createdby. Default is: explanation,insert_time" ) parser.add_argument( '--outFile', default=sys.stdout, type=argparse.FileType('w'), help="Where to print the output to. Leave blank to print to stdout") parser.add_argument( '--delim', default="", help= "The delimiter character. Defaults to spaces leading to nice formatting table" ) parser.add_argument('-v', action='store_true', help="Verbose output for debugging") args = parser.parse_args() if args.v: logging.getLogger().setLevel(logging.DEBUG) else: logging.getLogger().setLevel(logging.INFO) logging.debug(args.inDS) logging.debug(args.fields) logging.debug(args.timestamp) if args.timestamp == "the dawn of time": logging.error( "Unfortunately we don't know any parameters from this time period... but we're working on it!" ) return 9999 #split elements of fields by comma to get full list args.fields = sum((y.split(',') for y in args.fields), []) args.fields = [x.strip() for x in args.fields] #strips whitespace #look for keyword_ fields, these are special ... args.keywords = [] for f in args.fields: if f.startswith("keyword_"): k = f[8:] #and then add each keyword to the extraFieldDefaults so it is recognised thusly extraFieldDefaults["keyword_%s" % k] = bool(False) args.keywords += [k] #same for physics groups args.physicsGroups = sum((y.split(',') for y in args.physicsGroups), []) args.physicsGroups = [x.strip() for x in args.physicsGroups] #strips whitespace #same for explainFields and explainInfo args.explainFields = sum((y.split(',') for y in args.explainFields), []) args.explainFields = [x.strip() for x in args.explainFields] #strips whitespace args.explainInfo = sum((y.split(',') for y in args.explainInfo), []) args.explainInfo = [x.strip() for x in args.explainInfo] #strips whitespace if args.inDsTxt != '': args.inDS = readDsFromFile(args.inDsTxt) #and same for inDS args.inDS = sum((y.split(',') for y in args.inDS), []) args.inDS = [x.strip() for x in args.inDS] #strips whitespace #1. check field values are allowed, we obtain default field values at same time.. #2. For each entry in inDS, if contains wildcard we obtain list of DS, otherwise check DS exists. During this time we obtain the datasetid and numEvents properties, incase we need them #3. For each of these DS, get parameters from ami matching the timestamp. Organize into fields and index by subprocessID #4. Output a line to our output file #1. #before adding all the ami parameters, identify which of provided fields are: 1). Obtained from list_datasets command (dsFields) 2). actual parameters dsFields = [ x for x in args.fields if x in fieldDefaults.keys() and x not in ["subprocessID", "ldn"] ] extraFields = [x for x in args.fields if x in extraFieldDefaults.keys()] paramFields = [x for x in args.fields if x in paramDefaults.keys()] if len(paramFields) > 0 and args.physicsGroups == [""]: logging.error( "You must specify at least one physics group. See -h for allowed groups" ) return -1 #combine paramDefaults with fieldDefaults fieldDefaults.update(paramDefaults) #and with extra fields fieldDefaults.update(extraFieldDefaults) for field in args.fields: if field not in fieldDefaults: logging.error("%s is not a recognised field. Allowed fields are:" % field) logging.error(fieldDefaults.keys()) return -1 if args.oldTimestamp != "": logging.info("oldTimestamp option specified. Running in diff mode...") args.explainFields = args.fields args.explainInfo = [ "explanation", "insert_time", "physicsGroup", "createdby" ] #2. #replace all '*' with '%' and strip "/" args.inDS = [ds.replace("*", "%") for ds in args.inDS] args.inDS = [ds.rstrip("/") for ds in args.inDS] if len(args.inDS) == 0 or (len(args.inDS) == 1 and args.inDS[0] == ""): logging.error( "No datasets provided. Please specify datasets with the --inDS or --inDsTxt options" ) return -1 logging.info( "Fetching list of datasets from AMI (this may take a few minutes)...") #obtain list of datasets res = AtlasAPI.list_datasets( client, patterns=args.inDS, fields=dsFields + ['ldn'], ami_status="VALID" ) #changed status from %, to only catch valid now: wb 08/2015 logging.info("...Found %d datasets matching your selection" % len(res)) if len(res) == 0: return 0 #NOTE: Should we allow retrieval of the extra information: keyword, genfiltereff, approx crossection, .. these all come from GetDatasetInfo ami command dataset_values = dict() for r in res: mydict = dict() dataset_values[str(r['ldn'])] = mydict for field in r.items(): if str(field[0]) == "ldn": continue if str(field[0]) not in args.fields: continue mydict[str(field[0])] = str(field[1]) #also if we have the 'extra fields or keywords' we will need to execute AtlasAPI.get_dataset_info .. if len(extraFields) > 0 or len(args.keywords) > 0: info_res = AtlasAPI.get_dataset_info(client, str(r['ldn'])) #print(info_res) if len(info_res) == 0: logging.error("Unable to retrieve dataset info for %s" % str(r['ldn'])) return -1 for field in extraFields: #ignore the keyword_ fields if field.startswith("keyword_"): continue mydict[field] = float(info_res[0][unicode(field)]) if isfloat( info_res[0][unicode(field)]) else extraFieldDefaults[field] for k in args.keywords: mydict["keyword_%s" % k] = int( (k in str(info_res[0][unicode('keyword')]).split(","))) #sort dataset_values as well as possible from collections import OrderedDict sorted_values = OrderedDict() for ds in args.inDS: if ds in dataset_values.keys(): sorted_values[ds] = dataset_values[ds] for ds in sorted(dataset_values): if ds not in sorted_values.keys(): sorted_values[ds] = dataset_values[ds] dataset_values = sorted_values logging.debug(dataset_values) #res = client.execute(['GetDatasetInfo for ds in args.inDS: if '%' not in ds and ds not in dataset_values.keys(): logging.warning("Unknown dataset: %s" % ds) datasetsToQuery = ",".join(dataset_values.keys()) #if using inDsTxt, retain any comment or blank lines in structure of output complete_values = OrderedDict() if args.inDsTxt != "": # read lines commentcount = 0 import re txt = open(args.inDsTxt) for tmpLine in txt: # remove \n tmpLine = re.sub('\n', '', tmpLine) # remove white spaces tmpLine = tmpLine.strip() # skip comment or empty if tmpLine.startswith('#') or tmpLine == '': complete_values['comment%d' % (commentcount)] = tmpLine commentcount = commentcount + 1 continue # append tmpLine = tmpLine.rstrip("/") if tmpLine in dataset_values.keys(): complete_values[tmpLine] = dataset_values[tmpLine] else: print("cannot find %s" % tmpLine) # close file txt.close() dataset_values = complete_values logging.info( "Obtaining %s for selected datasets at timestamp=%s... (please be patient)" % (args.fields, args.timestamp)) #do as one query, to be efficient if (args.timestamp == current_time): res = client.execute([ 'GetPhysicsParamsForDataset', "--logicalDatasetName=%s" % datasetsToQuery, "--timestamp='%s'" % args.timestamp ], format='dom_object') else: res = client.execute([ 'GetPhysicsParamsForDataset', "--logicalDatasetName=%s" % datasetsToQuery, "--timestamp='%s'" % args.timestamp, "--history=true" ], format='dom_object') #organize results by dataset parameterQueryResults = dict() for r in res.get_rows(): if r[u'logicalDatasetName'] not in parameterQueryResults.keys(): parameterQueryResults[r[u'logicalDatasetName']] = [] parameterQueryResults[r[u'logicalDatasetName']] += [ r ] #puts row in the list for this dataset if args.oldTimestamp != "": logging.info( "Obtaining %s for selected datasets at timestamp=%s... (please be patient)" % (args.fields, args.oldTimestamp)) res2 = client.execute([ 'GetPhysicsParamsForDataset', "--logicalDatasetName=%s" % datasetsToQuery, "--timestamp='%s'" % args.oldTimestamp, "--history=true" ], format='dom_object') old_parameterQueryResults = dict() for r in res2.get_rows(): if r[u'logicalDatasetName'] not in old_parameterQueryResults.keys( ): old_parameterQueryResults[r[u'logicalDatasetName']] = [] old_parameterQueryResults[r[u'logicalDatasetName']] += [ r ] #puts row in the list for this dataset headerString = "" doneHeader = False commentCache = "" commentCount = 0 #result is a list of lists (each list is 1 row) outputTable = [] tableHeaders = [] for ds in dataset_values.keys(): if ds.startswith('comment'): if commentCount > 0: commentCache += "\n" commentCache += dataset_values[ds] commentCount = commentCount + 1 continue #obtain list of parameters for this dataset #if(args.timestamp==current_time): # res = client.execute(['GetPhysicsParamsForDataset',"--logicalDatasetName=%s"% ds,"--timestamp='%s'"%args.timestamp], format='dom_object') #else: # res = client.execute(['GetPhysicsParamsForDataset',"--logicalDatasetName=%s"% ds,"--timestamp='%s'"%args.timestamp,"--history=true"], format='dom_object') res = parameterQueryResults.get(ds, []) if args.oldTimestamp != "": res2 = old_parameterQueryResults.get(ds, []) #first we have to determine how many subprocesses this ds has dsSubprocesses = [0] #always have the 0 subprocess for r in res: sp = int(r[u'subprocessID']) if sp not in dsSubprocesses: dsSubprocesses += [sp] #now for each subprocess we have to locate each required field value (in paramFields) #rank by physicsGroup for sp in dsSubprocesses: paramVals = dict() paramVals2 = dict() groupsWithVals = dict() #held for helpful output #need to keep explanations for requested fields explainInfo = dict() for i in args.explainFields: explainInfo[i] = dict() for param in paramFields: groupsWithVals[param] = [] bestGroupIndex = len(args.physicsGroups) import copy paramVals[param] = copy.copy(fieldDefaults[param]) for r in res: if int(r[u'subprocessID']) != sp: continue if str(r[u'paramName']) != param and not ( param == "crossSection_pb" and str(r[u'paramName']) == "crossSection"): continue if str(r[u'physicsGroup']) not in args.physicsGroups: groupsWithVals[param] += [(str(r[u'physicsGroup']), str(r[u'paramValue']))] continue if args.physicsGroups.index(str( r[u'physicsGroup'])) > bestGroupIndex: continue if args.physicsGroups.index(str( r[u'physicsGroup'])) == bestGroupIndex: logging.warning( "Duplicate parameter %s for group %s in dataset %s (subprocess %d). Please report this!" % (param, str(r[u'physicsGroup']), ds, sp)) paramVals[param] = str(r[u'paramValue']) if param == "crossSection_pb": paramVals[param] = str( float(paramVals[param]) * 1000.0) bestGroupIndex = args.physicsGroups.index( str(r[u'physicsGroup'])) #keep the explanation info for the requested fields if param in explainInfo.keys(): for e in args.explainInfo: if unicode(e) not in r: logging.error( "Unrecognised explainInfo field: %s" % e) return -1 explainInfo[param][e] = str(r[unicode(e)]) if args.oldTimestamp != "": bestGroupIndex = len(args.physicsGroups) paramVals2[param] = copy.copy(fieldDefaults[param]) for r in res2: if int(r[u'subprocessID']) != sp: continue if str(r[u'paramName']) != param and not ( param == "crossSection_pb" and str(r[u'paramName']) == "crossSection"): continue if str(r[u'physicsGroup']) not in args.physicsGroups: continue if args.physicsGroups.index(str( r[u'physicsGroup'])) > bestGroupIndex: continue if args.physicsGroups.index(str( r[u'physicsGroup'])) == bestGroupIndex: logging.warning( "Duplicate parameter %s for group %s in dataset %s (subprocess %d). Please report this!" % (param, str(r[u'physicsGroup']), ds, sp)) paramVals2[param] = str(r[u'paramValue']) if param == "crossSection_pb": paramVals2[param] = str( float(paramVals2[param]) * 1000.0) bestGroupIndex = args.physicsGroups.index( str(r[u'physicsGroup'])) #at this stage, parameters reside in paramVals dict or dataset_values[ds] dict #print them in the requested order .. if any is "None" then stop, because it doesn't have a default value and didn't find a value for it either rowString = "" rowList = [] firstPrint = False for param in args.fields: val = None if param == "ldn": val = ds elif param == "subprocessID": val = sp elif param in dataset_values[ds].keys(): val = dataset_values[ds][param] else: val = paramVals.get(param, None) if val == None: if args.outFile != sys.stdout: logging.warning( "dataset %s (subprocess %d) does not have parameter %s, which has no default." % (ds, sp, param)) if len(groupsWithVals.get(param, [])) > 0: logging.warning( "The follow physicsGroups have defined that parameter though:" ) logging.warning(groupsWithVals[param]) val = "#UNKNOWN#" #return -1 #if isfloat(str(val)): val = "%.6g" % float(val) if args.oldTimestamp != "": #diff val to old val val2 = None if param == "ldn": val2 = ds elif param == "subprocessID": val2 = sp elif param in dataset_values[ds].keys(): val2 = dataset_values[ds][param] else: val2 = paramVals2.get(param, None) if val2 == None: val2 = "#UNKNOWN#" #if isfloat(str(val2)): val2 = "%.6g" % float(val) if (str(val) != str(val2)): if not firstPrint: print("%s:" % ds) firstPrint = True print(" %s : %s ---> %s" % (param, str(val2), str(val))) print(" insert_time : %s" % explainInfo[param]['insert_time']) print(" explanation : %s" % explainInfo[param]['explanation']) print(" createdby : %s" % explainInfo[param]['createdby']) print(" physicsGroup : %s" % explainInfo[param]['physicsGroup']) continue rowList += [str(val)] if rowString != "" and args.delim != "": rowString += args.delim rowString += str(val) #inspect the type of str(val) to build up the header if not doneHeader: headerString += param if args.outFile != sys.stdout: if type(fieldDefaults[param]) == bool: headerString += "/O:" elif type(fieldDefaults[param]) == int: headerString += "/I:" elif type(fieldDefaults[param]) == float: headerString += "/D:" elif isfloat(str(val)): headerString += "/D:" #elif isint(str(val)): headerString += "/I:" TO BE SAFE WE MAKE ALL NUMERIC FIELDS FLOATS, EXCEPT if the default value is type int else: headerString += "/C:" else: v = param if param in paramUnits: headerString += " [%s]" % paramUnits[param] v += " [%s]" % paramUnits[param] tableHeaders += [v] headerString += " " if args.oldTimestamp != "": continue #print nothing more for diff mode if not doneHeader: doneHeader = True if args.outFile != sys.stdout: print(headerString[:-1], file=args.outFile) if commentCount > 0: if args.outFile != sys.stdout and args.delim != "": print(commentCache, file=args.outFile) outputTable += [["COMMENT", commentCache]] commentCache = '' commentCount = 0 if args.outFile != sys.stdout and args.delim != "": print(rowString, file=args.outFile) outputTable += [rowList] #also print the required explanations for (field, expl) in explainInfo.items(): outString = "#%s: { " % field doneFirst = False for eField in args.explainInfo: if doneFirst: outString += " , " if not eField in expl.keys(): outString += " %s: <NONE .. value is default>" % eField else: outString += "%s: %s" % (eField, expl[eField]) doneFirst = True outString += " }" #print(outString,file=args.outFile) outputTable += [["COMMENT", outString]] if args.oldTimestamp != "": args.outFile.close() return 0 #print the table in nicely formatted state if args.outFile == sys.stdout or args.delim == "": #determine column widths columnWidths = [0] * len(args.fields) for i in range(0, len(tableHeaders)): columnWidths[i] = len(tableHeaders[i]) for r in outputTable: if len(r) > 0 and r[0] == "COMMENT": continue for i in range(0, len(r)): if len(r[i]) > columnWidths[i]: columnWidths[i] = len(r[i]) lineout = "" for i in range(0, len(tableHeaders)): lineout += tableHeaders[i].ljust(columnWidths[i]) + " " print(lineout) for r in outputTable: lineout = "" if len(r) > 0 and r[0] == "COMMENT": lineout = r[1] else: for i in range(0, len(r)): lineout += r[i].ljust(columnWidths[i]) + " " print(lineout, file=args.outFile) #print the footer, which is the command to reproduce this output import os if args.outFile != sys.stdout: #remove comment from dataset_values datasetss = [ x for x in dataset_values.keys() if not x.startswith("comment") ] print("", file=args.outFile) print("#lsetup \"asetup %s,%s\" pyAMI" % (os.environ.get('AtlasProject', 'UNKNOWN!'), os.environ.get('AtlasVersion', 'UNKNOWN!')), file=args.outFile) print( "#getMetadata.py --timestamp=\"%s\" --physicsGroups=\"%s\" --fields=\"%s\" --inDS=\"%s\"" % (args.timestamp, ",".join(args.physicsGroups), ",".join( args.fields), ",".join(datasetss)), file=args.outFile) logging.info("Results written to: %s" % args.outFile.name) args.outFile.close()