def get_ami_events(ami_client, dataset): results = atlas_api.get_dataset_info(ami_client, dataset) if len(results) != 1: print "WARNING: %d results returned from AMI, expected 1" % len( results) return int(results[0]["totalEvents"])
def getDatasetInfo(dataset, debug=False): client = pyAMI.client.Client('atlas') AtlasAPI.init() results = AtlasAPI.get_dataset_info(client, dataset) if len(results) != 1: raise SystemExit('\n***EXIT*** no valid results for dataset %s'%dataset) eff = None for name, value in results[0].iteritems(): if name=='totalEvents': nevents = value elif name=='crossSection': xsec = float(value)*1e6 #xsec is in nb, hence the 1e6 factor to get it in fb elif name=='datasetNumber': dsid = value elif name=='genFiltEff': eff = float(value) #if geFiltEff is not available, use aprox_GenFiltEff if eff is None: for name, value in results[0].iteritems(): if name=='approx_GenFiltEff': eff = float(value) print '%s %e %e %s'%(dsid, xsec, eff, nevents) if debug: for name, value in results[0].iteritems(): print ' %s %s'%((name+':').ljust(24),value) print '' return dsid, xsec, eff, nevents
def getSampleWeight(dsid): fname = "" files = os.listdir(file_loc) for f in files: if dsid in f: fname = f.strip("_ntupleOutput\.root\/").strip("user\.tholmes\.") fname = "m" + fname info = AtlasAPI.get_dataset_info(client, fname)[0] xs = float(info.get('approx_crossSection'))*float(info.get('approx_GenFiltEff')) return xs
def encodeDSInfo(s, ldn): datasetinfo = AtlasAPI.get_dataset_info(s.client, ldn) neventsAMI = float(datasetinfo[0]["totalEvents"]) crosssectionAMI = float(datasetinfo[0]["crossSection"]) if "approx_GenFiltEff" in datasetinfo[0].keys(): filtereffAMI = float(datasetinfo[0]["approx_GenFiltEff"]) else: filtereffAMI = None return {"events": neventsAMI, "xsec": crosssectionAMI, "fit_eff": filtereffAMI, "ldn": ldn}
def getNumberAmi(dsName): try: InfoDict = AtlasAPI.get_dataset_info(client, dsName) except pyAMI.exception.Error as bla: print "Failed to query DS %s on AMI: Exception was \"%s\"" % (dsName, bla) return -1 try: n_Events = InfoDict[0]["totalEvents"] except KeyError: print "Unable to decipher the InfoDict: " print InfoDict return -1 return int(n_Events)
def getXS(dsid): xs = getCrossSection(int(dsid), verbose=False) if xs == -1: #print "using pyami for dsid", dsid fname = "" files = os.listdir(file_loc) for f in files: if dsid in f: fname = f.strip("_ntupleOutput\.root\/").strip("user\.tholmes\.") fname = "m" + fname info = AtlasAPI.get_dataset_info(client, fname)[0] #if (info.get('crossSection_unit') != 'nano barn'): print 'WARNING: Cross section listed in', info.get('crossSection_unit'),"!!" xs = 1000*float(info.get('approx_crossSection'))*float(info.get('approx_GenFiltEff')) return xs
def genParamsFromParents(client, datasetName, datasetNumber): from pyAMI.atlas.api import get_dataset_info approx_GenFiltEff = None xsec = None prov = pyAMI.atlas.api.get_dataset_prov(client, datasetName) for parent in prov['node']: # minbias overlays are also parents so need to # check the channel number if int(parent[u'logicalDatasetName'].split(".")[1]) == datasetNumber: parentinfo = get_dataset_info(client, parent[u'logicalDatasetName'])[0] if parentinfo.has_key(u'approx_GenFiltEff'): approx_GenFiltEff = parentinfo[u'approx_GenFiltEff'] pass if parentinfo.has_key(u'crossSection' ) and parentinfo[u'crossSection'] != u'NULL': xsec = float(parentinfo[u'crossSection']) pass if approx_GenFiltEff and xsec: break pass return (xsec, approx_GenFiltEff)
import argparse import json, ast parser = argparse.ArgumentParser(description='Helper to get nFiles of a list of datasets') # parser.add_argument('datasets', type=str, help='file containing list of datasets') parser.add_argument('file', type=argparse.FileType('r'), nargs='+') args = parser.parse_args() client = pyAMI.client.Client( 'atlas' ) print "INFO - Checking AMI status of datasets" for f in args.file: print "INFO - Checking %s" %f for dataset in f: dataset = dataset.rstrip() if dataset[:1] == '#': continue if dataset[:2] == 'mc' or dataset[:4] == 'data': try: result = AtlasAPI.get_dataset_info(client, dataset = dataset) result = ast.literal_eval(json.dumps(result)) print(result[0]['nFiles']) except: print("ERROR - Not in AMI - %s" %dataset)
for prov in dsProv["node"]: if prov['dataType'] == "EVNT": thisProvDSName = prov['logicalDatasetName'] thisProvDSID = thisProvDSName.split(".")[1] if thisProvDSID == dsID: print "\tUsing ",thisProvDSName inputDS.append(thisProvDSName) def getUnitSF(unit): if unit == "nano barn": return 1000 print "Unknown unit..." return 1.0 fh_out=open(args.output,'w') if args.output!=None else None for ds in inputDS: dsList = AtlasAPI.get_dataset_info(client,dataset=ds) dsInfo = dsList[0] #print dsInfo['logicalDatasetName'] #print "\tcross section",dsInfo["crossSection_mean"] #print "\tfilter Eff.",dsInfo["GenFiltEff_mean"] print "totalEvents:",dsInfo['totalEvents'] unit = dsInfo['crossSection_unit'] getSF = getUnitSF(unit) if fh_out==None: print dsInfo['datasetNumber']," ",dsInfo['physicsShort']," ",float(dsInfo["crossSection_mean"])*getSF," 1. ",float(dsInfo["GenFiltEff_mean"])," 1." else: fh_out.write("%s\t%s\t%e\t1.\t%e\t1.\n"%(dsInfo['datasetNumber'],dsInfo['physicsShort'],float(dsInfo["crossSection_mean"])*getSF,float(dsInfo["GenFiltEff_mean"])))
def main(): # configurable options config = parseCmdLine(sys.argv[1:]) if (config.baseline or config.official) and config.sample: print "--baseline, --official and --sample are mutually exclusive" sys.exit(1) if (config.baseline or config.official or config.sample) and config.grl != "": print "--grl is incompatible with --baseline, --official and --sample" sys.exit(1) # AMI client connection client = pyAMI.client.Client('atlas') pyAMI.client.endpoint = config.server pyAMI.atlas.api.init() # consistency checks if config.whichMC15 != '': if config.whichMC15 == 'week1' and config.prefix != 'mc15_week1': print 'prefix changed to mc15_week1 in agrement with whichMC15' config.prefix = 'mc15_week1' elif config.whichMC15 == '50ns' and config.prefix != 'mc15_13TeV': print 'prefix changed to mc15_13TeV in agrement with whichMC15' config.prefix = 'mc15_13TeV' elif config.whichMC15 == '25ns' and config.prefix != 'mc15_13TeV': print 'prefix changed to mc15_13TeV in agrement with whichMC15' config.prefix = 'mc15_13TeV' # data type is NTUP_SUSY for 2011/2012 and AOD for 2014 on datatype = config.datatype if 'mc11_' in config.prefix or 'mc12_' in config.prefix or 'data11_' in config.prefix or 'data12_' in config.prefix: datatype = '%.merge.NTUP_SUSY%' # make list of official datasets (baseline+alt) officialids = [] if config.official or config.baseline or config.sample: if 'mc12_8TeV' in config.prefix or 'mc14_8TeV' in config.prefix: import mc12_8TeV_MCSampleList as mcsl elif 'mc14_13TeV' in config.prefix: import mc14_13TeV_MCSampleList as mcsl elif 'mc15_13TeV' in config.prefix: import mc15_13TeV_MCSampleList as mcsl elif 'mc15_week1' in config.prefix: import mc15_13TeV_week1_MCSampleList as mcsl else: print '--official is only supported for mc12_8TeV, mc14_8TeV, mc14_13TeV, mc15_13TeV and mc15_week1' sys.exit(1) if config.sample: officialids = mcsl.__dict__[str(config.sample)] else: officialids = mcsl.__dict__["lbaseline"] if config.official: officialids += mcsl.__dict__["lalt"] elif config.grl != "": if not os.path.exists(config.grl): print 'Couldnot find GRL', config.grl sys.exit(1) pass doc = ET.parse(config.grl) for item in doc.findall('./NamedLumiRange/Metadata'): if item.attrib['Name'] == 'RunList': for r in item.text.split(','): officialids.append(int(r)) pass # get all datasets matching prefix & tag and then filter them from pyAMI.atlas.api import get_dataset_info, list_datasets alldatasets = [] if config.whichMC15 != '': prefix = config.prefix if prefix == 'mc15_week1': prefix = 'mc15_13TeV' for tag in mc15_rtags[config.whichMC15]: dskey = prefix + datatype + tag + config.tag print 'Querying AMI for datasets matching pattern', dskey alldatasets += list_datasets(client, dskey) else: prefix = config.prefix if prefix == 'mc15_week1': prefix = 'mc15_13TeV' dskey = config.prefix + datatype + config.tag print 'Querying AMI for datasets matching pattern', dskey alldatasets = list_datasets(client, dskey) acceptedDS = [] for DSlist in alldatasets: dsname = DSlist['ldn'] cut = False for filter in filters: if filter in dsname.split('.')[2]: cut = True if (config.official or config.baseline or config.sample or config.grl != "") and not int( dsname.split('.')[1]) in officialids: cut = True if config.signal: cut = True for pattern in lsignals: if pattern in dsname: cut = False if cut: continue acceptedDS.append(dsname) pass acceptedDS.sort() # get informations for all accepted datasets dsinfos = [] for dsname in acceptedDS: dsinfos.append(get_dataset_info(client, dsname)[0]) pass # write file coveredids = set() if not (config.suffix == ""): myoutputfile = 'datasets_' + config.suffix + '.txt' else: myoutputfile = 'datasets.txt' fout = open(myoutputfile, 'w') for info in dsinfos: try: dsname = info['logicalDatasetName'] if config.grl == "": generatorString = info['generatorName'] version = info['version'] if badDataset(dsname, generatorString, version): continue availability = info['prodsysStatus'] if config.onlyComplete and availability != u'ALL EVENTS AVAILABLE': print 'Skip incomplete dataset', dsname, availability continue nFiles = int(info['nFiles']) if nFiles > 0 and config.prefix.startswith('data'): fout.write(dsname + '\n') elif nFiles > 0: period = 'MC' xsec = 0. effic = 1. if info.has_key('period'): period = info['period'] else: datasetNumber = int(info[u'datasetNumber']) coveredids.add(datasetNumber) # confirmed with AMI team that this should be enought, no need # to re-implement get_dataset_xsec_effic for PyAMI5 # there are sometime problems in the propagation of these # properties to the xAOD/derived datasets so go back in # parentage to find the information xsec = info[u'crossSection'] if info.has_key(u'approx_GenFiltEff'): effic = info[u'approx_GenFiltEff'] if config.datatype == '%TRUTH1%': effic = 1 if ((xsec == u'NULL' or not info.has_key(u'approx_GenFiltEff')) and not (config.datatype == '%TRUTH1%')): xsec, effic = genParamsFromParents( client, dsname, datasetNumber) if not xsec: xsec = 0 if not effic: print 'No approx_GenFiltEff found for', dsname, 'set to 0 !!!!' effic = 0 pass nevts = info['totalEvents'] nfiles = info['nFiles'] if not dsname.endswith('/'): dsname += '/' fout.write("%s %s %s %s %s %s\n" % (dsname, nevts, nfiles, period, xsec, effic)) except KeyError as prop: print 'Missing property', prop, 'for dataset ', dsname, 'in AMI, skip' fout.close() if len(coveredids) == 0: if not config.prefix.startswith('data'): print 'Could not extract any channel IDs from datasets found, this is OK for data but suspicious for MC' else: for id in officialids: if not id in coveredids: print 'No dataset found for channel ', id pass
for a in open('stupid.txt'): b = a.strip().replace('/','') if len(b) == 0: continue #Take AOD instead of DAOD (not the same original number of events) b = b.replace('DAOD_HIGG4D2', 'AOD') b = b.replace('_p2419', '') # b = b.rstrip(b[-6:]) # print b id = b.split('.')[1] if id in Ids: continue Ids.append(id) infos = AtlasAPI.get_dataset_info(client, b)[0] xsec = float(infos['crossSection']) * 1000 filtstr = infos['approx_GenFiltEff'] if filtstr.find('N/A')>=0: # print 'No filter efficiency available' # print infos filt = 1 else: filt = float(filtstr) kfac = 1.0 nevts = float(infos['totalEvents']) lumi = nevts/ 1000. / (xsec * kfac * filt) print id, '\t', '%s' % int(nevts), '\t', '{0}*{1}*{2}'.format(xsec, kfac, filt), '\t', '{0:1.2f}'.format(lumi),'\t', b
runList.append(key.GetName().split('_')[2]) runList.sort() with open(args.output,'wb') as csvfile: cwriter = csv.writer(csvfile,delimiter=',') cwriter.writerow(['run', 'luminosity', 'CBC selected', 'Initial', 'GRL', 'event cleaning', 'trigger', 'pT_lead', 'n_fatjet==3', 'n_fatjet==3 && b-tag', 'n_fatjet==4 && MJ < 600', 'nfatjet==4 && MJ < 600 && b-tag', 'n_fatjet >= 5 && MJ < 600', 'n_fatjet >= 5 && MJ < 600 && b-tag']) for run in runList: h = f.Get('h_cutflow_'+run) row = [h.GetBinContent(i) for i in range(1,h.GetNbinsX()+1)] row.insert(0,'') row.insert(0,int(run)) dsName = 'data15_13TeV.00'+run+'.physics_Main.merge.DAOD_EXOT3.r7562_p2521_p2614' d=AtlasAPI.get_dataset_info(client,dsName)[0] totalEvents = int(d['totalEvents']) print 'DSID: %s, AMI = %i, CBC = %i, Initial = %i' % (run,totalEvents,row[2],row[3]) cwriter.writerow(row)
runList = [] client = pyAMI.client.Client('atlas') AtlasAPI.init() for key in f.GetListOfKeys(): if 'h_cutflow' in key.GetName(): runList.append(key.GetName().split('_')[2]) runList.sort() with open(args.output, 'wb') as csvfile: cwriter = csv.writer(csvfile, delimiter=',') cwriter.writerow([ 'run', 'luminosity', 'CBC selected', 'Initial', 'GRL', 'event cleaning', 'trigger', 'pT_lead', 'n_fatjet==3', 'n_fatjet==3 && b-tag', 'n_fatjet==4 && MJ < 600', 'nfatjet==4 && MJ < 600 && b-tag', 'n_fatjet >= 5 && MJ < 600', 'n_fatjet >= 5 && MJ < 600 && b-tag' ]) for run in runList: h = f.Get('h_cutflow_' + run) row = [h.GetBinContent(i) for i in range(1, h.GetNbinsX() + 1)] row.insert(0, '') row.insert(0, int(run)) dsName = 'data15_13TeV.00' + run + '.physics_Main.merge.DAOD_EXOT3.r7562_p2521_p2614' d = AtlasAPI.get_dataset_info(client, dsName)[0] totalEvents = int(d['totalEvents']) print 'DSID: %s, AMI = %i, CBC = %i, Initial = %i' % (run, totalEvents, row[2], row[3]) cwriter.writerow(row)
def SampleHandler_QueryAmi(samples): # set up an AMI client # This is the basic minimum - and it will look for an encrypted file with your user credentals # If it does not find that it will try for a VOMS proxy # Make the encrypted file by running the amo command # ami auth # first. # In the pyAMI doc you will find an example of how to get your program to request a # the user to make a file. # https://atlas-ami.cern.ch/AMI/pyAMI/examples/api.html amiClient = pyAMI.client.Client('atlas') # Extract from your mail ############### # The quantities in the class MetaDataSample are pretty much all I need: # * whether it is data or MC # * the luminosity of the sample # * the k-factor of the sample (only for MC) # * the number of events in the sample # * the cross section of the sample (only for MC) # * the filter efficiency data = ROOT.SH.MetaDataQuery() data.messages = 'done by ami query' # I am assuming that "samples" is a list of dataset names, and that # the user already checked that they exist and are valid for sample in samples: sample_noscope = sample.split(':')[-1] mydata = ROOT.SH.MetaDataSample(sample) # The first question you ask is it data or mc. # Actually you should be able to tell this without ambiguity from the name # without going to the trouble of a request to AMI. # description: 1 for data, 0 for MC, or -1 if this is not known. mydata.source = 'https://atlas-ami.cern.ch/AMI/pyAMI/' mydata.unknown = 0 if (sample.startswith("mc")): mydata.isData = 0 pass elif (sample.startswith("data")): mydata.isData = 1 pass else: mydata.isData = -1 pass # You are calling ths AMI functions with tid suffixes. # AMI does not specifically catalogue TID datasets so # I am stripping off the suffix. # Normally uses should not be concerned with these datasets # but only with the containers. # However if you are really only interested in the output of a particular # prodsys task then we can do it - but it would be more complex # as we need to redo the event and cross section calculations # just for those tasks. if (sample.find("_tid")): print("Stripping tid suffix from " + sample) sample = sample.split("_tid")[0] pass # All datasets should have the number of events. # have to convert this to a long int I suppose? amiinfo = get_dataset_info(amiClient, sample_noscope)[0] mydata.nevents = long(amiinfo['totalEvents']) # AMI does not yet have a function for getting luminosity. # It IS on the todo list, as luminosity info per run is available # in COMA, and AMI has access to the information in principle # So this is in part a place holder # I do not know anything about k-factor. We have no such parameter sent to us. # This should be taken up with the MC people I suppose. if (mydata.isData == 1): # get luminosity for the run mydata.crossSection = -1 mydata.filterEfficiency = -1 pass else: mydata.luminosity = -1 # MC - can get cross-section and filter efficiency xsec = float(amiinfo['approx_crossSection']) effic = float(amiinfo['approx_GenFiltEff']) # + conversion string to float. mydata.crossSection = xsec mydata.filterEfficiency = effic if mydata.crossSection > 0 and mydata.filterEfficiency > 0: mydata.luminosity = float( float(mydata.nevents) / (mydata.crossSection * mydata.filterEfficiency)) pass pass data.addSample(mydata) # print "cross section = "+str(mydata.crossSection)+", filter efficiency = "+str(mydata.filterEfficiency)+", nEvents= "+str(mydata.nevents) pass return data
ROOT.SH.ScanDir().sampleDepth(0).samplePattern(args.eosDataSet).scanEOS(sh_all, base) else: raise Exception("What just happened?") if args.xsecFromAMI: import pyAMI.client import pyAMI.atlas.api as AtlasAPI from pyAMI.atlas.api import get_dataset_info client = pyAMI.client.Client("atlas") AtlasAPI.init() evntName = line.rstrip() if "merge.DAOD_EXOT3" in evntName: evntName = evntName.replace("merge.DAOD_EXOT3", "evgen.EVNT") evntName = evntName[0 : evntName.find("evgen.EVNT") + 16] # d=AtlasAPI.get_dataset_info(client,line.rstrip())[0] d = AtlasAPI.get_dataset_info(client, evntName)[0] filtEff = 1 xsec = 1 if "genFiltEff" in d: if d["genFiltEff"] != "NULL": filtEff = float(d["genFiltEff"]) elif "GenFiltEff_mean" in d: if d["GenFiltEff_mean"] != "NULL": filtEff = float(d["GenFiltEff_mean"]) if "crossSection" in d: if d["crossSection"] != "NULL": xsec = float(d["crossSection"]) dsid = str(d["datasetNumber"]) sh_all.setMetaString(line.rstrip().rstrip("/"), "dsid", dsid) sh_all.setMetaDouble(line.rstrip().rstrip("/"), "weight_xs", filtEff * xsec) print(
# Don't check data if did.startswith("00"): continue # Get all possible AODs for the output ntuple base_files = AtlasAPI.list_datasets( client, patterns=['mc15_13TeV.' + did + '%merge.AOD%' + tags + '%'], type='AOD') print "-------------------------------------------------------------------------" print "Ntuple name:", d print base_files[0]['ldn'] if len(base_files) > 1: print "Found more than one matching AOD." print "For file", f print "using", base_files[0]['ldn'] info = AtlasAPI.get_dataset_info(client, base_files[0]['ldn'])[0] n_real_events = info.get('totalEvents') if float(n_real_events) < float(n_aod_events): print "\033[91m Error in dsid", did bad_dids += [did] print "\t Real number of AOD events:", n_real_events print "\t EventCountHist number: ", n_aod_events print "\t Ratio: ", float(n_aod_events) / float( n_real_events) print '\033[0m' print "All dids with bad weights:" print bad_dids
raise Exception("What just happened?") if args.xsecFromAMI: import pyAMI.client import pyAMI.atlas.api as AtlasAPI from pyAMI.atlas.api import get_dataset_info client = pyAMI.client.Client('atlas') AtlasAPI.init() evntName = line.rstrip() if 'merge.DAOD_EXOT3' in evntName: evntName = evntName.replace( 'merge.DAOD_EXOT3', 'evgen.EVNT') evntName = evntName[0:evntName. find('evgen.EVNT') + 16] # d=AtlasAPI.get_dataset_info(client,line.rstrip())[0] d = AtlasAPI.get_dataset_info( client, evntName)[0] filtEff = 1 xsec = 1 if 'genFiltEff' in d: if d['genFiltEff'] != 'NULL': filtEff = float(d['genFiltEff']) elif 'GenFiltEff_mean' in d: if d['GenFiltEff_mean'] != 'NULL': filtEff = float(d['GenFiltEff_mean']) if 'crossSection' in d: if d['crossSection'] != 'NULL': xsec = float(d['crossSection']) dsid = str(d['datasetNumber']) sh_all.setMetaString(line.rstrip().rstrip('/'), 'dsid', dsid) sh_all.setMetaDouble(line.rstrip().rstrip('/'),
inDS = inDS.split(":")[1] items = inDS.split(".") tag = items[-1].split("_")[0] evntDS = ".".join(items[:-3] + ['evgen', 'EVNT', tag]) tokens = dataset.split(".") mystr = tokens[2].split("_")[-1] numbers = "" for character in mystr: if character.isdigit() : numbers = numbers+character tag = "JZ{0}W".format(numbers) info = AtlasAPI.get_dataset_info(client, evntDS)[0] infoDict = {} infoDict['crossSection'] = info['crossSection_max'] infoDict['filterEff'] = info['GenFiltEff_mean'] infoDict['nEvt'] = info['totalEvents'] print infoDict mydict[tag] = infoDict outfile = "pyAMIInfoForFiles_{0}.py".format(outFileTag) w = open(outfile, "w") w.write("amiInfoDict = {\n") for key, val in sorted(mydict.items()) : w.write("'{0}'".format(key)+" : "+ "{" ) for ikey,ival in val.items() :
thisProvDSID = thisProvDSName.split(".")[1] if thisProvDSID == dsID: print "\tUsing ", thisProvDSName inputDS.append(thisProvDSName) def getUnitSF(unit): if unit == "nano barn": return 1000 print "Unknown unit..." return 1.0 fh_out = open(args.output, 'w') if args.output != None else None for ds in inputDS: dsList = AtlasAPI.get_dataset_info(client, dataset=ds) dsInfo = dsList[0] #print dsInfo['logicalDatasetName'] #print "\tcross section",dsInfo["crossSection_mean"] #print "\tfilter Eff.",dsInfo["GenFiltEff_mean"] print "totalEvents:", dsInfo['totalEvents'] unit = dsInfo['crossSection_unit'] getSF = getUnitSF(unit) if fh_out == None: print dsInfo['datasetNumber'], " ", dsInfo['physicsShort'], " ", float( dsInfo["crossSection_mean"]) * getSF, " 1. ", float( dsInfo["GenFiltEff_mean"]), " 1." else: fh_out.write("%s\t%s\t%e\t1.\t%e\t1.\n" % (dsInfo['datasetNumber'], dsInfo['physicsShort'], float(dsInfo["crossSection_mean"]) * getSF,
def main(): logging.basicConfig(format='%(levelname)s:%(message)s') import time, datetime from pytz import timezone import argparse try: import pyAMI.client import pyAMI.atlas.api as AtlasAPI import pyAMI.config except ImportError: logging.error( "Unable to find pyAMI client. Please try this command first: lsetup pyAMI" ) return -1 extraFieldDefaults = { } #{"approx_crossSection":None,"approx_GenFiltEff":1.0} fieldDefaults = {"subprocessID": 0, "dataset_number": 0} #populate the fieldDefaults ... for all, assume 'None' for field in pyAMI.config.tables['datasets'].keys(): if str(field) == "cross_section": continue #special exception because this field only present in if str(field) in fieldDefaults.keys(): continue if str(field).startswith("@"): continue fieldDefaults[str(field)] = None import commands #check the voms proxy status, out = commands.getstatusoutput("voms-proxy-info -fqan -exists") if status != 0: logging.error( "Please renew your certificate with this command: voms-proxy-init -voms atlas" ) return -1 try: client = pyAMI.client.Client('atlas') AtlasAPI.init() except: logging.error( "Could not establish pyAMI session. Are you sure you have a valid certificate? Do: voms-proxy-init -voms atlas" ) return -1 #need to collect the ami dataset parameter defaults paramExplains = [] #for the help message only paramUnits = dict() paramDefaults = {} res = client.execute('ListPhysicsParameterDefs', format='dom_object') for r in res.get_rows(): #r is OrderedDict explainString = "%s: %s" % (r[u'PARAMNAME'], r[u'DESCRIPTION']) if r[u'UNITS'] != u'NULL': explainString += " (units: %s)" % r[u'UNITS'] paramUnits[r[u'PARAMNAME']] = r[u'UNITS'] if r[u'HASDEFAULT'] == u'N': paramDefaults[str(r[u'PARAMNAME'])] = None else: explainString += " (default value = %s)" % r[u'DEFAULTVALUE'] if r[u'PARAMTYPE'] == u'number': paramDefaults[str(r[u'PARAMNAME'])] = float( r[u'DEFAULTVALUE'] ) #FIXME: Assumes all parameters are floats elif r[u'PARAMTYPE'] == u'string': paramDefaults[str(r[u'PARAMNAME'])] = str(r[u'DEFAULTVALUE']) paramExplains += [explainString] paramDefaults["crossSection_pb"] = None paramUnits["crossSection_pb"] = "pb" paramExplains += [ "crossSection_pb: Same as crossSection except in pb units (units: pb)" ] cern_time = timezone('UCT') current_time = datetime.datetime.fromtimestamp( time.time(), cern_time).strftime('%Y-%m-%d %H:%M:%S') from argparse import RawTextHelpFormatter parser = argparse.ArgumentParser(description=__doc__, formatter_class=RawTextHelpFormatter) parser.add_argument('--inDS', nargs='+', default=[""], help="List of datasets to retrieve parameters for") parser.add_argument( '--inDsTxt', default="", help= "Alternative to --inDS, can specify the datasets from an input file") parser.add_argument( '--fields', nargs='+', help= "List of parameters to extract. Available parameters are: \n\n %s\n\nYou can also include any from:\n %s\nYou can also do keyword_xxx to add a bool branch for keywords" % ("\n ".join(paramExplains), ", ".join(fieldDefaults.keys() + extraFieldDefaults.keys())), default=["dataset_number", "crossSection", "kFactor", "genFiltEff"]) parser.add_argument( '--timestamp', default=current_time, help= "The timestamp to query parameters at, specified in Universal Central Time (UCT). If left blank, will take the current time" ) parser.add_argument( '--physicsGroups', nargs='+', default=["PMG,MCGN"], help= "Physics group from which to retrieve parameters, listed in order of priority (highest first). Default value is 'PMG,MCGN' (i.e. try to use PMG values, fallback on MCGN values if unavailable). Allowed groups are:\n PMG (this is the PMG's group name), BPHY, COSM, DAPR, EGAM, EXOT, FTAG, HIGG, HION, IDET, IDTR, JETM, LARG, MCGN (this is the AMI default group name), MDET, MUON, PHYS, REPR, SIMU, STDM, SUSY, TAUP, TCAL, TDAQ, THLT, TOPQ, TRIG, UPGR, VALI" ) parser.add_argument( '--oldTimestamp', default="", help= "If specified, will instead display a diff between the old and new timestamp, showing explanation of any changed parameters" ) parser.add_argument( '--explainFields', nargs='+', default=[], help= "The fields you would like explained .. will appear as comment lines after each row in the output" ) parser.add_argument( '--explainInfo', nargs='+', default=['explanation', 'insert_time'], help= "Properties of the parameter you want to show in the explanation. Can list from: explanation, insert_time, physicsGroup, createdby. Default is: explanation,insert_time" ) parser.add_argument( '--outFile', default=sys.stdout, type=argparse.FileType('w'), help="Where to print the output to. Leave blank to print to stdout") parser.add_argument( '--delim', default="", help= "The delimiter character. Defaults to spaces leading to nice formatting table" ) parser.add_argument('-v', action='store_true', help="Verbose output for debugging") args = parser.parse_args() if args.v: logging.getLogger().setLevel(logging.DEBUG) else: logging.getLogger().setLevel(logging.INFO) logging.debug(args.inDS) logging.debug(args.fields) logging.debug(args.timestamp) if args.timestamp == "the dawn of time": logging.error( "Unfortunately we don't know any parameters from this time period... but we're working on it!" ) return 9999 #split elements of fields by comma to get full list args.fields = sum((y.split(',') for y in args.fields), []) args.fields = [x.strip() for x in args.fields] #strips whitespace #look for keyword_ fields, these are special ... args.keywords = [] for f in args.fields: if f.startswith("keyword_"): k = f[8:] #and then add each keyword to the extraFieldDefaults so it is recognised thusly extraFieldDefaults["keyword_%s" % k] = bool(False) args.keywords += [k] #same for physics groups args.physicsGroups = sum((y.split(',') for y in args.physicsGroups), []) args.physicsGroups = [x.strip() for x in args.physicsGroups] #strips whitespace #same for explainFields and explainInfo args.explainFields = sum((y.split(',') for y in args.explainFields), []) args.explainFields = [x.strip() for x in args.explainFields] #strips whitespace args.explainInfo = sum((y.split(',') for y in args.explainInfo), []) args.explainInfo = [x.strip() for x in args.explainInfo] #strips whitespace if args.inDsTxt != '': args.inDS = readDsFromFile(args.inDsTxt) #and same for inDS args.inDS = sum((y.split(',') for y in args.inDS), []) args.inDS = [x.strip() for x in args.inDS] #strips whitespace #1. check field values are allowed, we obtain default field values at same time.. #2. For each entry in inDS, if contains wildcard we obtain list of DS, otherwise check DS exists. During this time we obtain the datasetid and numEvents properties, incase we need them #3. For each of these DS, get parameters from ami matching the timestamp. Organize into fields and index by subprocessID #4. Output a line to our output file #1. #before adding all the ami parameters, identify which of provided fields are: 1). Obtained from list_datasets command (dsFields) 2). actual parameters dsFields = [ x for x in args.fields if x in fieldDefaults.keys() and x not in ["subprocessID", "ldn"] ] extraFields = [x for x in args.fields if x in extraFieldDefaults.keys()] paramFields = [x for x in args.fields if x in paramDefaults.keys()] if len(paramFields) > 0 and args.physicsGroups == [""]: logging.error( "You must specify at least one physics group. See -h for allowed groups" ) return -1 #combine paramDefaults with fieldDefaults fieldDefaults.update(paramDefaults) #and with extra fields fieldDefaults.update(extraFieldDefaults) for field in args.fields: if field not in fieldDefaults: logging.error("%s is not a recognised field. Allowed fields are:" % field) logging.error(fieldDefaults.keys()) return -1 if args.oldTimestamp != "": logging.info("oldTimestamp option specified. Running in diff mode...") args.explainFields = args.fields args.explainInfo = [ "explanation", "insert_time", "physicsGroup", "createdby" ] #2. #replace all '*' with '%' and strip "/" args.inDS = [ds.replace("*", "%") for ds in args.inDS] args.inDS = [ds.rstrip("/") for ds in args.inDS] if len(args.inDS) == 0 or (len(args.inDS) == 1 and args.inDS[0] == ""): logging.error( "No datasets provided. Please specify datasets with the --inDS or --inDsTxt options" ) return -1 logging.info( "Fetching list of datasets from AMI (this may take a few minutes)...") #obtain list of datasets res = AtlasAPI.list_datasets( client, patterns=args.inDS, fields=dsFields + ['ldn'], ami_status="VALID" ) #changed status from %, to only catch valid now: wb 08/2015 logging.info("...Found %d datasets matching your selection" % len(res)) if len(res) == 0: return 0 #NOTE: Should we allow retrieval of the extra information: keyword, genfiltereff, approx crossection, .. these all come from GetDatasetInfo ami command dataset_values = dict() for r in res: mydict = dict() dataset_values[str(r['ldn'])] = mydict for field in r.items(): if str(field[0]) == "ldn": continue if str(field[0]) not in args.fields: continue mydict[str(field[0])] = str(field[1]) #also if we have the 'extra fields or keywords' we will need to execute AtlasAPI.get_dataset_info .. if len(extraFields) > 0 or len(args.keywords) > 0: info_res = AtlasAPI.get_dataset_info(client, str(r['ldn'])) #print(info_res) if len(info_res) == 0: logging.error("Unable to retrieve dataset info for %s" % str(r['ldn'])) return -1 for field in extraFields: #ignore the keyword_ fields if field.startswith("keyword_"): continue mydict[field] = float(info_res[0][unicode(field)]) if isfloat( info_res[0][unicode(field)]) else extraFieldDefaults[field] for k in args.keywords: mydict["keyword_%s" % k] = int( (k in str(info_res[0][unicode('keyword')]).split(","))) #sort dataset_values as well as possible from collections import OrderedDict sorted_values = OrderedDict() for ds in args.inDS: if ds in dataset_values.keys(): sorted_values[ds] = dataset_values[ds] for ds in sorted(dataset_values): if ds not in sorted_values.keys(): sorted_values[ds] = dataset_values[ds] dataset_values = sorted_values logging.debug(dataset_values) #res = client.execute(['GetDatasetInfo for ds in args.inDS: if '%' not in ds and ds not in dataset_values.keys(): logging.warning("Unknown dataset: %s" % ds) datasetsToQuery = ",".join(dataset_values.keys()) #if using inDsTxt, retain any comment or blank lines in structure of output complete_values = OrderedDict() if args.inDsTxt != "": # read lines commentcount = 0 import re txt = open(args.inDsTxt) for tmpLine in txt: # remove \n tmpLine = re.sub('\n', '', tmpLine) # remove white spaces tmpLine = tmpLine.strip() # skip comment or empty if tmpLine.startswith('#') or tmpLine == '': complete_values['comment%d' % (commentcount)] = tmpLine commentcount = commentcount + 1 continue # append tmpLine = tmpLine.rstrip("/") if tmpLine in dataset_values.keys(): complete_values[tmpLine] = dataset_values[tmpLine] else: print("cannot find %s" % tmpLine) # close file txt.close() dataset_values = complete_values logging.info( "Obtaining %s for selected datasets at timestamp=%s... (please be patient)" % (args.fields, args.timestamp)) #do as one query, to be efficient if (args.timestamp == current_time): res = client.execute([ 'GetPhysicsParamsForDataset', "--logicalDatasetName=%s" % datasetsToQuery, "--timestamp='%s'" % args.timestamp ], format='dom_object') else: res = client.execute([ 'GetPhysicsParamsForDataset', "--logicalDatasetName=%s" % datasetsToQuery, "--timestamp='%s'" % args.timestamp, "--history=true" ], format='dom_object') #organize results by dataset parameterQueryResults = dict() for r in res.get_rows(): if r[u'logicalDatasetName'] not in parameterQueryResults.keys(): parameterQueryResults[r[u'logicalDatasetName']] = [] parameterQueryResults[r[u'logicalDatasetName']] += [ r ] #puts row in the list for this dataset if args.oldTimestamp != "": logging.info( "Obtaining %s for selected datasets at timestamp=%s... (please be patient)" % (args.fields, args.oldTimestamp)) res2 = client.execute([ 'GetPhysicsParamsForDataset', "--logicalDatasetName=%s" % datasetsToQuery, "--timestamp='%s'" % args.oldTimestamp, "--history=true" ], format='dom_object') old_parameterQueryResults = dict() for r in res2.get_rows(): if r[u'logicalDatasetName'] not in old_parameterQueryResults.keys( ): old_parameterQueryResults[r[u'logicalDatasetName']] = [] old_parameterQueryResults[r[u'logicalDatasetName']] += [ r ] #puts row in the list for this dataset headerString = "" doneHeader = False commentCache = "" commentCount = 0 #result is a list of lists (each list is 1 row) outputTable = [] tableHeaders = [] for ds in dataset_values.keys(): if ds.startswith('comment'): if commentCount > 0: commentCache += "\n" commentCache += dataset_values[ds] commentCount = commentCount + 1 continue #obtain list of parameters for this dataset #if(args.timestamp==current_time): # res = client.execute(['GetPhysicsParamsForDataset',"--logicalDatasetName=%s"% ds,"--timestamp='%s'"%args.timestamp], format='dom_object') #else: # res = client.execute(['GetPhysicsParamsForDataset',"--logicalDatasetName=%s"% ds,"--timestamp='%s'"%args.timestamp,"--history=true"], format='dom_object') res = parameterQueryResults.get(ds, []) if args.oldTimestamp != "": res2 = old_parameterQueryResults.get(ds, []) #first we have to determine how many subprocesses this ds has dsSubprocesses = [0] #always have the 0 subprocess for r in res: sp = int(r[u'subprocessID']) if sp not in dsSubprocesses: dsSubprocesses += [sp] #now for each subprocess we have to locate each required field value (in paramFields) #rank by physicsGroup for sp in dsSubprocesses: paramVals = dict() paramVals2 = dict() groupsWithVals = dict() #held for helpful output #need to keep explanations for requested fields explainInfo = dict() for i in args.explainFields: explainInfo[i] = dict() for param in paramFields: groupsWithVals[param] = [] bestGroupIndex = len(args.physicsGroups) import copy paramVals[param] = copy.copy(fieldDefaults[param]) for r in res: if int(r[u'subprocessID']) != sp: continue if str(r[u'paramName']) != param and not ( param == "crossSection_pb" and str(r[u'paramName']) == "crossSection"): continue if str(r[u'physicsGroup']) not in args.physicsGroups: groupsWithVals[param] += [(str(r[u'physicsGroup']), str(r[u'paramValue']))] continue if args.physicsGroups.index(str( r[u'physicsGroup'])) > bestGroupIndex: continue if args.physicsGroups.index(str( r[u'physicsGroup'])) == bestGroupIndex: logging.warning( "Duplicate parameter %s for group %s in dataset %s (subprocess %d). Please report this!" % (param, str(r[u'physicsGroup']), ds, sp)) paramVals[param] = str(r[u'paramValue']) if param == "crossSection_pb": paramVals[param] = str( float(paramVals[param]) * 1000.0) bestGroupIndex = args.physicsGroups.index( str(r[u'physicsGroup'])) #keep the explanation info for the requested fields if param in explainInfo.keys(): for e in args.explainInfo: if unicode(e) not in r: logging.error( "Unrecognised explainInfo field: %s" % e) return -1 explainInfo[param][e] = str(r[unicode(e)]) if args.oldTimestamp != "": bestGroupIndex = len(args.physicsGroups) paramVals2[param] = copy.copy(fieldDefaults[param]) for r in res2: if int(r[u'subprocessID']) != sp: continue if str(r[u'paramName']) != param and not ( param == "crossSection_pb" and str(r[u'paramName']) == "crossSection"): continue if str(r[u'physicsGroup']) not in args.physicsGroups: continue if args.physicsGroups.index(str( r[u'physicsGroup'])) > bestGroupIndex: continue if args.physicsGroups.index(str( r[u'physicsGroup'])) == bestGroupIndex: logging.warning( "Duplicate parameter %s for group %s in dataset %s (subprocess %d). Please report this!" % (param, str(r[u'physicsGroup']), ds, sp)) paramVals2[param] = str(r[u'paramValue']) if param == "crossSection_pb": paramVals2[param] = str( float(paramVals2[param]) * 1000.0) bestGroupIndex = args.physicsGroups.index( str(r[u'physicsGroup'])) #at this stage, parameters reside in paramVals dict or dataset_values[ds] dict #print them in the requested order .. if any is "None" then stop, because it doesn't have a default value and didn't find a value for it either rowString = "" rowList = [] firstPrint = False for param in args.fields: val = None if param == "ldn": val = ds elif param == "subprocessID": val = sp elif param in dataset_values[ds].keys(): val = dataset_values[ds][param] else: val = paramVals.get(param, None) if val == None: if args.outFile != sys.stdout: logging.warning( "dataset %s (subprocess %d) does not have parameter %s, which has no default." % (ds, sp, param)) if len(groupsWithVals.get(param, [])) > 0: logging.warning( "The follow physicsGroups have defined that parameter though:" ) logging.warning(groupsWithVals[param]) val = "#UNKNOWN#" #return -1 #if isfloat(str(val)): val = "%.6g" % float(val) if args.oldTimestamp != "": #diff val to old val val2 = None if param == "ldn": val2 = ds elif param == "subprocessID": val2 = sp elif param in dataset_values[ds].keys(): val2 = dataset_values[ds][param] else: val2 = paramVals2.get(param, None) if val2 == None: val2 = "#UNKNOWN#" #if isfloat(str(val2)): val2 = "%.6g" % float(val) if (str(val) != str(val2)): if not firstPrint: print("%s:" % ds) firstPrint = True print(" %s : %s ---> %s" % (param, str(val2), str(val))) print(" insert_time : %s" % explainInfo[param]['insert_time']) print(" explanation : %s" % explainInfo[param]['explanation']) print(" createdby : %s" % explainInfo[param]['createdby']) print(" physicsGroup : %s" % explainInfo[param]['physicsGroup']) continue rowList += [str(val)] if rowString != "" and args.delim != "": rowString += args.delim rowString += str(val) #inspect the type of str(val) to build up the header if not doneHeader: headerString += param if args.outFile != sys.stdout: if type(fieldDefaults[param]) == bool: headerString += "/O:" elif type(fieldDefaults[param]) == int: headerString += "/I:" elif type(fieldDefaults[param]) == float: headerString += "/D:" elif isfloat(str(val)): headerString += "/D:" #elif isint(str(val)): headerString += "/I:" TO BE SAFE WE MAKE ALL NUMERIC FIELDS FLOATS, EXCEPT if the default value is type int else: headerString += "/C:" else: v = param if param in paramUnits: headerString += " [%s]" % paramUnits[param] v += " [%s]" % paramUnits[param] tableHeaders += [v] headerString += " " if args.oldTimestamp != "": continue #print nothing more for diff mode if not doneHeader: doneHeader = True if args.outFile != sys.stdout: print(headerString[:-1], file=args.outFile) if commentCount > 0: if args.outFile != sys.stdout and args.delim != "": print(commentCache, file=args.outFile) outputTable += [["COMMENT", commentCache]] commentCache = '' commentCount = 0 if args.outFile != sys.stdout and args.delim != "": print(rowString, file=args.outFile) outputTable += [rowList] #also print the required explanations for (field, expl) in explainInfo.items(): outString = "#%s: { " % field doneFirst = False for eField in args.explainInfo: if doneFirst: outString += " , " if not eField in expl.keys(): outString += " %s: <NONE .. value is default>" % eField else: outString += "%s: %s" % (eField, expl[eField]) doneFirst = True outString += " }" #print(outString,file=args.outFile) outputTable += [["COMMENT", outString]] if args.oldTimestamp != "": args.outFile.close() return 0 #print the table in nicely formatted state if args.outFile == sys.stdout or args.delim == "": #determine column widths columnWidths = [0] * len(args.fields) for i in range(0, len(tableHeaders)): columnWidths[i] = len(tableHeaders[i]) for r in outputTable: if len(r) > 0 and r[0] == "COMMENT": continue for i in range(0, len(r)): if len(r[i]) > columnWidths[i]: columnWidths[i] = len(r[i]) lineout = "" for i in range(0, len(tableHeaders)): lineout += tableHeaders[i].ljust(columnWidths[i]) + " " print(lineout) for r in outputTable: lineout = "" if len(r) > 0 and r[0] == "COMMENT": lineout = r[1] else: for i in range(0, len(r)): lineout += r[i].ljust(columnWidths[i]) + " " print(lineout, file=args.outFile) #print the footer, which is the command to reproduce this output import os if args.outFile != sys.stdout: #remove comment from dataset_values datasetss = [ x for x in dataset_values.keys() if not x.startswith("comment") ] print("", file=args.outFile) print("#lsetup \"asetup %s,%s\" pyAMI" % (os.environ.get('AtlasProject', 'UNKNOWN!'), os.environ.get('AtlasVersion', 'UNKNOWN!')), file=args.outFile) print( "#getMetadata.py --timestamp=\"%s\" --physicsGroups=\"%s\" --fields=\"%s\" --inDS=\"%s\"" % (args.timestamp, ",".join(args.physicsGroups), ",".join( args.fields), ",".join(datasetss)), file=args.outFile) logging.info("Results written to: %s" % args.outFile.name) args.outFile.close()
import os import pyAMI.atlas.api as ami import pyAMI.client from dotenv import load_dotenv load_dotenv() CERT_PATH = os.getenv('CERT_FILE') KEY_PATH = os.getenv('KEY_FILE') client = pyAMI.client.Client('atlas', cert_file=CERT_PATH, key_file=KEY_PATH, ignore_proxy=True, verbose=True) # extract data # df = uproot.concatenate('../data/wminmunu_MC.root:sumWeights') # result = client.execute('list datasets --dataset-number 301170 -f cross_section,nfiles,physics_short,events,total_size', format='dict_object') kwargs = {'dataset-number': 301170} result = ami.get_dataset_info(client, **kwargs) print(result)
def main(): # configurable options config = parseCmdLine(sys.argv[1:]) if (config.baseline or config.official ) and config.sample: print "--baseline, --official and --sample are mutually exclusive" sys.exit(1) # AMI client connection client = pyAMI.client.Client('atlas') pyAMI.client.endpoint = config.server pyAMI.atlas.api.init() # data type is NTUP_SUSY for 2011/2012 and AOD for 2014 on datatype = config.datatype if 'mc11_' in config.prefix or 'mc12_' in config.prefix or 'data11_' in config.prefix or 'data12_' in config.prefix : datatype = '%.merge.NTUP_SUSY%' # make list of official datasets (baseline+alt) officialids = [] if config.official or config.baseline or config.sample: if 'mc12_8TeV' in config.prefix or 'mc14_8TeV' in config.prefix: import mc12_8TeV_MCSampleList as mcsl elif 'mc14_13TeV' in config.prefix: import mc14_13TeV_MCSampleList as mcsl else: print '--official is only supported for mc12_8TeV, mc14_8TeV and mc14_13TeV' sys.exit(1) if config.sample: officialids = mcsl.__dict__[str(config.sample)] else: officialids = mcsl.__dict__["lbaseline"] if config.official: officialids += mcsl.__dict__["lalt"] # get all datasets matching prefix & tag and then filter them from pyAMI.atlas.api import get_dataset_info, list_datasets dskey = config.prefix+datatype+config.tag print 'Querying AMI for datasets matching pattern',dskey alldatasets = list_datasets(client,dskey) acceptedDS = [] for DSlist in alldatasets: dsname = DSlist['ldn'] cut = False for filter in filters: if filter in dsname.split('.')[2]: cut = True if (config.official or config.baseline or config.sample) and not int(dsname.split('.')[1]) in officialids: cut = True if config.signal : cut = True for pattern in lsignals: if pattern in dsname: cut = False if cut: continue acceptedDS.append(dsname) pass acceptedDS.sort() # get informations for all accepted datasets dsinfos = [] for dsname in acceptedDS: dsinfos.append(get_dataset_info(client,dsname)[0]) pass # write file fout = open('datasets.txt','w') for info in dsinfos: try: dsname = info['logicalDatasetName'] generatorString = info['generatorName'] version = info['version'] if badDataset(dsname,generatorString,version): continue availability = info['prodsysStatus'] nFiles = int(info['nFiles']) if nFiles>0: period = 'MC' xsec = 0. effic = 1. if info.has_key('period'): period = info['period'] else: #(xsec, effic) = get_dataset_xsec_effic(client,info.info['logicalDatasetName']) # confirmed with AMI team that this should be enought, no need # to re-implement get_dataset_xsec_effic for PyAMI5 xsec = info[u'crossSection'] effic = info[u'approx_GenFiltEff'] nevts = info['totalEvents'] nfiles = info['nFiles'] if not dsname.endswith('/'): dsname += '/' fout.write("%s %s %s %s %s %s\n" % (dsname,nevts,nfiles,period,xsec,effic)) except KeyError as prop: print 'Missing property',prop,'for dataset ',dsname,'in AMI, skip' fout.close() pass