def getAMIProv(sample): """This function includes all AMI interface components This function uses the pyAMI API to get all needed information from the database and returns the event counts in a dictionary. The provenance is all files that the dataset were produced from, or that were used to produce the dataset. The 'distance' determines how many steps away from the dataset the entry is. e.g. -1 is the file that was used to produce the dataset, and 1 is a file that was produced FROM the dataset. --sample should be the full dataset name as you would search for it in AMI with *NO* trailing /. """ amiEvents = {} # Get the provenance from AMI sample = sample.rstrip('/') #Remove any trailing slashes sample = sample.split(':', 1)[-1] # Remove anything before a ':' if its there. prov = AtlasAPI.get_dataset_prov(client, sample) # Loop over all of the datasets in the provenance for dataset in prov['node']: # Get nevents from the AOD that this sample was produced from if dataset['dataType'] == 'AOD': if dataset[ 'distance'] == '-1': # The file imediately before the derivation amiEvents['AOD_AMI'] = int(dataset['events']) # Get the nEvents from the derivation sample. if dataset['dataType'] == 'DAOD_HIGG1D1': if dataset[ 'distance'] == '0': # The actual sample. Any higher numbers are files made from this one. amiEvents['DAOD_AMI'] = int(dataset['events']) return amiEvents
def getAMIProv(sample): """This function includes all AMI interface components This function uses the pyAMI API to get all needed information from the database and returns the event counts in a dictionary. The provenance is all files that the dataset were produced from, or that were used to produce the dataset. The 'distance' determines how many steps away from the dataset the entry is. e.g. -1 is the file that was used to produce the dataset, and 1 is a file that was produced FROM the dataset. --sample should be the full dataset name as you would search for it in AMI with *NO* trailing /. """ amiEvents = {} # Get the provenance from AMI sample = sample.rstrip('/') #Remove any trailing slashes sample = sample.split(':',1)[-1] # Remove anything before a ':' if its there. prov = AtlasAPI.get_dataset_prov(client, sample) # Loop over all of the datasets in the provenance for dataset in prov['node']: # Get nevents from the AOD that this sample was produced from if dataset['dataType'] == 'AOD': if dataset['distance'] == '-1': # The file imediately before the derivation amiEvents['AOD_AMI'] = int(dataset['events']) # Get the nEvents from the derivation sample. if dataset['dataType'] == 'DAOD_HIGG1D1': if dataset['distance'] == '0': # The actual sample. Any higher numbers are files made from this one. amiEvents['DAOD_AMI'] = int(dataset['events']) return amiEvents
def getOriginalEVNT(s, ldn): provs = AtlasAPI.get_dataset_prov(s.client, ldn) evnt = None for sample in provs["node"]: if "evgen.EVNT" in sample["logicalDatasetName"]: evnt = sample["logicalDatasetName"] return evnt
def getOriginalAOD(s, ldn): provs = AtlasAPI.get_dataset_prov(s.client, ldn) evnt = None for sample in provs["node"]: if ".AOD." in sample["logicalDatasetName"]: evnt = sample["logicalDatasetName"] break return evnt
def main(): from argparse import RawTextHelpFormatter parser = argparse.ArgumentParser(description=__doc__, formatter_class=RawTextHelpFormatter) parser.add_argument( '--inDsTxt', action="store", help= "text file containing datasets to make PRW for (one per line) [REQUIRED]", required=True) parser.add_argument('--outDS', action="store", default="", help="Name of the output dataset", required=False) parser.add_argument( '--forceStaged', action="store_true", help="If set, grid jobs will be submitted with forceStaged option") parser.add_argument( '--skipNTUP_PILEUP', action="store_true", help="If set, will not check for existing NTUP_PILEUP datasets") parser.add_argument('prwFiles', nargs="*", help="Exosting PRW Config files to check") args = parser.parse_args() try: import pyAMI.atlas.api as atlasAPI import pyAMI.client except ImportError: print "Could not import pyAMI ... please do: lsetup pyAMI" print "Also ensure you have a valid certificate (voms-proxy-init -voms atlas)" return 1 client = pyAMI.client.Client('atlas') atlasAPI.init() #read datasets into list with open(args.inDsTxt) as f: datasets = f.read().splitlines() import ROOT out = ROOT.CP.TPileupReweighting("out") for f in args.prwFiles: out.AddConfigFile(f) out.ResetCountingMode() #trick tool into going into counting mode #list of known period numbers periodNumbers = out.GetPeriodNumbers() print "Determining provenances of %d datasets ..." % len(datasets) aodDatasets = [] ntupDatasets = [] for dataset in datasets: dataset = dataset.strip("/") #strip the scope if it's there if dataset.startswith("#"): continue dataset = dataset.rsplit(":")[-1].strip() if len(dataset) == 0: continue print "Doing %s" % dataset prov = atlasAPI.get_dataset_prov(client, dataset) if 'node' not in prov: print "ERROR: Could not determine provenance of %s, skipping!" % dataset continue theParent = "" for ds in prov['node']: if ds[u'dataType'] != u'AOD': continue theParent = str(ds[u'logicalDatasetName']) theParentSize = int(ds[u'events']) break if theParent == "": print "ERROR: Could not determine provenance of %s, skipping!" % dataset continue #check input prw files, if we specified isIncomplete = False if len(args.prwFiles): total = 0 dsid = theParent.split(".")[1] for p in periodNumbers: if p == -1: continue hist = out.GetInputHistogram(int(dsid), p) if hist: total += hist.GetEntries() if total == theParentSize: print "INFO: %s is complete in your existing PRW files. Good!" % dataset continue if total > theParentSize: print "WARNING: %s is suspect in your existing PRW files, has %d events when expected %d ... please check you didn't overmerge" % ( dataset, total, theParentSize) continue else: if total != 0: print "WARNING: %s is incomplete (%d events when expected %d) ... will try to find centrally produced NTUP_PILEUP or prepare to generate" % ( dataset, total, theParentSize) isIncomplete = True #before adding the dataset, see if we can find an NTUP_PILEUP for it if not args.skipNTUP_PILEUP: ntupDatasetName = theParent.replace("AOD", "NTUP_PILEUP") ntupDatasetName = ntupDatasetName.replace("aod", "%") ntupDatasetName = ntupDatasetName.replace("merge", "%") #remove everything after first rtag of ami tag .. replace with wildcard first_rtag_pos = ntupDatasetName.index( "_r", ntupDatasetName.index("NTUP_PILEUP")) try: next_underscore_pos = ntupDatasetName.index( "_", first_rtag_pos + 1) except ValueError: next_underscore_pos = len(ntupDatasetName) ntupDatasetName = ntupDatasetName[:next_underscore_pos] + "%" res = atlasAPI.list_datasets(client, ntupDatasetName, fields='ldn,prodsys_status') foundNTUP = False for r in res: if r[u'prodsys_status'] != "ALL EVENTS AVAILABLE" and ( isIncomplete or r[u'prodsys_status'] != "EVENTS PARTIALLY AVAILABLE"): continue print "Found existing NTUP_PILEUP ... please download: %s" % r[ u"ldn"] ntupDatasets += [r[u'ldn']] foundNTUP = True break if foundNTUP == True: continue aodDatasets += [theParent] if len(aodDatasets) > 0: if args.outDS == "": print "NTUP_PILEUP need generating for the following datasets, please specify the --outDS option to give a name to the output dataset" print ",".join(aodDatasets) return 1 print "...submitting job to grid..." extraOpts = "" if args.forceStaged: extraOpts += "--forceStaged " mycommand = """pathena --inDS="%s" --outDS="%s" PileupReweighting/generatePRW_jobOptions.py %s--mergeOutput --nGBPerJob=MAX --addNthFieldOfInDSToLFN=2,6""" % ( ",".join(aodDatasets), args.outDS, extraOpts) print "Command: %s" % mycommand from subprocess import call if call(mycommand, shell=True) != 0: print "Problem executing command. Did you remember to do: lsetup panda" return 1 print "... finished. Please monitor your job on the grid, and when it is finished, download the files!" if len(ntupDatasets): frucio_fn = 'rucio_downloads_%s.sh' % args.inDsTxt print "Please download existing config files from these datasets (see also output file %s):" % frucio_fn with open(frucio_fn, 'w') as frucio: for ds in ntupDatasets: command = "rucio download %s" % ds print command frucio.write(command + '\n') print "" if len(ntupDatasets) or len(aodDatasets): print "After downloading, you are advised to check they are complete: checkPRW.py --inDsTxt=%s <downloaded files> " % args.inDsTxt print "Thank you for generating config files, you get a gold star!" else: print "Looks like you are ready for pileup reweighting!" return 0
inputDS = [] inputFile = open(args.input,"r") for line in inputFile: if line.startswith("#"): continue line = line.strip() if line=='': continue parts=line.split(':') dsName = parts[-1].rstrip("/") dsID = dsName.split(".")[1] print dsName dsProv = AtlasAPI.get_dataset_prov(client,dataset=dsName) for prov in dsProv["node"]: if prov['dataType'] == "EVNT": thisProvDSName = prov['logicalDatasetName'] thisProvDSID = thisProvDSName.split(".")[1] if thisProvDSID == dsID: print "\tUsing ",thisProvDSName inputDS.append(thisProvDSName) def getUnitSF(unit): if unit == "nano barn": return 1000 print "Unknown unit..." return 1.0 fh_out=open(args.output,'w') if args.output!=None else None
def getParent(client,sample): dict = AtlasAPI.get_dataset_prov(client,sample) parent = dict['node'][1]['logicalDatasetName'] # get the immediate first parent - this should be an AOD for a DxAOD return parent
inputDS = [] inputFile = open(args.input, "r") for line in inputFile: if line.startswith("#"): continue line = line.strip() if line == '': continue parts = line.split(':') dsName = parts[-1].rstrip("/") dsID = dsName.split(".")[1] print dsName dsProv = AtlasAPI.get_dataset_prov(client, dataset=dsName) for prov in dsProv["node"]: if prov['dataType'] == "EVNT": thisProvDSName = prov['logicalDatasetName'] thisProvDSID = thisProvDSName.split(".")[1] if thisProvDSID == dsID: print "\tUsing ", thisProvDSName inputDS.append(thisProvDSName) def getUnitSF(unit): if unit == "nano barn": return 1000 print "Unknown unit..." return 1.0
def main(): from argparse import RawTextHelpFormatter parser = argparse.ArgumentParser(description=__doc__,formatter_class=RawTextHelpFormatter) parser.add_argument('--outPRWFile',action="store",help="OPTIONAL Name of the output prw file containing valid configs",required=False) parser.add_argument('--outputSuspect',action="store_true",help="allow for suspect channels to be included in the output prw file",default=False) parser.add_argument('--inDsTxt',action="store",help="text file containing datasets to make PRW for (one per line)",required=True) parser.add_argument('prwFiles',nargs="+",help="PRW Config files to scan") args = parser.parse_args() try: import pyAMI.atlas.api as atlasAPI import pyAMI.client except ImportError: print "Could not import pyAMI ... please do: lsetup pyAMI" print "Also ensure you have a valid certificate (voms-proxy-init -voms atlas)" return 1 client = pyAMI.client.Client('atlas') atlasAPI.init() #read datasets into list datasets=[] for txtFile in args.inDsTxt.split(","): with open(txtFile) as f: datasets += f.read().splitlines() print "Determining provenances of %d datasets ..." % len(datasets) aodDatasets=dict() for dataset in datasets: #strip the scope if it's there if dataset.startswith("#"): continue dataset = dataset.rsplit(":")[-1].strip() if len(dataset)==0: continue print "Doing %s" % dataset prov = atlasAPI.get_dataset_prov(client, dataset ) if 'node' not in prov: print "ERROR: Could not determine provenance of %s, skipping!" % dataset continue theParent="" for ds in prov['node']: if ds[u'dataType']!=u'AOD': continue theParent = str(ds[u'logicalDatasetName']) theParentSize = int(ds[u'events']) break if theParent=="": print "ERROR: Could not determine provenance of %s, skipping!" % dataset continue #extract the dsid ... theParent = theParent.split(".")[1] if theParent in aodDatasets: aodDatasets[theParent] += theParentSize else: aodDatasets[theParent] = theParentSize #aodDatasets is now a chanNum -> eventNumber pairing ... import ROOT out = ROOT.CP.TPileupReweighting("out") for f in args.prwFiles: out.AddConfigFile(f) out.ResetCountingMode() #trick tool into going into counting mode #list of known period numbers periodNumbers = out.GetPeriodNumbers() for dsid,nevents in aodDatasets.iteritems(): #get the sum of weights from the tool total=0; for p in periodNumbers: if p==-1: continue hist = out.GetInputHistogram(int(dsid),p) if hist: total += hist.GetEntries() if total==nevents: print "channel %s is ok" % dsid elif total<nevents: print "channel %s is incomplete (missing %d events from config files)" % (dsid,nevents-total) out.RemoveChannel(int(dsid)) elif total>nevents: print "channel %s is suspect! (config files have additional %d events)" % (dsid,total-nevents) if not args.outputSuspect: out.RemoveChannel(int(dsid)) if args.outPRWFile: out.Initialize(); out.WriteToFile(args.outPRWFile); return 0