Ejemplo n.º 1
0
def getAMIProv(sample):
    """This function includes all AMI interface components

    This function uses the pyAMI API to get all needed information
    from the database and returns the event counts in a dictionary.
    The provenance is all files that the dataset were produced from, or that
    were used to produce the dataset. The 'distance' determines how many steps
    away from the dataset the entry is. e.g. -1 is the file that was used to
    produce the dataset, and 1 is a file that was produced FROM the dataset.

    --sample should be the full dataset name as you would search for it in AMI
      with *NO* trailing /.
    """
    amiEvents = {}
    # Get the provenance from AMI
    sample = sample.rstrip('/')  #Remove any trailing slashes
    sample = sample.split(':',
                          1)[-1]  # Remove anything before a ':' if its there.
    prov = AtlasAPI.get_dataset_prov(client, sample)
    # Loop over all of the datasets in the provenance
    for dataset in prov['node']:
        # Get nevents from the AOD that this sample was produced from
        if dataset['dataType'] == 'AOD':
            if dataset[
                    'distance'] == '-1':  # The file imediately before the derivation
                amiEvents['AOD_AMI'] = int(dataset['events'])
        # Get the nEvents from the derivation sample.
        if dataset['dataType'] == 'DAOD_HIGG1D1':
            if dataset[
                    'distance'] == '0':  # The actual sample. Any higher numbers are files made from this one.
                amiEvents['DAOD_AMI'] = int(dataset['events'])

    return amiEvents
def getAMIProv(sample):
    """This function includes all AMI interface components

    This function uses the pyAMI API to get all needed information
    from the database and returns the event counts in a dictionary.
    The provenance is all files that the dataset were produced from, or that
    were used to produce the dataset. The 'distance' determines how many steps
    away from the dataset the entry is. e.g. -1 is the file that was used to
    produce the dataset, and 1 is a file that was produced FROM the dataset.

    --sample should be the full dataset name as you would search for it in AMI
      with *NO* trailing /.
    """
    amiEvents = {}
    # Get the provenance from AMI
    sample = sample.rstrip('/') #Remove any trailing slashes
    sample = sample.split(':',1)[-1] # Remove anything before a ':' if its there.
    prov = AtlasAPI.get_dataset_prov(client, sample)
    # Loop over all of the datasets in the provenance
    for dataset in prov['node']:
        # Get nevents from the AOD that this sample was produced from
        if dataset['dataType'] == 'AOD':
            if dataset['distance'] == '-1': # The file imediately before the derivation
                amiEvents['AOD_AMI'] = int(dataset['events'])
        # Get the nEvents from the derivation sample.
        if dataset['dataType'] == 'DAOD_HIGG1D1':
            if dataset['distance'] == '0': # The actual sample. Any higher numbers are files made from this one.
                amiEvents['DAOD_AMI'] = int(dataset['events'])

    return amiEvents
Ejemplo n.º 3
0
    def getOriginalEVNT(s, ldn):
        provs = AtlasAPI.get_dataset_prov(s.client, ldn)

        evnt = None

        for sample in provs["node"]:
            if "evgen.EVNT" in sample["logicalDatasetName"]:
                evnt = sample["logicalDatasetName"]

        return evnt
Ejemplo n.º 4
0
    def getOriginalAOD(s, ldn):
        provs = AtlasAPI.get_dataset_prov(s.client, ldn)

        evnt = None

        for sample in provs["node"]:
            if ".AOD." in sample["logicalDatasetName"]:
                evnt = sample["logicalDatasetName"]
                break

        return evnt
Ejemplo n.º 5
0
def main():
    from argparse import RawTextHelpFormatter

    parser = argparse.ArgumentParser(description=__doc__,
                                     formatter_class=RawTextHelpFormatter)

    parser.add_argument(
        '--inDsTxt',
        action="store",
        help=
        "text file containing datasets to make PRW for (one per line) [REQUIRED]",
        required=True)
    parser.add_argument('--outDS',
                        action="store",
                        default="",
                        help="Name of the output dataset",
                        required=False)
    parser.add_argument(
        '--forceStaged',
        action="store_true",
        help="If set, grid jobs will be submitted with forceStaged option")
    parser.add_argument(
        '--skipNTUP_PILEUP',
        action="store_true",
        help="If set, will not check for existing NTUP_PILEUP datasets")
    parser.add_argument('prwFiles',
                        nargs="*",
                        help="Exosting PRW Config files to check")

    args = parser.parse_args()

    try:
        import pyAMI.atlas.api as atlasAPI
        import pyAMI.client
    except ImportError:
        print "Could not import pyAMI ... please do: lsetup pyAMI"
        print "Also ensure you have a valid certificate (voms-proxy-init -voms atlas)"
        return 1

    client = pyAMI.client.Client('atlas')
    atlasAPI.init()

    #read datasets into list
    with open(args.inDsTxt) as f:
        datasets = f.read().splitlines()

    import ROOT

    out = ROOT.CP.TPileupReweighting("out")
    for f in args.prwFiles:
        out.AddConfigFile(f)
    out.ResetCountingMode()  #trick tool into going into counting mode
    #list of known period numbers
    periodNumbers = out.GetPeriodNumbers()

    print "Determining provenances of %d datasets ..." % len(datasets)

    aodDatasets = []
    ntupDatasets = []
    for dataset in datasets:
        dataset = dataset.strip("/")
        #strip the scope if it's there
        if dataset.startswith("#"): continue
        dataset = dataset.rsplit(":")[-1].strip()
        if len(dataset) == 0: continue

        print "Doing %s" % dataset
        prov = atlasAPI.get_dataset_prov(client, dataset)
        if 'node' not in prov:
            print "ERROR: Could not determine provenance of %s, skipping!" % dataset
            continue
        theParent = ""
        for ds in prov['node']:
            if ds[u'dataType'] != u'AOD': continue
            theParent = str(ds[u'logicalDatasetName'])
            theParentSize = int(ds[u'events'])

            break
        if theParent == "":
            print "ERROR: Could not determine provenance of %s, skipping!" % dataset
            continue

        #check input prw files, if we specified
        isIncomplete = False
        if len(args.prwFiles):
            total = 0
            dsid = theParent.split(".")[1]
            for p in periodNumbers:
                if p == -1: continue
                hist = out.GetInputHistogram(int(dsid), p)
                if hist: total += hist.GetEntries()

            if total == theParentSize:
                print "INFO: %s is complete in your existing PRW files. Good!" % dataset
                continue
            if total > theParentSize:
                print "WARNING: %s is suspect in your existing PRW files, has %d events when expected %d ... please check you didn't overmerge" % (
                    dataset, total, theParentSize)
                continue
            else:
                if total != 0:
                    print "WARNING: %s is incomplete (%d events when expected %d) ... will try to find centrally produced NTUP_PILEUP or prepare to generate" % (
                        dataset, total, theParentSize)
                    isIncomplete = True

        #before adding the dataset, see if we can find an NTUP_PILEUP for it
        if not args.skipNTUP_PILEUP:
            ntupDatasetName = theParent.replace("AOD", "NTUP_PILEUP")
            ntupDatasetName = ntupDatasetName.replace("aod", "%")
            ntupDatasetName = ntupDatasetName.replace("merge", "%")
            #remove everything after first rtag of ami tag .. replace with wildcard
            first_rtag_pos = ntupDatasetName.index(
                "_r", ntupDatasetName.index("NTUP_PILEUP"))
            try:
                next_underscore_pos = ntupDatasetName.index(
                    "_", first_rtag_pos + 1)
            except ValueError:
                next_underscore_pos = len(ntupDatasetName)
            ntupDatasetName = ntupDatasetName[:next_underscore_pos] + "%"
            res = atlasAPI.list_datasets(client,
                                         ntupDatasetName,
                                         fields='ldn,prodsys_status')
            foundNTUP = False
            for r in res:
                if r[u'prodsys_status'] != "ALL EVENTS AVAILABLE" and (
                        isIncomplete or
                        r[u'prodsys_status'] != "EVENTS PARTIALLY AVAILABLE"):
                    continue
                print "Found existing NTUP_PILEUP ... please download: %s" % r[
                    u"ldn"]
                ntupDatasets += [r[u'ldn']]
                foundNTUP = True
                break
            if foundNTUP == True: continue

        aodDatasets += [theParent]

    if len(aodDatasets) > 0:
        if args.outDS == "":
            print "NTUP_PILEUP need generating for the following datasets, please specify the --outDS option to give a name to the output dataset"
            print ",".join(aodDatasets)
            return 1
        print "...submitting job to grid..."

        extraOpts = ""
        if args.forceStaged: extraOpts += "--forceStaged "

        mycommand = """pathena --inDS="%s" --outDS="%s" PileupReweighting/generatePRW_jobOptions.py %s--mergeOutput --nGBPerJob=MAX --addNthFieldOfInDSToLFN=2,6""" % (
            ",".join(aodDatasets), args.outDS, extraOpts)

        print "Command: %s" % mycommand

        from subprocess import call
        if call(mycommand, shell=True) != 0:
            print "Problem executing command. Did you remember to do: lsetup panda"
            return 1

        print "... finished. Please monitor your job on the grid, and when it is finished, download the files!"

    if len(ntupDatasets):
        frucio_fn = 'rucio_downloads_%s.sh' % args.inDsTxt
        print "Please download existing config files from these datasets (see also output file %s):" % frucio_fn
        with open(frucio_fn, 'w') as frucio:
            for ds in ntupDatasets:
                command = "rucio download %s" % ds
                print command
                frucio.write(command + '\n')
        print ""

    if len(ntupDatasets) or len(aodDatasets):
        print "After downloading, you are advised to check they are complete: checkPRW.py --inDsTxt=%s <downloaded files> " % args.inDsTxt
        print "Thank you for generating config files, you get a gold star!"
    else:
        print "Looks like you are ready for pileup reweighting!"

    return 0
Ejemplo n.º 6
0
inputDS = []

inputFile = open(args.input,"r")

for line in inputFile:
    if line.startswith("#"): continue
    line = line.strip()
    if line=='': continue

    parts=line.split(':')
    dsName = parts[-1].rstrip("/")
    dsID = dsName.split(".")[1]
    print dsName

    dsProv = AtlasAPI.get_dataset_prov(client,dataset=dsName)
    for prov in dsProv["node"]:
        if prov['dataType'] == "EVNT":
            thisProvDSName = prov['logicalDatasetName']
            thisProvDSID   = thisProvDSName.split(".")[1]
            if thisProvDSID == dsID:
                print "\tUsing ",thisProvDSName
                inputDS.append(thisProvDSName)

def getUnitSF(unit):
    if unit == "nano barn":
        return 1000
    print "Unknown unit..."
    return 1.0

fh_out=open(args.output,'w') if args.output!=None else None
Ejemplo n.º 7
0
def getParent(client,sample):
    dict = AtlasAPI.get_dataset_prov(client,sample)
    parent = dict['node'][1]['logicalDatasetName']
# get the immediate first parent - this should be an AOD for a DxAOD
    return parent
Ejemplo n.º 8
0
inputDS = []

inputFile = open(args.input, "r")

for line in inputFile:
    if line.startswith("#"): continue
    line = line.strip()
    if line == '': continue

    parts = line.split(':')
    dsName = parts[-1].rstrip("/")
    dsID = dsName.split(".")[1]
    print dsName

    dsProv = AtlasAPI.get_dataset_prov(client, dataset=dsName)
    for prov in dsProv["node"]:
        if prov['dataType'] == "EVNT":
            thisProvDSName = prov['logicalDatasetName']
            thisProvDSID = thisProvDSName.split(".")[1]
            if thisProvDSID == dsID:
                print "\tUsing ", thisProvDSName
                inputDS.append(thisProvDSName)


def getUnitSF(unit):
    if unit == "nano barn":
        return 1000
    print "Unknown unit..."
    return 1.0
Ejemplo n.º 9
0
def main():
    from argparse import RawTextHelpFormatter
    
    parser = argparse.ArgumentParser(description=__doc__,formatter_class=RawTextHelpFormatter)
    parser.add_argument('--outPRWFile',action="store",help="OPTIONAL Name of the output prw file containing valid configs",required=False)
    parser.add_argument('--outputSuspect',action="store_true",help="allow for suspect channels to be included in the output prw file",default=False)
    parser.add_argument('--inDsTxt',action="store",help="text file containing datasets to make PRW for (one per line)",required=True)
    parser.add_argument('prwFiles',nargs="+",help="PRW Config files to scan")
    
    args = parser.parse_args()

    
    try:
      import pyAMI.atlas.api as atlasAPI
      import pyAMI.client
    except ImportError:
      print "Could not import pyAMI ... please do: lsetup pyAMI"
      print "Also ensure you have a valid certificate (voms-proxy-init -voms atlas)"
      return 1

    client = pyAMI.client.Client('atlas')
    atlasAPI.init()

    #read datasets into list
    datasets=[]
    for txtFile in args.inDsTxt.split(","):
      with open(txtFile) as f: datasets += f.read().splitlines()

    
    print "Determining provenances of %d datasets ..." % len(datasets)
    
    aodDatasets=dict()
    for dataset in datasets:
      #strip the scope if it's there
      if dataset.startswith("#"): continue
      dataset = dataset.rsplit(":")[-1].strip()
      if len(dataset)==0: continue
      
      print "Doing %s" % dataset
      prov = atlasAPI.get_dataset_prov(client, dataset )
      if 'node' not in prov:
          print "ERROR: Could not determine provenance of %s, skipping!" % dataset
          continue
      theParent=""
      for ds in prov['node']:
        if ds[u'dataType']!=u'AOD': continue
        theParent = str(ds[u'logicalDatasetName'])
        theParentSize = int(ds[u'events'])
        break
      if theParent=="":
          print "ERROR: Could not determine provenance of %s, skipping!" % dataset
          continue
      #extract the dsid ...
      theParent = theParent.split(".")[1]
      
      if theParent in aodDatasets: aodDatasets[theParent] += theParentSize
      else: aodDatasets[theParent] = theParentSize
    
    #aodDatasets is now a chanNum -> eventNumber pairing ...
    
    import ROOT
    
    out = ROOT.CP.TPileupReweighting("out")
    for f in args.prwFiles:
      out.AddConfigFile(f)
    out.ResetCountingMode() #trick tool into going into counting mode 
    
    #list of known period numbers
    periodNumbers = out.GetPeriodNumbers()
    
    for dsid,nevents in aodDatasets.iteritems():
      #get the sum of weights from the tool
      
      total=0;
      for p in periodNumbers:
        if p==-1: continue
        hist = out.GetInputHistogram(int(dsid),p)
        if hist: total += hist.GetEntries()
      
      if total==nevents:
        print "channel %s is ok" % dsid
      elif total<nevents:
        print "channel %s is incomplete (missing %d events from config files)" % (dsid,nevents-total)
        out.RemoveChannel(int(dsid))
      elif total>nevents:
        print "channel %s is suspect! (config files have additional %d events)" % (dsid,total-nevents)
        if not args.outputSuspect:
          out.RemoveChannel(int(dsid))
      
      
    
    if args.outPRWFile:
        out.Initialize();
        out.WriteToFile(args.outPRWFile);
    
    
    return 0