コード例 #1
0
ファイル: Sample.py プロジェクト: slehti/TauFW
 def _getnevents(self,
                 das=True,
                 refresh=False,
                 tree='Events',
                 limit=-1,
                 checkfiles=False,
                 verb=0):
     """Get number of nanoAOD events from DAS (default), or from files on storage system (das=False)."""
     nevents = self.nevents
     filenevts = self.filenevts
     treename = tree
     if nevents <= 0 or refresh:
         if checkfiles or (self.storage and not das
                           ):  # get number of events from storage system
             files = self.getfiles(url=True,
                                   das=das,
                                   refresh=refresh,
                                   limit=limit,
                                   verb=verb)
             for fname in files:
                 nevts = getnevents(fname, treename)
                 filenevts[fname] = nevts  # cache
                 nevents += nevts
                 LOG.verb(
                     "_getnevents: Found %d events in %r." % (nevts, fname),
                     verb, 3)
         else:  # get number of events from DAS
             for daspath in self.paths:
                 nevents += getdasnevents(daspath,
                                          instance=self.instance,
                                          verb=verb - 1)
         if limit < 0:
             self.nevents = nevents
     return nevents, filenevts
コード例 #2
0
ファイル: Sample.py プロジェクト: whahmad/TauFW
 def getfiles(self, refresh=False, url=True, verb=0):
     """Get list of files from DAS."""
     files = self.files
     if self.refreshable and (not files or refresh):
         files = []
         for path in self.paths:
             if self.storage:  # get files from storage system
                 sepath = repkey(self.storage, PATH=path).replace('//', '/')
                 storage = getstorage(sepath, verb=verb - 1)
                 outlist = storage.getfiles(url=url, verb=verb - 1)
             else:  # get files from DAS
                 dascmd = 'dasgoclient --query="file dataset=%s instance=%s"' % (
                     path, self.instance)  #--limit=0
                 LOG.verb(repr(dascmd), verb)
                 cmdout = execute(dascmd, verb=verb - 1)
                 outlist = cmdout.split(os.linesep)
             for line in outlist:  # filter root files
                 line = line.strip()
                 if line.endswith('.root') and not any(
                         f.endswith(line) for f in self.blacklist):
                     if url and self.url not in line and 'root://' not in line:
                         line = self.url + line
                     files.append(line)
         files.sort()  # for consistent list order
         self.files = files
     return files
コード例 #3
0
def dasgoclient(query, **kwargs):
    """Help function to call dasgoclient and retrieve data set information."""
    try:
        verbosity = kwargs.get('verb', 0)
        instance = kwargs.get('instance', "")
        limit = kwargs.get('limit', 0)
        option = kwargs.get('opts', "")
        if instance:
            query += " instance=%s" % (instance)
        dascmd = 'dasgoclient --query="%s"' % (query)
        if limit > 0:
            dascmd += " --limit=%d" % (limit)
        if option:
            dascmd += " " + option.strip()
        LOG.verb(repr(dascmd), verbosity)
        cmdout = execute(dascmd, verb=verbosity - 1)
    except CalledProcessError as e:
        print
        LOG.error(
            "Failed to call 'dasgoclient' command. Please make sure:\n"
            "  1) 'dasgoclient' command exists.\n"
            "  2) You have a valid VOMS proxy. Use 'voms-proxy-init -voms cms -valid 200:0' or 'source utils/setupVOMS.sh'.\n"
            "  3) The DAS dataset in '%s' exists!\n" % (dascmd))
        raise e
    return cmdout
コード例 #4
0
ファイル: Sample.py プロジェクト: yihui-lai/TauFW
 def getnevents(self, das=True, refresh=False, treename='Events', verb=0):
     """Get number of nanoAOD events from DAS (default), or from files on storage system (das=False)."""
     nevents = self.nevents
     if nevents <= 0 or refresh:
         if self.storage and not das:  # get number of events from storage system
             files = self.getfiles(url=True, refresh=refresh, verb=verb)
             for fname in files:
                 file = ensureTFile(fname)
                 tree = file.Get(treename)
                 if not tree:
                     LOG.warning("getnevents: No %r tree in events in %r!" %
                                 ('Events', fname))
                     continue
                 nevts = tree.GetEntries()
                 file.Close()
                 nevents += nevts
                 LOG.verb(
                     "getnevents: Found %d events in %r." % (nevts, fname),
                     verb, 3)
         else:  # get number of events from DAS
             for daspath in self.paths:
                 cmdout = dasgoclient("summary dataset=%s instance=%s" %
                                      (daspath, self.instance),
                                      verb=verb - 1)
                 if "nevents" in cmdout:
                     ndasevts = int(
                         cmdout.split('"nevents":')[1].split(',')[0])
                 else:
                     ndasevts = 0
                     LOG.warning(
                         "Could not get number of events from DAS for %r." %
                         (self.name))
                 nevents += ndasevts
         self.nevents = nevents
     return nevents
コード例 #5
0
ファイル: Sample.py プロジェクト: whahmad/TauFW
 def getnevents(self, refresh=False, verb=0):
     """Get number of files from DAS."""
     nevents = self.nevents
     if nevents <= 0 or refresh:
         for path in self.paths:
             dascmd = 'dasgoclient --query="summary dataset=%s instance=%s"' % (
                 path, self.instance)
             LOG.verb(repr(dascmd), verb)
             cmdout = execute(dascmd, verb=verb - 1)
             if "nevents" in cmdout:
                 ndasevts = int(cmdout.split('"nevents":')[1].split(',')[0])
             else:
                 LOG.warning(
                     "Could not get number of events from DAS for %r." %
                     (self.name))
             nevents += ndasevts
         self.nevents = nevents
     return nevents
コード例 #6
0
ファイル: Sample.py プロジェクト: cms-tau-pog/TauFW
 def getfiles(self,das=False,refresh=False,url=True,limit=-1,verb=0):
   """Get list of files from storage system (default), or DAS (if no storage system of das=True)."""
   LOG.verb("getfiles: das=%r, refresh=%r, url=%r, limit=%r, filelist=%r, len(files)=%d, len(filenevts)=%d"%(
     das,refresh,url,limit,self.filelist,len(self.files),len(self.filenevts)),verb,1)
   if self.filelist and not self.files: # get file list from text file for first time
     self.loadfiles(self.filelist)
   files = self.files # cache for efficiency
   url_  = self.dasurl if (das and self.storage) else self.url
   if self.refreshable and (not files or das or refresh): # (re)derive file list
     if not files or das:
       LOG.verb("getfiles: Retrieving files...",verb,2)
     else:
       LOG.verb("getfiles: Refreshing file list...",verb,2)
     files = [ ]
     for daspath in self.paths: # loop over DAS dataset paths
       self.pathfiles[daspath] = [ ]
       if (self.storage and not das) or (not self.instance): # get files from storage system
         postfix = self.postfix+'.root'
         sepath  = repkey(self.storepath,PATH=daspath,DAS=daspath).replace('//','/')
         outlist = self.storage.getfiles(sepath,url=url,verb=verb-1)
         if limit>0:
           outlist = outlist[:limit]
       else: # get files from DAS
         postfix = '.root'
         outlist = getdasfiles(daspath,instance=self.instance,limit=limit,verb=verb-1)
       for line in outlist: # filter root files
         line = line.strip()
         if line.endswith(postfix) and not any(f.endswith(line) for f in self.blacklist):
           if url and url_ not in line and 'root://' not in line:
             line = url_+line
           files.append(line)
           self.pathfiles[daspath].append(line)
       self.pathfiles[daspath].sort()
       if not self.pathfiles[daspath]:
         LOG.warning("getfiles: Did not find any files for %s"%(daspath))
     files.sort() # for consistent list order
     if not das or not self.storage:
       self.files = files # store cache for efficiency
   elif url and any(url_ not in f for f in files): # add url if missing
     files = [(url_+f if url_ not in f else f) for f in files]
   elif not url and any(url_ in f for f in files): # remove url
     files = [f.replace(url_,"") for f in files]
   return files[:] # pass copy to protect private self.files
コード例 #7
0
ファイル: Sample.py プロジェクト: cms-tau-pog/TauFW
 def loadfiles(self,listname_,**kwargs):
   verbosity = LOG.getverbosity(self,kwargs)
   """Load filenames from text file for fast look up in future."""
   listname  = repkey(listname_,ERA=self.era,GROUP=self.group,SAMPLE=self.name)
   LOG.verb("loadfiles: listname=%r -> %r, len(files)=%d, len(filenevts)=%d"%(
     listname_,listname,len(self.files),len(self.filenevts)),verbosity,1)
   filenevts = self.filenevts
   nevents   = 0
   #listname = ensurefile(listname,fatal=False)
   filelist = [ ]
   paths = self.paths if '$PATH' in listname else [self.paths[0]]
   for path in paths:
     listname_ = repkey(listname,PATH=path.strip('/').replace('/','__'))
     if self.verbosity>=1:
       print ">>> Loading sample files from %r..."%(listname_)
     self.pathfiles[path] = [ ]
     if os.path.isfile(listname_):
       skip = False
       subpaths = [ ] # for sanity check
       with open(listname_,'r') as file:
         for line in file:
           line = line.strip().split() # split at space to allow comments at end
           if not line: continue
           line = line[0].strip() # remove spaces, consider only first part of the line
           if line[0]=='#': continue # do not consider comments
           #if line.endswith('.root'):
           if line.startswith("DASPATH="): # to keep track of multiple DAS data set paths
             path = line.split('=')[-1] # DAS data set path
             LOG.insist(path.count('/')>=3 and path.startswith('/'),
               "DAS path %r in %s has wrong format. Need /SAMPLE/CAMPAIGN/FORMAT..."%(path,listname_))
             if path in self.paths: # store file list for this path
               self.pathfiles[path] = [ ]
               subpaths.append(path)
               skip = False
             else: # do not store file list for this path
               skip = True
           else:
             if skip: continue # only load files for this sample's DAS dataset paths
             match = fevtsexp.match(line) # match $FILENAM(:NEVTS)
             if not match: continue
             infile = match.group(1)
             if match.group(2): # found nevents in filename
               nevts  = int(match.group(2))
               filenevts[infile] = nevts # store/cache in dictionary
               nevents += nevts
             filelist.append(infile)
             self.pathfiles[path].append(infile)
             if self.verbosity>=3:
               print ">>> %7d events for %s"%(nevts,infile)
       if not filelist:
         LOG.warning("loadfiles: Did not find any files in %s!"%(listname_))
         self.refreshable = True
       else: # sanity check for empty list
         for subpath in subpaths:
           if not self.pathfiles[subpath]:
             LOG.warning("loadfiles: Did not find any files for path %s in %s!"%(subpath,listname_))
     else:
       LOG.warning("loadfiles: file list %s does not exist!"%(listname_))
       self.refreshable = True
   for path in self.paths:
     if path not in self.pathfiles: # nonexistent list
       LOG.warning("loadfiles: Did not find any files for path %s in %s!"%(path,listname))
   if self.nevents<=0:
     self.nevents = nevents
   elif self.nevents!=nevents:
     LOG.warning("loadfiles: stored nevents=%d does not match the sum total of file events, %d!"%(self.nevents,nevents))
     self.nevents == nevents
   self.files = filelist
   self.files.sort()
   return self.files
コード例 #8
0
ファイル: Sample.py プロジェクト: cms-tau-pog/TauFW
 def _getnevents(self,das=True,refresh=False,tree='Events',limit=-1,checkfiles=False,ncores=0,verb=0):
   """Get number of nanoAOD events from DAS (default), or from files on storage system (das=False)."""
   LOG.verb("_getnevents: das=%r, refresh=%r, tree=%r, limit=%r, checkfiles=%r, filelist=%r, len(files)=%d, len(filenevts)=%d"%(
     das,refresh,tree,limit,checkfiles,self.filelist,len(self.files),len(self.filenevts)),verb,1)
   if self.filelist and not self.files: # get file list from text file for first time
     self.loadfiles(self.filelist)
   nevents   = self.nevents
   filenevts = self.filenevts
   bar       = None
   if nevents<=0 or refresh:
     if checkfiles or (self.storage and not das): # get number of events per file from storage system
       LOG.verb("_getnevents: Get events per file (storage=%r, das=%r)..."%(self.storage,das),verb,2)
       files = self.getfiles(url=True,das=das,refresh=refresh,limit=limit,verb=verb)
       if verb<=0 and len(files)>=5:
         bar = LoadingBar(len(files),width=20,pre=">>> Getting number of events: ",counter=True,remove=True)
       for nevts, fname in iterevts(files,tree,filenevts,refresh,ncores=ncores,verb=verb):
         filenevts[fname] = nevts # cache
         nevents += nevts
         LOG.verb("_getnevents: Found %d events in %r."%(nevts,fname),verb,3)
         if bar:
            if self.nevents>0:
              bar.count("files, %d/%d events (%d%%)"%(nevents,self.nevents,100.0*nevents/self.nevents))
            else:
              bar.count("files, %d events"%(nevents))
     else: # get total number of events from DAS
       LOG.verb("_getnevents: Get total number of events per path (storage=%r, das=%r)..."%(self.storage,das),verb,2)
       for daspath in self.paths:
         nevts = getdasnevents(daspath,instance=self.instance,verb=verb-1)
         LOG.verb("_getnevents: %10d events for %s..."%(nevts,daspath),verb,2)
         nevents += nevts
     if limit<=0:
       self.nevents = nevents
   else:
     LOG.verb("_getnevents: Reusing old number of events (nevents=%r, refresh=%r)..."%(nevents,refresh),verb,2)
   return nevents, filenevts