def getFileNames(event): files = [] query = "file dataset=%(dataset)s run=%(run)i lumi=%(lumi)i | grep file.name" % event jsondict = das_client.get_data(query) status = jsondict['status'] if status != 'ok': print "DAS query status: %s" % (status) return files mongo_query = jsondict['mongo_query'] filters = mongo_query['filters'] data = jsondict['data'] files = [] for row in data: file = [r for r in das_client.get_value(row, filters['grep'])][0] if len(file) > 0 and not file in files: files.append(file) return files
def getFileNames (event): files = [] query = "file dataset=%(dataset)s run=%(run)i lumi=%(lumi)i | grep file.name" % event jsondict = das_client.get_data(query) status = jsondict['status'] if status != 'ok': print "DAS query status: %s"%(status) return files mongo_query = jsondict['mongo_query'] filters = mongo_query['filters'] data = jsondict['data'] files = [] for row in data: file = [r for r in das_client.get_value(row, filters['grep'])][0] if len(file) > 0 and not file in files: files.append(file) return files
def getFileNames_das_client(): ################################################################### """Return files for given DAS query via das_client""" files = [] query = "dataset dataset=/ZeroBias/Run2*SiStripCalMinBias-*/ALCARECO site=T2_CH_CERN" jsondict = das_client.get_data(query) status = jsondict['status'] if status != 'ok': print "DAS query status: %s"%(status) return files data = jsondict['data'] viableDS = [] for element in data: viableDS.append(element['dataset'][0]['name']) print "Using Dataset:",viableDS[-1] query = "file dataset=%s site=T2_CH_CERN | grep file.name" % viableDS[-1] jsondict = das_client.get_data(query) status = jsondict['status'] if status != 'ok': print "DAS query status: %s"%(status) return files mongo_query = jsondict['mongo_query'] filters = mongo_query['filters'] data = jsondict['data'] files = [] for row in data: the_file = [r for r in das_client.get_value(row, filters['grep'])][0] if len(the_file) > 0 and not the_file in files: files.append(the_file) return files
def apply( self ): useDAS = self._parameters[ 'useDAS' ].value cmsswVersion = self._parameters[ 'cmsswVersion' ].value formerVersion = self._parameters[ 'formerVersion' ].value relVal = self._parameters[ 'relVal' ].value dataTier = self._parameters[ 'dataTier' ].value condition = self._parameters[ 'condition' ].value # only used for GT determination in initialization, if GT not explicitly given globalTag = self._parameters[ 'globalTag' ].value maxVersions = self._parameters[ 'maxVersions' ].value skipFiles = self._parameters[ 'skipFiles' ].value numberOfFiles = self._parameters[ 'numberOfFiles' ].value debug = self._parameters[ 'debug' ].value filePaths = [] # Determine corresponding CMSSW version for RelVals preId = '_pre' patchId = '_patch' # patch releases hltPatchId = '_hltpatch' # HLT patch releases dqmPatchId = '_dqmpatch' # DQM patch releases slhcId = '_SLHC' # SLHC releases rootId = '_root' # ROOT test releases ibId = '_X_' # IBs if patchId in cmsswVersion: cmsswVersion = cmsswVersion.split( patchId )[ 0 ] elif hltPatchId in cmsswVersion: cmsswVersion = cmsswVersion.split( hltPatchId )[ 0 ] elif dqmPatchId in cmsswVersion: cmsswVersion = cmsswVersion.split( dqmPatchId )[ 0 ] elif rootId in cmsswVersion: cmsswVersion = cmsswVersion.split( rootId )[ 0 ] elif slhcId in cmsswVersion: cmsswVersion = cmsswVersion.split( slhcId )[ 0 ] elif ibId in cmsswVersion or formerVersion: outputTuple = Popen( [ 'scram', 'l -c CMSSW' ], stdout = PIPE, stderr = PIPE ).communicate() if len( outputTuple[ 1 ] ) != 0: print('%s INFO : SCRAM error'%( self._label )) if debug: print(' from trying to determine last valid releases before \'%s\''%( cmsswVersion )) print() print(outputTuple[ 1 ]) print() self.messageEmptyList() return filePaths versions = { 'last' :'' , 'lastToLast':'' } for line in outputTuple[ 0 ].splitlines(): version = line.split()[ 1 ] if cmsswVersion.split( ibId )[ 0 ] in version or cmsswVersion.rpartition( '_' )[ 0 ] in version: if not ( patchId in version or hltPatchId in version or dqmPatchId in version or slhcId in version or ibId in version or rootId in version ): versions[ 'lastToLast' ] = versions[ 'last' ] versions[ 'last' ] = version if version == cmsswVersion: break # FIXME: ordering of output problematic ('XYZ_pre10' before 'XYZ_pre2', no "formerVersion" for 'XYZ_pre1') if formerVersion: # Don't use pre-releases as "former version" for other releases than CMSSW_X_Y_0 if preId in versions[ 'lastToLast' ] and not preId in versions[ 'last' ] and not versions[ 'last' ].endswith( '_0' ): versions[ 'lastToLast' ] = versions[ 'lastToLast' ].split( preId )[ 0 ] # works only, if 'CMSSW_X_Y_0' esists ;-) # Use pre-release as "former version" for CMSSW_X_Y_0 elif versions[ 'last' ].endswith( '_0' ) and not ( preId in versions[ 'lastToLast' ] and versions[ 'lastToLast' ].startswith( versions[ 'last' ] ) ): versions[ 'lastToLast' ] = '' for line in outputTuple[ 0 ].splitlines(): version = line.split()[ 1 ] versionParts = version.partition( preId ) if versionParts[ 0 ] == versions[ 'last' ] and versionParts[ 1 ] == preId: versions[ 'lastToLast' ] = version elif versions[ 'lastToLast' ] != '': break # Don't use CMSSW_X_Y_0 as "former version" for pre-releases elif preId in versions[ 'last' ] and not preId in versions[ 'lastToLast' ] and versions[ 'lastToLast' ].endswith( '_0' ): versions[ 'lastToLast' ] = '' # no alternative :-( cmsswVersion = versions[ 'lastToLast' ] else: cmsswVersion = versions[ 'last' ] # Debugging output if debug: print('%s DEBUG: Called with...'%( self._label )) for key in self._parameters.keys(): print(' %s:\t'%( key ), end=' ') print(self._parameters[ key ].value, end=' ') if self._parameters[ key ].value is self.getDefaultParameters()[ key ].value: print(' (default)') else: print() if key == 'cmsswVersion' and cmsswVersion != self._parameters[ key ].value: if formerVersion: print(' ==> modified to last to last valid release %s (s. \'formerVersion\' parameter)'%( cmsswVersion )) else: print(' ==> modified to last valid release %s'%( cmsswVersion )) # Check domain domain = socket.getfqdn().split( '.' ) domainSE = '' if len( domain ) == 0: print('%s INFO : Cannot determine domain of this computer'%( self._label )) if debug: self.messageEmptyList() return filePaths elif os.uname()[0] == "Darwin": print('%s INFO : Running on MacOSX without direct access to RelVal files.'%( self._label )) if debug: self.messageEmptyList() return filePaths elif len( domain ) == 1: print('%s INFO : Running on local host \'%s\' without direct access to RelVal files'%( self._label, domain[ 0 ] )) if debug: self.messageEmptyList() return filePaths if not ( ( domain[ -2 ] == 'cern' and domain[ -1 ] == 'ch' ) or ( domain[ -2 ] == 'fnal' and domain[ -1 ] == 'gov' ) ): print('%s INFO : Running on site \'%s.%s\' without direct access to RelVal files'%( self._label, domain[ -2 ], domain[ -1 ] )) if debug: self.messageEmptyList() return filePaths if domain[ -2 ] == 'cern': domainSE = 'T2_CH_CERN' elif domain[ -2 ] == 'fnal': domainSE = 'T1_US_FNAL_MSS' if debug: print('%s DEBUG: Running at site \'%s.%s\''%( self._label, domain[ -2 ], domain[ -1 ] )) print('%s DEBUG: Looking for SE \'%s\''%( self._label, domainSE )) # Find files validVersion = 0 dataset = '' datasetAll = '/%s/%s-%s-v*/%s'%( relVal, cmsswVersion, globalTag, dataTier ) if useDAS: if debug: print('%s DEBUG: Using DAS query'%( self._label )) dasLimit = numberOfFiles if dasLimit <= 0: dasLimit = 1 for version in range( maxVersions, 0, -1 ): filePaths = [] filePathsTmp = [] fileCount = 0 dataset = '/%s/%s-%s-v%i/%s'%( relVal, cmsswVersion, globalTag, version, dataTier ) dasQuery = 'file dataset=%s | grep file.name'%( dataset ) if debug: print('%s DEBUG: Querying dataset \'%s\' with'%( self._label, dataset )) print(' \'%s\''%( dasQuery )) jsondict = das_client.get_data(dasQuery,dasLimit) if debug: print('%s DEBUG: Received DAS JSON dictionary:'%( self._label )) print(' \'%s\''%( jsondict )) if jsondict[ 'status' ] != 'ok': print('There was a problem while querying DAS with query \'%s\'. Server reply was:\n %s' % (dasQuery, jsondict)) exit( 1 ) mongo_query = jsondict[ 'mongo_query' ] filters = mongo_query[ 'filters' ] data = jsondict[ 'data' ] if debug: print('%s DEBUG: Query in JSON dictionary:'%( self._label )) print(' \'%s\''%( mongo_query )) print('%s DEBUG: Filters in query:'%( self._label )) print(' \'%s\''%( filters )) print('%s DEBUG: Data in JSON dictionary:'%( self._label )) print(' \'%s\''%( data )) for row in data: filePath = [ r for r in das_client.get_value( row, filters[ 'grep' ] ) ][ 0 ] if debug: print('%s DEBUG: Testing file entry \'%s\''%( self._label, filePath )) if len( filePath ) > 0: if validVersion != version: jsontestdict = das_client.get_data('site dataset=%s | grep site.name' % ( dataset ), 999) mongo_testquery = jsontestdict[ 'mongo_query' ] testfilters = mongo_testquery[ 'filters' ] testdata = jsontestdict[ 'data' ] if debug: print('%s DEBUG: Received DAS JSON dictionary (site test):'%( self._label )) print(' \'%s\''%( jsontestdict )) print('%s DEBUG: Query in JSON dictionary (site test):'%( self._label )) print(' \'%s\''%( mongo_testquery )) print('%s DEBUG: Filters in query (site test):'%( self._label )) print(' \'%s\''%( testfilters )) print('%s DEBUG: Data in JSON dictionary (site test):'%( self._label )) print(' \'%s\''%( testdata )) foundSE = False for testrow in testdata: siteName = [ tr for tr in das_client.get_value( testrow, testfilters[ 'grep' ] ) ][ 0 ] if siteName == domainSE: foundSE = True break if not foundSE: if debug: print('%s DEBUG: Possible version \'v%s\' not available on SE \'%s\''%( self._label, version, domainSE )) break validVersion = version if debug: print('%s DEBUG: Valid version set to \'v%i\''%( self._label, validVersion )) if numberOfFiles == 0: break # protect from double entries ( 'unique' flag in query does not work here) if not filePath in filePathsTmp: filePathsTmp.append( filePath ) if debug: print('%s DEBUG: File \'%s\' found'%( self._label, filePath )) fileCount += 1 # needed, since and "limit" overrides "idx" in 'get_data' (==> "idx" set to '0' rather than "skipFiles") if fileCount > skipFiles: filePaths.append( filePath ) elif debug: print('%s DEBUG: File \'%s\' found again'%( self._label, filePath )) if validVersion > 0: if numberOfFiles == 0 and debug: print('%s DEBUG: No files requested'%( self._label )) break else: if debug: print('%s DEBUG: Using DBS query'%( self._label )) print('%s WARNING: DBS query disabled for DBS3 transition to new API'%( self._label )) #for version in range( maxVersions, 0, -1 ): #filePaths = [] #fileCount = 0 #dataset = '/%s/%s-%s-v%i/%s'%( relVal, cmsswVersion, globalTag, version, dataTier ) #dbsQuery = 'find file where dataset = %s'%( dataset ) #if debug: #print '%s DEBUG: Querying dataset \'%s\' with'%( self._label, dataset ) #print ' \'%s\''%( dbsQuery ) #foundSE = False #for line in os.popen( 'dbs search --query="%s"'%( dbsQuery ) ).readlines(): #if line.find( '.root' ) != -1: #if validVersion != version: #if not foundSE: #dbsSiteQuery = 'find dataset where dataset = %s and site = %s'%( dataset, domainSE ) #if debug: #print '%s DEBUG: Querying site \'%s\' with'%( self._label, domainSE ) #print ' \'%s\''%( dbsSiteQuery ) #for lineSite in os.popen( 'dbs search --query="%s"'%( dbsSiteQuery ) ).readlines(): #if lineSite.find( dataset ) != -1: #foundSE = True #break #if not foundSE: #if debug: #print '%s DEBUG: Possible version \'v%s\' not available on SE \'%s\''%( self._label, version, domainSE ) #break #validVersion = version #if debug: #print '%s DEBUG: Valid version set to \'v%i\''%( self._label, validVersion ) #if numberOfFiles == 0: #break #filePath = line.replace( '\n', '' ) #if debug: #print '%s DEBUG: File \'%s\' found'%( self._label, filePath ) #fileCount += 1 #if fileCount > skipFiles: #filePaths.append( filePath ) #if not numberOfFiles < 0: #if numberOfFiles <= len( filePaths ): #break #if validVersion > 0: #if numberOfFiles == 0 and debug: #print '%s DEBUG: No files requested'%( self._label ) #break # Check output and return if validVersion == 0: print('%s WARNING : No RelVal file(s) found at all in datasets \'%s*\' on SE \'%s\''%( self._label, datasetAll, domainSE )) if debug: self.messageEmptyList() elif len( filePaths ) == 0: print('%s WARNING : No RelVal file(s) picked up in dataset \'%s\''%( self._label, dataset )) if debug: self.messageEmptyList() elif len( filePaths ) < numberOfFiles: print('%s INFO : Only %i RelVal file(s) instead of %i picked up in dataset \'%s\''%( self._label, len( filePaths ), numberOfFiles, dataset )) if debug: print('%s DEBUG: returning %i file(s):\n%s'%( self._label, len( filePaths ), filePaths )) return filePaths