def userStorageAccounting( self ): self.log.notice( "-------------------------------------------------------------------------------------\n" ) self.log.notice( "Generate accounting records for user directories " ) self.log.notice( "-------------------------------------------------------------------------------------\n" ) result = self.__stDB.getUserSummary() if not result[ 'OK' ]: return result userCatalogData = result[ 'Value' ] print userCatalogData self.log.notice( "Got summary for %s users" % ( len( userCatalogData ) ) ) result = self.__stDB.getUserSummaryPerSE() if not result[ 'OK' ]: return result userSEData = result[ 'Value' ] self.log.notice( "Got SE summary for %s users" % ( len( userSEData ) ) ) now = Time.dateTime() numRows = 0 for user in sorted( userSEData ): if user not in userCatalogData: self.log.error( "User has SE data but not Catalog data!", user ) continue for se in sorted( userSEData[ user ] ): seData = userSEData[ user ][ se ] usRecord = UserStorage() usRecord.setStartTime( now ) usRecord.setEndTime( now ) usRecord.setValueByKey( "User", user ) usRecord.setValueByKey( "StorageElement", se ) usRecord.setValueByKey( "LogicalSize", userCatalogData[ user ][ 'Size' ] ) usRecord.setValueByKey( "LogicalFiles", userCatalogData[ user ][ 'Files' ] ) usRecord.setValueByKey( "PhysicalSize", seData[ 'Size' ] ) usRecord.setValueByKey( "PhysicalFiles", seData[ 'Files' ] ) usRecord.setValueByKey( "StorageSize", 0 ) usRecord.setValueByKey( "StorageFiles", 0 ) gDataStoreClient.addRegister( usRecord ) numRows += 1 self.log.notice( " User %s is using %.2f GiB (%s files)" % ( user, userCatalogData[ user ][ 'Size' ] / ( 1024.0 ** 3 ), userCatalogData[ user ][ 'Files' ] ) ) self.log.notice( "Sending %s records to accounting for user storage" % numRows ) res = gDataStoreClient.commit() if not res[ 'OK' ]: self.log.notice( "ERROR: committing UserStorage records: %s " % res ) return S_ERROR( res ) else: self.log.notice( "%s records for UserStorage type successfully committed" % numRows )
def _addPilotsAccountingReport(self, pilotsData): """ fill accounting data """ for pRef in pilotsData: pData = pilotsData[pRef] pA = PilotAccounting() pA.setEndTime(pData['LastUpdateTime']) pA.setStartTime(pData['SubmissionTime']) retVal = Registry.getUsernameForDN(pData['OwnerDN']) if not retVal['OK']: userName = '******' self.log.error("Can't determine username for dn:", pData['OwnerDN']) else: userName = retVal['Value'] pA.setValueByKey('User', userName) pA.setValueByKey('UserGroup', pData['OwnerGroup']) result = getCESiteMapping(pData['DestinationSite']) if result['OK'] and pData['DestinationSite'] in result['Value']: pA.setValueByKey('Site', result['Value'][pData['DestinationSite']].strip()) else: pA.setValueByKey('Site', 'Unknown') pA.setValueByKey('GridCE', pData['DestinationSite']) pA.setValueByKey('GridMiddleware', pData['GridType']) pA.setValueByKey('GridResourceBroker', pData['Broker']) pA.setValueByKey('GridStatus', pData['Status']) if 'Jobs' not in pData: pA.setValueByKey('Jobs', 0) else: pA.setValueByKey('Jobs', len(pData['Jobs'])) self.log.verbose("Added accounting record for pilot %s" % pData['PilotID']) retVal = gDataStoreClient.addRegister(pA) if not retVal['OK']: return retVal return S_OK()
def test_addAndRemove(): # just inserting one record record = createAccountingRecord() record.setStartTime() record.setEndTime() res = gDataStoreClient.addRegister(record) assert res['OK'] res = gDataStoreClient.commit() assert res['OK'] rc = ReportsClient() res = rc.listReports('DataOperation') assert res['OK'] res = rc.listUniqueKeyValues('DataOperation') assert res['OK'] res = rc.getReport('DataOperation', 'Successful transfers', datetime.datetime.utcnow(), datetime.datetime.utcnow(), {}, 'Destination') assert res['OK'] # now removing that record res = gDataStoreClient.remove(record) assert res['OK']
def __addPilotsAccountingReport(self, pilotsData): """ fill accounting data """ for pRef in pilotsData: pData = pilotsData[pRef] pA = PilotAccounting() pA.setEndTime(pData['LastUpdateTime']) pA.setStartTime(pData['SubmissionTime']) retVal = CS.getUsernameForDN(pData['OwnerDN']) if not retVal['OK']: userName = '******' self.log.error("Can't determine username for dn:", pData['OwnerDN']) else: userName = retVal['Value'] pA.setValueByKey('User', userName) pA.setValueByKey('UserGroup', pData['OwnerGroup']) result = getSiteForCE(pData['DestinationSite']) if result['OK'] and result['Value'].strip(): pA.setValueByKey('Site', result['Value'].strip()) else: pA.setValueByKey('Site', 'Unknown') pA.setValueByKey('GridCE', pData['DestinationSite']) pA.setValueByKey('GridMiddleware', pData['GridType']) pA.setValueByKey('GridResourceBroker', pData['Broker']) pA.setValueByKey('GridStatus', pData['Status']) if 'Jobs' not in pData: pA.setValueByKey('Jobs', 0) else: pA.setValueByKey('Jobs', len(pData['Jobs'])) self.log.verbose("Added accounting record for pilot %s" % pData['PilotID']) retVal = gDataStoreClient.addRegister(pA) if not retVal['OK']: return retVal return S_OK()
def __addPilotsAccountingReport(self, pilotsData): """ fill accounting data """ for pRef in pilotsData: pData = pilotsData[pRef] pA = PilotAccounting() pA.setEndTime(pData["LastUpdateTime"]) pA.setStartTime(pData["SubmissionTime"]) retVal = CS.getUsernameForDN(pData["OwnerDN"]) if not retVal["OK"]: userName = "******" self.log.error("Can't determine username for dn:", pData["OwnerDN"]) else: userName = retVal["Value"] pA.setValueByKey("User", userName) pA.setValueByKey("UserGroup", pData["OwnerGroup"]) result = getSiteForCE(pData["DestinationSite"]) if result["OK"] and result["Value"].strip(): pA.setValueByKey("Site", result["Value"].strip()) else: pA.setValueByKey("Site", "Unknown") pA.setValueByKey("GridCE", pData["DestinationSite"]) pA.setValueByKey("GridMiddleware", pData["GridType"]) pA.setValueByKey("GridResourceBroker", pData["Broker"]) pA.setValueByKey("GridStatus", pData["Status"]) if not "Jobs" in pData: pA.setValueByKey("Jobs", 0) else: pA.setValueByKey("Jobs", len(pData["Jobs"])) self.log.verbose("Added accounting record for pilot %s" % pData["PilotID"]) retVal = gDataStoreClient.addRegister(pA) if not retVal["OK"]: return retVal return S_OK()
def test_addAndRemoveDataOperation(): # just inserting one record record = createDataOperationAccountingRecord() record.setStartTime() record.setEndTime() res = gDataStoreClient.addRegister(record) assert res['OK'] res = gDataStoreClient.commit() assert res['OK'] rc = ReportsClient() res = rc.listReports('DataOperation') assert res['OK'] res = rc.listUniqueKeyValues('DataOperation') assert res['OK'] res = rc.getReport('DataOperation', 'Successful transfers', datetime.datetime.utcnow(), datetime.datetime.utcnow(), {}, 'Destination') assert res['OK'] # now removing that record res = gDataStoreClient.remove(record) assert res['OK']
def test_addAndRemoveStorageOccupancy(): # just inserting one record record = createStorageOccupancyAccountingRecord() record.setStartTime() record.setEndTime() res = gDataStoreClient.addRegister(record) assert res['OK'] res = gDataStoreClient.commit() assert res['OK'] rc = ReportsClient() res = rc.listReports('StorageOccupancy') assert res['OK'] res = rc.listUniqueKeyValues('StorageOccupancy') assert res['OK'] res = rc.getReport('StorageOccupancy', 'Free and Used Space', datetime.datetime.utcnow(), datetime.datetime.utcnow(), {}, 'StorageElement') assert res['OK'] # now removing that record res = gDataStoreClient.remove(record) assert res['OK']
def commit(self): """ Commit register to server """ retVal = gDataStoreClient.addRegister(self) if not retVal['OK']: return retVal return gDataStoreClient.commit()
def _storeCommand(self, results): """ _storeCommand Adding records to accounting, on top of what does the derived method. :param dict results: something like {'ElementName': 'CERN-HIST-EOS', 'Endpoint': 'httpg://srm-eoslhcb-bis.cern.ch:8443/srm/v2/server', 'Free': 3264963586.10073, 'Total': 8000000000.0, 'SpaceReservation': 'LHCb-Disk'} :returns: S_OK/S_ERROR dict """ res = super(FreeDiskSpaceCommand, self)._storeCommand(results) if not res['OK']: return res siteRes = DMSHelpers().getLocalSiteForSE(results['ElementName']) if not siteRes['OK']: return siteRes if not siteRes['Value']: return S_OK() spaceReservation = results.get('SpaceReservation') accountingDict = { 'SpaceToken': spaceReservation, 'Endpoint': results['Endpoint'], 'Site': siteRes['Value'] } results['Used'] = results['Total'] - results['Free'] for sType in ['Total', 'Free', 'Used']: spaceTokenAccounting = SpaceToken() spaceTokenAccounting.setNowAsStartAndEndTime() spaceTokenAccounting.setValuesFromDict(accountingDict) spaceTokenAccounting.setValueByKey('SpaceType', sType) spaceTokenAccounting.setValueByKey( 'Space', int(convertSizeUnits(results[sType], 'MB', 'B'))) gDataStoreClient.addRegister(spaceTokenAccounting) gDataStoreClient.commit() return S_OK()
def commit( self ): """ Commit register to server """ retVal = gDataStoreClient.addRegister( self ) if not retVal[ 'OK' ]: return retVal return gDataStoreClient.commit()
def sendPilotAccounting(self, pilotDict): """ Send pilot accounting record """ for pRef in pilotDict: self.log.verbose('Preparing accounting record for pilot %s' % pRef) pA = PilotAccounting() pA.setEndTime(pilotDict[pRef]['LastUpdateTime']) pA.setStartTime(pilotDict[pRef]['SubmissionTime']) retVal = CS.getUsernameForDN(pilotDict[pRef]['OwnerDN']) if not retVal['OK']: userName = '******' self.log.error("Can't determine username for dn:", pilotDict[pRef]['OwnerDN']) else: userName = retVal['Value'] pA.setValueByKey('User', userName) pA.setValueByKey('UserGroup', pilotDict[pRef]['OwnerGroup']) result = getSiteForCE(pilotDict[pRef]['DestinationSite']) if result['OK'] and result['Value'].strip(): pA.setValueByKey('Site', result['Value'].strip()) else: pA.setValueByKey('Site', 'Unknown') pA.setValueByKey('GridCE', pilotDict[pRef]['DestinationSite']) pA.setValueByKey('GridMiddleware', pilotDict[pRef]['GridType']) pA.setValueByKey('GridResourceBroker', pilotDict[pRef]['Broker']) pA.setValueByKey('GridStatus', pilotDict[pRef]['Status']) if not 'Jobs' in pilotDict[pRef]: pA.setValueByKey('Jobs', 0) else: pA.setValueByKey('Jobs', len(pilotDict[pRef]['Jobs'])) self.log.info("Adding accounting record for pilot %s" % pilotDict[pRef]['PilotID']) retVal = gDataStoreClient.addRegister(pA) if not retVal['OK']: self.log.error('Failed to send accounting info for pilot ', pRef) else: # Set up AccountingSent flag result = pilotAgentsDB.setAccountingFlag(pRef) if not result['OK']: self.log.error('Failed to set accounting flag for pilot ', pRef) self.log.info('Committing accounting records for %d pilots' % len(pilotDict)) result = gDataStoreClient.commit() if result['OK']: for pRef in pilotDict: self.log.verbose('Setting AccountingSent flag for pilot %s' % pRef) result = pilotAgentsDB.setAccountingFlag(pRef) if not result['OK']: self.log.error('Failed to set accounting flag for pilot ', pRef) else: return result return S_OK()
def delayedCommit(self): """ Commit register to the server. Delayed commit allows to speed up the operation as more registers will be sent at once. """ retVal = gDataStoreClient.addRegister(self) if not retVal['OK']: return retVal return gDataStoreClient.delayedCommit()
def delayedCommit(self): """ Commit register to the server. Delayed commit allows to speed up the operation as more registers will be sent at once. """ retVal = gDataStoreClient.addRegister(self) if not retVal['OK']: return retVal return gDataStoreClient.delayedCommit()
def _storeCommand(self, results): """ Stores the results in the cache (SpaceTokenOccupancyCache), and adds records to the StorageOccupancy accounting. :param dict results: something like {'ElementName': 'CERN-HIST-EOS', 'Endpoint': 'httpg://srm-eoslhcb-bis.cern.ch:8443/srm/v2/server', 'Free': 3264963586.10073, 'Total': 8000000000.0} :returns: S_OK/S_ERROR dict """ # Stores in cache res = self.rmClient.addOrModifySpaceTokenOccupancyCache( endpoint=results["Endpoint"], lastCheckTime=datetime.utcnow(), free=results["Free"], total=results["Total"], token=results["ElementName"], ) if not res["OK"]: self.log.error("Error calling addOrModifySpaceTokenOccupancyCache", res["Message"]) return res # Now proceed with the accounting siteRes = DMSHelpers().getLocalSiteForSE(results["ElementName"]) if not siteRes["OK"]: return siteRes accountingDict = { "StorageElement": results["ElementName"], "Endpoint": results["Endpoint"], "Site": siteRes["Value"] if siteRes["Value"] else "unassigned", } results["Used"] = results["Total"] - results["Free"] for sType in ["Total", "Free", "Used"]: spaceTokenAccounting = StorageOccupancy() spaceTokenAccounting.setNowAsStartAndEndTime() spaceTokenAccounting.setValuesFromDict(accountingDict) spaceTokenAccounting.setValueByKey("SpaceType", sType) spaceTokenAccounting.setValueByKey( "Space", int(convertSizeUnits(results[sType], "MB", "B"))) res = gDataStoreClient.addRegister(spaceTokenAccounting) if not res["OK"]: self.log.warn("Could not commit register", res["Message"]) continue return gDataStoreClient.commit()
def _storeCommand(self, results): """ Stores the results in the cache (SpaceTokenOccupancyCache), and adds records to the StorageOccupancy accounting. :param dict results: something like {'ElementName': 'CERN-HIST-EOS', 'Endpoint': 'httpg://srm-eoslhcb-bis.cern.ch:8443/srm/v2/server', 'Free': 3264963586.10073, 'Total': 8000000000.0, 'SpaceReservation': 'LHCb-Disk'} :returns: S_OK/S_ERROR dict """ # Stores in cache res = self.rmClient.addOrModifySpaceTokenOccupancyCache( endpoint=results['Endpoint'], lastCheckTime=datetime.utcnow(), free=results['Free'], total=results['Total'], token=results['ElementName']) if not res['OK']: self.log.error("Error calling addOrModifySpaceTokenOccupancyCache", res['Message']) return res # Now proceed with the accounting siteRes = DMSHelpers().getLocalSiteForSE(results['ElementName']) if not siteRes['OK']: return siteRes accountingDict = { 'StorageElement': results['ElementName'], 'Endpoint': results['Endpoint'], 'Site': siteRes['Value'] if siteRes['Value'] else 'unassigned' } results['Used'] = results['Total'] - results['Free'] for sType in ['Total', 'Free', 'Used']: spaceTokenAccounting = StorageOccupancy() spaceTokenAccounting.setNowAsStartAndEndTime() spaceTokenAccounting.setValuesFromDict(accountingDict) spaceTokenAccounting.setValueByKey('SpaceType', sType) spaceTokenAccounting.setValueByKey( 'Space', int(convertSizeUnits(results[sType], 'MB', 'B'))) res = gDataStoreClient.addRegister(spaceTokenAccounting) if not res['OK']: self.log.warn("Could not commit register", res['Message']) continue return gDataStoreClient.commit()
def test_addAndRemoveDataperation(): # just inserting one record record = createDataOperationAccountingRecord() record.setStartTime() record.setEndTime() res = gDataStoreClient.addRegister(record) assert res["OK"] res = gDataStoreClient.commit() assert res["OK"] # now removing that record res = gDataStoreClient.remove(record) assert res["OK"]
def test_addAndRemoveStorageOccupancy(): # just inserting one record record = createStorageOccupancyAccountingRecord() record.setStartTime() record.setEndTime() res = gDataStoreClient.addRegister(record) assert res['OK'] res = gDataStoreClient.commit() assert res['OK'] # now removing that record res = gDataStoreClient.remove(record) assert res['OK']
def sendPilotAccounting(self, pilotDict): """ Send pilot accounting record """ for pRef in pilotDict: self.log.verbose("Preparing accounting record for pilot %s" % pRef) pA = PilotAccounting() pA.setEndTime(pilotDict[pRef]["LastUpdateTime"]) pA.setStartTime(pilotDict[pRef]["SubmissionTime"]) retVal = CS.getUsernameForDN(pilotDict[pRef]["OwnerDN"]) if not retVal["OK"]: userName = "******" self.log.error("Can't determine username for dn:", pilotDict[pRef]["OwnerDN"]) else: userName = retVal["Value"] pA.setValueByKey("User", userName) pA.setValueByKey("UserGroup", pilotDict[pRef]["OwnerGroup"]) result = getSiteForCE(pilotDict[pRef]["DestinationSite"]) if result["OK"] and result["Value"].strip(): pA.setValueByKey("Site", result["Value"].strip()) else: pA.setValueByKey("Site", "Unknown") pA.setValueByKey("GridCE", pilotDict[pRef]["DestinationSite"]) pA.setValueByKey("GridMiddleware", pilotDict[pRef]["GridType"]) pA.setValueByKey("GridResourceBroker", pilotDict[pRef]["Broker"]) pA.setValueByKey("GridStatus", pilotDict[pRef]["Status"]) if not "Jobs" in pilotDict[pRef]: pA.setValueByKey("Jobs", 0) else: pA.setValueByKey("Jobs", len(pilotDict[pRef]["Jobs"])) self.log.info("Adding accounting record for pilot %s" % pilotDict[pRef]["PilotID"]) retVal = gDataStoreClient.addRegister(pA) if not retVal["OK"]: self.log.error("Failed to send accounting info for pilot ", pRef) else: # Set up AccountingSent flag result = pilotAgentsDB.setAccountingFlag(pRef) if not result["OK"]: self.log.error("Failed to set accounting flag for pilot ", pRef) self.log.info("Committing accounting records for %d pilots" % len(pilotDict)) result = gDataStoreClient.commit() if result["OK"]: for pRef in pilotDict: self.log.verbose("Setting AccountingSent flag for pilot %s" % pRef) result = pilotAgentsDB.setAccountingFlag(pRef) if not result["OK"]: self.log.error("Failed to set accounting flag for pilot ", pRef) else: return result return S_OK()
def sendPilotAccounting( self, pilotDict ): """ Send pilot accounting record """ for pRef in pilotDict: self.log.verbose( 'Preparing accounting record for pilot %s' % pRef ) pA = PilotAccounting() pA.setEndTime( pilotDict[pRef][ 'LastUpdateTime' ] ) pA.setStartTime( pilotDict[pRef][ 'SubmissionTime' ] ) retVal = CS.getUsernameForDN( pilotDict[pRef][ 'OwnerDN' ] ) if not retVal[ 'OK' ]: userName = '******' self.log.error( "Can't determine username for dn:", pilotDict[pRef][ 'OwnerDN' ] ) else: userName = retVal[ 'Value' ] pA.setValueByKey( 'User', userName ) pA.setValueByKey( 'UserGroup', pilotDict[pRef][ 'OwnerGroup' ] ) result = getSiteForCE( pilotDict[pRef][ 'DestinationSite' ] ) if result['OK'] and result[ 'Value' ].strip(): pA.setValueByKey( 'Site', result['Value'].strip() ) else: pA.setValueByKey( 'Site', 'Unknown' ) pA.setValueByKey( 'GridCE', pilotDict[pRef][ 'DestinationSite' ] ) pA.setValueByKey( 'GridMiddleware', pilotDict[pRef][ 'GridType' ] ) pA.setValueByKey( 'GridResourceBroker', pilotDict[pRef][ 'Broker' ] ) pA.setValueByKey( 'GridStatus', pilotDict[pRef][ 'Status' ] ) if not 'Jobs' in pilotDict[pRef]: pA.setValueByKey( 'Jobs', 0 ) else: pA.setValueByKey( 'Jobs', len( pilotDict[pRef]['Jobs'] ) ) self.log.verbose( "Adding accounting record for pilot %s" % pilotDict[pRef][ 'PilotID' ] ) retVal = gDataStoreClient.addRegister( pA ) if not retVal[ 'OK' ]: self.log.error( 'Failed to send accounting info for pilot ', pRef ) else: # Set up AccountingSent flag result = pilotAgentsDB.setAccountingFlag( pRef ) if not result['OK']: self.log.error( 'Failed to set accounting flag for pilot ', pRef ) self.log.info( 'Committing accounting records for %d pilots' % len( pilotDict ) ) result = gDataStoreClient.commit() if result['OK']: for pRef in pilotDict: self.log.verbose( 'Setting AccountingSent flag for pilot %s' % pRef ) result = pilotAgentsDB.setAccountingFlag( pRef ) if not result['OK']: self.log.error( 'Failed to set accounting flag for pilot ', pRef ) else: return result return S_OK()
def test_addAndRemove(): # just inserting one record record = createAccountingRecord() record.setStartTime() record.setEndTime() res = gDataStoreClient.addRegister(record) assert res['OK'] res = gDataStoreClient.commit() assert res['OK'] # now removing that record res = gDataStoreClient.remove(record) assert res['OK']
def sendAccounting(self): self.dataOp.setValuesFromDict(baseDict) if startTime: self.dataOp.setStartTime(startTime) self.dataOp.setEndTime(endTime) else: self.dataOp.setStartTime() self.dataOp.setEndTime() # Adding only to register if not commitFlag and not delayedCommit: return gDataStoreClient.addRegister(self.dataOp) # Adding to register and committing if commitFlag and not delayedCommit: gDataStoreClient.addRegister(self.dataOp) result = gDataStoreClient.commit() sLog.debug("Committing data operation to accounting") if not result["OK"]: sLog.error("Could not commit data operation to accounting", result["Message"]) return result sLog.debug("Done committing to accounting") # Only late committing else: result = self.dataOp.delayedCommit() if not result["OK"]: sLog.error( "Could not delay-commit data operation to accounting") return result # Send data and commit prioritizing the first monitoring option in the list for backend in self.monitoringOptions: func = locals()[f"send{backend}"] res = func() if not res["OK"]: return res
def fillAndSendAccountingRecord( self, lfnDir, metadataDict, now ): ''' Create, fill and send to accounting a record for the DataStorage type. ''' dataRecord = DataStorage() dataRecord.setStartTime( now ) dataRecord.setEndTime( now ) logicalSize = self.lfnUsage[ lfnDir ][ 'LfnSize' ] logicalFiles = self.lfnUsage[ lfnDir ][ 'LfnFiles' ] dataRecord.setValueByKey( "LogicalSize", logicalSize ) dataRecord.setValueByKey( "LogicalFiles", logicalFiles ) for key in ( 'DataType', 'Activity', 'FileType', 'Production', 'ProcessingPass', 'Conditions', 'EventType' ): dataRecord.setValueByKey( key, metadataDict.get( key, 'na' ) ) self.log.verbose( ">>> Send DataStorage record to accounting:" ) self.log.verbose( "\tlfnFiles: %d lfnSize: %d " % ( logicalFiles, logicalSize ) ) for se in self.pfnUsage[ lfnDir ]: self.log.verbose( "Filling accounting record for se %s" % se ) physicalSize = self.pfnUsage[ lfnDir ][ se ][ 'Size' ] physicalFiles = self.pfnUsage[ lfnDir ][ se ][ 'Files' ] dataRecord.setValueByKey( "StorageElement", se ) dataRecord.setValueByKey( "PhysicalSize", physicalSize ) dataRecord.setValueByKey( "PhysicalFiles", physicalFiles ) self.log.verbose( "\t\tStorageElement: %s --> physFiles: %d physSize: %d " % ( se, physicalFiles, physicalSize ) ) # addRegister is NOT making a copy, therefore all records are otherwise overwritten res = gDataStoreClient.addRegister( copy.deepcopy( dataRecord ) ) if not res[ 'OK']: self.log.error( "addRegister returned: %s" % res ) return S_ERROR( "addRegister returned: %s" % res ) # Reset logical information to zero in order to send it only once! dataRecord.setValueByKey( "LogicalSize", 0 ) dataRecord.setValueByKey( "LogicalFiles", 0 ) self.totalRecords += 1 self.recordsToCommit += 1 # Commit if necessary if self.recordsToCommit > self.limitForCommit: self.__commitRecords() return S_OK()
def __updateMigrationAccounting( self, se, migratingFiles, matchingFiles, mismatchingFiles, assumedEndTime, previousMonitorTime ): """ Create accounting messages for the overall throughput observed and the total migration time for the files """ allMigrated = matchingFiles + mismatchingFiles gMonitor.addMark( "MigratedFiles%s" % se, len( allMigrated ) ) gMonitor.addMark( "TotalMigratedFiles%s" % se, len( allMigrated ) ) lfnFileID = {} sizesToObtain = [] for fileID in allMigrated: if not migratingFiles[fileID]['Size']: lfn = migratingFiles[fileID]['LFN'] sizesToObtain.append( lfn ) lfnFileID[lfn] = fileID if sizesToObtain: res = self.ReplicaManager.getCatalogFileSize( sizesToObtain ) if not res['OK']: gLogger.error( "[%s] __updateMigrationAccounting: Failed to obtain file sizes" % se ) return res for lfn, error in res['Value']['Failed'].items(): gLogger.error( "[%s] __updateAccounting: Failed to get file size" % se, "%s %s" % ( lfn, error ) ) migratingFiles[lfnFileID[lfn]]['Size'] = 0 for lfn, size in res['Value']['Successful'].items(): migratingFiles[lfnFileID[lfn]]['Size'] = size totalSize = 0 for fileID in allMigrated: size = migratingFiles[fileID]['Size'] totalSize += size submitTime = migratingFiles[fileID]['SubmitTime'] timeDiff = submitTime - assumedEndTime migrationTime = ( timeDiff.days * 86400 ) + ( timeDiff.seconds ) + ( timeDiff.microseconds / 1000000.0 ) gMonitor.addMark( "MigrationTime%s" % se, migrationTime ) gDataStoreClient.addRegister( self.__initialiseAccountingObject( 'MigrationTime', se, submitTime, assumedEndTime, size ) ) gDataStoreClient.addRegister( self.__initialiseAccountingObject( 'MigrationThroughput', se, previousMonitorTime, assumedEndTime, size ) ) oDataOperation = self.__initialiseAccountingObject( 'MigrationSuccess', se, submitTime, assumedEndTime, size ) if fileID in mismatchingFiles: oDataOperation.setValueByKey( 'TransferOK', 0 ) oDataOperation.setValueByKey( 'FinalStatus', 'Failed' ) gDataStoreClient.addRegister( oDataOperation ) gMonitor.addMark( "TotalMigratedSize%s" % se, totalSize ) gMonitor.addMark( "ChecksumMismatches%s" % se, len( mismatchingFiles ) ) gMonitor.addMark( "TotalChecksumMismatches%s" % se, len( mismatchingFiles ) ) gMonitor.addMark( "ChecksumMatches%s" % se, len( matchingFiles ) ) gMonitor.addMark( "TotalChecksumMatches%s" % se, len( matchingFiles ) ) if allMigrated: gLogger.info( '[%s] __updateMigrationAccounting: Attempting to send accounting message...' % se ) return gDataStoreClient.commit() return S_OK()
def _addPilotsAccountingReport(self, pilotsData): """fill accounting data""" for pRef in pilotsData: pData = pilotsData[pRef] pA = PilotAccounting() pA.setEndTime(pData["LastUpdateTime"]) pA.setStartTime(pData["SubmissionTime"]) retVal = Registry.getUsernameForDN(pData["OwnerDN"]) if not retVal["OK"]: userName = "******" self.log.error( "Can't determine username for dn", ": %s : %s" % (pData["OwnerDN"], retVal["Message"]), ) else: userName = retVal["Value"] pA.setValueByKey("User", userName) pA.setValueByKey("UserGroup", pData["OwnerGroup"]) result = getCESiteMapping(pData["DestinationSite"]) if result["OK"] and pData["DestinationSite"] in result["Value"]: pA.setValueByKey( "Site", result["Value"][pData["DestinationSite"]].strip()) else: pA.setValueByKey("Site", "Unknown") pA.setValueByKey("GridCE", pData["DestinationSite"]) pA.setValueByKey("GridMiddleware", pData["GridType"]) pA.setValueByKey("GridResourceBroker", pData["Broker"]) pA.setValueByKey("GridStatus", pData["Status"]) if "Jobs" not in pData: pA.setValueByKey("Jobs", 0) else: pA.setValueByKey("Jobs", len(pData["Jobs"])) self.log.verbose("Added accounting record for pilot %s" % pData["PilotID"]) retVal = gDataStoreClient.addRegister(pA) if not retVal["OK"]: return retVal return S_OK()
def execute(self): """ Main loop of Popularity agent """ now = datetime.now() endTime = datetime(now.year, now.month, now.day, 0, 0, 0) startTime = endTime - timedelta(days=self.timeInterval) endTimeQuery = endTime.isoformat() startTimeQuery = startTime.isoformat() # query all traces in popularity in the time rage startTime,endTime and status =new # the condition to get th etraces is the AND of the time range and the status new self.log.info( "Querying Pop db to retrieve entries in time range %s - %s " % (startTimeQuery, endTimeQuery)) status = 'New' res = self.__dataUsageClient.getDataUsageSummary( startTimeQuery, endTimeQuery, status, timeout=self.queryTimeout) if not res['OK']: self.log.error("Error querying Popularity table.. %s" % res['Message']) return S_ERROR(res['Message']) val = res['Value'] self.log.info("Retrieved %d entries from Popularity table" % len(val)) # Build popularity report, and store the Ids in a list: idList = set() traceDict = {} for row in val: self.log.debug("row: %s" % str(row)) rowId, dirLfn, site, count, insertTime = row if rowId not in idList: idList.add(rowId) else: self.log.error("Same Id found twice! %d " % rowId) continue if dirLfn.startswith('/lhcb/user/'): self.log.verbose( "Private user directory. No metadata stored in Bkk %s " % dirLfn) continue # get the day (to do ) dayBin = (insertTime - startTime).days traceDict[dayBin][dirLfn][site] = \ traceDict.setdefault(dayBin, {}).setdefault(dirLfn, {}).setdefault(site, 0) + count # print a summary dayList = sorted(traceDict) for day in dayList: self.log.info(" ###### day %s (starting from %s ) " % (day, startTimeQuery)) self.log.info("---- %d directories touched:" % len(traceDict[day])) for lfn in traceDict[day]: self.log.verbose(" ---- lfn %s " % lfn) for site in traceDict[day][lfn]: self.log.verbose(" -------- site %s count: %d " % (site, traceDict[day][lfn][site])) self.log.info("Retrieve meta-data information for each directory ") now = Time.dateTime() self.numPopRows = 0 # keep a counter of the records to send to accounting data-store for day in traceDict: timeForAccounting = self.computeTimeForAccounting(startTime, day) self.log.info("Processing day %s - time for accounting %s " % (day, timeForAccounting)) for dirLfn in traceDict[day]: # did = configName = configVersion = conditions = processingPass = eventType = fileType = production = "na" # retrieve the directory meta-data from the DirMetadata table self.log.info("Processing dir %s " % dirLfn) metaForDir = self.cacheMetadata.get(dirLfn) if not metaForDir: dirList = [dirLfn] # this could be done in a bulk query for a list of directories... TBD res = self.__dataUsageClient.getDirMetadata(dirList) if not res['OK']: self.log.error( "Error retrieving directory meta-data %s " % res['Message']) continue dirMetadata = res['Value'].get(dirLfn) if not res['Value'] or not dirMetadata: self.log.info( "Cache missed: query BK to retrieve '%s' metadata and store cache" % dirList) res = self.__bkClient.getDirectoryMetadata(dirList) if not res['OK']: self.log.error("Failed to query Bookkeeping %s" % res['Message']) metadata = None else: self.log.verbose( "Successfully queried Bookkeeping, result: %s " % res) metadata = res['Value'].get('Successful', {}).get(dirLfn, [{}])[0] if not metadata: self.log.warn( "Directory is not registered in Bookkeeping! %s " % dirLfn) configName = configVersion = conditions = processingPass = eventType = fileType = production = "na" else: metadata['Visibility'] = metadata.pop( 'VisibilityFlag', metadata.get('Visibility', 'na')) configName = metadata['ConfigName'] configVersion = metadata['ConfigVersion'] conditions = metadata['ConditionDescription'] processingPass = metadata['ProcessingPass'] eventType = metadata['EventType'] fileType = metadata['FileType'] production = metadata['Production'] self.log.info( "Cache this entry in DirMetadata table..") res = self.__dataUsageClient.insertToDirMetadata( {dirLfn: metadata}) if not res['OK']: self.log.error( "Failed to insert metadata in DirMetadata table! %s " % res['Message']) else: self.log.info( "Successfully inserted metadata for directory %s in DirMetadata table " % dirLfn) self.log.verbose("result: %s " % res) else: self.log.info( "Directory %s was cached in DirMetadata table" % dirLfn) try: __did, configName, configVersion, conditions, \ processingPass, eventType, fileType, production = dirMetadata[0:8] except BaseException: self.log.error( "Error decoding directory cached information", dirMetadata) continue self.cacheMetadata[dirLfn] = (configName, configVersion, conditions, processingPass, eventType, fileType, production) else: configName, configVersion, conditions, processingPass, eventType, fileType, production = metaForDir for site in traceDict[day][dirLfn]: usage = traceDict[day][dirLfn][site] # compute the normalized usage, dividing by the number of files in the directory: normUsage = usage # to be done! after we have decided how to normalize # Build record for the accounting popRecord = Popularity() popRecord.setStartTime(timeForAccounting) popRecord.setEndTime(timeForAccounting) popRecord.setValueByKey("DataType", configName) popRecord.setValueByKey("Activity", configVersion) popRecord.setValueByKey("FileType", fileType) popRecord.setValueByKey("Production", production) popRecord.setValueByKey("ProcessingPass", processingPass) popRecord.setValueByKey("Conditions", conditions) popRecord.setValueByKey("EventType", eventType) popRecord.setValueByKey("StorageElement", site) popRecord.setValueByKey("Usage", usage) popRecord.setValueByKey("NormalizedUsage", normUsage) res = gDataStoreClient.addRegister(popRecord) if not res['OK']: self.log.error("ERROR: addRegister returned: %s" % res['Message']) continue self.numPopRows += 1 self.log.info( ">>> Sending record to accounting for: %s %s %s %s %s %s %s %s %s %d %d " % (timeForAccounting, configName, configVersion, fileType, production, processingPass, conditions, eventType, site, usage, normUsage)) if self.numPopRows > self.limitForCommit: res = self.__commitAccounting() if not res['OK']: return res # then set the status to Used res = self.__commitAccounting() if not res['OK']: return res self.log.info("Set the status to Used for %d entries" % len(idList)) from DIRAC.Core.Utilities.List import breakListIntoChunks for idChunk in breakListIntoChunks(list(idList), 1000): res = self.__dataUsageClient.updatePopEntryStatus( list(idChunk), 'Used', timeout=self.queryTimeout) if not res['OK']: self.log.error( "Error to update status in Popularity table.. %s" % res['Message']) return res self.log.info("Status updated to Used correctly for %s entries " % len(idList)) return S_OK()
def __monitorStorageElementStageRequests(self, storageElement, seReplicaIDs, replicaIDs): terminalReplicaIDs = {} oldRequests = [] stagedReplicas = [] pfnRepIDs = {} pfnReqIDs = {} for replicaID in seReplicaIDs: pfn = replicaIDs[replicaID]["PFN"] pfnRepIDs[pfn] = replicaID requestID = replicaIDs[replicaID].get("RequestID", None) if requestID: pfnReqIDs[pfn] = replicaIDs[replicaID]["RequestID"] gLogger.info( "StageMonitor.__monitorStorageElementStageRequests: Monitoring %s stage requests for %s." % (len(pfnRepIDs), storageElement) ) oAccounting = DataOperation() oAccounting.setStartTime() res = StorageElement(storageElement).getFileMetadata(pfnReqIDs) if not res["OK"]: gLogger.error( "StageMonitor.__monitorStorageElementStageRequests: Completely failed to monitor stage requests for replicas.", res["Message"], ) return prestageStatus = res["Value"] accountingDict = self.__newAccountingDict(storageElement) for pfn, reason in prestageStatus["Failed"].items(): accountingDict["TransferTotal"] += 1 if re.search("File does not exist", reason): gLogger.error( "StageMonitor.__monitorStorageElementStageRequests: PFN did not exist in the StorageElement", pfn ) terminalReplicaIDs[pfnRepIDs[pfn]] = "PFN did not exist in the StorageElement" for pfn, staged in prestageStatus["Successful"].items(): if staged and "Cached" in staged and staged["Cached"]: accountingDict["TransferTotal"] += 1 accountingDict["TransferOK"] += 1 accountingDict["TransferSize"] += staged["Size"] stagedReplicas.append(pfnRepIDs[pfn]) if staged and "Cached" in staged and not staged["Cached"]: oldRequests.append(pfnRepIDs[pfn]) # only ReplicaIDs oAccounting.setValuesFromDict(accountingDict) oAccounting.setEndTime() gDataStoreClient.addRegister(oAccounting) # Update the states of the replicas in the database if terminalReplicaIDs: gLogger.info( "StageMonitor.__monitorStorageElementStageRequests: %s replicas are terminally failed." % len(terminalReplicaIDs) ) res = self.stagerClient.updateReplicaFailure(terminalReplicaIDs) if not res["OK"]: gLogger.error( "StageMonitor.__monitorStorageElementStageRequests: Failed to update replica failures.", res["Message"], ) if stagedReplicas: gLogger.info( "StageMonitor.__monitorStorageElementStageRequests: %s staged replicas to be updated." % len(stagedReplicas) ) res = self.stagerClient.setStageComplete(stagedReplicas) if not res["OK"]: gLogger.error( "StageMonitor.__monitorStorageElementStageRequests: Failed to updated staged replicas.", res["Message"], ) res = self.stagerClient.updateReplicaStatus(stagedReplicas, "Staged") if not res["OK"]: gLogger.error( "StageMonitor.__monitorStorageElementStageRequests: Failed to insert replica status.", res["Message"], ) if oldRequests: gLogger.info( "StageMonitor.__monitorStorageElementStageRequests: %s old requests will be retried." % len(oldRequests) ) res = self.__wakeupOldRequests(oldRequests) if not res["OK"]: gLogger.error( "StageMonitor.__monitorStorageElementStageRequests: Failed to wakeup old requests.", res["Message"] ) return
def __monitorStorageElementStageRequests( self, storageElement, seReplicaIDs, replicaIDs ): terminalReplicaIDs = {} oldRequests = [] stagedReplicas = [] # Since we are in a given SE, the LFN is a unique key lfnRepIDs = {} for replicaID in seReplicaIDs: lfn = replicaIDs[replicaID]['LFN'] lfnRepIDs[lfn] = replicaID if lfnRepIDs: gLogger.info( "StageMonitor.__monitorStorageElementStageRequests: Monitoring %s stage requests for %s." % ( len( lfnRepIDs ), storageElement ) ) else: gLogger.warn( "StageMonitor.__monitorStorageElementStageRequests: No requests to monitor for %s." % storageElement ) return oAccounting = DataOperation() oAccounting.setStartTime() res = StorageElement( storageElement ).getFileMetadata( lfnRepIDs ) if not res['OK']: gLogger.error( "StageMonitor.__monitorStorageElementStageRequests: Completely failed to monitor stage requests for replicas.", res['Message'] ) return prestageStatus = res['Value'] accountingDict = self.__newAccountingDict( storageElement ) for lfn, reason in prestageStatus['Failed'].iteritems(): accountingDict['TransferTotal'] += 1 if re.search( 'File does not exist', reason ): gLogger.error( "StageMonitor.__monitorStorageElementStageRequests: LFN did not exist in the StorageElement", lfn ) terminalReplicaIDs[lfnRepIDs[lfn]] = 'LFN did not exist in the StorageElement' for lfn, metadata in prestageStatus['Successful'].iteritems(): if not metadata: continue staged = metadata.get( 'Cached', metadata['Accessible'] ) if staged: accountingDict['TransferTotal'] += 1 accountingDict['TransferOK'] += 1 accountingDict['TransferSize'] += metadata['Size'] stagedReplicas.append( lfnRepIDs[lfn] ) elif staged is not None: oldRequests.append( lfnRepIDs[lfn] ) # only ReplicaIDs oAccounting.setValuesFromDict( accountingDict ) oAccounting.setEndTime() gDataStoreClient.addRegister( oAccounting ) # Update the states of the replicas in the database if terminalReplicaIDs: gLogger.info( "StageMonitor.__monitorStorageElementStageRequests: %s replicas are terminally failed." % len( terminalReplicaIDs ) ) res = self.stagerClient.updateReplicaFailure( terminalReplicaIDs ) if not res['OK']: gLogger.error( "StageMonitor.__monitorStorageElementStageRequests: Failed to update replica failures.", res['Message'] ) if stagedReplicas: gLogger.info( "StageMonitor.__monitorStorageElementStageRequests: %s staged replicas to be updated." % len( stagedReplicas ) ) res = self.stagerClient.setStageComplete( stagedReplicas ) if not res['OK']: gLogger.error( "StageMonitor.__monitorStorageElementStageRequests: Failed to updated staged replicas.", res['Message'] ) res = self.stagerClient.updateReplicaStatus( stagedReplicas, 'Staged' ) if not res['OK']: gLogger.error( "StageMonitor.__monitorStorageElementStageRequests: Failed to insert replica status.", res['Message'] ) if oldRequests: gLogger.info( "StageMonitor.__monitorStorageElementStageRequests: %s old requests will be retried." % len( oldRequests ) ) res = self.__wakeupOldRequests( oldRequests ) if not res['OK']: gLogger.error( "StageMonitor.__monitorStorageElementStageRequests: Failed to wakeup old requests.", res['Message'] ) return
def addAccountingOperation(self, lfns, startDate, elapsedTime, storageParameters, callRes): """ Generates a DataOperation accounting if needs to be, and adds it to the DataStore client cache :param lfns : list of lfns on which we attempted the operation :param startDate : datetime, start of the operation :param elapsedTime : time (seconds) the operation took :param storageParameters : the parameters of the plugins used to perform the operation :param callRes : the return of the method call, S_OK or S_ERROR The operation is generated with the OperationType "se.methodName" The TransferSize and TransferTotal for directory methods actually take into account the files inside the directory, and not the amount of directory given as parameter """ if self.methodName not in (self.readMethods + self.writeMethods + self.removeMethods): return baseAccountingDict = {} baseAccountingDict['OperationType'] = 'se.%s' % self.methodName baseAccountingDict['User'] = getProxyInfo().get('Value', {}).get('username', 'unknown') baseAccountingDict['RegistrationTime'] = 0.0 baseAccountingDict['RegistrationOK'] = 0 baseAccountingDict['RegistrationTotal'] = 0 # if it is a get method, then source and destination of the transfer should be inverted if self.methodName == 'getFile': baseAccountingDict['Destination'] = siteName() baseAccountingDict['Source'] = self.name else: baseAccountingDict['Destination'] = self.name baseAccountingDict['Source'] = siteName() baseAccountingDict['TransferTotal'] = 0 baseAccountingDict['TransferOK'] = 0 baseAccountingDict['TransferSize'] = 0 baseAccountingDict['TransferTime'] = 0.0 baseAccountingDict['FinalStatus'] = 'Successful' oDataOperation = DataOperation() oDataOperation.setValuesFromDict(baseAccountingDict) oDataOperation.setStartTime(startDate) oDataOperation.setEndTime(startDate + datetime.timedelta(seconds=elapsedTime)) oDataOperation.setValueByKey('TransferTime', elapsedTime) oDataOperation.setValueByKey('Protocol', storageParameters.get('Protocol', 'unknown')) if not callRes['OK']: # Everything failed oDataOperation.setValueByKey('TransferTotal', len(lfns)) oDataOperation.setValueByKey('FinalStatus', 'Failed') else: succ = callRes.get('Value', {}).get('Successful', {}) failed = callRes.get('Value', {}).get('Failed', {}) totalSize = 0 # We don't take len(lfns) in order to make two # separate entries in case of few failures totalSucc = len(succ) if self.methodName in ('putFile', 'getFile'): # putFile and getFile return for each entry # in the successful dir the size of the corresponding file totalSize = sum(succ.values()) elif self.methodName in ('putDirectory', 'getDirectory'): # putDirectory and getDirectory return for each dir name # a dictionnary with the keys 'Files' and 'Size' totalSize = sum(val.get('Size', 0) for val in succ.values() if isinstance(val, dict)) totalSucc = sum(val.get('Files', 0) for val in succ.values() if isinstance(val, dict)) oDataOperation.setValueByKey('TransferOK', len(succ)) oDataOperation.setValueByKey('TransferSize', totalSize) oDataOperation.setValueByKey('TransferTotal', totalSucc) oDataOperation.setValueByKey('TransferOK', totalSucc) if callRes['Value']['Failed']: oDataOperationFailed = copy.deepcopy(oDataOperation) oDataOperationFailed.setValueByKey('TransferTotal', len(failed)) oDataOperationFailed.setValueByKey('TransferOK', 0) oDataOperationFailed.setValueByKey('TransferSize', 0) oDataOperationFailed.setValueByKey('FinalStatus', 'Failed') accRes = gDataStoreClient.addRegister(oDataOperationFailed) if not accRes['OK']: self.log.error("Could not send failed accounting report", accRes['Message']) accRes = gDataStoreClient.addRegister(oDataOperation) if not accRes['OK']: self.log.error("Could not send accounting report", accRes['Message'])
def __monitorStorageElementStageRequests(self, storageElement, seReplicaIDs, replicaIDs): terminalReplicaIDs = {} oldRequests = [] stagedReplicas = [] # Since we are in a given SE, the LFN is a unique key lfnRepIDs = {} lfnReqIDs = {} for replicaID in seReplicaIDs: lfn = replicaIDs[replicaID]['LFN'] lfnRepIDs[lfn] = replicaID requestID = replicaIDs[replicaID].get('RequestID', None) if requestID: lfnReqIDs[lfn] = replicaIDs[replicaID]['RequestID'] gLogger.info( "StageMonitor.__monitorStorageElementStageRequests: Monitoring %s stage requests for %s." % (len(lfnRepIDs), storageElement)) oAccounting = DataOperation() oAccounting.setStartTime() res = StorageElement(storageElement).getFileMetadata(lfnReqIDs) if not res['OK']: gLogger.error( "StageMonitor.__monitorStorageElementStageRequests: Completely failed to monitor stage requests for replicas.", res['Message']) return prestageStatus = res['Value'] accountingDict = self.__newAccountingDict(storageElement) for lfn, reason in prestageStatus['Failed'].items(): accountingDict['TransferTotal'] += 1 if re.search('File does not exist', reason): gLogger.error( "StageMonitor.__monitorStorageElementStageRequests: LFN did not exist in the StorageElement", lfn) terminalReplicaIDs[ lfnRepIDs[lfn]] = 'LFN did not exist in the StorageElement' for lfn, staged in prestageStatus['Successful'].items(): if staged and 'Cached' in staged and staged['Cached']: accountingDict['TransferTotal'] += 1 accountingDict['TransferOK'] += 1 accountingDict['TransferSize'] += staged['Size'] stagedReplicas.append(lfnRepIDs[lfn]) if staged and 'Cached' in staged and not staged['Cached']: oldRequests.append(lfnRepIDs[lfn]) # only ReplicaIDs oAccounting.setValuesFromDict(accountingDict) oAccounting.setEndTime() gDataStoreClient.addRegister(oAccounting) # Update the states of the replicas in the database if terminalReplicaIDs: gLogger.info( "StageMonitor.__monitorStorageElementStageRequests: %s replicas are terminally failed." % len(terminalReplicaIDs)) res = self.stagerClient.updateReplicaFailure(terminalReplicaIDs) if not res['OK']: gLogger.error( "StageMonitor.__monitorStorageElementStageRequests: Failed to update replica failures.", res['Message']) if stagedReplicas: gLogger.info( "StageMonitor.__monitorStorageElementStageRequests: %s staged replicas to be updated." % len(stagedReplicas)) res = self.stagerClient.setStageComplete(stagedReplicas) if not res['OK']: gLogger.error( "StageMonitor.__monitorStorageElementStageRequests: Failed to updated staged replicas.", res['Message']) res = self.stagerClient.updateReplicaStatus( stagedReplicas, 'Staged') if not res['OK']: gLogger.error( "StageMonitor.__monitorStorageElementStageRequests: Failed to insert replica status.", res['Message']) if oldRequests: gLogger.info( "StageMonitor.__monitorStorageElementStageRequests: %s old requests will be retried." % len(oldRequests)) res = self.__wakeupOldRequests(oldRequests) if not res['OK']: gLogger.error( "StageMonitor.__monitorStorageElementStageRequests: Failed to wakeup old requests.", res['Message']) return
def __updateMigrationAccounting(self, se, migratingFiles, matchingFiles, mismatchingFiles, assumedEndTime, previousMonitorTime): """ Create accounting messages for the overall throughput observed and the total migration time for the files """ allMigrated = matchingFiles + mismatchingFiles gMonitor.addMark("MigratedFiles%s" % se, len(allMigrated)) gMonitor.addMark("TotalMigratedFiles%s" % se, len(allMigrated)) lfnFileID = {} sizesToObtain = [] for fileID in allMigrated: if not migratingFiles[fileID]['Size']: lfn = migratingFiles[fileID]['LFN'] sizesToObtain.append(lfn) lfnFileID[lfn] = fileID if sizesToObtain: res = self.ReplicaManager.getCatalogFileSize(sizesToObtain) if not res['OK']: gLogger.error( "[%s] __updateMigrationAccounting: Failed to obtain file sizes" % se) return res for lfn, error in res['Value']['Failed'].items(): gLogger.error( "[%s] __updateAccounting: Failed to get file size" % se, "%s %s" % (lfn, error)) migratingFiles[lfnFileID[lfn]]['Size'] = 0 for lfn, size in res['Value']['Successful'].items(): migratingFiles[lfnFileID[lfn]]['Size'] = size totalSize = 0 for fileID in allMigrated: size = migratingFiles[fileID]['Size'] totalSize += size submitTime = migratingFiles[fileID]['SubmitTime'] timeDiff = submitTime - assumedEndTime migrationTime = (timeDiff.days * 86400) + (timeDiff.seconds) + ( timeDiff.microseconds / 1000000.0) gMonitor.addMark("MigrationTime%s" % se, migrationTime) gDataStoreClient.addRegister( self.__initialiseAccountingObject('MigrationTime', se, submitTime, assumedEndTime, size)) gDataStoreClient.addRegister( self.__initialiseAccountingObject('MigrationThroughput', se, previousMonitorTime, assumedEndTime, size)) oDataOperation = self.__initialiseAccountingObject( 'MigrationSuccess', se, submitTime, assumedEndTime, size) if fileID in mismatchingFiles: oDataOperation.setValueByKey('TransferOK', 0) oDataOperation.setValueByKey('FinalStatus', 'Failed') gDataStoreClient.addRegister(oDataOperation) gMonitor.addMark("TotalMigratedSize%s" % se, totalSize) gMonitor.addMark("ChecksumMismatches%s" % se, len(mismatchingFiles)) gMonitor.addMark("TotalChecksumMismatches%s" % se, len(mismatchingFiles)) gMonitor.addMark("ChecksumMatches%s" % se, len(matchingFiles)) gMonitor.addMark("TotalChecksumMatches%s" % se, len(matchingFiles)) if allMigrated: gLogger.info( '[%s] __updateMigrationAccounting: Attempting to send accounting message...' % se) return gDataStoreClient.commit() return S_OK()
def execute(self, production_id=None, prod_job_id=None, wms_job_id=None, workflowStatus=None, stepStatus=None, wf_commons=None, step_commons=None, step_number=None, step_id=None): """ Main execution method. Here we analyse what is written in the XML summary and the pool XML, and send accounting """ try: super(AnalyseFileAccess, self).execute(self.version, production_id, prod_job_id, wms_job_id, workflowStatus, stepStatus, wf_commons, step_commons, step_number, step_id) self._resolveInputVariables() self.log.info("Analyzing root access from %s and %s" % (self.XMLSummary, self.poolXMLCatName)) pfn_lfn = {} lfn_guid = {} lfn_pfn_fail = {} successful_lfn = set() for guid in self.poolXMLCatName_o.files: pFile = self.poolXMLCatName_o.files[guid] lfn = pFile.lfns[0] # there can be only one lfn_guid[lfn] = guid self.lfn_pfn[lfn] = [] for pfn, _ftype, se in pFile.pfns: pfn_lfn[pfn] = lfn self.pfn_se[pfn] = se self.lfn_pfn[lfn].append(pfn) for inputFile, status in self.XMLSummary_o.inputStatus: # The inputFile starts with 'LFN:' or 'PFN:' cleanedName = inputFile[4:] if status == 'full': # it is an LFN successful_lfn.add(cleanedName) elif status == 'fail': # it is a PFN lfn = pfn_lfn.get(cleanedName) if not lfn: self.log.error( "Failed pfn %s is not listed in the catalog" % cleanedName) continue lfn_pfn_fail.setdefault(lfn, []).append(cleanedName) else: # intermediate status, think of it... pass # The lfn in successful and not in lfn_pfn_failed succeeded immediately immediately_successful = successful_lfn - set(lfn_pfn_fail) for lfn in immediately_successful: # We take the first replica in the catalog pfn = self.__getNthPfnForLfn(lfn, 0) remoteSE = self.pfn_se.get(pfn) if not remoteSE: continue oDataOperation = self.__initialiseAccountingObject( remoteSE, True) gDataStoreClient.addRegister(oDataOperation) # For each file that had failure for lfn in lfn_pfn_fail: failedPfns = lfn_pfn_fail[lfn] # We add the accounting for the failure for pfn in failedPfns: remoteSE = self.pfn_se.get(pfn) if not remoteSE: continue oDataOperation = self.__initialiseAccountingObject( remoteSE, False) gDataStoreClient.addRegister(oDataOperation) # If there were more options to try, the next one is successful if len(failedPfns) < len(self.lfn_pfn[lfn]): pfn = self.__getNthPfnForLfn(lfn, len(failedPfns)) remoteSE = self.pfn_se.get(pfn) if not remoteSE: continue oDataOperation = self.__initialiseAccountingObject( remoteSE, True) gDataStoreClient.addRegister(oDataOperation) gDataStoreClient.commit() except Exception as e: #pylint:disable=broad-except self.log.warn(str(e)) finally: super(AnalyseFileAccess, self).finalize(self.version) return S_OK()
def topDirectoryAccounting( self ): self.log.notice( "-------------------------------------------------------------------------------------\n" ) self.log.notice( "Generate accounting records for top directories " ) self.log.notice( "-------------------------------------------------------------------------------------\n" ) ftb = 1.0e12 # get info from the DB about the LOGICAL STORAGE USAGE (from the su_Directory table): result = self.__stDB.getSummary( '/lhcb/' ) if not result[ 'OK' ]: return result logicalUsage = result['Value'] topDirLogicalUsage = {} # build the list of first level directories for row in logicalUsage: # d, size, files = row splitDir = row.split( "/" ) if len( splitDir ) > 3: # skip the root directory "/lhcb/" firstLevelDir = '/' + splitDir[1] + '/' + splitDir[2] + '/' topDirLogicalUsage.setdefault( firstLevelDir, {'Files':0, 'Size':0} ) topDirLogicalUsage[ firstLevelDir ][ 'Files' ] += logicalUsage[ row ][ 'Files' ] topDirLogicalUsage[ firstLevelDir ][ 'Size' ] += logicalUsage[ row ][ 'Size' ] self.log.notice( "Summary on logical usage of top directories: " ) for row in topDirLogicalUsage: self.log.notice( "dir: %s size: %.4f TB files: %d" % ( row, topDirLogicalUsage[row]['Size'] / ftb, topDirLogicalUsage[row]['Files'] ) ) # loop on top level directories (/lhcb/data, /lhcb/user/, /lhcb/MC/, etc..) # to get the summary in terms of PHYSICAL usage grouped by SE: seData = {} for directory in topDirLogicalUsage: result = self.__stDB.getDirectorySummaryPerSE( directory ) # retrieve the PHYSICAL usage if not result[ 'OK' ]: return result seData[ directory ] = result[ 'Value' ] self.log.notice( "Got SE summary for %s directories " % ( len( seData ) ) ) self.log.debug( "SEData: %s" % seData ) # loop on top level directories to send the accounting records numRows = 0 now = Time.dateTime() for directory in seData: self.log.debug( "dir: %s SEData: %s " % ( directory, seData[ directory ] ) ) if directory not in topDirLogicalUsage: self.log.error( "Dir %s is in the summary per SE, but it is not in the logical files summary!" % directory ) continue for se in sorted( seData[ directory ] ): storageRecord = Storage() storageRecord.setStartTime( now ) storageRecord.setEndTime( now ) storageRecord.setValueByKey( "Directory", directory ) storageRecord.setValueByKey( "StorageElement", se ) storageRecord.setValueByKey( "LogicalFiles", topDirLogicalUsage[ directory ][ 'Files' ] ) storageRecord.setValueByKey( "LogicalSize", topDirLogicalUsage[ directory ][ 'Size' ] ) try: physicalFiles = seData[ directory ][ se ][ 'Files' ] except: self.log.error( "WARNING! no files replicas for directory %s on SE %s" % ( directory, se ) ) physicalFiles = 0 try: physicalSize = seData[ directory ][ se ][ 'Size' ] except: self.log.error( "WARNING! no size for replicas for directory %s on SE %s" % ( directory, se ) ) physicalSize = 0 storageRecord.setValueByKey( "PhysicalFiles", physicalFiles ) storageRecord.setValueByKey( "PhysicalSize", physicalSize ) gDataStoreClient.addRegister( storageRecord ) numRows += 1 self.log.debug( "Directory: %s SE: %s physical size: %.4f TB (%d files)" % ( directory, se, physicalSize / ftb, physicalFiles ) ) self.log.notice( "Sending %s records to accounting for top level directories storage" % numRows ) res = gDataStoreClient.commit() if not res[ 'OK' ]: self.log.notice( "ERROR: committing Storage records: %s " % res ) return S_ERROR( res ) else: self.log.notice( "%s records for Storage type successfully committed" % numRows )
def __monitorStorageElementStageRequests( self, storageElement, seReplicaIDs, replicaIDs ): terminalReplicaIDs = {} oldRequests = [] stagedReplicas = [] pfnRepIDs = {} pfnReqIDs = {} for replicaID in seReplicaIDs: pfn = replicaIDs[replicaID]['PFN'] pfnRepIDs[pfn] = replicaID requestID = replicaIDs[replicaID].get( 'RequestID', None ) if requestID: pfnReqIDs[pfn] = replicaIDs[replicaID]['RequestID'] gLogger.info( "StageMonitor.__monitorStorageElementStageRequests: Monitoring %s stage requests for %s." % ( len( pfnRepIDs ), storageElement ) ) oAccounting = DataOperation() oAccounting.setStartTime() res = self.replicaManager.getStorageFileMetadata( pfnReqIDs.keys(), storageElement ) if not res['OK']: gLogger.error( "StageMonitor.__monitorStorageElementStageRequests: Completely failed to monitor stage requests for replicas.", res['Message'] ) return prestageStatus = res['Value'] accountingDict = self.__newAccountingDict( storageElement ) for pfn, reason in prestageStatus['Failed'].items(): accountingDict['TransferTotal'] += 1 if re.search( 'File does not exist', reason ): gLogger.error( "StageMonitor.__monitorStorageElementStageRequests: PFN did not exist in the StorageElement", pfn ) terminalReplicaIDs[pfnRepIDs[pfn]] = 'PFN did not exist in the StorageElement' for pfn, staged in prestageStatus['Successful'].items(): if staged and 'Cached' in staged and staged['Cached']: accountingDict['TransferTotal'] += 1 accountingDict['TransferOK'] += 1 accountingDict['TransferSize'] += staged['Size'] stagedReplicas.append( pfnRepIDs[pfn] ) if staged and 'Cached' in staged and not staged['Cached']: oldRequests.append( pfnRepIDs[pfn] ); #only ReplicaIDs oAccounting.setValuesFromDict( accountingDict ) oAccounting.setEndTime() gDataStoreClient.addRegister( oAccounting ) # Update the states of the replicas in the database if terminalReplicaIDs: gLogger.info( "StageMonitor.__monitorStorageElementStageRequests: %s replicas are terminally failed." % len( terminalReplicaIDs ) ) res = self.stagerClient.updateReplicaFailure( terminalReplicaIDs ) if not res['OK']: gLogger.error( "StageMonitor.__monitorStorageElementStageRequests: Failed to update replica failures.", res['Message'] ) if stagedReplicas: gLogger.info( "StageMonitor.__monitorStorageElementStageRequests: %s staged replicas to be updated." % len( stagedReplicas ) ) res = self.stagerClient.setStageComplete( stagedReplicas ) if not res['OK']: gLogger.error( "StageMonitor.__monitorStorageElementStageRequests: Failed to updated staged replicas.", res['Message'] ) res = self.stagerClient.updateReplicaStatus( stagedReplicas, 'Staged' ) if not res['OK']: gLogger.error( "StageMonitor.__monitorStorageElementStageRequests: Failed to insert replica status.", res['Message'] ) if oldRequests: gLogger.info( "StageMonitor.__monitorStorageElementStageRequests: %s old requests will be retried." % len( oldRequests ) ) res = self.__wakeupOldRequests( oldRequests ) if not res['OK']: gLogger.error( "StageMonitor.__monitorStorageElementStageRequests: Failed to wakeup old requests.", res['Message'] ) return
def addAccountingOperation(self, lfns, startDate, elapsedTime, storageParameters, callRes): """ Generates a DataOperation accounting if needs to be, and adds it to the DataStore client cache :param lfns : list of lfns on which we attempted the operation :param startDate : datetime, start of the operation :param elapsedTime : time (seconds) the operation took :param storageParameters : the parameters of the plugins used to perform the operation :param callRes : the return of the method call, S_OK or S_ERROR The operation is generated with the OperationType "se.methodName" The TransferSize and TransferTotal for directory methods actually take into account the files inside the directory, and not the amount of directory given as parameter """ if self.methodName not in (self.readMethods + self.writeMethods + self.removeMethods): return baseAccountingDict = {} baseAccountingDict['OperationType'] = 'se.%s' % self.methodName baseAccountingDict['User'] = getProxyInfo().get('Value', {}).get( 'username', 'unknown') baseAccountingDict['RegistrationTime'] = 0.0 baseAccountingDict['RegistrationOK'] = 0 baseAccountingDict['RegistrationTotal'] = 0 # if it is a get method, then source and destination of the transfer should be inverted if self.methodName == 'getFile': baseAccountingDict['Destination'] = siteName() baseAccountingDict['Source'] = self.name else: baseAccountingDict['Destination'] = self.name baseAccountingDict['Source'] = siteName() baseAccountingDict['TransferTotal'] = 0 baseAccountingDict['TransferOK'] = 0 baseAccountingDict['TransferSize'] = 0 baseAccountingDict['TransferTime'] = 0.0 baseAccountingDict['FinalStatus'] = 'Successful' oDataOperation = DataOperation() oDataOperation.setValuesFromDict(baseAccountingDict) oDataOperation.setStartTime(startDate) oDataOperation.setEndTime(startDate + datetime.timedelta(seconds=elapsedTime)) oDataOperation.setValueByKey('TransferTime', elapsedTime) oDataOperation.setValueByKey( 'Protocol', storageParameters.get('Protocol', 'unknown')) if not callRes['OK']: # Everything failed oDataOperation.setValueByKey('TransferTotal', len(lfns)) oDataOperation.setValueByKey('FinalStatus', 'Failed') else: succ = callRes.get('Value', {}).get('Successful', {}) failed = callRes.get('Value', {}).get('Failed', {}) totalSize = 0 # We don't take len(lfns) in order to make two # separate entries in case of few failures totalSucc = len(succ) if self.methodName in ('putFile', 'getFile'): # putFile and getFile return for each entry # in the successful dir the size of the corresponding file totalSize = sum(succ.values()) elif self.methodName in ('putDirectory', 'getDirectory'): # putDirectory and getDirectory return for each dir name # a dictionnary with the keys 'Files' and 'Size' totalSize = sum( val.get('Size', 0) for val in succ.values() if isinstance(val, dict)) totalSucc = sum( val.get('Files', 0) for val in succ.values() if isinstance(val, dict)) oDataOperation.setValueByKey('TransferOK', len(succ)) oDataOperation.setValueByKey('TransferSize', totalSize) oDataOperation.setValueByKey('TransferTotal', totalSucc) oDataOperation.setValueByKey('TransferOK', totalSucc) if callRes['Value']['Failed']: oDataOperationFailed = copy.deepcopy(oDataOperation) oDataOperationFailed.setValueByKey('TransferTotal', len(failed)) oDataOperationFailed.setValueByKey('TransferOK', 0) oDataOperationFailed.setValueByKey('TransferSize', 0) oDataOperationFailed.setValueByKey('FinalStatus', 'Failed') accRes = gDataStoreClient.addRegister(oDataOperationFailed) if not accRes['OK']: self.log.error("Could not send failed accounting report", accRes['Message']) accRes = gDataStoreClient.addRegister(oDataOperation) if not accRes['OK']: self.log.error("Could not send accounting report", accRes['Message'])