class PropagationCommand(Command): def __init__(self, args=None, clients=None): self.rssClient = ResourceStatusClient() super(PropagationCommand, self).__init__(args, clients) def doNew(self, masterParams=None): return S_OK() def doCache(self): if not self.args['site']: return S_ERROR('site was not found in args') site = self.args['site'] elements = CSHelpers.getSiteElements(site) statusList = [] if elements['OK']: for element in elements['Value']: status = self.rssClient.selectStatusElement( "Resource", "Status", element, meta={'columns': ['Status']}) if not status['OK']: return status if status['Value']: statusList.append(status['Value'][0][0]) else: # forcing in the case the resource has no status (yet) statusList.append('Active') if 'Active' in statusList: return S_OK({ 'Status': 'Active', 'Reason': 'An element that belongs to the site is Active' }) if 'Degraded' in statusList: return S_OK({ 'Status': 'Degraded', 'Reason': 'An element that belongs to the site is Degraded' }) return S_OK({ 'Status': 'Banned', 'Reason': 'There is no Active element in the site' }) def doMaster(self): return S_OK()
class PropagationCommand(Command): def __init__(self, args=None, clients=None): self.rssClient = ResourceStatusClient() super(PropagationCommand, self).__init__(args, clients) def doNew(self, masterParams=None): return S_OK() def doCache(self): if not self.args["name"]: return S_ERROR("site was not found in args") site = self.args["name"] elements = CSHelpers.getSiteElements(site) statusList = [] if elements["OK"]: for element in elements["Value"]: status = self.rssClient.selectStatusElement( "Resource", "Status", element, meta={"columns": ["Status"]}) if not status["OK"]: return status if status["Value"]: statusList.append(status["Value"][0][0]) else: # forcing in the case the resource has no status (yet) statusList.append("Active") if "Active" in statusList: return S_OK({ "Status": "Active", "Reason": "An element that belongs to the site is Active" }) if "Degraded" in statusList: return S_OK({ "Status": "Degraded", "Reason": "An element that belongs to the site is Degraded" }) return S_OK({ "Status": "Banned", "Reason": "There is no Active element in the site" }) def doMaster(self): return S_OK()
def getSiteMaskLogging(self, site=None, printOutput=False): """Retrieves site mask logging information. Example usage: >>> print diracAdmin.getSiteMaskLogging('LCG.AUVER.fr') {'OK': True, 'Value': } :returns: S_OK,S_ERROR """ result = self.__checkSiteIsValid(site) if not result['OK']: return result rssClient = ResourceStatusClient() result = rssClient.selectStatusElement('Site', 'History', name=site, statusType='ComputingAccess') if not result['OK']: return result siteDict = {} for logTuple in result['Value']: status, reason, siteName, dateEffective, dateTokenExpiration, eType, sType, eID, lastCheckTime, author = logTuple result = getSiteFullNames(siteName) if not result['OK']: continue for sName in result['Value']: if site is None or (site and site == sName): siteDict.setdefault(sName, []) siteDict[sName].append((status, reason, dateEffective, author, dateTokenExpiration)) if printOutput: if site: print '\nSite Mask Logging Info for %s\n' % site else: print '\nAll Site Mask Logging Info\n' for site, tupleList in siteDict.items(): if not site: print '\n===> %s\n' % site for tup in tupleList: print str( tup[0] ).ljust( 8 ) + str( tup[1] ).ljust( 20 ) + \ '( ' + str( tup[2] ).ljust( len( str( tup[2] ) ) ) + ' ) "' + str( tup[3] ) + '"' print ' ' return S_OK(siteDict)
def getSiteMaskLogging( self, site = None, printOutput = False ): """Retrieves site mask logging information. Example usage: >>> print diracAdmin.getSiteMaskLogging('LCG.AUVER.fr') {'OK': True, 'Value': } :returns: S_OK,S_ERROR """ result = self.__checkSiteIsValid( site ) if not result['OK']: return result rssClient = ResourceStatusClient() result = rssClient.selectStatusElement( 'Site', 'History', name = site, statusType = 'ComputingAccess' ) if not result['OK']: return result siteDict = {} for logTuple in result['Value']: status,reason,siteName,dateEffective,dateTokenExpiration,eType,sType,eID,lastCheckTime,author = logTuple result = getSiteFullNames( siteName ) if not result['OK']: continue for sName in result['Value']: if site is None or (site and site == sName): siteDict.setdefault( sName, [] ) siteDict[sName].append( (status,reason,dateEffective,author,dateTokenExpiration) ) if printOutput: if site: print '\nSite Mask Logging Info for %s\n' % site else: print '\nAll Site Mask Logging Info\n' for site, tupleList in siteDict.items(): if not site: print '\n===> %s\n' % site for tup in tupleList: print str( tup[0] ).ljust( 8 ) + str( tup[1] ).ljust( 20 ) + \ '( ' + str( tup[2] ).ljust( len( str( tup[2] ) ) ) + ' ) "' + str( tup[3] ) + '"' print ' ' return S_OK( siteDict )
class PropagationCommand(Command): def __init__(self, args=None, clients=None): self.rssClient = ResourceStatusClient() super(PropagationCommand, self).__init__(args, clients) def doNew(self, masterParams=None): return S_OK() def doCache(self): if not self.args['site']: return S_ERROR('site was not found in args') site = self.args['site'] elements = CSHelpers.getSiteElements(site) statusList = [] if elements['OK']: for element in elements['Value']: status = self.rssClient.selectStatusElement("Resource", "Status", element, meta={'columns': ['Status']}) if not status['OK']: return status if status['Value']: statusList.append(status['Value'][0][0]) else: # forcing in the case the resource has no status (yet) statusList.append('Active') if 'Active' in statusList: return S_OK({'Status': 'Active', 'Reason': 'An element that belongs to the site is Active'}) if 'Degraded' in statusList: return S_OK({'Status': 'Degraded', 'Reason': 'An element that belongs to the site is Degraded'}) return S_OK({'Status': 'Banned', 'Reason': 'There is no Active element in the site'}) def doMaster(self): return S_OK()
class TokenAgent( AgentModule ): ''' TokenAgent is in charge of checking tokens assigned on resources. Notifications are sent to those users owning expiring tokens. ''' # Hours to notify a user __notifyHours = 12 # Rss token __rssToken = 'rs_svc' # Admin mail __adminMail = None def __init__( self, *args, **kwargs ): ''' c'tor ''' AgentModule.__init__( self, *args, **kwargs ) self.notifyHours = self.__notifyHours self.adminMail = self.__adminMail self.rsClient = None self.rmClient = None self.noClient = None self.tokenDict = None self.diracAdmin = None def initialize( self ): ''' TokenAgent initialization Uses the ProductionManager shifterProxy to modify the ResourceStatus DB ''' self.am_setOption( 'shifterProxy', 'ProductionManager' ) self.notifyHours = self.am_getOption( 'notifyHours', self.notifyHours ) self.rsClient = ResourceStatusClient() self.rmClient = ResourceManagementClient() self.noClient = NotificationClient() self.diracAdmin = DiracAdmin() return S_OK() def execute( self ): ''' Looks for user tokens. If they are expired, or expiring, it notifies users. ''' # Initialized here, as it is needed empty at the beginning of the execution self.tokenDict = {} # FIXME: probably this can be obtained from RssConfiguration instead elements = ( 'Site', 'Resource', 'Node' ) for element in elements: self.log.info( 'Processing %s' % element ) interestingTokens = self._getInterestingTokens( element ) if not interestingTokens[ 'OK' ]: self.log.error( interestingTokens[ 'Message' ] ) continue interestingTokens = interestingTokens[ 'Value' ] processTokens = self._processTokens( element, interestingTokens ) if not processTokens[ 'OK' ]: self.log.error( processTokens[ 'Message' ] ) continue notificationResult = self._notifyOfTokens() if not notificationResult[ 'OK' ]: self.log.error( notificationResult[ 'Message' ] ) return S_OK() ## Protected methods ######################################################### def _getInterestingTokens( self, element ): ''' Given an element, picks all the entries with TokenExpiration < now + X<hours> If the TokenOwner is not the rssToken ( rs_svc ), it is selected. ''' tokenExpLimit = datetime.utcnow() + timedelta( hours = self.notifyHours ) tokenElements = self.rsClient.selectStatusElement( element, 'Status', meta = { 'older' : ( 'TokenExpiration', tokenExpLimit ) } ) if not tokenElements[ 'OK' ]: return tokenElements tokenColumns = tokenElements[ 'Columns' ] tokenElements = tokenElements[ 'Value' ] interestingTokens = [] for tokenElement in tokenElements: tokenElement = dict( zip( tokenColumns, tokenElement ) ) if tokenElement[ 'TokenOwner' ] != self.__rssToken: interestingTokens.append( tokenElement ) return S_OK( interestingTokens ) def _processTokens( self, element, tokenElements ): ''' Given an element and a list of interesting token elements, updates the database if the token is expired, logs a message and adds ''' never = datetime.max for tokenElement in tokenElements: try: name = tokenElement[ 'Name' ] statusType = tokenElement[ 'StatusType' ] status = tokenElement[ 'Status' ] tokenOwner = tokenElement[ 'TokenOwner' ] tokenExpiration = tokenElement[ 'TokenExpiration' ] except KeyError, e: return S_ERROR( e ) # If token has already expired if tokenExpiration < datetime.utcnow(): _msg = '%s with statusType "%s" and owner %s EXPIRED' self.log.info( _msg % ( name, statusType, tokenOwner ) ) result = self.rsClient.addOrModifyStatusElement( element, 'Status', name = name, statusType = statusType, tokenOwner = self.__rssToken, tokenExpiration = never ) if not result[ 'OK' ]: return result else: _msg = '%s with statusType "%s" and owner %s -> %s' self.log.info( _msg % ( name, statusType, tokenOwner, tokenExpiration ) ) if not tokenOwner in self.tokenDict: self.tokenDict[ tokenOwner ] = [] self.tokenDict[ tokenOwner ].append( [ tokenOwner, element, name, statusType, status, tokenExpiration ] ) return S_OK()
class SummarizeLogsAgent(AgentModule): """ SummarizeLogsAgent as extension of AgentModule. """ def __init__(self, *args, **kwargs): """ Constructor. """ AgentModule.__init__(self, *args, **kwargs) self.rsClient = None def initialize(self): """ Standard initialize. :return: S_OK """ self.rsClient = ResourceStatusClient() return S_OK() def execute(self): """ execute ( main method ) The execute method runs over the three families of tables ( Site, Resource and Node ) performing identical operations. First, selects all logs for a given family ( and keeps track of which one is the last row ID ). It summarizes the logs and finally, deletes the logs from the database. :return: S_OK """ # loop over the tables for element in ('Site', 'Resource', 'Node'): self.log.info('Summarizing %s' % element) # get all logs to be summarized selectLogElements = self._summarizeLogs(element) if not selectLogElements['OK']: self.log.error(selectLogElements['Message']) continue lastID, logElements = selectLogElements['Value'] # logElements is a dictionary of key-value pairs as follows: # ( name, statusType ) : list( logs ) for key, logs in logElements.iteritems(): sumResult = self._registerLogs(element, key, logs) if not sumResult['OK']: self.log.error(sumResult['Message']) continue if lastID is not None: self.log.info('Deleting %sLog till ID %s' % (element, lastID)) deleteResult = self.rsClient.deleteStatusElement(element, 'Log', meta={'older': ('ID', lastID)}) if not deleteResult['OK']: self.log.error(deleteResult['Message']) continue return S_OK() def _summarizeLogs(self, element): """ given an element, selects all logs in table <element>Log. :Parameters: **element** - `string` name of the table family ( either Site, Resource or Node ) :return: S_OK( lastID, listOfLogs ) / S_ERROR """ selectResults = self.rsClient.selectStatusElement(element, 'Log') if not selectResults['OK']: return selectResults selectedItems = {} latestID = None if not selectResults['Value']: return S_OK((latestID, selectedItems)) selectColumns = selectResults['Columns'] selectResults = selectResults['Value'] if selectResults: latestID = dict(zip(selectColumns, selectResults[-1]))['ID'] for selectResult in selectResults: elementDict = dict(zip(selectColumns, selectResult)) key = (elementDict['Name'], elementDict['StatusType']) if key not in selectedItems: selectedItems[key] = [elementDict] else: lastStatus = selectedItems[key][-1]['Status'] lastToken = selectedItems[key][-1]['TokenOwner'] # If there are no changes on the Status or the TokenOwner with respect # the previous one, discards the log. if lastStatus != elementDict['Status'] or lastToken != elementDict['TokenOwner']: selectedItems[key].append(elementDict) return S_OK((latestID, selectedItems)) def _registerLogs(self, element, key, logs): """ Given an element, a key - which is a tuple ( <name>, <statusType> ) and a list of dictionaries, this method inserts them on the <element>History table. Before inserting them, checks whether the first one is or is not on the <element>History table. If it is, it is not inserted. :Parameters: **element** - `string` name of the table family ( either Site, Resource and Node ) **key** - `tuple` tuple with the name of the element and the statusType **logs** - `list` list of dictionaries containing the logs :return: S_OK / S_ERROR """ if not logs: return S_OK() # Undo key name, statusType = key selectedRes = self.rsClient.selectStatusElement(element, 'History', name, statusType, meta={'columns': ['Status', 'TokenOwner'], 'limit': 1, 'order': ('DateEffective', 'desc')}) if not selectedRes['OK']: return selectedRes selectedRes = selectedRes['Value'] if not selectedRes: return S_OK() # We want from the <element>History table the last Status, and TokenOwner lastStatus, lastToken = None, None if selectedRes: try: lastStatus = selectedRes[0][0] lastToken = selectedRes[0][1] except IndexError: pass # If the first of the selected items has a different status than the latest # on the history, we keep it, otherwise we remove it. if logs[0]['Status'] == lastStatus and logs[0]['TokenOwner'] == lastToken: logs.pop(0) if logs: self.log.info('%s ( %s ):' % (name, statusType)) self.log.debug(logs) for selectedItemDict in logs: res = self.__logToHistoryTable(element, selectedItemDict) if not res['OK']: return res return S_OK() def __logToHistoryTable(self, element, elementDict): """ Given an element and a dictionary with all the arguments, this method inserts a new entry on the <element>History table :Parameters: **element** - `string` name of the table family ( either Site, Resource and Node ) **elementDict** - `dict` dictionary returned from the DB to be inserted on the History table :return: S_OK / S_ERROR """ name = elementDict.get('Name') statusType = elementDict.get('StatusType') status = elementDict.get('Status') elementType = elementDict.get('ElementType') reason = elementDict.get('Reason') dateEffective = elementDict.get('DateEffective') lastCheckTime = elementDict.get('LastCheckTime') tokenOwner = elementDict.get('TokenOwner') tokenExpiration = elementDict.get('TokenExpiration') self.log.info(' %s %s %s %s' % (status, dateEffective, tokenOwner, reason)) return self.rsClient.insertStatusElement(element, 'History', name, statusType, status, elementType, reason, dateEffective, lastCheckTime, tokenOwner, tokenExpiration)
class TokenAgent(AgentModule): """ TokenAgent is in charge of checking tokens assigned on resources. Notifications are sent to those users owning expiring tokens. """ # Hours to notify a user __notifyHours = 12 # Rss token __rssToken = "rs_svc" # Admin mail __adminMail = None def __init__(self, *args, **kwargs): """ c'tor """ AgentModule.__init__(self, *args, **kwargs) self.notifyHours = self.__notifyHours self.adminMail = self.__adminMail self.rsClient = None self.tokenDict = None self.diracAdmin = None def initialize(self): """ TokenAgent initialization """ self.notifyHours = self.am_getOption("notifyHours", self.notifyHours) self.adminMail = self.am_getOption("adminMail", self.adminMail) self.rsClient = ResourceStatusClient() self.diracAdmin = DiracAdmin() return S_OK() def execute(self): """ Looks for user tokens. If they are expired, or expiring, it notifies users. """ # Initialized here, as it is needed empty at the beginning of the execution self.tokenDict = {} # FIXME: probably this can be obtained from RssConfiguration instead elements = ("Site", "Resource", "Node") for element in elements: self.log.info("Processing %s" % element) interestingTokens = self._getInterestingTokens(element) if not interestingTokens["OK"]: self.log.error(interestingTokens["Message"]) continue interestingTokens = interestingTokens["Value"] processTokens = self._processTokens(element, interestingTokens) if not processTokens["OK"]: self.log.error(processTokens["Message"]) continue notificationResult = self._notifyOfTokens() if not notificationResult["OK"]: self.log.error(notificationResult["Message"]) return S_OK() ## Protected methods ######################################################### def _getInterestingTokens(self, element): """ Given an element, picks all the entries with TokenExpiration < now + X<hours> If the TokenOwner is not the rssToken ( rs_svc ), it is selected. """ tokenExpLimit = datetime.utcnow() + timedelta(hours=self.notifyHours) tokenElements = self.rsClient.selectStatusElement( element, "Status", meta={"older": ("TokenExpiration", tokenExpLimit)} ) if not tokenElements["OK"]: return tokenElements tokenColumns = tokenElements["Columns"] tokenElements = tokenElements["Value"] interestingTokens = [] for tokenElement in tokenElements: tokenElement = dict(zip(tokenColumns, tokenElement)) if tokenElement["TokenOwner"] != self.__rssToken: interestingTokens.append(tokenElement) return S_OK(interestingTokens) def _processTokens(self, element, tokenElements): """ Given an element and a list of interesting token elements, updates the database if the token is expired, logs a message and adds """ never = datetime.max for tokenElement in tokenElements: try: name = tokenElement["Name"] statusType = tokenElement["StatusType"] status = tokenElement["Status"] tokenOwner = tokenElement["TokenOwner"] tokenExpiration = tokenElement["TokenExpiration"] except KeyError, e: return S_ERROR(e) # If token has already expired if tokenExpiration < datetime.utcnow(): _msg = '%s with statusType "%s" and owner %s EXPIRED' self.log.info(_msg % (name, statusType, tokenOwner)) result = self.rsClient.addOrModifyStatusElement( element, "Status", name=name, statusType=statusType, tokenOwner=self.__rssToken, tokenExpiration=never, ) if not result["OK"]: return result else: _msg = '%s with statusType "%s" and owner %s -> %s' self.log.info(_msg % (name, statusType, tokenOwner, tokenExpiration)) if not tokenOwner in self.tokenDict: self.tokenDict[tokenOwner] = [] self.tokenDict[tokenOwner].append([tokenOwner, element, name, statusType, status, tokenExpiration]) return S_OK()
class SummarizeLogsAgent(AgentModule): """ SummarizeLogsAgent as extension of AgentModule. """ def __init__(self, *args, **kwargs): """ Constructor. """ AgentModule.__init__(self, *args, **kwargs) self.rsClient = None def initialize(self): """ Standard initialize. :return: S_OK """ self.rsClient = ResourceStatusClient() return S_OK() def execute(self): """ execute ( main method ) The execute method runs over the three families of tables ( Site, Resource and Node ) performing identical operations. First, selects all logs for a given family ( and keeps track of which one is the last row ID ). It summarizes the logs and finally, deletes the logs from the database. :return: S_OK """ # loop over the tables for element in ('Site', 'Resource', 'Node'): self.log.info('Summarizing %s' % element) # get all logs to be summarized selectLogElements = self._summarizeLogs(element) if not selectLogElements['OK']: self.log.error(selectLogElements['Message']) continue lastID, logElements = selectLogElements['Value'] # logElements is a dictionary of key-value pairs as follows: # ( name, statusType ) : list( logs ) for key, logs in logElements.iteritems(): sumResult = self._registerLogs(element, key, logs) if not sumResult['OK']: self.log.error(sumResult['Message']) continue if lastID is not None: self.log.info('Deleting %sLog till ID %s' % (element, lastID)) deleteResult = self.rsClient.deleteStatusElement( element, 'Log', meta={'older': ['ID', lastID]}) if not deleteResult['OK']: self.log.error(deleteResult['Message']) continue return S_OK() def _summarizeLogs(self, element): """ given an element, selects all logs in table <element>Log. :Parameters: **element** - `string` name of the table family ( either Site, Resource or Node ) :return: S_OK( lastID, listOfLogs ) / S_ERROR """ selectResults = self.rsClient.selectStatusElement(element, 'Log') if not selectResults['OK']: return selectResults selectedItems = {} latestID = None if not selectResults['Value']: return S_OK((latestID, selectedItems)) selectColumns = selectResults['Columns'] selectResults = selectResults['Value'] if selectResults: latestID = dict(zip(selectColumns, selectResults[-1]))['ID'] for selectResult in selectResults: elementDict = dict(zip(selectColumns, selectResult)) key = (elementDict['Name'], elementDict['StatusType']) if key not in selectedItems: selectedItems[key] = [elementDict] else: lastStatus = selectedItems[key][-1]['Status'] lastToken = selectedItems[key][-1]['TokenOwner'] # If there are no changes on the Status or the TokenOwner with respect # the previous one, discards the log. if lastStatus != elementDict[ 'Status'] or lastToken != elementDict['TokenOwner']: selectedItems[key].append(elementDict) return S_OK((latestID, selectedItems)) def _registerLogs(self, element, key, logs): """ Given an element, a key - which is a tuple ( <name>, <statusType> ) and a list of dictionaries, this method inserts them on the <element>History table. Before inserting them, checks whether the first one is or is not on the <element>History table. If it is, it is not inserted. :Parameters: **element** - `string` name of the table family ( either Site, Resource and Node ) **key** - `tuple` tuple with the name of the element and the statusType **logs** - `list` list of dictionaries containing the logs :return: S_OK / S_ERROR """ if not logs: return S_OK() # Undo key name, statusType = key selectedRes = self.rsClient.selectStatusElement( element, 'History', name, statusType, meta={ 'columns': ['Status', 'TokenOwner'], 'limit': 1, 'order': ['DateEffective', 'desc'] }) if not selectedRes['OK']: return selectedRes selectedRes = selectedRes['Value'] if not selectedRes: for selectedItemDict in logs: res = self.__logToHistoryTable(element, selectedItemDict) if not res['OK']: return res return S_OK() # We want from the <element>History table the last Status, and TokenOwner lastStatus, lastToken = None, None if selectedRes: try: lastStatus = selectedRes[0][0] lastToken = selectedRes[0][1] except IndexError: pass # If the first of the selected items has a different status than the latest # on the history, we keep it, otherwise we remove it. if logs[0]['Status'] == lastStatus and logs[0][ 'TokenOwner'] == lastToken: logs.pop(0) if logs: self.log.info('%s ( %s ):' % (name, statusType)) self.log.debug(logs) for selectedItemDict in logs: res = self.__logToHistoryTable(element, selectedItemDict) if not res['OK']: return res return S_OK() def __logToHistoryTable(self, element, elementDict): """ Given an element and a dictionary with all the arguments, this method inserts a new entry on the <element>History table :Parameters: **element** - `string` name of the table family ( either Site, Resource and Node ) **elementDict** - `dict` dictionary returned from the DB to be inserted on the History table :return: S_OK / S_ERROR """ name = elementDict.get('Name') statusType = elementDict.get('StatusType') # vo = elementDict.get('VO') # FIXME: not sure about it status = elementDict.get('Status') elementType = elementDict.get('ElementType') reason = elementDict.get('Reason') dateEffective = elementDict.get('DateEffective') lastCheckTime = elementDict.get('LastCheckTime') tokenOwner = elementDict.get('TokenOwner') tokenExpiration = elementDict.get('TokenExpiration') self.log.info(' %s %s %s %s' % (status, dateEffective, tokenOwner, reason)) return self.rsClient.insertStatusElement( element=element, tableType='History', name=name, statusType=statusType, status=status, elementType=elementType, reason=reason, dateEffective=dateEffective, lastCheckTime=lastCheckTime, tokenOwner=tokenOwner, tokenExpiration=tokenExpiration)
class ResourceStatus( object ): """ ResourceStatus helper that connects to CS if RSS flag is not Active. It keeps the connection to the db / server as an object member, to avoid creating a new one massively. """ __metaclass__ = DIRACSingleton def __init__( self, rssFlag = None ): """ Constructor, initializes the rssClient. """ self.log = gLogger.getSubLogger( self.__class__.__name__ ) self.rssConfig = RssConfiguration() self.__opHelper = Operations() self.rssClient = ResourceStatusClient() self.rssFlag = rssFlag if rssFlag is None: self.rssFlag = self.__getMode() # We can set CacheLifetime and CacheHistory from CS, so that we can tune them. cacheLifeTime = int( self.rssConfig.getConfigCache() ) # RSSCache only affects the calls directed to RSS, if using the CS it is not used. self.rssCache = RSSCache( cacheLifeTime, self.__updateRssCache ) def getElementStatus( self, elementName, elementType, statusType = None, default = None ): """ Helper function, tries to get information from the RSS for the given Element, otherwise, it gets it from the CS. :param elementName: name of the element :type elementName: str :param elementType: type of the element (StorageElement, ComputingElement, FTS, Catalog) :type elementType: str :param statusType: type of the status (meaningful only when elementType==StorageElement) :type statusType: None, str, list :param default: defult value (meaningful only when rss is InActive) :type default: str :return: S_OK/S_ERROR :rtype: dict :Example: >>> getElementStatus('CE42', 'ComputingElement') S_OK( { 'CE42': { 'all': 'Active' } } } ) >>> getElementStatus('SE1', 'StorageElement', 'ReadAccess') S_OK( { 'SE1': { 'ReadAccess': 'Banned' } } } ) >>> getElementStatus('SE1', 'ThisIsAWrongElementType', 'ReadAccess') S_ERROR( xyz.. ) >>> getElementStatus('ThisIsAWrongName', 'StorageElement', 'WriteAccess') S_ERROR( xyz.. ) >>> getElementStatus('A_file_catalog', 'FileCatalog') S_OK( { 'A_file_catalog': { 'all': 'Active' } } } ) >>> getElementStatus('SE1', 'StorageElement', ['ReadAccess', 'WriteAccess']) S_OK( { 'SE1': { 'ReadAccess': 'Banned' , 'WriteAccess': 'Active'} } } ) >>> getElementStatus('SE1', 'StorageElement') S_OK( { 'SE1': { 'ReadAccess': 'Probing' , 'WriteAccess': 'Active', 'CheckAccess': 'Degraded', 'RemoveAccess': 'Banned'} } } ) """ allowedParameters = ["StorageElement", "ComputingElement", "FTS", "Catalog"] if elementType not in allowedParameters: return S_ERROR("%s in not in the list of the allowed parameters: %s" % (elementType, allowedParameters)) # Apply defaults if not statusType: if elementType == "StorageElement": statusType = ['ReadAccess', 'WriteAccess', 'CheckAccess', 'RemoveAccess'] elif elementType == "ComputingElement": statusType = ['all'] elif elementType == "FTS": statusType = ['all'] elif elementType == "Catalog": statusType = ['all'] if self.rssFlag: return self.__getRSSElementStatus( elementName, elementType, statusType ) else: return self.__getCSElementStatus( elementName, elementType, statusType, default ) def setElementStatus( self, elementName, elementType, statusType, status, reason = None, tokenOwner = None ): """ Tries set information in RSS and in CS. :param elementName: name of the element :type elementName: str :param elementType: type of the element (StorageElement, ComputingElement, FTS, Catalog) :type elementType: str :param statusType: type of the status (meaningful only when elementType==StorageElement) :type statusType: str :param reason: reason for setting the status :type reason: str :param tokenOwner: owner of the token (meaningful only when rss is Active) :type tokenOwner: str :return: S_OK/S_ERROR :rtype: dict :Example: >>> setElementStatus('CE42', 'ComputingElement', 'all', 'Active') S_OK( xyz.. ) >>> setElementStatus('SE1', 'StorageElement', 'ReadAccess', 'Banned') S_OK( xyz.. ) """ if self.rssFlag: return self.__setRSSElementStatus( elementName, elementType, statusType, status, reason, tokenOwner ) else: return self.__setCSElementStatus( elementName, elementType, statusType, status ) ################################################################################ def __updateRssCache( self ): """ Method used to update the rssCache. It will try 5 times to contact the RSS before giving up """ meta = { 'columns' : [ 'Name', 'ElementType', 'StatusType', 'Status' ] } for ti in range( 5 ): rawCache = self.rssClient.selectStatusElement( 'Resource', 'Status', meta = meta ) if rawCache['OK']: break self.log.warn( "Can't get resource's status", rawCache['Message'] + "; trial %d" % ti ) sleep( math.pow( ti, 2 ) ) self.rssClient = ResourceStatusClient() if not rawCache[ 'OK' ]: return rawCache return S_OK( getCacheDictFromRawData( rawCache[ 'Value' ] ) ) ################################################################################ def __getRSSElementStatus( self, elementName, elementType, statusType ): """ Gets from the cache or the RSS the Elements status. The cache is a copy of the DB table. If it is not on the cache, most likely is not going to be on the DB. There is one exception: item just added to the CS, e.g. new Element. The period between it is added to the DB and the changes are propagated to the cache will be inconsistent, but not dangerous. Just wait <cacheLifeTime> minutes. :param elementName: name of the element :type elementName: str :param elementType: type of the element (StorageElement, ComputingElement, FTS, Catalog) :type elementType: str :param statusType: type of the status (meaningful only when elementType==StorageElement, otherwise it is 'all' or ['all']) :type statusType: str, list """ cacheMatch = self.rssCache.match( elementName, elementType, statusType ) self.log.debug( '__getRSSElementStatus' ) self.log.debug( cacheMatch ) return cacheMatch def __getCSElementStatus( self, elementName, elementType, statusType, default ): """ Gets from the CS the Element status :param elementName: name of the element :type elementName: str :param elementType: type of the element (StorageElement, ComputingElement, FTS, Catalog) :type elementType: str :param statusType: type of the status (meaningful only when elementType==StorageElement) :type statusType: str, list :param default: defult value :type default: None, str """ # DIRAC doesn't store the status of ComputingElements nor FTS in the CS, so here we can just return 'Active' if elementType in ('ComputingElement', 'FTS'): return S_OK( { elementName: { 'all': 'Active'} } ) # If we are here it is because elementType is either 'StorageElement' or 'Catalog' if elementType == 'StorageElement': cs_path = "/Resources/StorageElements" elif elementType == 'Catalog': cs_path = "/Resources/FileCatalogs" statusType = ['Status'] if not isinstance( elementName, list ): elementName = [ elementName ] if not isinstance( statusType, list ): statusType = [ statusType ] result = {} for element in elementName: for sType in statusType: # Look in standard location, 'Active' by default res = gConfig.getValue( "%s/%s/%s" % ( cs_path, element, sType ), 'Active' ) result.setdefault( element, {} )[sType] = res if result: return S_OK( result ) if default is not None: defList = [ [ el, statusType, default ] for el in elementName ] return S_OK( getDictFromList( defList ) ) _msg = "Element '%s', with statusType '%s' is unknown for CS." return S_ERROR( DErrno.ERESUNK, _msg % ( elementName, statusType ) ) def __setRSSElementStatus( self, elementName, elementType, statusType, status, reason, tokenOwner ): """ Sets on the RSS the Elements status """ expiration = datetime.utcnow() + timedelta( days = 1 ) self.rssCache.acquireLock() try: res = self.rssClient.addOrModifyStatusElement( 'Resource', 'Status', name = elementName, elementType = elementType, status = status, statusType = statusType, reason = reason, tokenOwner = tokenOwner, tokenExpiration = expiration ) if res[ 'OK' ]: self.rssCache.refreshCache() if not res[ 'OK' ]: _msg = 'Error updating Element (%s,%s,%s)' % ( elementName, statusType, status ) gLogger.warn( 'RSS: %s' % _msg ) return res finally: # Release lock, no matter what. self.rssCache.releaseLock() def __setCSElementStatus( self, elementName, elementType, statusType, status ): """ Sets on the CS the Elements status """ # DIRAC doesn't store the status of ComputingElements nor FTS in the CS, so here we can just do nothing if elementType in ('ComputingElement', 'FTS'): return S_OK() # If we are here it is because elementType is either 'StorageElement' or 'Catalog' statuses = self.rssConfig.getConfigStatusType( elementType ) if statusType not in statuses: gLogger.error( "%s is not a valid statusType" % statusType ) return S_ERROR( "%s is not a valid statusType: %s" % ( statusType, statuses ) ) if elementType == 'StorageElement': cs_path = "/Resources/StorageElements" elif elementType == 'Catalog': cs_path = "/Resources/FileCatalogs" #FIXME: This a probably outdated location (new one is in /Operations/[]/Services/Catalogs) # but needs to be VO-aware statusType = 'Status' csAPI = CSAPI() csAPI.setOption( "%s/%s/%s/%s" % ( cs_path, elementName, elementType, statusType ), status ) res = csAPI.commitChanges() if not res[ 'OK' ]: gLogger.warn( 'CS: %s' % res[ 'Message' ] ) return res def __getMode( self ): """ Get's flag defined ( or not ) on the RSSConfiguration. If defined as 1, we use RSS, if not, we use CS. """ res = self.rssConfig.getConfigState() if res == 'Active': if self.rssClient is None: self.rssClient = ResourceStatusClient() return True self.rssClient = None return False def isStorageElementAlwaysBanned( self, seName, statusType ): """ Checks if the AlwaysBanned policy is applied to the SE given as parameter :param seName : string, name of the SE :param statusType : ReadAcces, WriteAccess, RemoveAccess, CheckAccess :returns: S_OK(True/False) """ res = getPoliciesThatApply( {'name' : seName, 'statusType' : statusType} ) if not res['OK']: self.log.error( "isStorageElementAlwaysBanned: unable to get the information", res['Message'] ) return res isAlwaysBanned = 'AlwaysBanned' in [policy['type'] for policy in res['Value']] return S_OK( isAlwaysBanned )
class PEP: """ PEP ( Policy Enforcement Point ) """ def __init__(self, clients=None): """ Constructor examples: >>> pep = PEP() >>> pep1 = PEP( { 'ResourceStatusClient' : ResourceStatusClient() } ) >>> pep2 = PEP( { 'ResourceStatusClient' : ResourceStatusClient(), 'ClientY' : None } ) :Parameters: **clients** - [ None, `dict` ] dictionary with clients to be used in the commands issued by the policies. If not defined, the commands will import them. It is a measure to avoid opening the same connection every time a policy is evaluated. """ if clients is None: clients = {} # PEP uses internally two of the clients: ResourceStatusClient and ResouceManagementClient if 'ResourceStatusClient' in clients: self.rsClient = clients['ResourceStatusClient'] else: self.rsClient = ResourceStatusClient() if 'ResourceManagementClient' in clients: self.rmClient = clients['ResourceManagementClient'] else: self.rmClient = ResourceManagementClient() self.clients = clients # Pass to the PDP the clients that are going to be used on the Commands self.pdp = PDP(clients) def enforce(self, decisionParams): """ Given a dictionary with decisionParams, it is passed to the PDP, which will return ( in case there is a/are positive match/es ) a dictionary containing three key-pair values: the original decisionParams ( `decisionParams` ), all the policies evaluated ( `singlePolicyResults` ) and the computed final result ( `policyCombinedResult` ). To know more about decisionParams, please read PDP.setup where the decisionParams are sanitized. examples: >>> pep.enforce( { 'element' : 'Site', 'name' : 'MySite' } ) >>> pep.enforce( { 'element' : 'Resource', 'name' : 'myce.domain.ch' } ) :Parameters: **decisionParams** - `dict` dictionary with the parameters that will be used to match policies. """ # Setup PDP with new parameters dictionary self.pdp.setup(decisionParams) # Run policies, get decision, get actions to apply resDecisions = self.pdp.takeDecision() if not resDecisions['OK']: gLogger.error( 'PEP: Something went wrong, not enforcing policies for %s' % decisionParams) return resDecisions resDecisions = resDecisions['Value'] # We take from PDP the decision parameters used to find the policies decisionParams = resDecisions['decissionParams'] policyCombinedResult = resDecisions['policyCombinedResult'] singlePolicyResults = resDecisions['singlePolicyResults'] # We have run the actions and at this point, we are about to execute the actions. # One more final check before proceeding isNotUpdated = self.__isNotUpdated(decisionParams) if not isNotUpdated['OK']: return isNotUpdated for policyActionName, policyActionType in policyCombinedResult[ 'PolicyAction']: try: actionMod = Utils.voimport( 'DIRAC.ResourceStatusSystem.PolicySystem.Actions.%s' % policyActionType) except ImportError: gLogger.error('Error importing %s action' % policyActionType) continue try: action = getattr(actionMod, policyActionType) except AttributeError: gLogger.error('Error importing %s action class' % policyActionType) continue actionObj = action(policyActionName, decisionParams, policyCombinedResult, singlePolicyResults, self.clients) gLogger.debug((policyActionName, policyActionType)) actionResult = actionObj.run() if not actionResult['OK']: gLogger.error(actionResult['Message']) return S_OK(resDecisions) def __isNotUpdated(self, decisionParams): """ Checks for the existence of the element as it was passed to the PEP. It may happen that while being the element processed by the PEP an user through the web interface or the CLI has updated the status for this particular element. As a result, the PEP would overwrite whatever the user had set. This check is not perfect, as still an user action can happen while executing the actions, but the probability is close to 0. However, if there is an action that takes seconds to be executed, this must be re-evaluated. ! :Parameters: **decisionParams** - `dict` dictionary with the parameters that will be used to match policies :return: S_OK / S_ERROR """ # Copy original dictionary and get rid of one key we cannot pass as kwarg selectParams = decisionParams.copy() del selectParams['element'] del selectParams['active'] # We expect to have an exact match. If not, then something has changed and # we cannot proceed with the actions. unchangedRow = self.rsClient.selectStatusElement( decisionParams['element'], 'Status', **selectParams) if not unchangedRow['OK']: return unchangedRow if not unchangedRow['Value']: msg = '%(name)s ( %(status)s / %(statusType)s ) has been updated after PEP started running' return S_ERROR(msg % selectParams) return S_OK() #............................................................................... #EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF
class ElementInspectorAgent( AgentModule ): ''' The ElementInspector agent is a generic agent used to check the elements of one of the elementTypes ( e.g. Site, Resource, Node ). This Agent takes care of the Elements. In order to do so, it gathers the eligible ones and then evaluates their statuses with the PEP. ''' # Max number of worker threads by default __maxNumberOfThreads = 5 # ElementType, to be defined among Site, Resource or Node __elementType = None # Inspection freqs, defaults, the lower, the higher priority to be checked. # Error state usually means there is a glitch somewhere, so it has the highest # priority. __checkingFreqs = { 'Default' : { 'Active' : 60, 'Degraded' : 30, 'Probing' : 30, 'Banned' : 30, 'Unknown' : 15, 'Error' : 15 } } # queue size limit to stop feeding __limitQueueFeeder = 15 def __init__( self, *args, **kwargs ): ''' c'tor ''' AgentModule.__init__( self, *args, **kwargs ) # members initialization self.maxNumberOfThreads = self.__maxNumberOfThreads self.elementType = self.__elementType self.checkingFreqs = self.__checkingFreqs self.limitQueueFeeder = self.__limitQueueFeeder self.elementsToBeChecked = None self.threadPool = None self.rsClient = None self.clients = {} def initialize( self ): ''' Standard initialize. Uses the ProductionManager shifterProxy to modify the ResourceStatus DB ''' self.maxNumberOfThreads = self.am_getOption( 'maxNumberOfThreads', self.maxNumberOfThreads ) self.elementType = self.am_getOption( 'elementType', self.elementType ) self.checkingFreqs = self.am_getOption( 'checkingFreqs', self.checkingFreqs ) self.limitQueueFeeder = self.am_getOption( 'limitQueueFeeder', self.limitQueueFeeder ) self.elementsToBeChecked = Queue.Queue() self.threadPool = ThreadPool( self.maxNumberOfThreads, self.maxNumberOfThreads ) self.rsClient = ResourceStatusClient() self.clients[ 'ResourceStatusClient' ] = self.rsClient self.clients[ 'ResourceManagementClient' ] = ResourceManagementClient() return S_OK() def execute( self ): # If there are elements in the queue to be processed, we wait ( we know how # many elements in total we can have, so if there are more than 15% of them # on the queue, we do not add anything ), but the threads are running and # processing items from the queue on background. qsize = self.elementsToBeChecked.qsize() if qsize > self.limitQueueFeeder: self.log.warn( 'Queue not empty ( %s > %s ), skipping feeding loop' % ( qsize, self.limitQueueFeeder ) ) return S_OK() # We get all the elements, then we filter. elements = self.rsClient.selectStatusElement( self.elementType, 'Status' ) if not elements[ 'OK' ]: self.log.error( elements[ 'Message' ] ) return elements utcnow = datetime.datetime.utcnow().replace( microsecond = 0 ) # filter elements by Type for element in elements[ 'Value' ]: # Maybe an overkill, but this way I have NEVER again to worry about order # of elements returned by mySQL on tuples elemDict = dict( zip( elements[ 'Columns' ], element ) ) # We skip the elements with token different than "rs_svc" if elemDict[ 'TokenOwner' ] != 'rs_svc': self.log.info( 'Skipping %s ( %s ) with token %s' % ( elemDict[ 'Name' ], elemDict[ 'StatusType' ], elemDict[ 'TokenOwner' ] )) continue if not elemDict[ 'ElementType' ] in self.checkingFreqs: #self.log.warn( '"%s" not in inspectionFreqs, getting default' % elemDict[ 'ElementType' ] ) timeToNextCheck = self.checkingFreqs[ 'Default' ][ elemDict[ 'Status' ] ] else: timeToNextCheck = self.checkingFreqs[ elemDict[ 'ElementType' ] ][ elemDict[ 'Status' ] ] if utcnow - datetime.timedelta( minutes = timeToNextCheck ) > elemDict[ 'LastCheckTime' ]: # We are not checking if the item is already on the queue or not. It may # be there, but in any case, it is not a big problem. lowerElementDict = { 'element' : self.elementType } for key, value in elemDict.items(): lowerElementDict[ key[0].lower() + key[1:] ] = value # We add lowerElementDict to the queue self.elementsToBeChecked.put( lowerElementDict ) self.log.verbose( '%s # "%s" # "%s" # %s # %s' % ( elemDict[ 'Name' ], elemDict[ 'ElementType' ], elemDict[ 'StatusType' ], elemDict[ 'Status' ], elemDict[ 'LastCheckTime' ]) ) # Measure size of the queue, more or less, to know how many threads should # we start ! queueSize = self.elementsToBeChecked.qsize() # 30, could have been other number.. but it works reasonably well. ( +1 to get ceil ) threadsToStart = max( min( self.maxNumberOfThreads, ( queueSize / 30 ) + 1 ), 1 ) threadsRunning = self.threadPool.numWorkingThreads() self.log.info( 'Needed %d threads to process %d elements' % ( threadsToStart, queueSize ) ) if threadsRunning: self.log.info( 'Already %d threads running' % threadsRunning ) threadsToStart = max( 0, threadsToStart - threadsRunning ) self.log.info( 'Starting %d threads to process %d elements' % ( threadsToStart, queueSize ) ) # It may happen that we start two threads, 0 and 1. 1 goes DOWN, but 0 keeps # running. In next loop we will start a new thread, and will be called 0 # again. To have a mechanism to see which thread is where, we append the # cycle number before the threadId. cycle = self._AgentModule__moduleProperties[ 'cyclesDone' ] for _x in xrange( threadsToStart ): threadId = '%s_%s' % ( cycle, _x ) jobUp = self.threadPool.generateJobAndQueueIt( self._execute, args = ( threadId, ) ) if not jobUp[ 'OK' ]: self.log.error( jobUp[ 'Message' ] ) return S_OK() def finalize( self ): self.log.info( 'draining queue... blocking until empty' ) # block until all tasks are done self.elementsToBeChecked.join() return S_OK() ## Private methods ############################################################# def _execute( self, threadNumber ): ''' Method run by the thread pool. It enters a loop until there are no elements on the queue. On each iteration, it evaluates the policies for such element and enforces the necessary actions. If there are no more elements in the queue, the loop is finished. ''' tHeader = '%sJob%s' % ( '* '*30, threadNumber ) self.log.info( '%s UP' % tHeader ) pep = PEP( clients = self.clients ) while True: try: element = self.elementsToBeChecked.get_nowait() except Queue.Empty: self.log.info( '%s DOWN' % tHeader ) return S_OK() self.log.info( '%s ( %s / %s ) being processed' % ( element[ 'name' ], element[ 'status' ], element[ 'statusType' ] ) ) resEnforce = pep.enforce( element ) if not resEnforce[ 'OK' ]: self.log.error( resEnforce[ 'Message' ] ) self.elementsToBeChecked.task_done() continue resEnforce = resEnforce[ 'Value' ] oldStatus = resEnforce[ 'decissionParams' ][ 'status' ] statusType = resEnforce[ 'decissionParams' ][ 'statusType' ] newStatus = resEnforce[ 'policyCombinedResult' ][ 'Status' ] reason = resEnforce[ 'policyCombinedResult' ][ 'Reason' ] if oldStatus != newStatus: self.log.info( '%s (%s) is now %s ( %s ), before %s' % ( element[ 'name' ], statusType, newStatus, reason, oldStatus ) ) # Used together with join ! self.elementsToBeChecked.task_done() self.log.info( '%s DOWN' % tHeader ) return S_OK()
class Synchronizer(object): ''' Every time there is a successful write on the CS, Synchronizer().sync() is executed. It updates the database with the values on the CS. ''' def __init__(self, rStatus=None, rManagement=None, defaultStatus="Unknown"): # Warm up local CS CSHelpers.warmUp() if rStatus is None: self.rStatus = ResourceStatusClient() if rManagement is None: self.rManagement = ResourceManagementClient() self.defaultStatus = defaultStatus self.rssConfig = RssConfiguration() self.tokenOwner = "rs_svc" result = getProxyInfo() if result['OK']: self.tokenOwner = result['Value']['username'] def sync(self, _eventName, _params): ''' Main synchronizer method. It synchronizes the three types of elements: Sites, Resources and Nodes. Each _syncX method returns a dictionary with the additions and deletions. examples: >>> s.sync( None, None ) S_OK() :Parameters: **_eventName** - any this parameter is ignored, but needed by caller function. **_params** - any this parameter is ignored, but needed by caller function. :return: S_OK ''' syncSites = self._syncSites() if not syncSites['OK']: gLogger.error(syncSites['Message']) syncResources = self._syncResources() if not syncResources['OK']: gLogger.error(syncResources['Message']) syncNodes = self._syncNodes() if not syncNodes['OK']: gLogger.error(syncNodes['Message']) return S_OK() ## Protected methods ######################################################### def _syncSites(self): ''' Sync sites: compares CS with DB and does the necessary modifications. ''' gLogger.info('-- Synchronizing sites --') # sites in CS res = CSHelpers.getSites() if not res['OK']: return res sitesCS = res['Value'] gLogger.verbose('%s sites found in CS' % len(sitesCS)) # sites in RSS result = self.rStatus.selectStatusElement('Site', 'Status', meta={'columns': ['Name']}) if not result['OK']: return result sitesDB = [siteDB[0] for siteDB in result['Value']] # Sites that are in DB but not (anymore) in CS toBeDeleted = list(set(sitesDB).difference(set(sitesCS))) gLogger.verbose('%s sites to be deleted' % len(toBeDeleted)) # Delete sites for siteName in toBeDeleted: deleteQuery = self.rStatus._extermineStatusElement( 'Site', siteName) gLogger.verbose('Deleting site %s' % siteName) if not deleteQuery['OK']: return deleteQuery # Sites that are in CS but not (anymore) in DB toBeAdded = list(set(sitesCS).difference(set(sitesDB))) gLogger.verbose('%s site entries to be added' % len(toBeAdded)) for site in toBeAdded: query = self.rStatus.addIfNotThereStatusElement('Site', 'Status', name=site, statusType='all', status=self.defaultStatus, elementType='Site', tokenOwner=self.tokenOwner, reason='Synchronized') if not query['OK']: return query return S_OK() def _syncResources(self): ''' Sync resources: compares CS with DB and does the necessary modifications. ( StorageElements, FTS, FileCatalogs and ComputingElements ) ''' gLogger.info('-- Synchronizing Resources --') gLogger.verbose('-> StorageElements') ses = self.__syncStorageElements() if not ses['OK']: gLogger.error(ses['Message']) gLogger.verbose('-> FTS') fts = self.__syncFTS() if not fts['OK']: gLogger.error(fts['Message']) gLogger.verbose('-> FileCatalogs') fileCatalogs = self.__syncFileCatalogs() if not fileCatalogs['OK']: gLogger.error(fileCatalogs['Message']) gLogger.verbose('-> ComputingElements') computingElements = self.__syncComputingElements() if not computingElements['OK']: gLogger.error(computingElements['Message']) gLogger.verbose('-> removing resources that no longer exist in the CS') removingResources = self.__removeNonExistingResourcesFromRM() if not removingResources['OK']: gLogger.error(removingResources['Message']) # FIXME: VOMS return S_OK() def _syncNodes(self): ''' Sync resources: compares CS with DB and does the necessary modifications. ( Queues ) ''' gLogger.info('-- Synchronizing Nodes --') gLogger.verbose('-> Queues') queues = self.__syncQueues() if not queues['OK']: gLogger.error(queues['Message']) return S_OK() ## Private methods ########################################################### def __removeNonExistingResourcesFromRM(self): ''' Remove resources from DowntimeCache table that no longer exist in the CS. ''' if not getServiceURL("ResourceStatus/ResourceManagement"): gLogger.verbose( 'ResourceManagement is not installed, skipping removal of non existing resources...') return S_OK() sesHosts = CSHelpers.getStorageElementsHosts() if not sesHosts['OK']: return sesHosts sesHosts = sesHosts['Value'] resources = sesHosts ftsServer = getFTS3Servers() if ftsServer['OK']: resources.extend(ftsServer['Value']) ce = CSHelpers.getComputingElements() if ce['OK']: resources.extend(ce['Value']) downtimes = self.rManagement.selectDowntimeCache() if not downtimes['OK']: return downtimes # Remove hosts that no longer exist in the CS for host in downtimes['Value']: gLogger.verbose('Checking if %s is still in the CS' % host[0]) if host[0] not in resources: gLogger.verbose( '%s is no longer in CS, removing entry...' % host[0]) result = self.rManagement.deleteDowntimeCache(name=host[0]) if not result['OK']: return result return S_OK() def __syncComputingElements(self): ''' Sync ComputingElements: compares CS with DB and does the necessary modifications. ''' cesCS = CSHelpers.getComputingElements() if not cesCS['OK']: return cesCS cesCS = cesCS['Value'] gLogger.verbose('%s Computing elements found in CS' % len(cesCS)) cesDB = self.rStatus.selectStatusElement('Resource', 'Status', elementType='ComputingElement', meta={'columns': ['Name']}) if not cesDB['OK']: return cesDB cesDB = [ceDB[0] for ceDB in cesDB['Value']] # ComputingElements that are in DB but not in CS toBeDeleted = list(set(cesDB).difference(set(cesCS))) gLogger.verbose('%s Computing elements to be deleted' % len(toBeDeleted)) # Delete storage elements for ceName in toBeDeleted: deleteQuery = self.rStatus._extermineStatusElement( 'Resource', ceName) gLogger.verbose('... %s' % ceName) if not deleteQuery['OK']: return deleteQuery #statusTypes = RssConfiguration.getValidStatusTypes()[ 'Resource' ] statusTypes = self.rssConfig.getConfigStatusType('ComputingElement') result = self.rStatus.selectStatusElement('Resource', 'Status', elementType='ComputingElement', meta={'columns': ['Name', 'StatusType']}) if not result['OK']: return result cesTuple = [(x[0], x[1]) for x in result['Value']] # For each ( se, statusType ) tuple not present in the DB, add it. cesStatusTuples = [(se, statusType) for se in cesCS for statusType in statusTypes] toBeAdded = list(set(cesStatusTuples).difference(set(cesTuple))) gLogger.debug('%s Computing elements entries to be added' % len(toBeAdded)) for ceTuple in toBeAdded: _name = ceTuple[0] _statusType = ceTuple[1] _status = self.defaultStatus _reason = 'Synchronized' _elementType = 'ComputingElement' query = self.rStatus.addIfNotThereStatusElement('Resource', 'Status', name=_name, statusType=_statusType, status=_status, elementType=_elementType, tokenOwner=self.tokenOwner, reason=_reason) if not query['OK']: return query return S_OK() def __syncFileCatalogs(self): ''' Sync FileCatalogs: compares CS with DB and does the necessary modifications. ''' catalogsCS = CSHelpers.getFileCatalogs() if not catalogsCS['OK']: return catalogsCS catalogsCS = catalogsCS['Value'] gLogger.verbose('%s File catalogs found in CS' % len(catalogsCS)) catalogsDB = self.rStatus.selectStatusElement('Resource', 'Status', elementType='Catalog', meta={'columns': ['Name']}) if not catalogsDB['OK']: return catalogsDB catalogsDB = [catalogDB[0] for catalogDB in catalogsDB['Value']] # StorageElements that are in DB but not in CS toBeDeleted = list(set(catalogsDB).difference(set(catalogsCS))) gLogger.verbose('%s File catalogs to be deleted' % len(toBeDeleted)) # Delete storage elements for catalogName in toBeDeleted: deleteQuery = self.rStatus._extermineStatusElement( 'Resource', catalogName) gLogger.verbose('... %s' % catalogName) if not deleteQuery['OK']: return deleteQuery #statusTypes = RssConfiguration.getValidStatusTypes()[ 'Resource' ] statusTypes = self.rssConfig.getConfigStatusType('Catalog') result = self.rStatus.selectStatusElement('Resource', 'Status', elementType='Catalog', meta={'columns': ['Name', 'StatusType']}) if not result['OK']: return result sesTuple = [(x[0], x[1]) for x in result['Value']] # For each ( se, statusType ) tuple not present in the DB, add it. catalogsStatusTuples = [(se, statusType) for se in catalogsCS for statusType in statusTypes] toBeAdded = list(set(catalogsStatusTuples).difference(set(sesTuple))) gLogger.verbose('%s File catalogs entries to be added' % len(toBeAdded)) for catalogTuple in toBeAdded: _name = catalogTuple[0] _statusType = catalogTuple[1] _status = self.defaultStatus _reason = 'Synchronized' _elementType = 'Catalog' query = self.rStatus.addIfNotThereStatusElement('Resource', 'Status', name=_name, statusType=_statusType, status=_status, elementType=_elementType, tokenOwner=self.tokenOwner, reason=_reason) if not query['OK']: return query return S_OK() def __syncFTS(self): ''' Sync FTS: compares CS with DB and does the necessary modifications. ''' ftsCS = CSHelpers.getFTS() if not ftsCS['OK']: return ftsCS ftsCS = ftsCS['Value'] gLogger.verbose('%s FTS endpoints found in CS' % len(ftsCS)) ftsDB = self.rStatus.selectStatusElement('Resource', 'Status', elementType='FTS', meta={'columns': ['Name']}) if not ftsDB['OK']: return ftsDB ftsDB = [fts[0] for fts in ftsDB['Value']] # StorageElements that are in DB but not in CS toBeDeleted = list(set(ftsDB).difference(set(ftsCS))) gLogger.verbose('%s FTS endpoints to be deleted' % len(toBeDeleted)) # Delete storage elements for ftsName in toBeDeleted: deleteQuery = self.rStatus._extermineStatusElement( 'Resource', ftsName) gLogger.verbose('... %s' % ftsName) if not deleteQuery['OK']: return deleteQuery statusTypes = self.rssConfig.getConfigStatusType('FTS') #statusTypes = RssConfiguration.getValidStatusTypes()[ 'Resource' ] result = self.rStatus.selectStatusElement('Resource', 'Status', elementType='FTS', meta={'columns': ['Name', 'StatusType']}) if not result['OK']: return result sesTuple = [(x[0], x[1]) for x in result['Value']] # For each ( se, statusType ) tuple not present in the DB, add it. ftsStatusTuples = [(se, statusType) for se in ftsCS for statusType in statusTypes] toBeAdded = list(set(ftsStatusTuples).difference(set(sesTuple))) gLogger.verbose('%s FTS endpoints entries to be added' % len(toBeAdded)) for ftsTuple in toBeAdded: _name = ftsTuple[0] _statusType = ftsTuple[1] _status = self.defaultStatus _reason = 'Synchronized' _elementType = 'FTS' query = self.rStatus.addIfNotThereStatusElement('Resource', 'Status', name=_name, statusType=_statusType, status=_status, elementType=_elementType, tokenOwner=self.tokenOwner, reason=_reason) if not query['OK']: return query return S_OK() def __syncStorageElements(self): ''' Sync StorageElements: compares CS with DB and does the necessary modifications. ''' sesCS = CSHelpers.getStorageElements() if not sesCS['OK']: return sesCS sesCS = sesCS['Value'] gLogger.verbose('%s storage elements found in CS' % len(sesCS)) sesDB = self.rStatus.selectStatusElement('Resource', 'Status', elementType='StorageElement', meta={'columns': ['Name']}) if not sesDB['OK']: return sesDB sesDB = [seDB[0] for seDB in sesDB['Value']] # StorageElements that are in DB but not in CS toBeDeleted = list(set(sesDB).difference(set(sesCS))) gLogger.verbose('%s storage elements to be deleted' % len(toBeDeleted)) # Delete storage elements for sesName in toBeDeleted: deleteQuery = self.rStatus._extermineStatusElement( 'Resource', sesName) gLogger.verbose('... %s' % sesName) if not deleteQuery['OK']: return deleteQuery statusTypes = self.rssConfig.getConfigStatusType('StorageElement') #statusTypes = RssConfiguration.getValidStatusTypes()[ 'Resource' ] result = self.rStatus.selectStatusElement('Resource', 'Status', elementType='StorageElement', meta={'columns': ['Name', 'StatusType']}) if not result['OK']: return result sesTuple = [(x[0], x[1]) for x in result['Value']] # For each ( se, statusType ) tuple not present in the DB, add it. sesStatusTuples = [(se, statusType) for se in sesCS for statusType in statusTypes] toBeAdded = list(set(sesStatusTuples).difference(set(sesTuple))) gLogger.verbose('%s storage element entries to be added' % len(toBeAdded)) for seTuple in toBeAdded: _name = seTuple[0] _statusType = seTuple[1] _status = self.defaultStatus _reason = 'Synchronized' _elementType = 'StorageElement' query = self.rStatus.addIfNotThereStatusElement('Resource', 'Status', name=_name, statusType=_statusType, status=_status, elementType=_elementType, tokenOwner=self.tokenOwner, reason=_reason) if not query['OK']: return query return S_OK() def __syncQueues(self): ''' Sync Queues: compares CS with DB and does the necessary modifications. ''' queuesCS = CSHelpers.getQueues() if not queuesCS['OK']: return queuesCS queuesCS = queuesCS['Value'] gLogger.verbose('%s Queues found in CS' % len(queuesCS)) queuesDB = self.rStatus.selectStatusElement('Node', 'Status', elementType='Queue', meta={'columns': ['Name']}) if not queuesDB['OK']: return queuesDB queuesDB = [queueDB[0] for queueDB in queuesDB['Value']] # ComputingElements that are in DB but not in CS toBeDeleted = list(set(queuesDB).difference(set(queuesCS))) gLogger.verbose('%s Queues to be deleted' % len(toBeDeleted)) # Delete storage elements for queueName in toBeDeleted: deleteQuery = self.rStatus._extermineStatusElement( 'Node', queueName) gLogger.verbose('... %s' % queueName) if not deleteQuery['OK']: return deleteQuery statusTypes = self.rssConfig.getConfigStatusType('Queue') #statusTypes = RssConfiguration.getValidStatusTypes()[ 'Node' ] result = self.rStatus.selectStatusElement('Node', 'Status', elementType='Queue', meta={'columns': ['Name', 'StatusType']}) if not result['OK']: return result queueTuple = [(x[0], x[1]) for x in result['Value']] # For each ( se, statusType ) tuple not present in the DB, add it. queueStatusTuples = [(se, statusType) for se in queuesCS for statusType in statusTypes] toBeAdded = list(set(queueStatusTuples).difference(set(queueTuple))) gLogger.verbose('%s Queue entries to be added' % len(toBeAdded)) for queueTuple in toBeAdded: _name = queueTuple[0] _statusType = queueTuple[1] _status = self.defaultStatus _reason = 'Synchronized' _elementType = 'Queue' query = self.rStatus.addIfNotThereStatusElement('Node', 'Status', name=_name, statusType=_statusType, status=_status, elementType=_elementType, tokenOwner=self.tokenOwner, reason=_reason) if not query['OK']: return query return S_OK()
class SiteStatus(object): """ RSS helper to interact with the 'Site' family on the DB. It provides the most demanded functions and a cache to avoid hitting the server too often. It provides four methods to interact with the site statuses: * getSiteStatuses * isUsableSite * getUsableSites * getSites """ __metaclass__ = DIRACSingleton def __init__(self): """ Constructor, initializes the rssClient. """ self.log = gLogger.getSubLogger(self.__class__.__name__) self.rssConfig = RssConfiguration() self.__opHelper = Operations() self.rssFlag = ResourceStatus().rssFlag self.rsClient = ResourceStatusClient() def getSiteStatuses(self, siteNamesList=None): """ Method that queries the database for status of the sites in a given list. If the input is None, it is interpreted as * ( all ). If match is positive, the output looks like: { 'test1.test1.org': 'Active', 'test2.test2.org': 'Banned', } examples >>> siteStatus.getSiteStatuses( ['test1.test1.uk', 'test2.test2.net', 'test3.test3.org'] ) S_OK( { 'test1.test1.org': 'Active', 'test2.test2.net': 'Banned', 'test3.test3.org': 'Active' } ) >>> siteStatus.getSiteStatuses( 'NotExists') S_ERROR( ... )) >>> siteStatus.getSiteStatuses( None ) S_OK( { 'test1.test1.org': 'Active', 'test2.test2.net': 'Banned', }, ... } ) :Parameters: **siteNamesList** - `list` name(s) of the sites to be matched :return: S_OK() || S_ERROR() """ if not siteNamesList: if self.rssFlag: siteStatusDict = self.rsClient.selectStatusElement( 'Site', 'Status', meta={'columns': ['Name', 'Status']}) else: siteStatusDict = RPCClient( 'WorkloadManagement/WMSAdministrator').getSiteMaskStatus() if not siteStatusDict['OK']: return siteStatusDict else: siteStatusDict = siteStatusDict['Value'] return S_OK(dict(siteStatusDict)) siteStatusDict = {} for siteName in siteNamesList: if self.rssFlag: result = self.rsClient.selectStatusElement( 'Site', 'Status', name=siteName, meta={'columns': ['Status']}) else: result = RPCClient('WorkloadManagement/WMSAdministrator' ).getSiteMaskStatus(siteName) if not result['OK']: return result elif not result['Value']: #if one of the listed elements does not exist continue continue else: if self.rssFlag: siteStatusDict[siteName] = result['Value'][0][0] else: siteStatusDict[siteName] = result['Value'] return S_OK(siteStatusDict) def isUsableSite(self, siteName): """ Similar method to getSiteStatus. The difference is the output. Given a site name, returns a bool if the site is usable: status is Active or Degraded outputs True anything else outputs False examples >>> siteStatus.isUsableSite( 'test1.test1.org' ) True >>> siteStatus.isUsableSite( 'test2.test2.org' ) False # May be banned >>> siteStatus.isUsableSite( None ) False >>> siteStatus.isUsableSite( 'NotExists' ) False :Parameters: **siteName** - `string` name of the site to be matched :return: S_OK() || S_ERROR() """ if self.rssFlag: siteStatus = self.rsClient.selectStatusElement( 'Site', 'Status', name=siteName, meta={'columns': ['Status']}) else: siteStatus = RPCClient('WorkloadManagement/WMSAdministrator' ).getSiteMaskStatus(siteName) if not siteStatus['OK']: return siteStatus if not siteStatus['Value']: # Site does not exist, so it is not usable return S_OK(False) if self.rssFlag: status = siteStatus['Value'][0][0] else: status = siteStatus['Value'] if status in ('Active', 'Degraded'): return S_OK(True) else: return S_OK(False) def getUsableSites(self, siteNamesList=None): """ Returns all sites that are usable if their statusType is either Active or Degraded; in a list. examples >>> siteStatus.getUsableSites( ['test1.test1.uk', 'test2.test2.net', 'test3.test3.org'] ) S_OK( ['test1.test1.uk', 'test3.test3.org'] ) >>> siteStatus.getUsableSites( None ) S_OK( ['test1.test1.uk', 'test3.test3.org', 'test4.test4.org', 'test5.test5.org', ...] ) >>> siteStatus.getUsableSites( 'NotExists' ) S_ERROR( ... ) :Parameters: **siteNamesList** - `List` name(s) of the sites to be matched :return: S_OK() || S_ERROR() """ if not siteNamesList: if self.rssFlag: result = self.rsClient.selectStatusElement( 'Site', 'Status', status='Active', meta={'columns': ['Name']}) if not result['OK']: return result activeSites = [x[0] for x in result['Value']] result = self.rsClient.selectStatusElement( 'Site', 'Status', status='Degraded', meta={'columns': ['Name']}) if not result['OK']: return result degradedSites = [x[0] for x in result['Value']] return S_OK(activeSites + degradedSites) else: activeSites = RPCClient( 'WorkloadManagement/WMSAdministrator').getSiteMask() if not activeSites['OK']: return activeSites return S_OK(activeSites['Value']) siteStatusList = [] for siteName in siteNamesList: if self.rssFlag: siteStatus = self.rsClient.selectStatusElement( 'Site', 'Status', name=siteName, meta={'columns': ['Status']}) else: siteStatus = RPCClient('WorkloadManagement/WMSAdministrator' ).getSiteMaskStatus(siteName) if not siteStatus['OK']: return siteStatus elif not siteStatus['Value']: #if one of the listed elements does not exist continue continue else: if self.rssFlag: siteStatus = siteStatus['Value'][0][0] else: siteStatus = siteStatus['Value'] if siteStatus in ('Active', 'Degraded'): siteStatusList.append(siteName) return S_OK(siteStatusList) def getSites(self, siteState='Active'): """ By default, it gets the currently active site list examples >>> siteStatus.getSites() S_OK( ['test1.test1.uk', 'test3.test3.org'] ) >>> siteStatus.getSites( 'Active' ) S_OK( ['test1.test1.uk', 'test3.test3.org'] ) >>> siteStatus.getSites( 'Banned' ) S_OK( ['test0.test0.uk', ... ] ) >>> siteStatus.getSites( 'All' ) S_OK( ['test1.test1.uk', 'test3.test3.org', 'test4.test4.org', 'test5.test5.org'...] ) >>> siteStatus.getSites( None ) S_ERROR( ... ) :Parameters: **siteState** - `String` state of the sites to be matched :return: S_OK() || S_ERROR() """ if not siteState: return S_ERROR(DErrno.ERESUNK, 'siteState parameter is empty') elif siteState.capitalize() == 'All': # if no siteState is set return everything if self.rssFlag: siteStatus = self.rsClient.selectStatusElement( 'Site', 'Status', meta={'columns': ['Name']}) else: siteStatus = RPCClient( 'WorkloadManagement/WMSAdministrator').getSiteMask('All') else: # fix case sensitive string siteState = siteState.capitalize() allowedStateList = [ 'Active', 'Banned', 'Degraded', 'Probing', 'Error', 'Unknown' ] if siteState not in allowedStateList: return S_ERROR(errno.EINVAL, 'Not a valid status, parameter rejected') if self.rssFlag: siteStatus = self.rsClient.selectStatusElement( 'Site', 'Status', status=siteState, meta={'columns': ['Name']}) else: siteStatus = RPCClient('WorkloadManagement/WMSAdministrator' ).getSiteMask(siteState) if not siteStatus['OK']: return siteStatus else: if not self.rssFlag: return S_OK(siteStatus['Value']) siteList = [] for site in siteStatus['Value']: siteList.append(site[0]) return S_OK(siteList) def setSiteStatus(self, site, status, comment='No comment'): """ Set the status of a site in the 'SiteStatus' table of RSS examples >>> siteStatus.banSite( 'site1.test.test' ) S_OK() >>> siteStatus.banSite( None ) S_ERROR( ... ) :Parameters: **site** - `String` the site that is going to be banned **comment** - `String` reason for banning :return: S_OK() || S_ERROR() """ if not status: return S_ERROR(DErrno.ERESUNK, 'status parameter is empty') # fix case sensitive string status = status.capitalize() allowedStateList = [ 'Active', 'Banned', 'Degraded', 'Probing', 'Error', 'Unknown' ] if status not in allowedStateList: return S_ERROR(errno.EINVAL, 'Not a valid status, parameter rejected') result = getProxyInfo() if result['OK']: tokenOwner = result['Value']['username'] else: return S_ERROR("Unable to get user proxy info %s " % result['Message']) tokenExpiration = datetime.utcnow() + timedelta(days=1) result = self.rsClient.modifyStatusElement( 'Site', 'Status', status=status, name=site, tokenExpiration=tokenExpiration, reason=comment, tokenOwner=tokenOwner) if not result['OK']: return result return S_OK()
class SummarizeLogsAgent(AgentModule): """SummarizeLogsAgent as extension of AgentModule.""" def __init__(self, *args, **kwargs): """Constructor.""" AgentModule.__init__(self, *args, **kwargs) self.rsClient = None self.months = 36 def initialize(self): """Standard initialize. :return: S_OK """ self.rsClient = ResourceStatusClient() self.months = self.am_getOption("Months", self.months) return S_OK() def execute(self): """execute (main method) The execute method runs over the three families of tables (Site, Resource and Node) performing identical operations. First, selects all logs for a given family (and keeps track of which one is the last row ID). It summarizes the logs and finally, deletes the logs from the database. At last, this agent removes older entries from history tables :return: S_OK """ # loop over the tables for element in ("Site", "Resource", "Node"): self.log.info("Summarizing %s" % element) # get all logs to be summarized selectLogElements = self._summarizeLogs(element) if not selectLogElements["OK"]: self.log.error(selectLogElements["Message"]) continue lastID, logElements = selectLogElements["Value"] # logElements is a dictionary of key-value pairs as follows: # (name, statusType) : list(logs) for key, logs in logElements.items(): sumResult = self._registerLogs(element, key, logs) if not sumResult["OK"]: self.log.error(sumResult["Message"]) continue if lastID is not None: self.log.info("Deleting %sLog till ID %s" % (element, lastID)) deleteResult = self.rsClient.deleteStatusElement( element, "Log", meta={"older": ["ID", lastID]}) if not deleteResult["OK"]: self.log.error(deleteResult["Message"]) continue if self.months: self._removeOldHistoryEntries(element, self.months) return S_OK() def _summarizeLogs(self, element): """given an element, selects all logs in table <element>Log. :param str element: name of the table family (either Site, Resource or Node) :return: S_OK(lastID, listOfLogs) / S_ERROR """ selectResults = self.rsClient.selectStatusElement(element, "Log") if not selectResults["OK"]: return selectResults selectedItems = {} latestID = None if not selectResults["Value"]: return S_OK((latestID, selectedItems)) selectColumns = selectResults["Columns"] selectResults = selectResults["Value"] if selectResults: latestID = dict(zip(selectColumns, selectResults[-1]))["ID"] for selectResult in selectResults: elementDict = dict(zip(selectColumns, selectResult)) key = (elementDict["Name"], elementDict["StatusType"]) if key not in selectedItems: selectedItems[key] = [elementDict] else: lastStatus = selectedItems[key][-1]["Status"] lastToken = selectedItems[key][-1]["TokenOwner"] # If there are no changes on the Status or the TokenOwner with respect # the previous one, discards the log. if lastStatus != elementDict[ "Status"] or lastToken != elementDict["TokenOwner"]: selectedItems[key].append(elementDict) return S_OK((latestID, selectedItems)) def _registerLogs(self, element, key, logs): """Given an element, a key - which is a tuple (<name>, <statusType>) and a list of dictionaries, this method inserts them on the <element>History table. Before inserting them, checks whether the first one is or is not on the <element>History table. If it is, it is not inserted. :param str element: name of the table family (either Site, Resource or Node) :param tuple key: tuple with the name of the element and the statusType :param list logs: list of dictionaries containing the logs :return: S_OK(lastID, listOfLogs) / S_ERROR :return: S_OK / S_ERROR """ if not logs: return S_OK() # Undo key name, statusType = key selectedRes = self.rsClient.selectStatusElement( element, "History", name, statusType, meta={ "columns": ["Status", "TokenOwner"], "limit": 1, "order": ["DateEffective", "desc"] }, ) if not selectedRes["OK"]: return selectedRes selectedRes = selectedRes["Value"] if not selectedRes: for selectedItemDict in logs: res = self.__logToHistoryTable(element, selectedItemDict) if not res["OK"]: return res return S_OK() # We want from the <element>History table the last Status, and TokenOwner lastStatus, lastToken = None, None if selectedRes: try: lastStatus = selectedRes[0][0] lastToken = selectedRes[0][1] except IndexError: pass # If the first of the selected items has a different status than the latest # on the history, we keep it, otherwise we remove it. if logs[0]["Status"] == lastStatus and logs[0][ "TokenOwner"] == lastToken: logs.pop(0) if logs: self.log.info("%s (%s):" % (name, statusType)) self.log.debug(logs) for selectedItemDict in logs: res = self.__logToHistoryTable(element, selectedItemDict) if not res["OK"]: return res return S_OK() def __logToHistoryTable(self, element, elementDict): """Given an element and a dictionary with all the arguments, this method inserts a new entry on the <element>History table :param str element: name of the table family (either Site, Resource or Node) :param dict elementDict: dictionary returned from the DB to be inserted on the History table :return: S_OK / S_ERROR """ name = elementDict.get("Name") statusType = elementDict.get("StatusType") # vo = elementDict.get('VO') # FIXME: not sure about it status = elementDict.get("Status") elementType = elementDict.get("ElementType") reason = elementDict.get("Reason") dateEffective = elementDict.get("DateEffective") lastCheckTime = elementDict.get("LastCheckTime") tokenOwner = elementDict.get("TokenOwner") tokenExpiration = elementDict.get("TokenExpiration") self.log.info(" %s %s %s %s" % (status, dateEffective, tokenOwner, reason)) return self.rsClient.insertStatusElement( element=element, tableType="History", name=name, statusType=statusType, status=status, elementType=elementType, reason=reason, dateEffective=dateEffective, lastCheckTime=lastCheckTime, tokenOwner=tokenOwner, tokenExpiration=tokenExpiration, ) def _removeOldHistoryEntries(self, element, months): """Delete entries older than period :param str element: name of the table family (either Site, Resource or Node) :param int months: number of months :return: S_OK / S_ERROR """ toRemove = datetime.utcnow().replace(microsecond=0) - timedelta( days=30 * months) self.log.info("Removing history entries", "older than %s" % toRemove) deleteResult = self.rsClient.deleteStatusElement( element, "History", meta={"older": ["DateEffective", toRemove]}) if not deleteResult["OK"]: self.log.error(deleteResult["Message"])
class TokenAgent(AgentModule): """ TokenAgent is in charge of checking tokens assigned on resources. Notifications are sent to those users owning expiring tokens. """ # Rss token __rssToken = "rs_svc" def __init__(self, *args, **kwargs): """c'tor""" AgentModule.__init__(self, *args, **kwargs) self.notifyHours = 12 self.adminMail = "" self.rsClient = None self.tokenDict = {} self.diracAdmin = None def initialize(self): """TokenAgent initialization""" self.notifyHours = self.am_getOption("notifyHours", self.notifyHours) self.adminMail = self.am_getOption("adminMail", self.adminMail) self.rsClient = ResourceStatusClient() self.diracAdmin = DiracAdmin() return S_OK() def execute(self): """ Looks for user tokens. If they are expired, or expiring, it notifies users. """ # Initialized here, as it is needed empty at the beginning of the execution self.tokenDict = {} elements = ("Site", "Resource", "Node") for element in elements: self.log.info("Processing %s" % element) interestingTokens = self._getInterestingTokens(element) if not interestingTokens["OK"]: self.log.error(interestingTokens["Message"]) continue interestingTokens = interestingTokens["Value"] processTokens = self._processTokens(element, interestingTokens) if not processTokens["OK"]: self.log.error(processTokens["Message"]) continue notificationResult = self._notifyOfTokens() if not notificationResult["OK"]: self.log.error(notificationResult["Message"]) return S_OK() def _getInterestingTokens(self, element): """ Given an element, picks all the entries with TokenExpiration < now + X<hours> If the TokenOwner is not the rssToken ( rs_svc ), it is selected. """ tokenExpLimit = datetime.utcnow() + timedelta(hours=self.notifyHours) tokenElements = self.rsClient.selectStatusElement( element, "Status", meta={"older": ["TokenExpiration", tokenExpLimit]}) if not tokenElements["OK"]: return tokenElements tokenColumns = tokenElements["Columns"] tokenElements = tokenElements["Value"] interestingTokens = [] for tokenElement in tokenElements: tokenElement = dict(zip(tokenColumns, tokenElement)) if tokenElement["TokenOwner"] != self.__rssToken: interestingTokens.append(tokenElement) return S_OK(interestingTokens) def _processTokens(self, element, tokenElements): """ Given an element and a list of interesting token elements, updates the database if the token is expired, logs a message and adds """ never = datetime.max for tokenElement in tokenElements: try: name = tokenElement["Name"] statusType = tokenElement["StatusType"] status = tokenElement["Status"] tokenOwner = tokenElement["TokenOwner"] tokenExpiration = tokenElement["TokenExpiration"] except KeyError as e: return S_ERROR(e) # If token has already expired if tokenExpiration < datetime.utcnow(): _msg = '%s with statusType "%s" and owner %s EXPIRED' self.log.info(_msg % (name, statusType, tokenOwner)) result = self.rsClient.addOrModifyStatusElement( element, "Status", name=name, statusType=statusType, tokenOwner=self.__rssToken, tokenExpiration=never, ) if not result["OK"]: return result else: _msg = '%s with statusType "%s" and owner %s -> %s' self.log.info(_msg % (name, statusType, tokenOwner, tokenExpiration)) if tokenOwner not in self.tokenDict: self.tokenDict[tokenOwner] = [] self.tokenDict[tokenOwner].append([ tokenOwner, element, name, statusType, status, tokenExpiration ]) return S_OK() def _notifyOfTokens(self): """ Splits interesing tokens between expired and expiring. Also splits them among users. It ends sending notifications to the users. """ now = datetime.utcnow() adminExpired = [] adminExpiring = [] for tokenOwner, tokenLists in self.tokenDict.items(): expired = [] expiring = [] for tokenList in tokenLists: if tokenList[5] < now: expired.append(tokenList) adminExpired.append(tokenList) else: expiring.append(tokenList) adminExpiring.append(tokenList) resNotify = self._notify(tokenOwner, expired, expiring) if not resNotify["OK"]: self.log.error("Failed to notify token owner", resNotify["Message"]) if (adminExpired or adminExpiring) and self.adminMail: return self._notify(self.adminMail, adminExpired, adminExpiring) return S_OK() def _notify(self, tokenOwner, expired, expiring): """ Given a token owner and a list of expired and expiring tokens, sends an email to the user. """ subject = "RSS token summary for tokenOwner %s" % tokenOwner mail = "\nEXPIRED tokens ( RSS has taken control of them )\n" for tokenList in expired: mail += " ".join([str(x) for x in tokenList]) mail += "\n" mail = "\nEXPIRING tokens ( RSS will take control of them )\n" for tokenList in expiring: mail += " ".join([str(x) for x in tokenList]) mail += "\n" mail += "\n\n You can extend for another 24 hours using the web interface (Set token -> Acquire)\n" mail += " Or you can use the dirac-rss-set-token script\n\n" mail += "Through the same interfaces you can release the token any time\n" resEmail = self.diracAdmin.sendMail(tokenOwner, subject, mail) if not resEmail["OK"]: return S_ERROR('Cannot send email to user "%s"' % tokenOwner) return resEmail
def setToken(user): """ Function that gets the user token, sets the validity for it. Gets the elements in the database for a given name and statusType(s). Then updates the status of all them adding a reason and the token. """ rssClient = ResourceStatusClient() # This is a little bit of a nonsense, and certainly needs to be improved. # To modify a list of elements, we have to do it one by one. However, the # modify method does not discover the StatusTypes ( which in this script is # an optional parameter ). So, we get them from the DB and iterate over them. elements = rssClient.selectStatusElement( switchDict["element"], "Status", name=switchDict["name"], statusType=switchDict["statusType"], meta={"columns": ["StatusType", "TokenOwner"]}, ) if not elements["OK"]: return elements elements = elements["Value"] # If there list is empty they do not exist on the DB ! if not elements: subLogger.warn( "Nothing found for %s, %s, %s" % (switchDict["element"], switchDict["name"], switchDict["statusType"]) ) return S_OK() # If we want to release the token if switchDict["releaseToken"] != False: tokenExpiration = datetime.max newTokenOwner = "rs_svc" else: tokenExpiration = datetime.utcnow().replace(microsecond=0) + timedelta(days=1) newTokenOwner = user subLogger.info("New token : %s until %s" % (newTokenOwner, tokenExpiration)) for statusType, tokenOwner in elements: # If a user different than the one issuing the command and RSS if tokenOwner != user and tokenOwner != "rs_svc": subLogger.info("%s(%s) belongs to the user: %s" % (switchDict["name"], statusType, tokenOwner)) # does the job result = rssClient.modifyStatusElement( switchDict["element"], "Status", name=switchDict["name"], statusType=statusType, reason=switchDict["reason"], tokenOwner=newTokenOwner, tokenExpiration=tokenExpiration, ) if not result["OK"]: return result if tokenOwner == newTokenOwner: msg = "(extended)" elif newTokenOwner == "rs_svc": msg = "(released)" else: msg = "(aquired from %s)" % tokenOwner subLogger.info("%s:%s %s" % (switchDict["name"], statusType, msg)) return S_OK()
class ResourceStatus(object): """ ResourceStatus helper that connects to CS if RSS flag is not Active. It keeps the connection to the db / server as an object member, to avoid creating a new one massively. """ __metaclass__ = DIRACSingleton def __init__(self, rssFlag=None): """ Constructor, initializes the rssClient. """ self.log = gLogger.getSubLogger(self.__class__.__name__) self.rssConfig = RssConfiguration() self.__opHelper = Operations() self.rssClient = ResourceStatusClient() self.rssFlag = rssFlag if rssFlag is None: self.rssFlag = self.__getMode() cacheLifeTime = int(self.rssConfig.getConfigCache()) # RSSCache only affects the calls directed to RSS, if using the CS it is not used. self.rssCache = RSSCache(cacheLifeTime, self.__updateRssCache) def getElementStatus(self, elementName, elementType, statusType=None, default=None): """ Helper function, tries to get information from the RSS for the given Element, otherwise, it gets it from the CS. :param elementName: name of the element or list of element names :type elementName: str, list :param elementType: type of the element (StorageElement, ComputingElement, FTS, Catalog) :type elementType: str :param statusType: type of the status (meaningful only when elementType==StorageElement) :type statusType: None, str, list :param default: defult value (meaningful only when rss is InActive) :type default: str :return: S_OK/S_ERROR :rtype: dict :Example: >>> getElementStatus('CE42', 'ComputingElement') S_OK( { 'CE42': { 'all': 'Active' } } } ) >>> getElementStatus('SE1', 'StorageElement', 'ReadAccess') S_OK( { 'SE1': { 'ReadAccess': 'Banned' } } } ) >>> getElementStatus('SE1', 'ThisIsAWrongElementType', 'ReadAccess') S_ERROR( xyz.. ) >>> getElementStatus('ThisIsAWrongName', 'StorageElement', 'WriteAccess') S_ERROR( xyz.. ) >>> getElementStatus('A_file_catalog', 'FileCatalog') S_OK( { 'A_file_catalog': { 'all': 'Active' } } } ) >>> getElementStatus('SE1', 'StorageElement', ['ReadAccess', 'WriteAccess']) S_OK( { 'SE1': { 'ReadAccess': 'Banned' , 'WriteAccess': 'Active'} } } ) >>> getElementStatus('SE1', 'StorageElement') S_OK( { 'SE1': { 'ReadAccess': 'Probing' , 'WriteAccess': 'Active', 'CheckAccess': 'Degraded', 'RemoveAccess': 'Banned'} } } ) >>> getElementStatus(['CE1', 'CE2'], 'ComputingElement') S_OK( {'CE1': {'all': 'Active'}, 'CE2': {'all': 'Probing'}}} """ allowedParameters = ["StorageElement", "ComputingElement", "FTS", "Catalog"] if elementType not in allowedParameters: return S_ERROR("%s in not in the list of the allowed parameters: %s" % (elementType, allowedParameters)) # Apply defaults if not statusType: if elementType == "StorageElement": statusType = ['ReadAccess', 'WriteAccess', 'CheckAccess', 'RemoveAccess'] elif elementType == "ComputingElement": statusType = ['all'] elif elementType == "FTS": statusType = ['all'] elif elementType == "Catalog": statusType = ['all'] if self.rssFlag: return self.__getRSSElementStatus(elementName, elementType, statusType) else: return self.__getCSElementStatus(elementName, elementType, statusType, default) def setElementStatus(self, elementName, elementType, statusType, status, reason=None, tokenOwner=None): """ Tries set information in RSS and in CS. :param elementName: name of the element :type elementName: str :param elementType: type of the element (StorageElement, ComputingElement, FTS, Catalog) :type elementType: str :param statusType: type of the status (meaningful only when elementType==StorageElement) :type statusType: str :param reason: reason for setting the status :type reason: str :param tokenOwner: owner of the token (meaningful only when rss is Active) :type tokenOwner: str :return: S_OK/S_ERROR :rtype: dict :Example: >>> setElementStatus('CE42', 'ComputingElement', 'all', 'Active') S_OK( xyz.. ) >>> setElementStatus('SE1', 'StorageElement', 'ReadAccess', 'Banned') S_OK( xyz.. ) """ if self.rssFlag: return self.__setRSSElementStatus(elementName, elementType, statusType, status, reason, tokenOwner) else: return self.__setCSElementStatus(elementName, elementType, statusType, status) ################################################################################ def __updateRssCache(self): """ Method used to update the rssCache. It will try 5 times to contact the RSS before giving up """ meta = {'columns': ['Name', 'ElementType', 'StatusType', 'Status']} for ti in range(5): rawCache = self.rssClient.selectStatusElement('Resource', 'Status', meta=meta) if rawCache['OK']: break self.log.warn("Can't get resource's status", rawCache['Message'] + "; trial %d" % ti) sleep(math.pow(ti, 2)) self.rssClient = ResourceStatusClient() if not rawCache['OK']: return rawCache return S_OK(getCacheDictFromRawData(rawCache['Value'])) ################################################################################ def __getRSSElementStatus(self, elementName, elementType, statusType): """ Gets from the cache or the RSS the Elements status. The cache is a copy of the DB table. If it is not on the cache, most likely is not going to be on the DB. There is one exception: item just added to the CS, e.g. new Element. The period between it is added to the DB and the changes are propagated to the cache will be inconsistent, but not dangerous. Just wait <cacheLifeTime> minutes. :param elementName: name of the element or list of element names :type elementName: str, list :param elementType: type of the element (StorageElement, ComputingElement, FTS, Catalog) :type elementType: str :param statusType: type of the status (meaningful only when elementType==StorageElement, otherwise it is 'all' or ['all']) :type statusType: str, list """ cacheMatch = self.rssCache.match(elementName, elementType, statusType) self.log.debug('__getRSSElementStatus') self.log.debug(cacheMatch) return cacheMatch def __getCSElementStatus(self, elementName, elementType, statusType, default): """ Gets from the CS the Element status :param elementName: name of the element :type elementName: str :param elementType: type of the element (StorageElement, ComputingElement, FTS, Catalog) :type elementType: str :param statusType: type of the status (meaningful only when elementType==StorageElement) :type statusType: str, list :param default: defult value :type default: None, str """ # DIRAC doesn't store the status of ComputingElements nor FTS in the CS, so here we can just return 'Active' if elementType in ('ComputingElement', 'FTS'): return S_OK({elementName: {'all': 'Active'}}) # If we are here it is because elementType is either 'StorageElement' or 'Catalog' if elementType == 'StorageElement': cs_path = "/Resources/StorageElements" elif elementType == 'Catalog': cs_path = "/Resources/FileCatalogs" statusType = ['Status'] if not isinstance(elementName, list): elementName = [elementName] if not isinstance(statusType, list): statusType = [statusType] result = {} for element in elementName: for sType in statusType: # Look in standard location, 'Active' by default res = gConfig.getValue("%s/%s/%s" % (cs_path, element, sType), 'Active') result.setdefault(element, {})[sType] = res if result: return S_OK(result) if default is not None: defList = [[el, statusType, default] for el in elementName] return S_OK(getDictFromList(defList)) _msg = "Element '%s', with statusType '%s' is unknown for CS." return S_ERROR(DErrno.ERESUNK, _msg % (elementName, statusType)) def __setRSSElementStatus(self, elementName, elementType, statusType, status, reason, tokenOwner): """ Sets on the RSS the Elements status """ expiration = datetime.utcnow() + timedelta(days=1) self.rssCache.acquireLock() try: res = self.rssClient.addOrModifyStatusElement('Resource', 'Status', name=elementName, elementType=elementType, status=status, statusType=statusType, reason=reason, tokenOwner=tokenOwner, tokenExpiration=expiration) if res['OK']: self.rssCache.refreshCache() if not res['OK']: _msg = 'Error updating Element (%s,%s,%s)' % (elementName, statusType, status) gLogger.warn('RSS: %s' % _msg) return res finally: # Release lock, no matter what. self.rssCache.releaseLock() def __setCSElementStatus(self, elementName, elementType, statusType, status): """ Sets on the CS the Elements status """ # DIRAC doesn't store the status of ComputingElements nor FTS in the CS, so here we can just do nothing if elementType in ('ComputingElement', 'FTS'): return S_OK() # If we are here it is because elementType is either 'StorageElement' or 'Catalog' statuses = self.rssConfig.getConfigStatusType(elementType) if statusType not in statuses: gLogger.error("%s is not a valid statusType" % statusType) return S_ERROR("%s is not a valid statusType: %s" % (statusType, statuses)) if elementType == 'StorageElement': cs_path = "/Resources/StorageElements" elif elementType == 'Catalog': cs_path = "/Resources/FileCatalogs" # FIXME: This a probably outdated location (new one is in /Operations/[]/Services/Catalogs) # but needs to be VO-aware statusType = 'Status' csAPI = CSAPI() csAPI.setOption("%s/%s/%s/%s" % (cs_path, elementName, elementType, statusType), status) res = csAPI.commitChanges() if not res['OK']: gLogger.warn('CS: %s' % res['Message']) return res def __getMode(self): """ Gets flag defined (or not) on the RSSConfiguration. If defined as 'Active', we use RSS, if not, we use the CS when possible (and WMS for Sites). """ res = self.rssConfig.getConfigState() if res == 'Active': if self.rssClient is None: self.rssClient = ResourceStatusClient() return True self.rssClient = None return False def isStorageElementAlwaysBanned(self, seName, statusType): """ Checks if the AlwaysBanned policy is applied to the SE given as parameter :param seName: string, name of the SE :param statusType: ReadAcces, WriteAccess, RemoveAccess, CheckAccess :returns: S_OK(True/False) """ res = getPoliciesThatApply({'name': seName, 'statusType': statusType}) if not res['OK']: self.log.error("isStorageElementAlwaysBanned: unable to get the information", res['Message']) return res isAlwaysBanned = 'AlwaysBanned' in [policy['type'] for policy in res['Value']] return S_OK(isAlwaysBanned)
class SummarizeLogsAgent( AgentModule ): """ SummarizeLogsAgent as extension of AgentModule. """ def __init__( self, *args, **kwargs ): """ Constructor. """ AgentModule.__init__( self, *args, **kwargs ) self.rsClient = None def initialize( self ): """ Standard initialize. :return: S_OK """ self.rsClient = ResourceStatusClient() return S_OK() def execute( self ): """ execute ( main method ) The execute method runs over the three families of tables ( Site, Resource and Node ) performing identical operations. First, selects all logs for a given family ( and keeps track of which one is the last row ID ). It summarizes the logs and finally, deletes the logs from the database. :return: S_OK """ # loop over the tables for element in ( 'Site', 'Resource', 'Node' ): self.log.info( 'Summarizing %s' % element ) # get all logs to be summarized selectLogElements = self._summarizeLogs( element ) if not selectLogElements[ 'OK' ]: self.log.error( selectLogElements[ 'Message' ] ) continue lastID, logElements = selectLogElements[ 'Value' ] # logElements is a dictionary of key-value pairs as follows: # ( name, statusType ) : list( logs ) for key, logs in logElements.iteritems(): sumResult = self._registerLogs( element, key, logs ) if not sumResult[ 'OK' ]: self.log.error( sumResult[ 'Message' ] ) continue if lastID is not None: self.log.info( 'Deleting %sLog till ID %s' % ( element, lastID ) ) deleteResult = self.rsClient.deleteStatusElement( element, 'Log', meta = { 'older' : ( 'ID', lastID ) } ) if not deleteResult[ 'OK' ]: self.log.error( deleteResult[ 'Message' ] ) continue return S_OK() #............................................................................. def _summarizeLogs( self, element ): """ given an element, selects all logs in table <element>Log. :Parameters: **element** - `string` name of the table family ( either Site, Resource and Node ) :return: S_OK( lastID, listOfLogs ) / S_ERROR """ selectResults = self.rsClient.selectStatusElement( element, 'Log' ) if not selectResults[ 'OK' ]: return selectResults selectedItems = {} selectColumns = selectResults[ 'Columns' ] selectResults = selectResults[ 'Value' ] latestID = None if selectResults: latestID = dict( zip( selectColumns, selectResults[ -1 ] ) )[ 'ID' ] for selectResult in selectResults: elementDict = dict( zip( selectColumns, selectResult ) ) key = ( elementDict[ 'Name' ], elementDict[ 'StatusType' ] ) if not key in selectedItems: selectedItems[ key ] = [ elementDict ] else: lastStatus = selectedItems[ key ][ -1 ][ 'Status' ] lastToken = selectedItems[ key ][ -1 ][ 'TokenOwner' ] # If there are no changes on the Status or the TokenOwner with respect # the previous one, discards the log. if lastStatus != elementDict[ 'Status' ] or lastToken != elementDict[ 'TokenOwner' ]: selectedItems[ key ].append( elementDict ) return S_OK( ( latestID, selectedItems ) ) def _registerLogs( self, element, key, logs ): """ Given an element, a key - which is a tuple ( <name>, <statusType> ) and a list of dictionaries, this method inserts them on the <element>History table. Before inserting them, checks whether the first one is or is not on the <element>History table. If it is, it is not inserted. It also checks whether the LastCheckTime parameter of the first log to be inserted is larger than the last history log LastCheckTime. If not, it means an agent cycle has been interrupted and we can run into inconsistencies. It aborts to prevent more dramatic results. :Parameters: **element** - `string` name of the table family ( either Site, Resource and Node ) **key** - `tuple` tuple with the name of the element and the statusType **logs** - `list` list of dictionaries containing the logs :return: S_OK / S_ERROR """ # Undo key name, statusType = key selectedRes = self.rsClient.selectStatusElement( element, 'History', name, statusType, meta = { 'columns' : [ 'Status', 'LastCheckTime', 'TokenOwner' ], 'limit' : 1, 'order' : 'LastCheckTime DESC' } ) if not selectedRes[ 'OK' ]: return selectedRes selectedRes = selectedRes[ 'Value' ] # We want from the <element>History table the last Status, LastCheckTime # and TokenOwner lastStatus, lastCheckTime, lastToken = None, None, None if selectedRes: lastStatus, lastCheckTime, lastToken = selectedRes[ 0 ] # Sanity check to avoid running if an agent cycle has been stopped if lastCheckTime and logs[ 0 ][ 'LastCheckTime' ] < lastCheckTime: return S_ERROR( 'Overlapping data. Seems the DB has not been cleared properly' ) # If the first of the selected items has a different status than the latest # on the history, we add it. if logs[ 0 ][ 'Status' ] == lastStatus and logs[ 0 ][ 'TokenOwner' ] == lastToken: logs.remove( logs[ 0 ] ) if logs: self.log.info( '%s ( %s )' % ( name, statusType ) ) for selectedItemDict in logs: res = self.__logToHistoryTable( element, selectedItemDict ) if not res[ 'OK' ]: return res return S_OK() def __logToHistoryTable( self, element, elementDict ): """ Given an element and a dictionary with all the arguments, this method inserts a new entry on the <element>History table :Parameters: **element** - `string` name of the table family ( either Site, Resource and Node ) **elementDict** - `dict` dictionary returned from the DB to be inserted on the History table :return: S_OK / S_ERROR """ try: name = elementDict[ 'Name' ] statusType = elementDict[ 'StatusType' ] status = elementDict[ 'Status' ] elementType = elementDict[ 'ElementType' ] reason = elementDict[ 'Reason' ] dateEffective = elementDict[ 'DateEffective' ] lastCheckTime = elementDict[ 'LastCheckTime' ] tokenOwner = elementDict[ 'TokenOwner' ] tokenExpiration = elementDict[ 'TokenExpiration' ] except KeyError, e: return S_ERROR( e ) self.log.info( ' %(Status)s %(DateEffective)s %(TokenOwner)s %(Reason)s' % elementDict ) return self.rsClient.insertStatusElement( element, 'History', name, statusType, status, elementType, reason, dateEffective, lastCheckTime, tokenOwner, tokenExpiration ) #............................................................................... #EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF
class TokenAgent(AgentModule): ''' TokenAgent is in charge of checking tokens assigned on resources. Notifications are sent to those users owning expiring tokens. ''' # Hours to notify a user __notifyHours = 12 # Rss token __rssToken = 'rs_svc' # Admin mail __adminMail = None def __init__(self, *args, **kwargs): ''' c'tor ''' AgentModule.__init__(self, *args, **kwargs) self.notifyHours = self.__notifyHours self.adminMail = self.__adminMail self.rsClient = None self.tokenDict = None self.diracAdmin = None def initialize(self): ''' TokenAgent initialization ''' self.notifyHours = self.am_getOption('notifyHours', self.notifyHours) self.adminMail = self.am_getOption('adminMail', self.adminMail) self.rsClient = ResourceStatusClient() self.diracAdmin = DiracAdmin() return S_OK() def execute(self): ''' Looks for user tokens. If they are expired, or expiring, it notifies users. ''' # Initialized here, as it is needed empty at the beginning of the execution self.tokenDict = {} # FIXME: probably this can be obtained from RssConfiguration instead elements = ('Site', 'Resource', 'Node') for element in elements: self.log.info('Processing %s' % element) interestingTokens = self._getInterestingTokens(element) if not interestingTokens['OK']: self.log.error(interestingTokens['Message']) continue interestingTokens = interestingTokens['Value'] processTokens = self._processTokens(element, interestingTokens) if not processTokens['OK']: self.log.error(processTokens['Message']) continue notificationResult = self._notifyOfTokens() if not notificationResult['OK']: self.log.error(notificationResult['Message']) return S_OK() ## Protected methods ######################################################### def _getInterestingTokens(self, element): ''' Given an element, picks all the entries with TokenExpiration < now + X<hours> If the TokenOwner is not the rssToken ( rs_svc ), it is selected. ''' tokenExpLimit = datetime.utcnow() + timedelta(hours=self.notifyHours) tokenElements = self.rsClient.selectStatusElement( element, 'Status', meta={'older': ('TokenExpiration', tokenExpLimit)}) if not tokenElements['OK']: return tokenElements tokenColumns = tokenElements['Columns'] tokenElements = tokenElements['Value'] interestingTokens = [] for tokenElement in tokenElements: tokenElement = dict(zip(tokenColumns, tokenElement)) if tokenElement['TokenOwner'] != self.__rssToken: interestingTokens.append(tokenElement) return S_OK(interestingTokens) def _processTokens(self, element, tokenElements): ''' Given an element and a list of interesting token elements, updates the database if the token is expired, logs a message and adds ''' never = datetime.max for tokenElement in tokenElements: try: name = tokenElement['Name'] statusType = tokenElement['StatusType'] status = tokenElement['Status'] tokenOwner = tokenElement['TokenOwner'] tokenExpiration = tokenElement['TokenExpiration'] except KeyError, e: return S_ERROR(e) # If token has already expired if tokenExpiration < datetime.utcnow(): _msg = '%s with statusType "%s" and owner %s EXPIRED' self.log.info(_msg % (name, statusType, tokenOwner)) result = self.rsClient.addOrModifyStatusElement( element, 'Status', name=name, statusType=statusType, tokenOwner=self.__rssToken, tokenExpiration=never) if not result['OK']: return result else: _msg = '%s with statusType "%s" and owner %s -> %s' self.log.info(_msg % (name, statusType, tokenOwner, tokenExpiration)) if not tokenOwner in self.tokenDict: self.tokenDict[tokenOwner] = [] self.tokenDict[tokenOwner].append([ tokenOwner, element, name, statusType, status, tokenExpiration ]) return S_OK()
class PEP: """ PEP ( Policy Enforcement Point ) """ def __init__( self, clients = None ): """ Constructor examples: >>> pep = PEP() >>> pep1 = PEP( { 'ResourceStatusClient' : ResourceStatusClient() } ) >>> pep2 = PEP( { 'ResourceStatusClient' : ResourceStatusClient(), 'ClientY' : None } ) :Parameters: **clients** - [ None, `dict` ] dictionary with clients to be used in the commands issued by the policies. If not defined, the commands will import them. It is a measure to avoid opening the same connection every time a policy is evaluated. """ if clients is None: clients = {} # PEP uses internally two of the clients: ResourceStatusClient and ResouceManagementClient if 'ResourceStatusClient' in clients: self.rsClient = clients[ 'ResourceStatusClient' ] else: self.rsClient = ResourceStatusClient() if 'ResourceManagementClient' in clients: self.rmClient = clients[ 'ResourceManagementClient' ] else: self.rmClient = ResourceManagementClient() self.clients = clients # Pass to the PDP the clients that are going to be used on the Commands self.pdp = PDP( clients ) def enforce( self, decisionParams ): """ Given a dictionary with decisionParams, it is passed to the PDP, which will return ( in case there is a/are positive match/es ) a dictionary containing three key-pair values: the original decisionParams ( `decisionParams` ), all the policies evaluated ( `singlePolicyResults` ) and the computed final result ( `policyCombinedResult` ). To know more about decisionParams, please read PDP.setup where the decisionParams are sanitized. examples: >>> pep.enforce( { 'element' : 'Site', 'name' : 'MySite' } ) >>> pep.enforce( { 'element' : 'Resource', 'name' : 'myce.domain.ch' } ) :Parameters: **decisionParams** - `dict` dictionary with the parameters that will be used to match policies. """ # Setup PDP with new parameters dictionary self.pdp.setup( decisionParams ) # Run policies, get decision, get actions to apply resDecisions = self.pdp.takeDecision() if not resDecisions[ 'OK' ]: gLogger.error( 'PEP: Something went wrong, not enforcing policies for %s' % decisionParams ) return resDecisions resDecisions = resDecisions[ 'Value' ] # We take from PDP the decision parameters used to find the policies decisionParams = resDecisions[ 'decissionParams' ] policyCombinedResult = resDecisions[ 'policyCombinedResult' ] singlePolicyResults = resDecisions[ 'singlePolicyResults' ] # We have run the actions and at this point, we are about to execute the actions. # One more final check before proceeding isNotUpdated = self.__isNotUpdated( decisionParams ) if not isNotUpdated[ 'OK' ]: return isNotUpdated for policyActionName, policyActionType in policyCombinedResult[ 'PolicyAction' ]: try: actionMod = Utils.voimport( 'DIRAC.ResourceStatusSystem.PolicySystem.Actions.%s' % policyActionType ) except ImportError: gLogger.error( 'Error importing %s action' % policyActionType ) continue try: action = getattr( actionMod, policyActionType ) except AttributeError: gLogger.error( 'Error importing %s action class' % policyActionType ) continue actionObj = action( policyActionName, decisionParams, policyCombinedResult, singlePolicyResults, self.clients ) gLogger.debug( ( policyActionName, policyActionType ) ) actionResult = actionObj.run() if not actionResult[ 'OK' ]: gLogger.error( actionResult[ 'Message' ] ) return S_OK( resDecisions ) def __isNotUpdated( self, decisionParams ): """ Checks for the existence of the element as it was passed to the PEP. It may happen that while being the element processed by the PEP an user through the web interface or the CLI has updated the status for this particular element. As a result, the PEP would overwrite whatever the user had set. This check is not perfect, as still an user action can happen while executing the actions, but the probability is close to 0. However, if there is an action that takes seconds to be executed, this must be re-evaluated. ! :Parameters: **decisionParams** - `dict` dictionary with the parameters that will be used to match policies :return: S_OK / S_ERROR """ # Copy original dictionary and get rid of one key we cannot pass as kwarg selectParams = decisionParams.copy() del selectParams[ 'element' ] del selectParams[ 'active' ] # We expect to have an exact match. If not, then something has changed and # we cannot proceed with the actions. unchangedRow = self.rsClient.selectStatusElement( decisionParams[ 'element' ], 'Status', **selectParams ) if not unchangedRow[ 'OK' ]: return unchangedRow if not unchangedRow[ 'Value' ]: msg = '%(name)s ( %(status)s / %(statusType)s ) has been updated after PEP started running' return S_ERROR( msg % selectParams ) return S_OK() #............................................................................... #EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF
class SiteStatus(object): """ RSS helper to interact with the 'Site' family on the DB. It provides the most demanded functions and a cache to avoid hitting the server too often. It provides four methods to interact with the site statuses: * getSiteStatuses * isUsableSite * getUsableSites * getSites """ __metaclass__ = DIRACSingleton def __init__(self): """ Constructor, initializes the rssClient. """ self.log = gLogger.getSubLogger(self.__class__.__name__) self.rssConfig = RssConfiguration() self.__opHelper = Operations() self.rssFlag = ResourceStatus().rssFlag self.rsClient = ResourceStatusClient() # We can set CacheLifetime and CacheHistory from CS, so that we can tune them. cacheLifeTime = int(self.rssConfig.getConfigCache()) # RSSCache only affects the calls directed to RSS, if using the CS it is not used. self.rssCache = RSSCache(cacheLifeTime, self.__updateRssCache) def __updateRssCache(self): """ Method used to update the rssCache. It will try 5 times to contact the RSS before giving up """ meta = {'columns': ['Name', 'Status']} for ti in xrange(5): rawCache = self.rsClient.selectStatusElement('Site', 'Status', meta=meta) if rawCache['OK']: break self.log.warn("Can't get resource's status", rawCache['Message'] + "; trial %d" % ti) sleep(math.pow(ti, 2)) self.rsClient = ResourceStatusClient() if not rawCache['OK']: return rawCache return S_OK(getCacheDictFromRawData(rawCache['Value'])) def getSiteStatuses(self, siteNames=None): """ Method that queries the database for status of the sites in a given list. A single string site name may also be provides as "siteNames" If the input is None, it is interpreted as * ( all ). If match is positive, the output looks like: { 'test1.test1.org': 'Active', 'test2.test2.org': 'Banned', } examples >>> siteStatus.getSiteStatuses( ['test1.test1.uk', 'test2.test2.net', 'test3.test3.org'] ) S_OK( { 'test1.test1.org': 'Active', 'test2.test2.net': 'Banned', 'test3.test3.org': 'Active' } ) >>> siteStatus.getSiteStatuses( 'NotExists') S_ERROR( ... )) >>> siteStatus.getSiteStatuses( None ) S_OK( { 'test1.test1.org': 'Active', 'test2.test2.net': 'Banned', }, ... } ) :Parameters: **siteNames** - `list` or `str` name(s) of the sites to be matched :return: S_OK() || S_ERROR() """ if self.rssFlag: return self.__getRSSSiteStatus(siteNames) else: siteStatusDict = {} wmsAdmin = RPCClient('WorkloadManagement/WMSAdministrator') if siteNames: if isinstance(siteNames, basestring): siteNames = [siteNames] for siteName in siteNames: result = wmsAdmin.getSiteMaskStatus(siteName) if not result['OK']: return result else: siteStatusDict[siteName] = result['Value'] else: result = wmsAdmin.getSiteMaskStatus() if not result['OK']: return result else: siteStatusDict = result['Value'] return S_OK(siteStatusDict) def __getRSSSiteStatus(self, siteName=None): """ Gets from the cache or the RSS the Sites status. The cache is a copy of the DB table. If it is not on the cache, most likely is not going to be on the DB. There is one exception: item just added to the CS, e.g. new Element. The period between it is added to the DB and the changes are propagated to the cache will be inconsistent, but not dangerous. Just wait <cacheLifeTime> minutes. :param siteName: name of the site :type siteName: str :return: dict """ cacheMatch = self.rssCache.match(siteName, '', '') self.log.debug('__getRSSSiteStatus') self.log.debug(cacheMatch) return cacheMatch def getUsableSites(self, siteNames=None): """ Returns all sites that are usable if their statusType is either Active or Degraded; in a list. examples >>> siteStatus.getUsableSites( ['test1.test1.uk', 'test2.test2.net', 'test3.test3.org'] ) S_OK( ['test1.test1.uk', 'test3.test3.org'] ) >>> siteStatus.getUsableSites( None ) S_OK( ['test1.test1.uk', 'test3.test3.org', 'test4.test4.org', 'test5.test5.org', ...] ) >>> siteStatus.getUsableSites( 'NotExists' ) S_ERROR( ... ) :Parameters: **siteNames** - `List` or `str` name(s) of the sites to be matched :return: S_OK() || S_ERROR() """ siteStatusDictRes = self.getSiteStatuses(siteNames) if not siteStatusDictRes['OK']: return siteStatusDictRes siteStatusList = [x[0] for x in siteStatusDictRes['Value'].iteritems() if x[1] in ['Active', 'Degraded']] return S_OK(siteStatusList) def getSites(self, siteState='Active'): """ By default, it gets the currently active site list examples >>> siteStatus.getSites() S_OK( ['test1.test1.uk', 'test3.test3.org'] ) >>> siteStatus.getSites( 'Active' ) S_OK( ['test1.test1.uk', 'test3.test3.org'] ) >>> siteStatus.getSites( 'Banned' ) S_OK( ['test0.test0.uk', ... ] ) >>> siteStatus.getSites( 'All' ) S_OK( ['test1.test1.uk', 'test3.test3.org', 'test4.test4.org', 'test5.test5.org'...] ) >>> siteStatus.getSites( None ) S_ERROR( ... ) :Parameters: **siteState** - `String` state of the sites to be matched :return: S_OK() || S_ERROR() """ if not siteState: return S_ERROR(DErrno.ERESUNK, 'siteState parameter is empty') siteStatusDictRes = self.getSiteStatuses() if not siteStatusDictRes['OK']: return siteStatusDictRes if siteState.capitalize() == 'All': # if no siteState is set return everything siteList = list(siteStatusDictRes['Value']) else: # fix case sensitive string siteState = siteState.capitalize() allowedStateList = ['Active', 'Banned', 'Degraded', 'Probing', 'Error', 'Unknown'] if siteState not in allowedStateList: return S_ERROR(errno.EINVAL, 'Not a valid status, parameter rejected') siteList = [x[0] for x in siteStatusDictRes['Value'].iteritems() if x[1] == siteState] return S_OK(siteList) def setSiteStatus(self, site, status, comment='No comment'): """ Set the status of a site in the 'SiteStatus' table of RSS examples >>> siteStatus.banSite( 'site1.test.test' ) S_OK() >>> siteStatus.banSite( None ) S_ERROR( ... ) :Parameters: **site** - `String` the site that is going to be banned **comment** - `String` reason for banning :return: S_OK() || S_ERROR() """ if not status: return S_ERROR(DErrno.ERESUNK, 'status parameter is empty') # fix case sensitive string status = status.capitalize() allowedStateList = ['Active', 'Banned', 'Degraded', 'Probing', 'Error', 'Unknown'] if status not in allowedStateList: return S_ERROR(errno.EINVAL, 'Not a valid status, parameter rejected') if self.rssFlag: result = getProxyInfo() if result['OK']: tokenOwner = result['Value']['username'] else: return S_ERROR("Unable to get user proxy info %s " % result['Message']) tokenExpiration = datetime.utcnow() + timedelta(days=1) self.rssCache.acquireLock() try: result = self.rsClient.modifyStatusElement('Site', 'Status', status=status, name=site, tokenExpiration=tokenExpiration, reason=comment, tokenOwner=tokenOwner) if result['OK']: self.rssCache.refreshCache() else: _msg = 'Error updating status of site %s to %s' % (site, status) gLogger.warn('RSS: %s' % _msg) # Release lock, no matter what. finally: self.rssCache.releaseLock() else: if status in ['Active', 'Degraded']: result = RPCClient('WorkloadManagement/WMSAdministrator').allowSite() else: result = RPCClient('WorkloadManagement/WMSAdministrator').banSite() return result
class Synchronizer: """ Every time there is a successful write on the CS, Synchronizer().sync() is executed. It updates the database with the values on the CS. """ def __init__(self, rStatus=None, rManagement=None, defaultStatus="Unknown"): # Warm up local CS CSHelpers.warmUp() if rStatus is None: self.rStatus = ResourceStatusClient() if rManagement is None: self.rManagement = ResourceManagementClient() self.defaultStatus = defaultStatus self.rssConfig = RssConfiguration() # this just sets the main owner, "rs_svc" just mean "RSS service" self.tokenOwner = "rs_svc" # if we are running this script as a user (from a CLI), # the username found the proxy will be used as tokenOwner result = getProxyInfo() if result["OK"]: self.tokenOwner = result["Value"]["username"] def sync(self, _eventName, _params): """ Main synchronizer method. It synchronizes the three types of elements: Sites, Resources and Nodes. Each _syncX method returns a dictionary with the additions and deletions. examples: >>> s.sync( None, None ) S_OK() :Parameters: **_eventName** - any this parameter is ignored, but needed by caller function. **_params** - any this parameter is ignored, but needed by caller function. :return: S_OK """ syncSites = self._syncSites() if not syncSites["OK"]: gLogger.error(syncSites["Message"]) syncResources = self._syncResources() if not syncResources["OK"]: gLogger.error(syncResources["Message"]) syncNodes = self._syncNodes() if not syncNodes["OK"]: gLogger.error(syncNodes["Message"]) return S_OK() def _syncSites(self): """ Sync sites: compares CS with DB and does the necessary modifications. """ gLogger.info("-- Synchronizing sites --") # sites in CS res = getSites() if not res["OK"]: return res sitesCS = res["Value"] gLogger.verbose("%s sites found in CS" % len(sitesCS)) # sites in RSS result = self.rStatus.selectStatusElement("Site", "Status", meta={"columns": ["Name"]}) if not result["OK"]: return result sitesDB = [siteDB[0] for siteDB in result["Value"]] # Sites that are in DB but not (anymore) in CS toBeDeleted = list(set(sitesDB).difference(set(sitesCS))) gLogger.verbose("%s sites to be deleted" % len(toBeDeleted)) # Delete sites for siteName in toBeDeleted: deleteQuery = self.rStatus._extermineStatusElement( "Site", siteName) gLogger.verbose("Deleting site %s" % siteName) if not deleteQuery["OK"]: return deleteQuery # Sites that are in CS but not (anymore) in DB toBeAdded = list(set(sitesCS).difference(set(sitesDB))) gLogger.verbose("%s site entries to be added" % len(toBeAdded)) for site in toBeAdded: query = self.rStatus.addIfNotThereStatusElement( "Site", "Status", name=site, statusType="all", status=self.defaultStatus, elementType="Site", tokenOwner=self.tokenOwner, reason="Synchronized", ) if not query["OK"]: return query return S_OK() def _syncResources(self): """ Sync resources: compares CS with DB and does the necessary modifications. ( StorageElements, FTS, FileCatalogs and ComputingElements ) """ gLogger.info("-- Synchronizing Resources --") gLogger.verbose("-> StorageElements") ses = self.__syncStorageElements() if not ses["OK"]: gLogger.error(ses["Message"]) gLogger.verbose("-> FTS") fts = self.__syncFTS() if not fts["OK"]: gLogger.error(fts["Message"]) gLogger.verbose("-> FileCatalogs") fileCatalogs = self.__syncFileCatalogs() if not fileCatalogs["OK"]: gLogger.error(fileCatalogs["Message"]) gLogger.verbose("-> ComputingElements") computingElements = self.__syncComputingElements() if not computingElements["OK"]: gLogger.error(computingElements["Message"]) gLogger.verbose("-> removing resources that no longer exist in the CS") removingResources = self.__removeNonExistingResourcesFromRM() if not removingResources["OK"]: gLogger.error(removingResources["Message"]) return S_OK() def _syncNodes(self): """ Sync resources: compares CS with DB and does the necessary modifications. ( Queues ) """ gLogger.info("-- Synchronizing Nodes --") gLogger.verbose("-> Queues") queues = self.__syncQueues() if not queues["OK"]: gLogger.error(queues["Message"]) return S_OK() def __removeNonExistingResourcesFromRM(self): """ Remove resources from DowntimeCache table that no longer exist in the CS. """ if not getServiceURL("ResourceStatus/ResourceManagement"): gLogger.verbose( "ResourceManagement is not installed, skipping removal of non existing resources..." ) return S_OK() sesHosts = getStorageElementsHosts() if not sesHosts["OK"]: return sesHosts sesHosts = sesHosts["Value"] resources = sesHosts ftsServer = getFTS3Servers(hostOnly=True) if ftsServer["OK"]: resources.extend(ftsServer["Value"]) res = getCESiteMapping() if res["OK"]: resources.extend(list(res["Value"])) downtimes = self.rManagement.selectDowntimeCache() if not downtimes["OK"]: return downtimes # Remove hosts that no longer exist in the CS for host in downtimes["Value"]: gLogger.verbose("Checking if %s is still in the CS" % host[0]) if host[0] not in resources: gLogger.verbose("%s is no longer in CS, removing entry..." % host[0]) result = self.rManagement.deleteDowntimeCache(name=host[0]) if not result["OK"]: return result return S_OK() def __syncComputingElements(self): """ Sync ComputingElements: compares CS with DB and does the necessary modifications. """ res = getCESiteMapping() if not res["OK"]: return res cesCS = list(res["Value"]) gLogger.verbose("%s Computing elements found in CS" % len(cesCS)) cesDB = self.rStatus.selectStatusElement( "Resource", "Status", elementType="ComputingElement", meta={"columns": ["Name"]}) if not cesDB["OK"]: return cesDB cesDB = [ceDB[0] for ceDB in cesDB["Value"]] # ComputingElements that are in DB but not in CS toBeDeleted = list(set(cesDB).difference(set(cesCS))) gLogger.verbose("%s Computing elements to be deleted" % len(toBeDeleted)) # Delete storage elements for ceName in toBeDeleted: deleteQuery = self.rStatus._extermineStatusElement( "Resource", ceName) gLogger.verbose("... %s" % ceName) if not deleteQuery["OK"]: return deleteQuery # statusTypes = RssConfiguration.getValidStatusTypes()[ 'Resource' ] statusTypes = self.rssConfig.getConfigStatusType("ComputingElement") result = self.rStatus.selectStatusElement( "Resource", "Status", elementType="ComputingElement", meta={"columns": ["Name", "StatusType"]}) if not result["OK"]: return result cesTuple = [(x[0], x[1]) for x in result["Value"]] # For each ( se, statusType ) tuple not present in the DB, add it. cesStatusTuples = [(se, statusType) for se in cesCS for statusType in statusTypes] toBeAdded = list(set(cesStatusTuples).difference(set(cesTuple))) gLogger.debug("%s Computing elements entries to be added" % len(toBeAdded)) for ceTuple in toBeAdded: _name = ceTuple[0] _statusType = ceTuple[1] _status = self.defaultStatus _reason = "Synchronized" _elementType = "ComputingElement" query = self.rStatus.addIfNotThereStatusElement( "Resource", "Status", name=_name, statusType=_statusType, status=_status, elementType=_elementType, tokenOwner=self.tokenOwner, reason=_reason, ) if not query["OK"]: return query return S_OK() def __syncFileCatalogs(self): """ Sync FileCatalogs: compares CS with DB and does the necessary modifications. """ catalogsCS = CSHelpers.getFileCatalogs() if not catalogsCS["OK"]: return catalogsCS catalogsCS = catalogsCS["Value"] gLogger.verbose("%s File catalogs found in CS" % len(catalogsCS)) catalogsDB = self.rStatus.selectStatusElement( "Resource", "Status", elementType="Catalog", meta={"columns": ["Name"]}) if not catalogsDB["OK"]: return catalogsDB catalogsDB = [catalogDB[0] for catalogDB in catalogsDB["Value"]] # StorageElements that are in DB but not in CS toBeDeleted = list(set(catalogsDB).difference(set(catalogsCS))) gLogger.verbose("%s File catalogs to be deleted" % len(toBeDeleted)) # Delete storage elements for catalogName in toBeDeleted: deleteQuery = self.rStatus._extermineStatusElement( "Resource", catalogName) gLogger.verbose("... %s" % catalogName) if not deleteQuery["OK"]: return deleteQuery # statusTypes = RssConfiguration.getValidStatusTypes()[ 'Resource' ] statusTypes = self.rssConfig.getConfigStatusType("Catalog") result = self.rStatus.selectStatusElement( "Resource", "Status", elementType="Catalog", meta={"columns": ["Name", "StatusType"]}) if not result["OK"]: return result sesTuple = [(x[0], x[1]) for x in result["Value"]] # For each ( se, statusType ) tuple not present in the DB, add it. catalogsStatusTuples = [(se, statusType) for se in catalogsCS for statusType in statusTypes] toBeAdded = list(set(catalogsStatusTuples).difference(set(sesTuple))) gLogger.verbose("%s File catalogs entries to be added" % len(toBeAdded)) for catalogTuple in toBeAdded: _name = catalogTuple[0] _statusType = catalogTuple[1] _status = self.defaultStatus _reason = "Synchronized" _elementType = "Catalog" query = self.rStatus.addIfNotThereStatusElement( "Resource", "Status", name=_name, statusType=_statusType, status=_status, elementType=_elementType, tokenOwner=self.tokenOwner, reason=_reason, ) if not query["OK"]: return query return S_OK() def __syncFTS(self): """ Sync FTS: compares CS with DB and does the necessary modifications. """ ftsCS = CSHelpers.getFTS() if not ftsCS["OK"]: return ftsCS ftsCS = ftsCS["Value"] gLogger.verbose("%s FTS endpoints found in CS" % len(ftsCS)) ftsDB = self.rStatus.selectStatusElement("Resource", "Status", elementType="FTS", meta={"columns": ["Name"]}) if not ftsDB["OK"]: return ftsDB ftsDB = [fts[0] for fts in ftsDB["Value"]] # StorageElements that are in DB but not in CS toBeDeleted = list(set(ftsDB).difference(set(ftsCS))) gLogger.verbose("%s FTS endpoints to be deleted" % len(toBeDeleted)) # Delete storage elements for ftsName in toBeDeleted: deleteQuery = self.rStatus._extermineStatusElement( "Resource", ftsName) gLogger.verbose("... %s" % ftsName) if not deleteQuery["OK"]: return deleteQuery statusTypes = self.rssConfig.getConfigStatusType("FTS") # statusTypes = RssConfiguration.getValidStatusTypes()[ 'Resource' ] result = self.rStatus.selectStatusElement( "Resource", "Status", elementType="FTS", meta={"columns": ["Name", "StatusType"]}) if not result["OK"]: return result sesTuple = [(x[0], x[1]) for x in result["Value"]] # For each ( se, statusType ) tuple not present in the DB, add it. ftsStatusTuples = [(se, statusType) for se in ftsCS for statusType in statusTypes] toBeAdded = list(set(ftsStatusTuples).difference(set(sesTuple))) gLogger.verbose("%s FTS endpoints entries to be added" % len(toBeAdded)) for ftsTuple in toBeAdded: _name = ftsTuple[0] _statusType = ftsTuple[1] _status = self.defaultStatus _reason = "Synchronized" _elementType = "FTS" query = self.rStatus.addIfNotThereStatusElement( "Resource", "Status", name=_name, statusType=_statusType, status=_status, elementType=_elementType, tokenOwner=self.tokenOwner, reason=_reason, ) if not query["OK"]: return query return S_OK() def __syncStorageElements(self): """ Sync StorageElements: compares CS with DB and does the necessary modifications. """ sesCS = DMSHelpers().getStorageElements() gLogger.verbose("%s storage elements found in CS" % len(sesCS)) sesDB = self.rStatus.selectStatusElement("Resource", "Status", elementType="StorageElement", meta={"columns": ["Name"]}) if not sesDB["OK"]: return sesDB sesDB = [seDB[0] for seDB in sesDB["Value"]] # StorageElements that are in DB but not in CS toBeDeleted = list(set(sesDB).difference(set(sesCS))) gLogger.verbose("%s storage elements to be deleted" % len(toBeDeleted)) # Delete storage elements for sesName in toBeDeleted: deleteQuery = self.rStatus._extermineStatusElement( "Resource", sesName) gLogger.verbose("... %s" % sesName) if not deleteQuery["OK"]: return deleteQuery statusTypes = self.rssConfig.getConfigStatusType("StorageElement") # statusTypes = RssConfiguration.getValidStatusTypes()[ 'Resource' ] result = self.rStatus.selectStatusElement( "Resource", "Status", elementType="StorageElement", meta={"columns": ["Name", "StatusType"]}) if not result["OK"]: return result sesTuple = [(x[0], x[1]) for x in result["Value"]] # For each ( se, statusType ) tuple not present in the DB, add it. sesStatusTuples = [(se, statusType) for se in sesCS for statusType in statusTypes] toBeAdded = list(set(sesStatusTuples).difference(set(sesTuple))) gLogger.verbose("%s storage element entries to be added" % len(toBeAdded)) for seTuple in toBeAdded: _name = seTuple[0] _statusType = seTuple[1] _status = self.defaultStatus _reason = "Synchronized" _elementType = "StorageElement" query = self.rStatus.addIfNotThereStatusElement( "Resource", "Status", name=_name, statusType=_statusType, status=_status, elementType=_elementType, tokenOwner=self.tokenOwner, reason=_reason, ) if not query["OK"]: return query return S_OK() def __syncQueues(self): """ Sync Queues: compares CS with DB and does the necessary modifications. """ queuesCS = CSHelpers.getQueuesRSS() if not queuesCS["OK"]: return queuesCS queuesCS = queuesCS["Value"] gLogger.verbose("%s Queues found in CS" % len(queuesCS)) queuesDB = self.rStatus.selectStatusElement("Node", "Status", elementType="Queue", meta={"columns": ["Name"]}) if not queuesDB["OK"]: return queuesDB queuesDB = [queueDB[0] for queueDB in queuesDB["Value"]] # ComputingElements that are in DB but not in CS toBeDeleted = list(set(queuesDB).difference(set(queuesCS))) gLogger.verbose("%s Queues to be deleted" % len(toBeDeleted)) # Delete storage elements for queueName in toBeDeleted: deleteQuery = self.rStatus._extermineStatusElement( "Node", queueName) gLogger.verbose("... %s" % queueName) if not deleteQuery["OK"]: return deleteQuery statusTypes = self.rssConfig.getConfigStatusType("Queue") # statusTypes = RssConfiguration.getValidStatusTypes()[ 'Node' ] result = self.rStatus.selectStatusElement( "Node", "Status", elementType="Queue", meta={"columns": ["Name", "StatusType"]}) if not result["OK"]: return result queueTuple = [(x[0], x[1]) for x in result["Value"]] # For each ( se, statusType ) tuple not present in the DB, add it. queueStatusTuples = [(se, statusType) for se in queuesCS for statusType in statusTypes] toBeAdded = list(set(queueStatusTuples).difference(set(queueTuple))) gLogger.verbose("%s Queue entries to be added" % len(toBeAdded)) for queueTuple in toBeAdded: _name = queueTuple[0] _statusType = queueTuple[1] _status = self.defaultStatus _reason = "Synchronized" _elementType = "Queue" query = self.rStatus.addIfNotThereStatusElement( "Node", "Status", name=_name, statusType=_statusType, status=_status, elementType=_elementType, tokenOwner=self.tokenOwner, reason=_reason, ) if not query["OK"]: return query return S_OK()
class Synchronizer(object): ''' Every time there is a successful write on the CS, Synchronizer().sync() is executed. It updates the database with the values on the CS. ''' def __init__(self, rStatus=None, rManagement=None, defaultStatus="Unknown"): # Warm up local CS CSHelpers.warmUp() if rStatus is None: self.rStatus = ResourceStatusClient() if rManagement is None: self.rManagement = ResourceManagementClient() self.defaultStatus = defaultStatus self.rssConfig = RssConfiguration() # this just sets the main owner, "rs_svc" just mean "RSS service" self.tokenOwner = "rs_svc" # if we are running this script as a user (from a CLI), # the username found the proxy will be used as tokenOwner result = getProxyInfo() if result['OK']: self.tokenOwner = result['Value']['username'] def sync(self, _eventName, _params): ''' Main synchronizer method. It synchronizes the three types of elements: Sites, Resources and Nodes. Each _syncX method returns a dictionary with the additions and deletions. examples: >>> s.sync( None, None ) S_OK() :Parameters: **_eventName** - any this parameter is ignored, but needed by caller function. **_params** - any this parameter is ignored, but needed by caller function. :return: S_OK ''' syncSites = self._syncSites() if not syncSites['OK']: gLogger.error(syncSites['Message']) syncResources = self._syncResources() if not syncResources['OK']: gLogger.error(syncResources['Message']) syncNodes = self._syncNodes() if not syncNodes['OK']: gLogger.error(syncNodes['Message']) return S_OK() def _syncSites(self): ''' Sync sites: compares CS with DB and does the necessary modifications. ''' gLogger.info('-- Synchronizing sites --') # sites in CS res = getSites() if not res['OK']: return res sitesCS = res['Value'] gLogger.verbose('%s sites found in CS' % len(sitesCS)) # sites in RSS result = self.rStatus.selectStatusElement('Site', 'Status', meta={'columns': ['Name']}) if not result['OK']: return result sitesDB = [siteDB[0] for siteDB in result['Value']] # Sites that are in DB but not (anymore) in CS toBeDeleted = list(set(sitesDB).difference(set(sitesCS))) gLogger.verbose('%s sites to be deleted' % len(toBeDeleted)) # Delete sites for siteName in toBeDeleted: deleteQuery = self.rStatus._extermineStatusElement( 'Site', siteName) gLogger.verbose('Deleting site %s' % siteName) if not deleteQuery['OK']: return deleteQuery # Sites that are in CS but not (anymore) in DB toBeAdded = list(set(sitesCS).difference(set(sitesDB))) gLogger.verbose('%s site entries to be added' % len(toBeAdded)) for site in toBeAdded: query = self.rStatus.addIfNotThereStatusElement( 'Site', 'Status', name=site, statusType='all', status=self.defaultStatus, elementType='Site', tokenOwner=self.tokenOwner, reason='Synchronized') if not query['OK']: return query return S_OK() def _syncResources(self): ''' Sync resources: compares CS with DB and does the necessary modifications. ( StorageElements, FTS, FileCatalogs and ComputingElements ) ''' gLogger.info('-- Synchronizing Resources --') gLogger.verbose('-> StorageElements') ses = self.__syncStorageElements() if not ses['OK']: gLogger.error(ses['Message']) gLogger.verbose('-> FTS') fts = self.__syncFTS() if not fts['OK']: gLogger.error(fts['Message']) gLogger.verbose('-> FileCatalogs') fileCatalogs = self.__syncFileCatalogs() if not fileCatalogs['OK']: gLogger.error(fileCatalogs['Message']) gLogger.verbose('-> ComputingElements') computingElements = self.__syncComputingElements() if not computingElements['OK']: gLogger.error(computingElements['Message']) gLogger.verbose('-> removing resources that no longer exist in the CS') removingResources = self.__removeNonExistingResourcesFromRM() if not removingResources['OK']: gLogger.error(removingResources['Message']) # FIXME: VOMS return S_OK() def _syncNodes(self): ''' Sync resources: compares CS with DB and does the necessary modifications. ( Queues ) ''' gLogger.info('-- Synchronizing Nodes --') gLogger.verbose('-> Queues') queues = self.__syncQueues() if not queues['OK']: gLogger.error(queues['Message']) return S_OK() def __removeNonExistingResourcesFromRM(self): ''' Remove resources from DowntimeCache table that no longer exist in the CS. ''' if not getServiceURL("ResourceStatus/ResourceManagement"): gLogger.verbose( 'ResourceManagement is not installed, skipping removal of non existing resources...' ) return S_OK() sesHosts = getStorageElementsHosts() if not sesHosts['OK']: return sesHosts sesHosts = sesHosts['Value'] resources = sesHosts ftsServer = getFTS3Servers(hostOnly=True) if ftsServer['OK']: resources.extend(ftsServer['Value']) ce = CSHelpers.getComputingElements() if ce['OK']: resources.extend(ce['Value']) downtimes = self.rManagement.selectDowntimeCache() if not downtimes['OK']: return downtimes # Remove hosts that no longer exist in the CS for host in downtimes['Value']: gLogger.verbose('Checking if %s is still in the CS' % host[0]) if host[0] not in resources: gLogger.verbose('%s is no longer in CS, removing entry...' % host[0]) result = self.rManagement.deleteDowntimeCache(name=host[0]) if not result['OK']: return result return S_OK() def __syncComputingElements(self): ''' Sync ComputingElements: compares CS with DB and does the necessary modifications. ''' cesCS = CSHelpers.getComputingElements() if not cesCS['OK']: return cesCS cesCS = cesCS['Value'] gLogger.verbose('%s Computing elements found in CS' % len(cesCS)) cesDB = self.rStatus.selectStatusElement( 'Resource', 'Status', elementType='ComputingElement', meta={'columns': ['Name']}) if not cesDB['OK']: return cesDB cesDB = [ceDB[0] for ceDB in cesDB['Value']] # ComputingElements that are in DB but not in CS toBeDeleted = list(set(cesDB).difference(set(cesCS))) gLogger.verbose('%s Computing elements to be deleted' % len(toBeDeleted)) # Delete storage elements for ceName in toBeDeleted: deleteQuery = self.rStatus._extermineStatusElement( 'Resource', ceName) gLogger.verbose('... %s' % ceName) if not deleteQuery['OK']: return deleteQuery # statusTypes = RssConfiguration.getValidStatusTypes()[ 'Resource' ] statusTypes = self.rssConfig.getConfigStatusType('ComputingElement') result = self.rStatus.selectStatusElement( 'Resource', 'Status', elementType='ComputingElement', meta={'columns': ['Name', 'StatusType']}) if not result['OK']: return result cesTuple = [(x[0], x[1]) for x in result['Value']] # For each ( se, statusType ) tuple not present in the DB, add it. cesStatusTuples = [(se, statusType) for se in cesCS for statusType in statusTypes] toBeAdded = list(set(cesStatusTuples).difference(set(cesTuple))) gLogger.debug('%s Computing elements entries to be added' % len(toBeAdded)) for ceTuple in toBeAdded: _name = ceTuple[0] _statusType = ceTuple[1] _status = self.defaultStatus _reason = 'Synchronized' _elementType = 'ComputingElement' query = self.rStatus.addIfNotThereStatusElement( 'Resource', 'Status', name=_name, statusType=_statusType, status=_status, elementType=_elementType, tokenOwner=self.tokenOwner, reason=_reason) if not query['OK']: return query return S_OK() def __syncFileCatalogs(self): ''' Sync FileCatalogs: compares CS with DB and does the necessary modifications. ''' catalogsCS = CSHelpers.getFileCatalogs() if not catalogsCS['OK']: return catalogsCS catalogsCS = catalogsCS['Value'] gLogger.verbose('%s File catalogs found in CS' % len(catalogsCS)) catalogsDB = self.rStatus.selectStatusElement( 'Resource', 'Status', elementType='Catalog', meta={'columns': ['Name']}) if not catalogsDB['OK']: return catalogsDB catalogsDB = [catalogDB[0] for catalogDB in catalogsDB['Value']] # StorageElements that are in DB but not in CS toBeDeleted = list(set(catalogsDB).difference(set(catalogsCS))) gLogger.verbose('%s File catalogs to be deleted' % len(toBeDeleted)) # Delete storage elements for catalogName in toBeDeleted: deleteQuery = self.rStatus._extermineStatusElement( 'Resource', catalogName) gLogger.verbose('... %s' % catalogName) if not deleteQuery['OK']: return deleteQuery # statusTypes = RssConfiguration.getValidStatusTypes()[ 'Resource' ] statusTypes = self.rssConfig.getConfigStatusType('Catalog') result = self.rStatus.selectStatusElement( 'Resource', 'Status', elementType='Catalog', meta={'columns': ['Name', 'StatusType']}) if not result['OK']: return result sesTuple = [(x[0], x[1]) for x in result['Value']] # For each ( se, statusType ) tuple not present in the DB, add it. catalogsStatusTuples = [(se, statusType) for se in catalogsCS for statusType in statusTypes] toBeAdded = list(set(catalogsStatusTuples).difference(set(sesTuple))) gLogger.verbose('%s File catalogs entries to be added' % len(toBeAdded)) for catalogTuple in toBeAdded: _name = catalogTuple[0] _statusType = catalogTuple[1] _status = self.defaultStatus _reason = 'Synchronized' _elementType = 'Catalog' query = self.rStatus.addIfNotThereStatusElement( 'Resource', 'Status', name=_name, statusType=_statusType, status=_status, elementType=_elementType, tokenOwner=self.tokenOwner, reason=_reason) if not query['OK']: return query return S_OK() def __syncFTS(self): ''' Sync FTS: compares CS with DB and does the necessary modifications. ''' ftsCS = CSHelpers.getFTS() if not ftsCS['OK']: return ftsCS ftsCS = ftsCS['Value'] gLogger.verbose('%s FTS endpoints found in CS' % len(ftsCS)) ftsDB = self.rStatus.selectStatusElement('Resource', 'Status', elementType='FTS', meta={'columns': ['Name']}) if not ftsDB['OK']: return ftsDB ftsDB = [fts[0] for fts in ftsDB['Value']] # StorageElements that are in DB but not in CS toBeDeleted = list(set(ftsDB).difference(set(ftsCS))) gLogger.verbose('%s FTS endpoints to be deleted' % len(toBeDeleted)) # Delete storage elements for ftsName in toBeDeleted: deleteQuery = self.rStatus._extermineStatusElement( 'Resource', ftsName) gLogger.verbose('... %s' % ftsName) if not deleteQuery['OK']: return deleteQuery statusTypes = self.rssConfig.getConfigStatusType('FTS') # statusTypes = RssConfiguration.getValidStatusTypes()[ 'Resource' ] result = self.rStatus.selectStatusElement( 'Resource', 'Status', elementType='FTS', meta={'columns': ['Name', 'StatusType']}) if not result['OK']: return result sesTuple = [(x[0], x[1]) for x in result['Value']] # For each ( se, statusType ) tuple not present in the DB, add it. ftsStatusTuples = [(se, statusType) for se in ftsCS for statusType in statusTypes] toBeAdded = list(set(ftsStatusTuples).difference(set(sesTuple))) gLogger.verbose('%s FTS endpoints entries to be added' % len(toBeAdded)) for ftsTuple in toBeAdded: _name = ftsTuple[0] _statusType = ftsTuple[1] _status = self.defaultStatus _reason = 'Synchronized' _elementType = 'FTS' query = self.rStatus.addIfNotThereStatusElement( 'Resource', 'Status', name=_name, statusType=_statusType, status=_status, elementType=_elementType, tokenOwner=self.tokenOwner, reason=_reason) if not query['OK']: return query return S_OK() def __syncStorageElements(self): ''' Sync StorageElements: compares CS with DB and does the necessary modifications. ''' sesCS = DMSHelpers().getStorageElements() gLogger.verbose('%s storage elements found in CS' % len(sesCS)) sesDB = self.rStatus.selectStatusElement('Resource', 'Status', elementType='StorageElement', meta={'columns': ['Name']}) if not sesDB['OK']: return sesDB sesDB = [seDB[0] for seDB in sesDB['Value']] # StorageElements that are in DB but not in CS toBeDeleted = list(set(sesDB).difference(set(sesCS))) gLogger.verbose('%s storage elements to be deleted' % len(toBeDeleted)) # Delete storage elements for sesName in toBeDeleted: deleteQuery = self.rStatus._extermineStatusElement( 'Resource', sesName) gLogger.verbose('... %s' % sesName) if not deleteQuery['OK']: return deleteQuery statusTypes = self.rssConfig.getConfigStatusType('StorageElement') # statusTypes = RssConfiguration.getValidStatusTypes()[ 'Resource' ] result = self.rStatus.selectStatusElement( 'Resource', 'Status', elementType='StorageElement', meta={'columns': ['Name', 'StatusType']}) if not result['OK']: return result sesTuple = [(x[0], x[1]) for x in result['Value']] # For each ( se, statusType ) tuple not present in the DB, add it. sesStatusTuples = [(se, statusType) for se in sesCS for statusType in statusTypes] toBeAdded = list(set(sesStatusTuples).difference(set(sesTuple))) gLogger.verbose('%s storage element entries to be added' % len(toBeAdded)) for seTuple in toBeAdded: _name = seTuple[0] _statusType = seTuple[1] _status = self.defaultStatus _reason = 'Synchronized' _elementType = 'StorageElement' query = self.rStatus.addIfNotThereStatusElement( 'Resource', 'Status', name=_name, statusType=_statusType, status=_status, elementType=_elementType, tokenOwner=self.tokenOwner, reason=_reason) if not query['OK']: return query return S_OK() def __syncQueues(self): ''' Sync Queues: compares CS with DB and does the necessary modifications. ''' queuesCS = CSHelpers.getQueuesRSS() if not queuesCS['OK']: return queuesCS queuesCS = queuesCS['Value'] gLogger.verbose('%s Queues found in CS' % len(queuesCS)) queuesDB = self.rStatus.selectStatusElement('Node', 'Status', elementType='Queue', meta={'columns': ['Name']}) if not queuesDB['OK']: return queuesDB queuesDB = [queueDB[0] for queueDB in queuesDB['Value']] # ComputingElements that are in DB but not in CS toBeDeleted = list(set(queuesDB).difference(set(queuesCS))) gLogger.verbose('%s Queues to be deleted' % len(toBeDeleted)) # Delete storage elements for queueName in toBeDeleted: deleteQuery = self.rStatus._extermineStatusElement( 'Node', queueName) gLogger.verbose('... %s' % queueName) if not deleteQuery['OK']: return deleteQuery statusTypes = self.rssConfig.getConfigStatusType('Queue') # statusTypes = RssConfiguration.getValidStatusTypes()[ 'Node' ] result = self.rStatus.selectStatusElement( 'Node', 'Status', elementType='Queue', meta={'columns': ['Name', 'StatusType']}) if not result['OK']: return result queueTuple = [(x[0], x[1]) for x in result['Value']] # For each ( se, statusType ) tuple not present in the DB, add it. queueStatusTuples = [(se, statusType) for se in queuesCS for statusType in statusTypes] toBeAdded = list(set(queueStatusTuples).difference(set(queueTuple))) gLogger.verbose('%s Queue entries to be added' % len(toBeAdded)) for queueTuple in toBeAdded: _name = queueTuple[0] _statusType = queueTuple[1] _status = self.defaultStatus _reason = 'Synchronized' _elementType = 'Queue' query = self.rStatus.addIfNotThereStatusElement( 'Node', 'Status', name=_name, statusType=_statusType, status=_status, elementType=_elementType, tokenOwner=self.tokenOwner, reason=_reason) if not query['OK']: return query return S_OK()
class SummarizeLogsAgent( AgentModule ): # Date format in database __dateFormat = '%Y-%m-%d %H:%M:%S' def __init__( self, *args, **kwargs ): ''' c'tor ''' AgentModule.__init__( self, *args, **kwargs ) self.rsClient = None def initialize( self ): ''' Standard initialize. Uses the ProductionManager shifterProxy to modify the ResourceStatus DB ''' self.rsClient = ResourceStatusClient() return S_OK() def execute( self ): # FIXME: probably this can be obtained from RssConfiguration instead elements = ( 'Site', 'Resource', 'Node' ) # We do not want neither minutes, nor seconds nor microseconds thisHour = datetime.utcnow().replace( microsecond = 0 ) thisHour = thisHour.replace( second = 0 ).replace( minute = 0 ) for element in elements: self.log.info( 'Summarizing %s' % element ) selectLogElements = self._selectLogElements( element, thisHour ) if not selectLogElements[ 'OK' ]: self.log.error( selectLogElements[ 'Message' ] ) continue selectLogElements = selectLogElements[ 'Value' ] for selectedKey, selectedItem in selectLogElements.items(): sRes = self._logSelectedLogElement( element, selectedKey, selectedItem, thisHour ) if not sRes[ 'OK' ]: self.log.error( sRes[ 'Message' ] ) break return S_OK() def _selectLogElements( self, element, thisHour ): ''' For a given element, selects all the entries on the <element>Log table with LastCheckTime > <lastHour>. It groups them by tuples of ( <name>, <statusType> ) and keeps only the statuses that represent a change in the status. ''' lastHour = thisHour - timedelta( hours = 1 ) selectResults = self.rsClient.selectStatusElement( element, 'Log', meta = { 'newer' : ( 'LastCheckTime', lastHour ) } ) if not selectResults[ 'OK' ]: return selectResults selectedItems = {} selectColumns = selectResults[ 'Columns' ] selectResults = selectResults[ 'Value' ] for selectResult in selectResults: elementDict = dict( zip( selectColumns, selectResult ) ) if elementDict[ 'LastCheckTime' ] > thisHour: continue key = ( elementDict[ 'Name' ], elementDict[ 'StatusType' ] ) if not key in selectedItems: selectedItems[ key ] = [ elementDict ] else: lastStatus = selectedItems[ key ][ -1 ][ 'Status' ] if lastStatus != elementDict[ 'Status' ]: selectedItems[ key ].append( elementDict ) return S_OK( selectedItems ) def _logSelectedLogElement( self, element, selectedKey, selectedItem, thisHour ): ''' Given an element, a selectedKey - which is a tuple ( <name>, <statusType> ) and a list of dictionaries, this method inserts them. Before inserting them, checks whether the first one is or is not on the <element>History table. If it is, it is not inserted. ''' name, statusType = selectedKey selectedRes = self.rsClient.selectStatusElement( element, 'History', name, statusType, meta = { 'columns' : [ 'Status', 'LastCheckTime' ] } ) if not selectedRes[ 'OK' ]: return selectedRes selectedRes = selectedRes[ 'Value' ] selectedStatus = None if selectedRes: # Get the last selectedRes, which will be the newest one. Each selectedRes # is a tuple, in this case, containing two elements - Status, LastCheckTime selectedStatus, selectedLastTime = selectedRes[ -1 ] if selectedLastTime > thisHour - timedelta( hours = 1 ): return S_ERROR( 'The agent has run once on this time span, skipping' ) # If the first of the selected items has a different status than the latest # on the history, we add it. if selectedItem[ 0 ][ 'Status' ] != selectedStatus: res = self._logToHistoryTable( element, selectedItem[ 0 ] ) if not res[ 'OK' ]: return res for selectedItemDict in selectedItem[ 1: ]: res = self._logToHistoryTable( element, selectedItemDict ) if not res[ 'OK' ]: return res return S_OK() def _logToHistoryTable( self, element, elementDict ): ''' Given an element and a dictionary with all the arguments, this method inserts a new entry on the <element>History table ''' try: name = elementDict[ 'Name' ] statusType = elementDict[ 'StatusType' ] status = elementDict[ 'Status' ] elementType = elementDict[ 'ElementType' ] reason = elementDict[ 'Reason' ] dateEffective = elementDict[ 'DateEffective' ] lastCheckTime = elementDict[ 'LastCheckTime' ] tokenOwner = elementDict[ 'TokenOwner' ] tokenExpiration = elementDict[ 'TokenExpiration' ] except KeyError, e: return S_ERROR( e ) return self.rsClient.insertStatusElement( element, 'History', name, statusType, status, elementType, reason, dateEffective, lastCheckTime, tokenOwner, tokenExpiration )
class ElementInspectorAgent(AgentModule): ''' The ElementInspector agent is a generic agent used to check the elements of one of the elementTypes ( e.g. Site, Resource, Node ). This Agent takes care of the Elements. In order to do so, it gathers the eligible ones and then evaluates their statuses with the PEP. ''' # Max number of worker threads by default __maxNumberOfThreads = 5 # ElementType, to be defined among Site, Resource or Node __elementType = None # Inspection freqs, defaults, the lower, the higher priority to be checked. # Error state usually means there is a glitch somewhere, so it has the highest # priority. __checkingFreqs = { 'Default': { 'Active': 60, 'Degraded': 30, 'Probing': 30, 'Banned': 30, 'Unknown': 15, 'Error': 15 } } # queue size limit to stop feeding __limitQueueFeeder = 15 def __init__(self, *args, **kwargs): ''' c'tor ''' AgentModule.__init__(self, *args, **kwargs) # members initialization self.maxNumberOfThreads = self.__maxNumberOfThreads self.elementType = self.__elementType self.checkingFreqs = self.__checkingFreqs self.limitQueueFeeder = self.__limitQueueFeeder self.elementsToBeChecked = None self.threadPool = None self.rsClient = None self.clients = {} def initialize(self): ''' Standard initialize. Uses the ProductionManager shifterProxy to modify the ResourceStatus DB ''' self.maxNumberOfThreads = self.am_getOption('maxNumberOfThreads', self.maxNumberOfThreads) self.elementType = self.am_getOption('elementType', self.elementType) self.checkingFreqs = self.am_getOption('checkingFreqs', self.checkingFreqs) self.limitQueueFeeder = self.am_getOption('limitQueueFeeder', self.limitQueueFeeder) self.elementsToBeChecked = Queue.Queue() self.threadPool = ThreadPool(self.maxNumberOfThreads, self.maxNumberOfThreads) self.rsClient = ResourceStatusClient() self.clients['ResourceStatusClient'] = self.rsClient self.clients['ResourceManagementClient'] = ResourceManagementClient() self.clients['PilotsDB'] = PilotAgentsDB() return S_OK() def execute(self): # If there are elements in the queue to be processed, we wait ( we know how # many elements in total we can have, so if there are more than 15% of them # on the queue, we do not add anything ), but the threads are running and # processing items from the queue on background. qsize = self.elementsToBeChecked.qsize() if qsize > self.limitQueueFeeder: self.log.warn( 'Queue not empty ( %s > %s ), skipping feeding loop' % (qsize, self.limitQueueFeeder)) return S_OK() # We get all the elements, then we filter. elements = self.rsClient.selectStatusElement(self.elementType, 'Status') if not elements['OK']: self.log.error(elements['Message']) return elements utcnow = datetime.datetime.utcnow().replace(microsecond=0) # filter elements by Type for element in elements['Value']: # Maybe an overkill, but this way I have NEVER again to worry about order # of elements returned by mySQL on tuples elemDict = dict(zip(elements['Columns'], element)) # We skip the elements with token different than "rs_svc" if elemDict['TokenOwner'] != 'rs_svc': self.log.info('Skipping %s ( %s ) with token %s' % (elemDict['Name'], elemDict['StatusType'], elemDict['TokenOwner'])) continue if not elemDict['ElementType'] in self.checkingFreqs: #self.log.warn( '"%s" not in inspectionFreqs, getting default' % elemDict[ 'ElementType' ] ) timeToNextCheck = self.checkingFreqs['Default'][ elemDict['Status']] else: timeToNextCheck = self.checkingFreqs[elemDict['ElementType']][ elemDict['Status']] if utcnow - datetime.timedelta( minutes=timeToNextCheck) > elemDict['LastCheckTime']: # We are not checking if the item is already on the queue or not. It may # be there, but in any case, it is not a big problem. lowerElementDict = {'element': self.elementType} for key, value in elemDict.items(): lowerElementDict[key[0].lower() + key[1:]] = value # We add lowerElementDict to the queue self.elementsToBeChecked.put(lowerElementDict) self.log.verbose('%s # "%s" # "%s" # %s # %s' % (elemDict['Name'], elemDict['ElementType'], elemDict['StatusType'], elemDict['Status'], elemDict['LastCheckTime'])) # Measure size of the queue, more or less, to know how many threads should # we start ! queueSize = self.elementsToBeChecked.qsize() # 30, could have been other number.. but it works reasonably well. ( +1 to get ceil ) threadsToStart = max( min(self.maxNumberOfThreads, (queueSize / 30) + 1), 1) threadsRunning = self.threadPool.numWorkingThreads() self.log.info('Needed %d threads to process %d elements' % (threadsToStart, queueSize)) if threadsRunning: self.log.info('Already %d threads running' % threadsRunning) threadsToStart = max(0, threadsToStart - threadsRunning) self.log.info('Starting %d threads to process %d elements' % (threadsToStart, queueSize)) # It may happen that we start two threads, 0 and 1. 1 goes DOWN, but 0 keeps # running. In next loop we will start a new thread, and will be called 0 # again. To have a mechanism to see which thread is where, we append the # cycle number before the threadId. cycle = self._AgentModule__moduleProperties['cyclesDone'] for _x in xrange(threadsToStart): threadId = '%s_%s' % (cycle, _x) jobUp = self.threadPool.generateJobAndQueueIt(self._execute, args=(threadId, )) if not jobUp['OK']: self.log.error(jobUp['Message']) return S_OK() def finalize(self): self.log.info('draining queue... blocking until empty') # block until all tasks are done self.elementsToBeChecked.join() return S_OK() ## Private methods ############################################################# def _execute(self, threadNumber): ''' Method run by the thread pool. It enters a loop until there are no elements on the queue. On each iteration, it evaluates the policies for such element and enforces the necessary actions. If there are no more elements in the queue, the loop is finished. ''' tHeader = '%sJob%s' % ('* ' * 30, threadNumber) self.log.info('%s UP' % tHeader) pep = PEP(clients=self.clients) while True: try: element = self.elementsToBeChecked.get_nowait() except Queue.Empty: self.log.info('%s DOWN' % tHeader) return S_OK() self.log.info( '%s ( %s / %s ) being processed' % (element['name'], element['status'], element['statusType'])) resEnforce = pep.enforce(element) if not resEnforce['OK']: self.log.error(resEnforce['Message']) self.elementsToBeChecked.task_done() continue resEnforce = resEnforce['Value'] oldStatus = resEnforce['decissionParams']['status'] statusType = resEnforce['decissionParams']['statusType'] newStatus = resEnforce['policyCombinedResult']['Status'] reason = resEnforce['policyCombinedResult']['Reason'] if oldStatus != newStatus: self.log.info('%s (%s) is now %s ( %s ), before %s' % (element['name'], statusType, newStatus, reason, oldStatus)) # Used together with join ! self.elementsToBeChecked.task_done() self.log.info('%s DOWN' % tHeader) return S_OK()
class Statistics(object): """ Statistics class that provides helpers to extract information from the database more easily. """ def __init__(self): """ Constructor """ self.rsClient = ResourceStatusClient() #self.rmClient = ResourceManagementClient() def getElementHistory(self, element, elementName, statusType, oldAs=None, newAs=None): """ Returns the succession of statuses and the dates since they are effective. The values are comprised in the time interval [ oldAs, newAs ]. If not specified, all values up to the present are returned. It returns a list of tuples, of which the first element is the Status and the second one the time-stamp since it is effective. Note that the time-stamps will not necessarily match the time window. :Parameters: **element** - `str` element family ( either Site, Resource or Node ) **elementName** - `str` element name **statusType** - `str` status type of the element <elementName> (e.g. 'all', 'ReadAccess',... ) **oldAs** - [ None, `datetime` ] datetime with the start point for the time window. If not specified, it is used the oldest time in the history. **newAs** - [ None, `datetime` ] datetime with the end point for the time window. If not specified, it is used datetime.utcnow. :return: S_OK( [ (StatusA, datetimeA),(StatusB,datetimeB) ] ) | S_ERROR """ # Checks we are not passing a silly element ( we only accept Site, Resource and Node ) if element not in getValidElements(): return S_ERROR('"%s" is not a valid element' % element) # FIXME: read below # Gets all elements in history. If the history is long, this query is going to # be rather heavy... result = self.rsClient.selectStatusElement( element, 'History', name=elementName, statusType=statusType, meta={'columns': ['Status', 'DateEffective']}) if not result['OK']: return result result = result['Value'] if not result: return S_OK([]) # To avoid making exceptions in the for-loop, we feed history with the first # item in the results history = [result[0]] # Sets defaults. # OldAs is as old as datetime.min if not defined. #oldAs = ( 1 and oldAs ) or history[ 0 ][ 1 ] oldAs = (1 and oldAs) or datetime.datetime.min # NewAs is as new as as set or datetime.now newAs = (1 and newAs) or datetime.datetime.utcnow() # Sanity check: no funny time windows if oldAs > newAs: return S_ERROR("oldAs (%s) > newAs (%s)" % (oldAs, newAs)) # This avoids that the window finishes before having the first point in the # history. if history[0][1] > newAs: return S_OK([]) # Iterate starting from the second element in the list. The elements in the # list are SORTED. Otherwise, the break statement would be a mess. And same # applies for the elif for historyElement in result[1:]: # If the point is newer than the superior limit of the window, we are done. if historyElement[1] > newAs: break # If the point is older than the window lower limit, we buffer it. We just # want the closest point to the lower limit. elif historyElement[1] <= oldAs: history = [historyElement] # Otherwise, we add it to the history else: history.append(historyElement) return S_OK(history) def getElementStatusAt(self, element, elementName, statusType, statusTime): """ Returns the status of the <element><elementName><statusType> at the given time <statusTime>. If not know, will return an empty list. If known, will return a tuple with two elements: Status and time since it is effective. :Parameters: **element** - `str` element family ( either Site, Resource or Node ) **elementName** - `str` element name **statusType** - `str` status type of the element <elementName> (e.g. 'all', 'ReadAccess',... ) **statusTime** - `datetime` datetime when we want to know the status of <element><elementName><statusType> :return: S_OK( (StatusA, datetimeA) ) | S_ERROR """ result = self.getElementHistory(element, elementName, statusType, statusTime, statusTime) if not result['OK']: return result result = result['Value'] if result: result = list(result[0]) return S_OK(result) def getElementStatusTotalTimes(self, element, elementName, statusType, oldAs=None, newAs=None): """ Returns a dictionary with all the possible statuses as keys and as values the number of seconds that <element><elementName><statusType> hold it for a time window between [ oldAs, newAs ]. If oldAs is not defined, it is considered as datetime.min. If newAs is not defined, it is considered datetime.utcnow. :Parameters: **element** - `str` element family ( either Site, Resource or Node ) **elementName** - `str` element name **statusType** - `str` status type of the element <elementName> (e.g. 'all', 'ReadAccess',... ) **oldAs** - [ None, `datetime` ] datetime with the start point for the time window. If not specified, it is used the oldest time in the history. **newAs** - [ None, `datetime` ] datetime with the end point for the time window. If not specified, it is used datetime.utcnow. :return: S_OK( [ { StatusA : secondsA },{ StatusB : secondsB } ] ) | S_ERROR """ # Gets all history withing the window result = self.getElementHistory(element, elementName, statusType, oldAs, newAs) if not result['OK']: return result result = result['Value'] # Dictionary to be returned statusCounter = dict.fromkeys(getValidStatus()['Value'], 0) # If history is empty, return empty dictionary if not result: return S_OK(statusCounter) # Set defaults oldAs = (1 and oldAs) or datetime.datetime.min newAs = (1 and newAs) or datetime.datetime.utcnow() # If users are not behaving well, we force newAs to not be in the future. newAs = min(newAs, datetime.datetime.utcnow()) # Iterate over the results in tuples. for statusTuple in zip(result, result[1:]): # Make sure the time taken as base is not older than the lower limit of # the window. In principle, this should be only checked on the first element, # but it is harmless anyway and cleaner than the if-else. startingPoint = max(statusTuple[0][1], oldAs) # Get number of seconds and add them statusCounter[statusTuple[0][0]] += timedelta_to_seconds( statusTuple[1][1] - startingPoint) # The method selected to iterate over the results does not take into account the # last one. Gets the time using as lower limit the window lower limit. This applies # when we have only one element in the list for example. statusCounter[result[-1][0]] += timedelta_to_seconds( newAs - max(result[-1][1], oldAs)) return S_OK(statusCounter)
def setToken( user ): ''' Function that gets the user token, sets the validity for it. Gets the elements in the database for a given name and statusType(s). Then updates the status of all them adding a reason and the token. ''' rssClient = ResourceStatusClient() # This is a little bit of a nonsense, and certainly needs to be improved. # To modify a list of elements, we have to do it one by one. However, the # modify method does not discover the StatusTypes ( which in this script is # an optional parameter ). So, we get them from the DB and iterate over them. elements = rssClient.selectStatusElement( switchDict[ 'element' ], 'Status', name = switchDict[ 'name' ], statusType = switchDict[ 'statusType' ], meta = { 'columns' : [ 'StatusType', 'TokenOwner' ] } ) if not elements[ 'OK']: return elements elements = elements[ 'Value' ] # If there list is empty they do not exist on the DB ! if not elements: subLogger.warn( 'Nothing found for %s, %s, %s' % ( switchDict[ 'element' ], switchDict[ 'name' ], switchDict[ 'statusType' ] ) ) return S_OK() # If we want to release the token if switchDict[ 'releaseToken' ] != False: tokenExpiration = datetime.max newTokenOwner = 'rs_svc' else: tokenExpiration = datetime.utcnow().replace( microsecond = 0 ) + timedelta( days = 1 ) newTokenOwner = user subLogger.info( 'New token : %s until %s' % ( newTokenOwner, tokenExpiration ) ) for statusType, tokenOwner in elements: # If a user different than the one issuing the command and RSS if tokenOwner != user and tokenOwner != 'rs_svc': subLogger.info( '%s(%s) belongs to the user: %s' % ( switchDict[ 'name' ], statusType, tokenOwner ) ) # does the job result = rssClient.modifyStatusElement( switchDict[ 'element' ], 'Status', name = switchDict[ 'name' ], statusType = statusType, reason = switchDict[ 'reason'], tokenOwner = newTokenOwner, tokenExpiration = tokenExpiration ) if not result[ 'OK' ]: return result if tokenOwner == newTokenOwner: msg = '(extended)' elif newTokenOwner == 'rs_svc': msg = '(released)' else: msg = '(aquired from %s)' % tokenOwner subLogger.info( '%s:%s %s' % ( switchDict[ 'name' ], statusType, msg ) ) return S_OK()
class SiteStatus(object): """ RSS helper to interact with the 'Site' family on the DB. It provides the most demanded functions and a cache to avoid hitting the server too often. It provides four methods to interact with the site statuses: * getSiteStatuses * isUsableSite * getUsableSites * getSites """ __metaclass__ = DIRACSingleton def __init__(self): """ Constructor, initializes the rssClient. """ self.log = gLogger.getSubLogger(self.__class__.__name__) self.rssConfig = RssConfiguration() self.__opHelper = Operations() self.rssFlag = ResourceStatus().rssFlag self.rsClient = ResourceStatusClient() cacheLifeTime = int(self.rssConfig.getConfigCache()) # RSSCache only affects the calls directed to RSS, if using the CS it is not used. self.rssCache = RSSCache(cacheLifeTime, self.__updateRssCache) def __updateRssCache(self): """ Method used to update the rssCache. It will try 5 times to contact the RSS before giving up """ meta = {'columns': ['Name', 'Status']} for ti in xrange(5): rawCache = self.rsClient.selectStatusElement('Site', 'Status', meta=meta) if rawCache['OK']: break self.log.warn("Can't get resource's status", rawCache['Message'] + "; trial %d" % ti) sleep(math.pow(ti, 2)) self.rsClient = ResourceStatusClient() if not rawCache['OK']: return rawCache return S_OK(getCacheDictFromRawData(rawCache['Value'])) def getSiteStatuses(self, siteNames=None): """ Method that queries the database for status of the sites in a given list. A single string site name may also be provides as "siteNames" If the input is None, it is interpreted as * ( all ). If match is positive, the output looks like: { 'test1.test1.org': 'Active', 'test2.test2.org': 'Banned', } examples >>> siteStatus.getSiteStatuses( ['test1.test1.uk', 'test2.test2.net', 'test3.test3.org'] ) S_OK( { 'test1.test1.org': 'Active', 'test2.test2.net': 'Banned', 'test3.test3.org': 'Active' } ) >>> siteStatus.getSiteStatuses( 'NotExists') S_ERROR( ... )) >>> siteStatus.getSiteStatuses( None ) S_OK( { 'test1.test1.org': 'Active', 'test2.test2.net': 'Banned', }, ... } ) :Parameters: **siteNames** - `list` or `str` name(s) of the sites to be matched :return: S_OK() || S_ERROR() """ if self.rssFlag: return self.__getRSSSiteStatus(siteNames) else: siteStatusDict = {} wmsAdmin = RPCClient('WorkloadManagement/WMSAdministrator') if siteNames: if isinstance(siteNames, basestring): siteNames = [siteNames] for siteName in siteNames: result = wmsAdmin.getSiteMaskStatus(siteName) if not result['OK']: return result else: siteStatusDict[siteName] = result['Value'] else: result = wmsAdmin.getSiteMaskStatus() if not result['OK']: return result else: siteStatusDict = result['Value'] return S_OK(siteStatusDict) def __getRSSSiteStatus(self, siteName=None): """ Gets from the cache or the RSS the Sites status. The cache is a copy of the DB table. If it is not on the cache, most likely is not going to be on the DB. There is one exception: item just added to the CS, e.g. new Element. The period between it is added to the DB and the changes are propagated to the cache will be inconsistent, but not dangerous. Just wait <cacheLifeTime> minutes. :param siteName: name of the site :type siteName: str :return: dict """ cacheMatch = self.rssCache.match(siteName, '', '') self.log.debug('__getRSSSiteStatus') self.log.debug(cacheMatch) return cacheMatch def getUsableSites(self, siteNames=None): """ Returns all sites that are usable if their statusType is either Active or Degraded; in a list. examples >>> siteStatus.getUsableSites( ['test1.test1.uk', 'test2.test2.net', 'test3.test3.org'] ) S_OK( ['test1.test1.uk', 'test3.test3.org'] ) >>> siteStatus.getUsableSites( None ) S_OK( ['test1.test1.uk', 'test3.test3.org', 'test4.test4.org', 'test5.test5.org', ...] ) >>> siteStatus.getUsableSites( 'NotExists' ) S_ERROR( ... ) :Parameters: **siteNames** - `List` or `str` name(s) of the sites to be matched :return: S_OK() || S_ERROR() """ siteStatusDictRes = self.getSiteStatuses(siteNames) if not siteStatusDictRes['OK']: return siteStatusDictRes siteStatusList = [ x[0] for x in siteStatusDictRes['Value'].iteritems() if x[1] in ['Active', 'Degraded'] ] return S_OK(siteStatusList) def getSites(self, siteState='Active'): """ By default, it gets the currently active site list examples >>> siteStatus.getSites() S_OK( ['test1.test1.uk', 'test3.test3.org'] ) >>> siteStatus.getSites( 'Active' ) S_OK( ['test1.test1.uk', 'test3.test3.org'] ) >>> siteStatus.getSites( 'Banned' ) S_OK( ['test0.test0.uk', ... ] ) >>> siteStatus.getSites( 'All' ) S_OK( ['test1.test1.uk', 'test3.test3.org', 'test4.test4.org', 'test5.test5.org'...] ) >>> siteStatus.getSites( None ) S_ERROR( ... ) :Parameters: **siteState** - `String` state of the sites to be matched :return: S_OK() || S_ERROR() """ if not siteState: return S_ERROR(DErrno.ERESUNK, 'siteState parameter is empty') siteStatusDictRes = self.getSiteStatuses() if not siteStatusDictRes['OK']: return siteStatusDictRes if siteState.capitalize() == 'All': # if no siteState is set return everything siteList = list(siteStatusDictRes['Value']) else: # fix case sensitive string siteState = siteState.capitalize() allowedStateList = [ 'Active', 'Banned', 'Degraded', 'Probing', 'Error', 'Unknown' ] if siteState not in allowedStateList: return S_ERROR(errno.EINVAL, 'Not a valid status, parameter rejected') siteList = [ x[0] for x in siteStatusDictRes['Value'].iteritems() if x[1] == siteState ] return S_OK(siteList) def setSiteStatus(self, site, status, comment='No comment'): """ Set the status of a site in the 'SiteStatus' table of RSS examples >>> siteStatus.banSite( 'site1.test.test' ) S_OK() >>> siteStatus.banSite( None ) S_ERROR( ... ) :Parameters: **site** - `String` the site that is going to be banned **comment** - `String` reason for banning :return: S_OK() || S_ERROR() """ if not status: return S_ERROR(DErrno.ERESUNK, 'status parameter is empty') # fix case sensitive string status = status.capitalize() allowedStateList = [ 'Active', 'Banned', 'Degraded', 'Probing', 'Error', 'Unknown' ] if status not in allowedStateList: return S_ERROR(errno.EINVAL, 'Not a valid status, parameter rejected') if self.rssFlag: result = getProxyInfo() if result['OK']: tokenOwner = result['Value']['username'] else: return S_ERROR("Unable to get user proxy info %s " % result['Message']) tokenExpiration = datetime.utcnow() + timedelta(days=1) self.rssCache.acquireLock() try: result = self.rsClient.modifyStatusElement( 'Site', 'Status', status=status, name=site, tokenExpiration=tokenExpiration, reason=comment, tokenOwner=tokenOwner) if result['OK']: self.rssCache.refreshCache() else: _msg = 'Error updating status of site %s to %s' % (site, status) gLogger.warn('RSS: %s' % _msg) # Release lock, no matter what. finally: self.rssCache.releaseLock() else: if status in ['Active', 'Degraded']: result = RPCClient( 'WorkloadManagement/WMSAdministrator').allowSite() else: result = RPCClient( 'WorkloadManagement/WMSAdministrator').banSite() return result
def setToken(user): ''' Function that gets the user token, sets the validity for it. Gets the elements in the database for a given name and statusType(s). Then updates the status of all them adding a reason and the token. ''' rssClient = ResourceStatusClient() # This is a little bit of a nonsense, and certainly needs to be improved. # To modify a list of elements, we have to do it one by one. However, the # modify method does not discover the StatusTypes ( which in this script is # an optional parameter ). So, we get them from the DB and iterate over them. elements = rssClient.selectStatusElement(switchDict['element'], 'Status', name=switchDict['name'], statusType=switchDict['statusType'], meta={'columns': ['StatusType', 'TokenOwner']}) if not elements['OK']: return elements elements = elements['Value'] # If there list is empty they do not exist on the DB ! if not elements: subLogger.warn('Nothing found for %s, %s, %s' % (switchDict['element'], switchDict['name'], switchDict['statusType'])) return S_OK() # If we want to release the token if switchDict['releaseToken']: tokenExpiration = datetime.max newTokenOwner = 'rs_svc' else: tokenExpiration = datetime.utcnow().replace(microsecond=0) + timedelta(days=int(switchDict['days'])) newTokenOwner = user subLogger.always('New token: %s --- until %s' % (newTokenOwner, tokenExpiration)) for statusType, tokenOwner in elements: # If a user different than the one issuing the command and RSS if tokenOwner != user and tokenOwner != 'rs_svc': subLogger.info('%s(%s) belongs to the user: %s' % (switchDict['name'], statusType, tokenOwner)) # does the job result = rssClient.modifyStatusElement(switchDict['element'], 'Status', name=switchDict['name'], statusType=statusType, reason=switchDict['reason'], tokenOwner=newTokenOwner, tokenExpiration=tokenExpiration) if not result['OK']: return result if tokenOwner == newTokenOwner: msg = '(extended)' elif newTokenOwner == 'rs_svc': msg = '(released)' else: msg = '(aquired from %s)' % tokenOwner subLogger.info('%s:%s %s' % (switchDict['name'], statusType, msg)) return S_OK()
class LogStatusAction(BaseAction): """ Action that registers on the database a new entry on the <element>Status table. It adds or modifies if the record exists on the table. """ def __init__(self, name, decisionParams, enforcementResult, singlePolicyResults, clients=None): super(LogStatusAction, self).__init__(name, decisionParams, enforcementResult, singlePolicyResults, clients) if clients is not None and "ResourceStatusClient" in clients: self.rsClient = clients["ResourceStatusClient"] else: self.rsClient = ResourceStatusClient() def run(self): """ Checks it has the parameters it needs and tries to addOrModify in the database. """ # Minor security checks element = self.decisionParams["element"] if element is None: return S_ERROR("element should not be None") name = self.decisionParams["name"] if name is None: return S_ERROR("name should not be None") statusType = self.decisionParams["statusType"] if statusType is None: return S_ERROR("statusType should not be None") status = self.enforcementResult["Status"] if status is None: return S_ERROR("status should not be None") elementType = self.decisionParams["elementType"] if elementType is None: return S_ERROR("elementType should not be None") reason = self.enforcementResult["Reason"] if reason is None: return S_ERROR("reason should not be None") vo = self.enforcementResult.get("VO") # Truncate reason to fit in database column reason = (reason[:508] + "..") if len(reason) > 508 else reason # VO = 'all' (non-VO aware policy) for a combined policy affects all VOs for a given site or resource, if vo == "all": resSelect = self.rsClient.selectStatusElement( element, "Status", name=name, statusType=None, vO=None, status=None, elementType=None, reason=None, dateEffective=None, lastCheckTime=None, tokenOwner="rs_svc", tokenExpiration=None, meta=None, ) if not resSelect["OK"]: self.log.error("Could not obtain all VO rows for element: %s" % element) return resSelect voColumnIndex = resSelect["Columns"].index("VO") for row in resSelect["Value"]: vo = row[voColumnIndex] resLogUpdate = self.rsClient.addOrModifyStatusElement( element, "Status", name=name, statusType=statusType, vO=vo, status=status, elementType=elementType, reason=reason, ) self.log.debug("Update result", resLogUpdate) else: resLogUpdate = self.rsClient.addOrModifyStatusElement( element, "Status", name=name, statusType=statusType, vO=vo, status=status, elementType=elementType, reason=reason, ) return resLogUpdate
class ElementInspectorAgent( AgentModule ): """ ElementInspectorAgent The ElementInspector agent is a generic agent used to check the elements of one of the elementTypes ( e.g. Site, Resource, Node ). This Agent takes care of the Elements. In order to do so, it gathers the eligible ones and then evaluates their statuses with the PEP. """ # Max number of worker threads by default __maxNumberOfThreads = 15 # Inspection freqs, defaults, the lower, the higher priority to be checked. # Error state usually means there is a glitch somewhere, so it has the highest # priority. __checkingFreqs = { 'Active' : 20, 'Degraded' : 20, 'Probing' : 20, 'Banned' : 15, 'Unknown' : 10, 'Error' : 5 } def __init__( self, *args, **kwargs ): """ c'tor """ AgentModule.__init__( self, *args, **kwargs ) # ElementType, to be defined among Site, Resource or Node self.elementType = '' self.elementsToBeChecked = None self.threadPool = None self.rsClient = None self.clients = {} def initialize( self ): """ Standard initialize. """ maxNumberOfThreads = self.am_getOption( 'maxNumberOfThreads', self.__maxNumberOfThreads ) self.threadPool = ThreadPool( maxNumberOfThreads, maxNumberOfThreads ) self.elementType = self.am_getOption( 'elementType', self.elementType ) self.rsClient = ResourceStatusClient() self.clients[ 'ResourceStatusClient' ] = self.rsClient self.clients[ 'ResourceManagementClient' ] = ResourceManagementClient() if not self.elementType: return S_ERROR( 'Missing elementType' ) return S_OK() def execute( self ): """ execute This is the main method of the agent. It gets the elements from the Database which are eligible to be re-checked, calculates how many threads should be started and spawns them. Each thread will get an element from the queue until it is empty. At the end, the method will join the queue such that the agent will not terminate a cycle until all elements have been processed. """ # Gets elements to be checked ( returns a Queue ) elementsToBeChecked = self.getElementsToBeChecked() if not elementsToBeChecked[ 'OK' ]: self.log.error( elementsToBeChecked[ 'Message' ] ) return elementsToBeChecked self.elementsToBeChecked = elementsToBeChecked[ 'Value' ] queueSize = self.elementsToBeChecked.qsize() pollingTime = self.am_getPollingTime() # Assigns number of threads on the fly such that we exhaust the PollingTime # without having to spawn too many threads. We assume 10 seconds per element # to be processed ( actually, it takes something like 1 sec per element ): # numberOfThreads = elements * 10(s/element) / pollingTime numberOfThreads = int( math.ceil( queueSize * 10. / pollingTime ) ) self.log.info( 'Needed %d threads to process %d elements' % ( numberOfThreads, queueSize ) ) for _x in xrange( numberOfThreads ): jobUp = self.threadPool.generateJobAndQueueIt( self._execute ) if not jobUp[ 'OK' ]: self.log.error( jobUp[ 'Message' ] ) self.log.info( 'blocking until all elements have been processed' ) # block until all tasks are done self.elementsToBeChecked.join() self.log.info( 'done') return S_OK() def getElementsToBeChecked( self ): """ getElementsToBeChecked This method gets all the rows in the <self.elementType>Status table, and then discards entries with TokenOwner != rs_svc. On top of that, there are check frequencies that are applied: depending on the current status of the element, they will be checked more or less often. """ toBeChecked = Queue.Queue() # We get all the elements, then we filter. elements = self.rsClient.selectStatusElement( self.elementType, 'Status' ) if not elements[ 'OK' ]: return elements utcnow = datetime.datetime.utcnow().replace( microsecond = 0 ) # filter elements by Type for element in elements[ 'Value' ]: # Maybe an overkill, but this way I have NEVER again to worry about order # of elements returned by mySQL on tuples elemDict = dict( zip( elements[ 'Columns' ], element ) ) # This if-clause skips all the elements that are should not be checked yet timeToNextCheck = self.__checkingFreqs[ elemDict[ 'Status' ] ] if utcnow <= elemDict[ 'LastCheckTime' ] + datetime.timedelta( minutes = timeToNextCheck ): continue # We skip the elements with token different than "rs_svc" if elemDict[ 'TokenOwner' ] != 'rs_svc': self.log.verbose( 'Skipping %s ( %s ) with token %s' % ( elemDict[ 'Name' ], elemDict[ 'StatusType' ], elemDict[ 'TokenOwner' ] )) continue # We are not checking if the item is already on the queue or not. It may # be there, but in any case, it is not a big problem. lowerElementDict = { 'element' : self.elementType } for key, value in elemDict.items(): lowerElementDict[ key[0].lower() + key[1:] ] = value # We add lowerElementDict to the queue toBeChecked.put( lowerElementDict ) self.log.verbose( '%s # "%s" # "%s" # %s # %s' % ( elemDict[ 'Name' ], elemDict[ 'ElementType' ], elemDict[ 'StatusType' ], elemDict[ 'Status' ], elemDict[ 'LastCheckTime' ]) ) return S_OK( toBeChecked ) # Private methods ............................................................ def _execute( self ): """ Method run by the thread pool. It enters a loop until there are no elements on the queue. On each iteration, it evaluates the policies for such element and enforces the necessary actions. If there are no more elements in the queue, the loop is finished. """ pep = PEP( clients = self.clients ) while True: try: element = self.elementsToBeChecked.get_nowait() except Queue.Empty: return S_OK() self.log.verbose( '%s ( %s / %s ) being processed' % ( element[ 'name' ], element[ 'status' ], element[ 'statusType' ] ) ) resEnforce = pep.enforce( element ) if not resEnforce[ 'OK' ]: self.log.error( 'Failed policy enforcement', resEnforce[ 'Message' ] ) self.elementsToBeChecked.task_done() continue resEnforce = resEnforce[ 'Value' ] oldStatus = resEnforce[ 'decissionParams' ][ 'status' ] statusType = resEnforce[ 'decissionParams' ][ 'statusType' ] newStatus = resEnforce[ 'policyCombinedResult' ][ 'Status' ] reason = resEnforce[ 'policyCombinedResult' ][ 'Reason' ] if oldStatus != newStatus: self.log.info( '%s (%s) is now %s ( %s ), before %s' % ( element[ 'name' ], statusType, newStatus, reason, oldStatus ) ) # Used together with join ! self.elementsToBeChecked.task_done()
class SummarizeLogsAgent( AgentModule ): """ SummarizeLogsAgent as extension of AgentModule. """ def __init__( self, *args, **kwargs ): """ Constructor. """ AgentModule.__init__( self, *args, **kwargs ) self.rsClient = None def initialize( self ): """ Standard initialize. :return: S_OK """ self.rsClient = ResourceStatusClient() return S_OK() def execute( self ): """ execute ( main method ) The execute method runs over the three families of tables ( Site, Resource and Node ) performing identical operations. First, selects all logs for a given family ( and keeps track of which one is the last row ID ). It summarizes the logs and finally, deletes the logs from the database. :return: S_OK """ # loop over the tables for element in ( 'Site', 'Resource', 'Node' ): self.log.info( 'Summarizing %s' % element ) # get all logs to be summarized selectLogElements = self._summarizeLogs( element ) if not selectLogElements[ 'OK' ]: self.log.error( selectLogElements[ 'Message' ] ) continue lastID, logElements = selectLogElements[ 'Value' ] # logElements is a dictionary of key-value pairs as follows: # ( name, statusType ) : list( logs ) for key, logs in logElements.iteritems(): sumResult = self._registerLogs( element, key, logs ) if not sumResult[ 'OK' ]: self.log.error( sumResult[ 'Message' ] ) continue if lastID is not None: self.log.info( 'Deleting %sLog till ID %s' % ( element, lastID ) ) deleteResult = self.rsClient.deleteStatusElement( element, 'Log', meta = { 'older' : ( 'ID', lastID ) } ) if not deleteResult[ 'OK' ]: self.log.error( deleteResult[ 'Message' ] ) continue return S_OK() #............................................................................. def _summarizeLogs( self, element ): """ given an element, selects all logs in table <element>Log. :Parameters: **element** - `string` name of the table family ( either Site, Resource and Node ) :return: S_OK( lastID, listOfLogs ) / S_ERROR """ selectResults = self.rsClient.selectStatusElement( element, 'Log' ) if not selectResults[ 'OK' ]: return selectResults selectedItems = {} selectColumns = selectResults[ 'Columns' ] selectResults = selectResults[ 'Value' ] latestID = None if selectResults: latestID = dict( zip( selectColumns, selectResults[ -1 ] ) )[ 'ID' ] for selectResult in selectResults: elementDict = dict( zip( selectColumns, selectResult ) ) key = ( elementDict[ 'Name' ], elementDict[ 'StatusType' ] ) if key not in selectedItems: selectedItems[ key ] = [ elementDict ] else: lastStatus = selectedItems[ key ][ -1 ][ 'Status' ] lastToken = selectedItems[ key ][ -1 ][ 'TokenOwner' ] # If there are no changes on the Status or the TokenOwner with respect # the previous one, discards the log. if lastStatus != elementDict[ 'Status' ] or lastToken != elementDict[ 'TokenOwner' ]: selectedItems[ key ].append( elementDict ) return S_OK( ( latestID, selectedItems ) ) def _registerLogs( self, element, key, logs ): """ Given an element, a key - which is a tuple ( <name>, <statusType> ) and a list of dictionaries, this method inserts them on the <element>History table. Before inserting them, checks whether the first one is or is not on the <element>History table. If it is, it is not inserted. It also checks whether the LastCheckTime parameter of the first log to be inserted is larger than the last history log LastCheckTime. If not, it means an agent cycle has been interrupted and we can run into inconsistencies. It aborts to prevent more dramatic results. :Parameters: **element** - `string` name of the table family ( either Site, Resource and Node ) **key** - `tuple` tuple with the name of the element and the statusType **logs** - `list` list of dictionaries containing the logs :return: S_OK / S_ERROR """ # Undo key name, statusType = key selectedRes = self.rsClient.selectStatusElement( element, 'History', name, statusType, meta = { 'columns' : [ 'Status', 'LastCheckTime', 'TokenOwner' ], 'limit' : 1, 'order' : ('LastCheckTime', 'DESC') } ) if not selectedRes[ 'OK' ]: return selectedRes selectedRes = selectedRes[ 'Value' ] # We want from the <element>History table the last Status, LastCheckTime # and TokenOwner lastStatus, lastCheckTime, lastToken = None, None, None if selectedRes: lastStatus, lastCheckTime, lastToken = selectedRes[ 0 ] # Sanity check to avoid running if an agent cycle has been stopped if lastCheckTime and logs[ 0 ][ 'LastCheckTime' ] < lastCheckTime: return S_ERROR( 'Overlapping data. Seems the DB has not been cleared properly' ) # If the first of the selected items has a different status than the latest # on the history, we add it. if logs[ 0 ][ 'Status' ] == lastStatus and logs[ 0 ][ 'TokenOwner' ] == lastToken: logs.remove( logs[ 0 ] ) if logs: self.log.info( '%s ( %s )' % ( name, statusType ) ) for selectedItemDict in logs: res = self.__logToHistoryTable( element, selectedItemDict ) if not res[ 'OK' ]: return res return S_OK() def __logToHistoryTable( self, element, elementDict ): """ Given an element and a dictionary with all the arguments, this method inserts a new entry on the <element>History table :Parameters: **element** - `string` name of the table family ( either Site, Resource and Node ) **elementDict** - `dict` dictionary returned from the DB to be inserted on the History table :return: S_OK / S_ERROR """ try: name = elementDict[ 'Name' ] statusType = elementDict[ 'StatusType' ] status = elementDict[ 'Status' ] elementType = elementDict[ 'ElementType' ] reason = elementDict[ 'Reason' ] dateEffective = elementDict[ 'DateEffective' ] lastCheckTime = elementDict[ 'LastCheckTime' ] tokenOwner = elementDict[ 'TokenOwner' ] tokenExpiration = elementDict[ 'TokenExpiration' ] except KeyError, e: return S_ERROR( e ) self.log.info( ' %(Status)s %(DateEffective)s %(TokenOwner)s %(Reason)s' % elementDict ) return self.rsClient.insertStatusElement( element, 'History', name, statusType, status, elementType, reason, dateEffective, lastCheckTime, tokenOwner, tokenExpiration )
class Statistics( object ): """ Statistics class that provides helpers to extract information from the database more easily. """ def __init__( self ): """ Constructor """ self.rsClient = ResourceStatusClient() #self.rmClient = ResourceManagementClient() def getElementHistory( self, element, elementName, statusType, oldAs = None, newAs = None ): """ Returns the succession of statuses and the dates since they are effective. The values are comprised in the time interval [ oldAs, newAs ]. If not specified, all values up to the present are returned. It returns a list of tuples, of which the first element is the Status and the second one the time-stamp since it is effective. Note that the time-stamps will not necessarily match the time window. :Parameters: **element** - `str` element family ( either Site, Resource or Node ) **elementName** - `str` element name **statusType** - `str` status type of the element <elementName> (e.g. 'all', 'ReadAccess',... ) **oldAs** - [ None, `datetime` ] datetime with the start point for the time window. If not specified, it is used the oldest time in the history. **newAs** - [ None, `datetime` ] datetime with the end point for the time window. If not specified, it is used datetime.utcnow. :return: S_OK( [ (StatusA, datetimeA),(StatusB,datetimeB) ] ) | S_ERROR """ # Checks we are not passing a silly element ( we only accept Site, Resource and Node ) if not element in getValidElements(): return S_ERROR( '"%s" is not a valid element' % element ) # FIXME: read below # Gets all elements in history. If the history is long, this query is going to # be rather heavy... result = self.rsClient.selectStatusElement( element, 'History', name = elementName, statusType = statusType, meta = { 'columns' : [ 'Status', 'DateEffective' ] } ) if not result[ 'OK' ]: return result result = result[ 'Value' ] if not result: return S_OK( [] ) # To avoid making exceptions in the for-loop, we feed history with the first # item in the results history = [ result[ 0 ] ] # Sets defaults. # OldAs is as old as datetime.min if not defined. #oldAs = ( 1 and oldAs ) or history[ 0 ][ 1 ] oldAs = ( 1 and oldAs ) or datetime.datetime.min # NewAs is as new as as set or datetime.now newAs = ( 1 and newAs ) or datetime.datetime.utcnow() # Sanity check: no funny time windows if oldAs > newAs: return S_ERROR( "oldAs (%s) > newAs (%s)" % ( oldAs, newAs ) ) # This avoids that the window finishes before having the first point in the # history. if history[ 0 ][ 1 ] > newAs: return S_OK( [] ) # Iterate starting from the second element in the list. The elements in the # list are SORTED. Otherwise, the break statement would be a mess. And same # applies for the elif for historyElement in result[1:]: # If the point is newer than the superior limit of the window, we are done. if historyElement[ 1 ] > newAs: break # If the point is older than the window lower limit, we buffer it. We just # want the closest point to the lower limit. elif historyElement[ 1 ] <= oldAs: history = [ historyElement ] # Otherwise, we add it to the history else: history.append( historyElement ) return S_OK( history ) def getElementStatusAt( self, element, elementName, statusType, statusTime ): """ Returns the status of the <element><elementName><statusType> at the given time <statusTime>. If not know, will return an empty list. If known, will return a tuple with two elements: Status and time since it is effective. :Parameters: **element** - `str` element family ( either Site, Resource or Node ) **elementName** - `str` element name **statusType** - `str` status type of the element <elementName> (e.g. 'all', 'ReadAccess',... ) **statusTime** - `datetime` datetime when we want to know the status of <element><elementName><statusType> :return: S_OK( (StatusA, datetimeA) ) | S_ERROR """ result = self.getElementHistory( element, elementName, statusType, statusTime, statusTime ) if not result[ 'OK' ]: return result result = result[ 'Value' ] if result: result = list( result[ 0 ] ) return S_OK( result ) def getElementStatusTotalTimes( self, element, elementName, statusType, oldAs = None, newAs = None ): """ Returns a dictionary with all the possible statuses as keys and as values the number of seconds that <element><elementName><statusType> hold it for a time window between [ oldAs, newAs ]. If oldAs is not defined, it is considered as datetime.min. If newAs is not defined, it is considered datetime.utcnow. :Parameters: **element** - `str` element family ( either Site, Resource or Node ) **elementName** - `str` element name **statusType** - `str` status type of the element <elementName> (e.g. 'all', 'ReadAccess',... ) **oldAs** - [ None, `datetime` ] datetime with the start point for the time window. If not specified, it is used the oldest time in the history. **newAs** - [ None, `datetime` ] datetime with the end point for the time window. If not specified, it is used datetime.utcnow. :return: S_OK( [ { StatusA : secondsA },{ StatusB : secondsB } ] ) | S_ERROR """ # Gets all history withing the window result = self.getElementHistory( element, elementName, statusType, oldAs, newAs ) if not result[ 'OK' ]: return result result = result[ 'Value' ] # Dictionary to be returned statusCounter = dict.fromkeys( getValidStatus()[ 'Value' ], 0 ) # If history is empty, return empty dictionary if not result: return S_OK( statusCounter ) # Set defaults oldAs = ( 1 and oldAs ) or datetime.datetime.min newAs = ( 1 and newAs ) or datetime.datetime.utcnow() # If users are not behaving well, we force newAs to not be in the future. newAs = min( newAs, datetime.datetime.utcnow() ) # Iterate over the results in tuples. for statusTuple in zip( result, result[ 1: ] ): # Make sure the time taken as base is not older than the lower limit of # the window. In principle, this should be only checked on the first element, # but it is harmless anyway and cleaner than the if-else. startingPoint = max( statusTuple[ 0 ][ 1 ], oldAs ) # Get number of seconds and add them statusCounter[ statusTuple[0][0] ] += timedelta_to_seconds( statusTuple[1][1] - startingPoint ) # The method selected to iterate over the results does not take into account the # last one. Gets the time using as lower limit the window lower limit. This applies # when we have only one element in the list for example. statusCounter[ result[ -1 ][ 0 ] ] += timedelta_to_seconds( newAs - max( result[ -1 ][ 1 ], oldAs ) ) return S_OK( statusCounter )
def setToken(user): """ Function that gets the user token, sets the validity for it. Gets the elements in the database for a given name and statusType(s). Then updates the status of all them adding a reason and the token. """ rssClient = ResourceStatusClient() # This is a little bit of a nonsense, and certainly needs to be improved. # To modify a list of elements, we have to do it one by one. However, the # modify method does not discover the StatusTypes ( which in this script is # an optional parameter ). So, we get them from the DB and iterate over them. elements = rssClient.selectStatusElement( switchDict["element"], "Status", name=switchDict["name"], statusType=switchDict["statusType"], vO=switchDict["VO"], meta={"columns": ["StatusType", "TokenOwner"]}, ) if not elements["OK"]: return elements elements = elements["Value"] # If there list is empty they do not exist on the DB ! if not elements: subLogger.warn("Nothing found for %s, %s, %s %s" % (switchDict["element"], switchDict["name"], switchDict["VO"], switchDict["statusType"])) return S_OK() # If we want to release the token if switchDict["releaseToken"]: tokenExpiration = datetime.max newTokenOwner = "rs_svc" else: tokenExpiration = datetime.utcnow().replace(microsecond=0) + timedelta( days=int(switchDict["days"])) newTokenOwner = user subLogger.always("New token: %s --- until %s" % (newTokenOwner, tokenExpiration)) for statusType, tokenOwner in elements: # If a user different than the one issuing the command and RSS if tokenOwner != user and tokenOwner != "rs_svc": subLogger.info("%s(%s) belongs to the user: %s" % (switchDict["name"], statusType, tokenOwner)) # does the job result = rssClient.modifyStatusElement( switchDict["element"], "Status", name=switchDict["name"], statusType=statusType, reason=switchDict["reason"], tokenOwner=newTokenOwner, vO=switchDict["VO"], tokenExpiration=tokenExpiration, ) if not result["OK"]: return result if tokenOwner == newTokenOwner: msg = "(extended)" elif newTokenOwner == "rs_svc": msg = "(released)" else: msg = "(aquired from %s)" % tokenOwner subLogger.info("name:%s, VO:%s statusType:%s %s" % (switchDict["name"], switchDict["VO"], statusType, msg)) return S_OK()
class TokenAgent( AgentModule ): ''' TokenAgent is in charge of checking tokens assigned on resources. Notifications are sent to those users owning expiring tokens. ''' # Rss token __rssToken = 'rs_svc' def __init__( self, *args, **kwargs ): ''' c'tor ''' AgentModule.__init__( self, *args, **kwargs ) self.notifyHours = 12 self.adminMail = '' self.rsClient = None self.tokenDict = {} self.diracAdmin = None def initialize( self ): ''' TokenAgent initialization ''' self.notifyHours = self.am_getOption( 'notifyHours', self.notifyHours ) self.adminMail = self.am_getOption( 'adminMail', self.adminMail ) self.rsClient = ResourceStatusClient() self.diracAdmin = DiracAdmin() return S_OK() def execute( self ): ''' Looks for user tokens. If they are expired, or expiring, it notifies users. ''' # Initialized here, as it is needed empty at the beginning of the execution self.tokenDict = {} elements = ( 'Site', 'Resource', 'Node' ) for element in elements: self.log.info( 'Processing %s' % element ) interestingTokens = self._getInterestingTokens( element ) if not interestingTokens[ 'OK' ]: self.log.error( interestingTokens[ 'Message' ] ) continue interestingTokens = interestingTokens[ 'Value' ] processTokens = self._processTokens( element, interestingTokens ) if not processTokens[ 'OK' ]: self.log.error( processTokens[ 'Message' ] ) continue notificationResult = self._notifyOfTokens() if not notificationResult[ 'OK' ]: self.log.error( notificationResult[ 'Message' ] ) return S_OK() def _getInterestingTokens( self, element ): ''' Given an element, picks all the entries with TokenExpiration < now + X<hours> If the TokenOwner is not the rssToken ( rs_svc ), it is selected. ''' tokenExpLimit = datetime.utcnow() + timedelta( hours = self.notifyHours ) tokenElements = self.rsClient.selectStatusElement( element, 'Status', meta = { 'older' : ( 'TokenExpiration', tokenExpLimit ) } ) if not tokenElements[ 'OK' ]: return tokenElements tokenColumns = tokenElements[ 'Columns' ] tokenElements = tokenElements[ 'Value' ] interestingTokens = [] for tokenElement in tokenElements: tokenElement = dict( zip( tokenColumns, tokenElement ) ) if tokenElement[ 'TokenOwner' ] != self.__rssToken: interestingTokens.append( tokenElement ) return S_OK( interestingTokens ) def _processTokens( self, element, tokenElements ): ''' Given an element and a list of interesting token elements, updates the database if the token is expired, logs a message and adds ''' never = datetime.max for tokenElement in tokenElements: try: name = tokenElement[ 'Name' ] statusType = tokenElement[ 'StatusType' ] status = tokenElement[ 'Status' ] tokenOwner = tokenElement[ 'TokenOwner' ] tokenExpiration = tokenElement[ 'TokenExpiration' ] except KeyError as e: return S_ERROR( e ) # If token has already expired if tokenExpiration < datetime.utcnow(): _msg = '%s with statusType "%s" and owner %s EXPIRED' self.log.info( _msg % ( name, statusType, tokenOwner ) ) result = self.rsClient.addOrModifyStatusElement( element, 'Status', name = name, statusType = statusType, tokenOwner = self.__rssToken, tokenExpiration = never ) if not result[ 'OK' ]: return result else: _msg = '%s with statusType "%s" and owner %s -> %s' self.log.info( _msg % ( name, statusType, tokenOwner, tokenExpiration ) ) if tokenOwner not in self.tokenDict: self.tokenDict[ tokenOwner ] = [] self.tokenDict[ tokenOwner ].append( [ tokenOwner, element, name, statusType, status, tokenExpiration ] ) return S_OK() def _notifyOfTokens( self ): ''' Splits interesing tokens between expired and expiring. Also splits them among users. It ends sending notifications to the users. ''' now = datetime.utcnow() adminExpired = [] adminExpiring = [] for tokenOwner, tokenLists in self.tokenDict.items(): expired = [] expiring = [] for tokenList in tokenLists: if tokenList[ 5 ] < now: expired.append( tokenList ) adminExpired.append( tokenList ) else: expiring.append( tokenList ) adminExpiring.append( tokenList ) resNotify = self._notify( tokenOwner, expired, expiring ) if not resNotify[ 'OK' ]: self.log.error( 'Failed to notify token owner', resNotify[ 'Message' ] ) if (adminExpired or adminExpiring) and self.adminMail: return self._notify(self.adminMail, adminExpired, adminExpiring) return S_OK() def _notify( self, tokenOwner, expired, expiring ): ''' Given a token owner and a list of expired and expiring tokens, sends an email to the user. ''' subject = 'RSS token summary for tokenOwner %s' % tokenOwner mail = '\nEXPIRED tokens ( RSS has taken control of them )\n' for tokenList in expired: mail += ' '.join( [ str(x) for x in tokenList ] ) mail += '\n' mail = '\nEXPIRING tokens ( RSS will take control of them )\n' for tokenList in expiring: mail += ' '.join( [ str(x) for x in tokenList ] ) mail += '\n' mail += "\n\n You can extend for another 24 hours using the web interface (Set token -> Acquire)\n" mail += " Or you can use the dirac-rss-set-token script\n\n" mail += "Through the same interfaces you can release the token any time\n" # FIXME: you can re-take control of them using this or that... resEmail = self.diracAdmin.sendMail( tokenOwner, subject, mail ) if not resEmail[ 'OK' ]: return S_ERROR( 'Cannot send email to user "%s"' % tokenOwner ) return resEmail
class SummarizeLogsAgent(AgentModule): # Date format in database __dateFormat = '%Y-%m-%d %H:%M:%S' def __init__(self, *args, **kwargs): ''' c'tor ''' AgentModule.__init__(self, *args, **kwargs) self.rsClient = None def initialize(self): ''' Standard initialize. Uses the ProductionManager shifterProxy to modify the ResourceStatus DB ''' self.rsClient = ResourceStatusClient() return S_OK() def execute(self): # FIXME: probably this can be obtained from RssConfiguration instead elements = ('Site', 'Resource', 'Node') # We do not want neither minutes, nor seconds nor microseconds thisHour = datetime.utcnow().replace(microsecond=0) thisHour = thisHour.replace(second=0).replace(minute=0) for element in elements: self.log.info('Summarizing %s' % element) selectLogElements = self._selectLogElements(element, thisHour) if not selectLogElements['OK']: self.log.error(selectLogElements['Message']) continue selectLogElements = selectLogElements['Value'] for selectedKey, selectedItem in selectLogElements.items(): sRes = self._logSelectedLogElement(element, selectedKey, selectedItem, thisHour) if not sRes['OK']: self.log.error(sRes['Message']) break return S_OK() def _selectLogElements(self, element, thisHour): ''' For a given element, selects all the entries on the <element>Log table with LastCheckTime > <lastHour>. It groups them by tuples of ( <name>, <statusType> ) and keeps only the statuses that represent a change in the status. ''' lastHour = thisHour - timedelta(hours=1) selectResults = self.rsClient.selectStatusElement( element, 'Log', meta={'newer': ('LastCheckTime', lastHour)}) if not selectResults['OK']: return selectResults selectedItems = {} selectColumns = selectResults['Columns'] selectResults = selectResults['Value'] for selectResult in selectResults: elementDict = dict(zip(selectColumns, selectResult)) if elementDict['LastCheckTime'] > thisHour: continue key = (elementDict['Name'], elementDict['StatusType']) if not key in selectedItems: selectedItems[key] = [elementDict] else: lastStatus = selectedItems[key][-1]['Status'] if lastStatus != elementDict['Status']: selectedItems[key].append(elementDict) return S_OK(selectedItems) def _logSelectedLogElement(self, element, selectedKey, selectedItem, thisHour): ''' Given an element, a selectedKey - which is a tuple ( <name>, <statusType> ) and a list of dictionaries, this method inserts them. Before inserting them, checks whether the first one is or is not on the <element>History table. If it is, it is not inserted. ''' name, statusType = selectedKey selectedRes = self.rsClient.selectStatusElement( element, 'History', name, statusType, meta={'columns': ['Status', 'LastCheckTime']}) if not selectedRes['OK']: return selectedRes selectedRes = selectedRes['Value'] selectedStatus = None if selectedRes: # Get the last selectedRes, which will be the newest one. Each selectedRes # is a tuple, in this case, containing two elements - Status, LastCheckTime selectedStatus, selectedLastTime = selectedRes[-1] if selectedLastTime > thisHour - timedelta(hours=1): return S_ERROR( 'The agent has run once on this time span, skipping') # If the first of the selected items has a different status than the latest # on the history, we add it. if selectedItem[0]['Status'] != selectedStatus: res = self._logToHistoryTable(element, selectedItem[0]) if not res['OK']: return res for selectedItemDict in selectedItem[1:]: res = self._logToHistoryTable(element, selectedItemDict) if not res['OK']: return res return S_OK() def _logToHistoryTable(self, element, elementDict): ''' Given an element and a dictionary with all the arguments, this method inserts a new entry on the <element>History table ''' try: name = elementDict['Name'] statusType = elementDict['StatusType'] status = elementDict['Status'] elementType = elementDict['ElementType'] reason = elementDict['Reason'] dateEffective = elementDict['DateEffective'] lastCheckTime = elementDict['LastCheckTime'] tokenOwner = elementDict['TokenOwner'] tokenExpiration = elementDict['TokenExpiration'] except KeyError, e: return S_ERROR(e) return self.rsClient.insertStatusElement(element, 'History', name, statusType, status, elementType, reason, dateEffective, lastCheckTime, tokenOwner, tokenExpiration)