def notifyAboutNewSoftware(self): """Send an email to the mailing list if a new software version was defined""" #Only send email when something was actually added if not self.modifiedCS: return subject = '%s %s added to DIRAC CS' % (self.appName, self.appVersion) msg = 'New application %s %s declared into Configuration service\n %s' % (self.appName, self.appVersion, self.comment) from DIRAC.Core.Security.ProxyInfo import getProxyInfo from DIRAC.ConfigurationSystem.Client.Helpers.Registry import getUserOption from DIRAC.FrameworkSystem.Client.NotificationClient import NotificationClient notifyClient = NotificationClient() gLogger.notice('Sending mail for software installation to %s' % (self.mailadress)) res = getProxyInfo() if not res['OK']: sender = '*****@*****.**' else: if 'username' in res['Value']: sender = getUserOption(res['Value']['username'],'Email') else: sender = '*****@*****.**' gLogger.info('*'*80)# surround email with stars res = notifyClient.sendMail(self.mailadress, subject, msg, sender, localAttempt = False) gLogger.info('*'*80) if not res[ 'OK' ]: gLogger.error('The mail could not be sent: %s' % res['Message'])
def execute(self): """ The main execution method. """ self.log.notice( "Will ignore the following productions: %s" % self.productionsToIgnore ) self.log.notice( " Job Cache: %s " % self.jobCache ) transformations = self.getEligibleTransformations( self.transformationStatus, self.transformationTypes ) if not transformations['OK']: self.log.error( "Failure to get transformations", transformations['Message'] ) return S_ERROR( "Failure to get transformations" ) for prodID,values in transformations['Value'].iteritems(): if prodID in self.productionsToIgnore: self.log.notice( "Ignoring Production: %s " % prodID ) continue self.__resetCounters() self.inputFilesProcessed = set() transType, transName = values self.log.notice( "Running over Production: %s " % prodID ) self.treatProduction( int(prodID), transName, transType ) if self.notesToSend: ##remove from the jobCache because something happened self.jobCache.pop( int(prodID), None ) notification = NotificationClient() for address in self.addressTo: result = notification.sendMail( address, "%s: %s" %( self.subject, prodID ), self.notesToSend, self.addressFrom, localAttempt = False ) if not result['OK']: self.log.error( 'Cannot send notification mail', result['Message'] ) self.notesToSend = "" return S_OK()
def sendSMS( self, userName, body, fromAddress = None ): """ Send mail to specified address with body. """ if len( body ) > 160: return S_ERROR( 'Exceeded maximum SMS length of 160 characters' ) notification = NotificationClient() return notification.sendSMS( userName, body, fromAddress )
def __lookForNewCEs( self ): """ Look up BDII for CEs not yet present in the DIRAC CS """ bannedCEs = self.am_getOption( 'BannedCEs', [] ) result = getCEsFromCS() if not result['OK']: return result knownCEs = set( result['Value'] ) knownCEs = knownCEs.union( set( bannedCEs ) ) for vo in self.voName: result = self.__getBdiiCEInfo( vo ) if not result['OK']: continue bdiiInfo = result['Value'] result = getGridCEs( vo, bdiiInfo = bdiiInfo, ceBlackList = knownCEs ) if not result['OK']: self.log.error( 'Failed to get unused CEs', result['Message'] ) siteDict = result['Value'] body = '' for site in siteDict: newCEs = set( siteDict[site].keys() ) if not newCEs: continue ceString = '' for ce in newCEs: queueString = '' ceInfo = bdiiInfo[site]['CEs'][ce] ceString = "CE: %s, GOCDB Site Name: %s" % ( ce, site ) systemTuple = siteDict[site][ce]['System'] osString = "%s_%s_%s" % ( systemTuple ) newCEString = "\n%s\n%s\n" % ( ceString, osString ) for queue in ceInfo['Queues']: queueStatus = ceInfo['Queues'][queue].get( 'GlueCEStateStatus', 'UnknownStatus' ) if 'production' in queueStatus.lower(): ceType = ceInfo['Queues'][queue].get( 'GlueCEImplementationName', '' ) queueString += " %s %s %s\n" % ( queue, queueStatus, ceType ) if queueString: ceString = newCEString ceString += "Queues:\n" ceString += queueString if ceString: body += ceString if body: body = "\nWe are glad to inform You about new CE(s) possibly suitable for %s:\n" % vo + body body += "\n\nTo suppress information about CE add its name to BannedCEs list.\n" body += "Add new Sites/CEs for vo %s with the command:\n" % vo body += "dirac-admin-add-resources --vo %s --ce\n" % vo self.log.info( body ) if self.addressTo and self.addressFrom: notification = NotificationClient() result = notification.sendMail( self.addressTo, self.subject, body, self.addressFrom, localAttempt = False ) if not result['OK']: self.log.error( 'Can not send new site notification mail', result['Message'] ) return S_OK()
def __init__(self, *args, **kwargs): """Initialize the agent, clients, default values.""" AgentModule.__init__(self, *args, **kwargs) self.name = 'MonitorAgents' self.setup = "Production" self.enabled = False self.restartAgents = False self.restartExecutors = False self.restartServices = False self.controlComponents = False self.commitURLs = False self.diracLocation = "/opt/dirac/pro" self.sysAdminClient = SystemAdministratorClient(socket.gethostname()) self.jobMonClient = JobMonitoringClient() self.nClient = NotificationClient() self.csAPI = None self.agents = dict() self.executors = dict() self.services = dict() self.errors = list() self.accounting = defaultdict(dict) self.addressTo = ["*****@*****.**"] self.addressFrom = "*****@*****.**" self.emailSubject = "MonitorAgents on %s" % socket.gethostname()
def initialize( self ): self.systemLoggingDB = SystemLoggingDB() self.notification = NotificationClient() userList = self.am_getOption( "Reviewer", [] ) self.log.debug( "Users to be notified:", ', '.join( userList ) ) mailList = [] for user in userList: mail = getUserOption( user, 'Email', '' ) if not mail: self.log.warn( "Could not get user's mail", user ) else: mailList.append( mail ) if not mailList: mailList = Operations().getValue( 'EMail/Logging', [] ) if not len( mailList ): errString = "There are no valid users in the mailing list" varString = "[" + ','.join( userList ) + "]" self.log.warn( errString, varString ) self.log.info( "List of mails to be notified", ','.join( mailList ) ) self._mailAddress = mailList self._subject = 'New error messages were entered in the SystemLoggingDB' return S_OK()
def initialize(self): self.SystemLoggingDB = SystemLoggingDB() self.notification=NotificationClient() userString = self.am_getOption( "Reviewer", 'mseco' ) self.log.debug( "Users to be notified", ": " + userString ) userList = List.fromChar( userString, ",") mailList = [] for user in userList: retval = gConfig.getOption( "/Registry/Users/" + user + "/Email" ) if not retval['OK']: self.log.warn( "Could not get user's mail", retval['Message'] ) else: mailList.append( retval['Value'] ) if not mailList: mailList = gConfig.getValue( '/Operations/EMail/Logging', [] ) if not len(mailList): errString = "There are no valid users in the list" varString = "[" + ','.join( userList ) + "]" self.log.error( errString, varString ) return S_ERROR( errString + varString ) self.log.info( "List of mails to be notified", ','.join( mailList ) ) self._mailAddress = mailList self._subject = 'New error messages were entered in the SystemLoggingDB' return S_OK()
def __updateCS( self, bdiiChangeSet ): queueVODict = {} changeSet = set() for entry in bdiiChangeSet: section, option , _value, new_value = entry if option == "VO": queueVODict.setdefault( section, set() ) queueVODict[section] = queueVODict[section].union( set( new_value.split( ',' ) ) ) else: changeSet.add( entry ) for section, VOs in queueVODict.items(): changeSet.add( ( section, 'VO', '', ','.join( VOs ) ) ) if changeSet: changeList = list( changeSet ) changeList.sort() body = '\n'.join( [ "%s/%s %s -> %s" % entry for entry in changeList ] ) if body and self.addressTo and self.addressFrom: notification = NotificationClient() result = notification.sendMail( self.addressTo, self.subject, body, self.addressFrom, localAttempt = False ) if body: self.log.info( 'The following configuration changes were detected:' ) self.log.info( body ) for section, option, value, new_value in changeSet: if value == 'Unknown' or not value: self.csAPI.setOption( cfgPath( section, option ), new_value ) else: self.csAPI.modifyValue( cfgPath( section, option ), new_value ) if self.dryRun: self.log.info( "Dry Run: CS won't be updated" ) self.csAPI.showDiff() else: result = self.csAPI.commit() if not result['OK']: self.log.error( "Error while committing to CS", result['Message'] ) else: self.log.info( "Successfully committed %d changes to CS" % len( changeList ) ) return result else: self.log.info( "No changes found" ) return S_OK()
def __lookForNewSEs( self ): """ Look up BDII for SEs not yet present in the DIRAC CS """ bannedSEs = self.am_getOption( 'BannedSEs', [] ) result = getSEsFromCS() if not result['OK']: return result knownSEs = set( result['Value'] ) knownSEs = knownSEs.union( set( bannedSEs ) ) for vo in self.voName: result = self.__getBdiiSEInfo( vo ) if not result['OK']: continue bdiiInfo = result['Value'] result = getGridSRMs( vo, bdiiInfo = bdiiInfo, srmBlackList = knownSEs ) if not result['OK']: continue siteDict = result['Value'] body = '' for site in siteDict: newSEs = set( siteDict[site].keys() ) if not newSEs: continue for se in newSEs: body += '\n New SE %s available at site %s:\n' % ( se, site ) backend = siteDict[site][se]['SE'].get( 'GlueSEImplementationName', 'Unknown' ) size = siteDict[site][se]['SE'].get( 'GlueSESizeTotal', 'Unknown' ) body += ' Backend %s, Size %s' % ( backend, size ) if body: body = "\nWe are glad to inform You about new SE(s) possibly suitable for %s:\n" % vo + body body += "\n\nTo suppress information about an SE add its name to BannedSEs list.\n" body += "Add new SEs for vo %s with the command:\n" % vo body += "dirac-admin-add-resources --vo %s --se\n" % vo self.log.info( body ) if self.addressTo and self.addressFrom: notification = NotificationClient() result = notification.sendMail( self.addressTo, self.subject, body, self.addressFrom, localAttempt = False ) if not result['OK']: self.log.error( 'Can not send new site notification mail', result['Message'] ) return S_OK()
def initialize(self): """ TokenAgent initialization """ try: self.rsDB = ResourceStatusDB() self.nc = NotificationClient() return S_OK() except Exception: errorStr = "TokenAgent initialization" gLogger.exception(errorStr) return S_ERROR(errorStr)
def __init__( self, useMyProxy = False ): DB.__init__( self, 'ProxyDB', 'Framework/ProxyDB' ) random.seed() self.__defaultRequestLifetime = 300 # 5min self.__defaultTokenLifetime = 86400 * 7 # 1 week self.__defaultTokenMaxUses = 50 self.__useMyProxy = useMyProxy self._minSecsToAllowStore = 3600 self.__notifClient = NotificationClient() retVal = self.__initializeDB() if not retVal[ 'OK' ]: raise Exception( "Can't create tables: %s" % retVal[ 'Message' ] ) self.purgeExpiredProxies( sendNotifications = False ) self.__checkDBVersion()
def initialize(self): self.systemLoggingDB = SystemLoggingDB() self.agentName = self.am_getModuleParam('fullName') self.notification = NotificationClient() mailList = self.am_getOption("MailList", []) userList = self.am_getOption("Reviewer", []) self.log.debug("Users to be notified:", ', '.join(userList)) for user in userList: mail = getUserOption(user, 'Email', '') if not mail: self.log.warn("Could not get user's mail", user) else: mailList.append(mail) if not mailList: mailList = Operations().getValue('EMail/Logging', []) if not mailList: errString = "There are no valid users in the list of email where to send the report" errString += "\nPlease specify some in Operations/<default>/EMail/Logging" varString = "[" + ','.join(userList) + "]" self.log.error(errString, varString) return S_ERROR(errString + varString) self.log.info("List of mails to be notified", ','.join(mailList)) self._mailAddress = mailList self._threshold = int(self.am_getOption('Threshold', 10)) self.__days = self.am_getOption('QueryPeriod', 7) self._period = int(self.__days) * day self._limit = int(self.am_getOption('NumberOfErrors', 10)) string = "The %i most common errors in the SystemLoggingDB" % self._limit self._subject = string + " for the last %s days" % self.__days return S_OK()
def initialize( self ): ''' TokenAgent initialization ''' # Attribute defined outside __init__ # pylint: disable-msg=W0201 self.notifyHours = self.am_getOption( 'notifyHours', 10 ) try: self.rsClient = ResourceStatusClient() self.rmClient = ResourceManagementClient() self.noClient = NotificationClient() return S_OK() except Exception: errorStr = "TokenAgent initialization" self.log.exception( errorStr ) return S_ERROR( errorStr )
def __init__(self, *args, **kwargs): AgentModule.__init__(self, *args, **kwargs) self.name = 'FileStatusTransformationAgent' self.enabled = False self.shifterProxy = 'DataManager' self.transformationTypes = ["Replication"] self.transformationStatuses = ["Active"] self.transformationFileStatuses = ["Assigned", "Problematic", "Processed", "Unused"] self.addressTo = ["*****@*****.**"] self.addressFrom = "*****@*****.**" self.emailSubject = "FileStatusTransformationAgent" self.accounting = defaultdict(list) self.errors = [] self.fcClient = FileCatalogClient() self.tClient = TransformationClient() self.reqClient = ReqClient() self.nClient = NotificationClient()
def initialize(self): self.systemLoggingDB = SystemLoggingDB() self.agentName = self.am_getModuleParam("fullName") self.notification = NotificationClient() mailList = self.am_getOption("MailList", []) userList = self.am_getOption("Reviewer", []) self.log.debug("Users to be notified:", ", ".join(userList)) for user in userList: mail = getUserOption(user, "Email", "") if not mail: self.log.warn("Could not get user's mail", user) else: mailList.append(mail) if not mailList: mailList = Operations().getValue("EMail/Logging", []) if not len(mailList): errString = "There are no valid users in the list" varString = "[" + ",".join(userList) + "]" self.log.error(errString, varString) return S_ERROR(errString + varString) self.log.info("List of mails to be notified", ",".join(mailList)) self._mailAddress = mailList self._threshold = int(self.am_getOption("Threshold", 10)) self.__days = self.am_getOption("QueryPeriod", 7) self._period = int(self.__days) * day self._limit = int(self.am_getOption("NumberOfErrors", 10)) string = "The %i most common errors in the SystemLoggingDB" % self._limit self._subject = string + " for the last %s days" % self.__days return S_OK()
def initialize( self ): self.SystemLoggingDB = SystemLoggingDB() self.agentName = self.am_getModuleParam( 'fullName' ) self.notification = NotificationClient() mailList = self.am_getOption( "MailList", [] ) userString = self.am_getOption( "Reviewer", 'mseco' ) userList = List.fromChar( userString, "," ) self.log.debug( "Users to be notified", ": " + userString ) for user in userList: retval = gConfig.getOption( "/Registry/Users/" + user + "/email" ) if not retval['OK']: self.log.warn( "Could not get user's mail", retval['Message'] ) else: mailList.append( retval['Value'] ) if not mailList: mailList = gConfig.getValue( '/Operations/EMail/Logging', [] ) if not len( mailList ): errString = "There are no valid users in the list" varString = "[" + ','.join( userList ) + "]" self.log.error( errString, varString ) return S_ERROR( errString + varString ) self.log.info("List of mails to be notified", ','.join(mailList)) self._mailAddress = mailList self._threshold = int( self.am_getOption( 'Threshold', 10 ) ) self.__days = self.am_getOption( 'QueryPeriod',7 ) self._period=int( self.__days ) * day self._limit = int ( self.am_getOption( 'NumberOfErrors', 10 ) ) string = "The %i most common errors in the SystemLoggingDB" % self._limit self._subject = string + " for the last %s days" % self.__days return S_OK()
def registerUser(self, paramcopy): # Unfortunately there is no way to get rid of empty text values in JS, so i have to hardcode it on server side. Hate it! default_values = [ "John Smith", "jsmith", "*****@*****.**", "+33 9 10 00 10 00", "Select prefered virtual organization(s)" ] default_values.append("Select your country") default_values.append( "Any additional information you want to provide to administrators") dn = getUserDN() username = getUsername() if not username == "anonymous": return { "success": "false", "error": "You are already registered in DIRAC with username: %s" % username } else: if not dn: return { "success": "false", "error": "You have to load certificate to your browser before trying to register" } body = "" userMail = False vo = [] for i in paramcopy: if not paramcopy[i] in default_values: if i == "email": userMail = paramcopy[i] if i == "vo": vo = paramcopy[i].split(",") body = body + str(i) + ' - "' + str(paramcopy[i]) + '"\n' if not userMail: return { "success": "false", "error": "Can not get your email from the request" } gLogger.info("!!! VO: ", vo) # TODO Check for previous requests if not len(vo) > 0: mails = gConfig.getValue("/Website/UserRegistrationEmail", []) else: mails = [] for i in vo: i = i.strip() voadm = gConfig.getValue("/Registry/VO/%s/VOAdmin" % i, "") failsafe = False if voadm: tmpmail = gConfig.getValue( "/Registry/Users/%s/Email" % voadm, "") if tmpmail: mails.append(tmpmail) else: gLogger.error( "Can not find value for option /Registry/Users/%s/Email Trying failsafe option" % voadm) failsafe = True else: gLogger.error( "Can not find value for option /Registry/VO/%s/VOAdmin Trying failsafe option" % i) failsafe = True if failsafe: failsafe = gConfig.getValue( "/Website/UserRegistrationEmail", []) if len(failsafe) > 0: for j in failsafe: mails.append(j) else: gLogger.error( "Can not find value for failsafe option /Website/UserRegistrationEmail User registration for VO %s is failed" % i) mails = uniqueElements(mails) if not len(mails) > 0: groupList = list() allGroups = gConfig.getSections("/Registry/Groups") if not allGroups["OK"]: return { "success": "false", "error": "No groups found at this DIRAC installation" } allGroups = allGroups["Value"] for j in allGroups: props = getProperties(j) if "UserAdministrator" in props: # property which usd for user administration groupList.append(j) groupList = uniqueElements(groupList) if not len(groupList) > 0: return { "success": "false", "error": "No groups, resposible for user administration, found" } userList = list() for i in groupList: users = gConfig.getValue("/Registry/Groups/%s/Users" % i, []) for j in users: userList.append(j) userList = uniqueElements(userList) if not len(userList) > 0: return { "success": "false", "error": "Can not find a person resposible for user administration, your request can not be approuved" } mails = list() mail2name = dict() for i in userList: tmpmail = gConfig.getValue("/Registry/Users/%s/Email" % i, "") if tmpmail: mails.append(tmpmail) else: gLogger.error( "Can not find value for option /Registry/Users/%s/Email" % i) mails = uniqueElements(mails) if not len(mails) > 0: return { "success": "false", "error": "Can not find an email of the person resposible for the users administration, your request can not be approuved" } gLogger.info("Admins emails: ", mails) if not len(mails) > 0: return { "success": "false", "error": "Can not find any emails of DIRAC Administrators" } allUsers = gConfig.getSections("/Registry/Users") if not allUsers["OK"]: return { "success": "false", "error": "No users found at this DIRAC installation" } allUsers = allUsers["Value"] mail2name = dict() for i in allUsers: tmpmail = gConfig.getValue("/Registry/Users/%s/Email" % i, "") if tmpmail in mails: mail2name[tmpmail] = gConfig.getValue( "/Registry/Users/%s/FullName" % i, i) sentFailed = list() sentSuccess = list() errorMessage = list() ntc = NotificationClient(getRPCClient) for i in mails: i = i.strip() result = ntc.sendMail(i, "New user has registered", body, userMail, False) if not result["OK"]: sentFailed.append(mail2name[i]) errorMessage.append(result["Message"]) else: sentSuccess.append(mail2name[i]) gLogger.info("Sent success: ", sentSuccess) gLogger.info("Sent failure: ", sentFailed) errorMessage = uniqueElements(errorMessage) if len(sentSuccess) == 0: if not len(errorMessage) > 0: return { "success": "false", "error": "No messages were sent to administrators due techincal reasons" } errorMessage = ", ".join(errorMessage) return {"success": "false", "error": errorMessage} sName = ", ".join(sentSuccess) fName = ", ".join(sentFailed) if len(sentFailed) > 0: return { "success": "true", "result": "Your registration request were sent successfuly to %s. Failed to sent request to %s." % (sName, fName) } return { "success": "true", "result": "Your registration request were sent successfuly to %s." % sName }
' Token: Name of the Token' ] ) ) Script.parseCommandLine() hours = 24 args = Script.getPositionalArgs() if not args: Script.showHelp() from DIRAC.FrameworkSystem.Client.NotificationClient import NotificationClient from DIRAC.Core.Security.Misc import getProxyInfo from DIRAC import gConfig, gLogger from DIRAC.Core.DISET.RPCClient import RPCClient from DIRAC.ResourceStatusSystem.Utilities.CS import getMailForUser nc = NotificationClient() s = RPCClient( "ResourceStatus/ResourceStatus" ) res = getProxyInfo() if not res['OK']: gLogger.error( "Failed to get proxy information", res['Message'] ) DIRAC.exit( 2 ) userName = res['Value']['username'] group = res['Value']['group'] if group not in ( 'diracAdmin', 'lhcb_prod' ): gLogger.error( "You must be lhcb_prod or diracAdmin to execute this script" ) gLogger.info( "Please issue 'lhcb-proxy-init -g lhcb_prod' or 'lhcb-proxy-init -g diracAdmin'" ) DIRAC.exit( 2 ) for arg in args:
def _infoFromCE(self): sitesSection = cfgPath('Resources', 'Sites') result = gConfig.getSections(sitesSection) if not result['OK']: return grids = result['Value'] for grid in grids: gridSection = cfgPath(sitesSection, grid) result = gConfig.getSections(gridSection) if not result['OK']: return sites = result['Value'] changed = False body = "" for site in sites: # if site[-2:]!='ru': # continue siteSection = cfgPath(gridSection, site) opt = gConfig.getOptionsDict(siteSection)['Value'] name = opt.get('Name', '') if name: coor = opt.get('Coordinates', 'Unknown') mail = opt.get('Mail', 'Unknown') result = ldapSite(name) if not result['OK']: self.log.warn("BDII site %s: %s" % (name, result['Message'])) result = self._checkAlternativeBDIISite(ldapSite, name) if result['OK']: bdiisites = result['Value'] if len(bdiisites) == 0: self.log.warn(name, "Error in bdii: leng = 0") else: if not len(bdiisites) == 1: self.log.warn( name, "Warning in bdii: leng = %d" % len(bdiisites)) bdiisite = bdiisites[0] try: longitude = bdiisite['GlueSiteLongitude'] latitude = bdiisite['GlueSiteLatitude'] newcoor = "%s:%s" % (longitude, latitude) except: self.log.warn("Error in bdii coor") newcoor = "Unknown" try: newmail = bdiisite[ 'GlueSiteSysAdminContact'].split( ":")[-1].strip() except: self.log.warn("Error in bdii mail") newmail = "Unknown" self.log.debug("%s %s %s" % (name, newcoor, newmail)) if newcoor != coor: self.log.info("%s" % (name), "%s -> %s" % (coor, newcoor)) if coor == 'Unknown': self.csAPI.setOption( cfgPath(siteSection, 'Coordinates'), newcoor) else: self.csAPI.modifyValue( cfgPath(siteSection, 'Coordinates'), newcoor) changed = True if newmail != mail: self.log.info("%s" % (name), "%s -> %s" % (mail, newmail)) if mail == 'Unknown': self.csAPI.setOption( cfgPath(siteSection, 'Mail'), newmail) else: self.csAPI.modifyValue( cfgPath(siteSection, 'Coordinates'), newmail) changed = True celist = List.fromChar(opt.get('CE', '')) if not celist: self.log.warn(site, 'Empty site list') continue # result = gConfig.getSections( cfgPath( siteSection,'CEs' ) # if not result['OK']: # self.log.debug( "Section CEs:", result['Message'] ) for ce in celist: ceSection = cfgPath(siteSection, 'CEs', ce) result = gConfig.getOptionsDict(ceSection) if not result['OK']: self.log.debug("Section CE", result['Message']) wnTmpDir = 'Unknown' arch = 'Unknown' os = 'Unknown' si00 = 'Unknown' pilot = 'Unknown' cetype = 'Unknown' else: ceopt = result['Value'] wnTmpDir = ceopt.get('wnTmpDir', 'Unknown') arch = ceopt.get('architecture', 'Unknown') os = ceopt.get('OS', 'Unknown') si00 = ceopt.get('SI00', 'Unknown') pilot = ceopt.get('Pilot', 'Unknown') cetype = ceopt.get('CEType', 'Unknown') result = ldapCE(ce) if not result['OK']: self.log.warn('Error in bdii for %s' % ce, result['Message']) result = self._checkAlternativeBDIISite(ldapCE, ce) continue try: bdiice = result['Value'][0] except: self.log.warn('Error in bdii for %s' % ce, result) bdiice = None if bdiice: try: newwnTmpDir = bdiice['GlueSubClusterWNTmpDir'] except: newwnTmpDir = 'Unknown' if wnTmpDir != newwnTmpDir and newwnTmpDir != 'Unknown': section = cfgPath(ceSection, 'wnTmpDir') self.log.info(section, " -> ".join( (wnTmpDir, newwnTmpDir))) if wnTmpDir == 'Unknown': self.csAPI.setOption(section, newwnTmpDir) else: self.csAPI.modifyValue(section, newwnTmpDir) changed = True try: newarch = bdiice[ 'GlueHostArchitecturePlatformType'] except: newarch = 'Unknown' if arch != newarch and newarch != 'Unknown': section = cfgPath(ceSection, 'architecture') self.log.info(section, " -> ".join( (arch, newarch))) if arch == 'Unknown': self.csAPI.setOption(section, newarch) else: self.csAPI.modifyValue(section, newarch) changed = True try: newos = '_'.join( (bdiice['GlueHostOperatingSystemName'], bdiice['GlueHostOperatingSystemVersion'], bdiice['GlueHostOperatingSystemRelease'])) except: newos = 'Unknown' if os != newos and newos != 'Unknown': section = cfgPath(ceSection, 'OS') self.log.info(section, " -> ".join((os, newos))) if os == 'Unknown': self.csAPI.setOption(section, newos) else: self.csAPI.modifyValue(section, newos) changed = True body = body + "OS was changed %s -> %s for %s at %s\n" % ( os, newos, ce, site) try: newsi00 = bdiice['GlueHostBenchmarkSI00'] except: newsi00 = 'Unknown' if si00 != newsi00 and newsi00 != 'Unknown': section = cfgPath(ceSection, 'SI00') self.log.info(section, " -> ".join( (si00, newsi00))) if si00 == 'Unknown': self.csAPI.setOption(section, newsi00) else: self.csAPI.modifyValue(section, newsi00) changed = True try: rte = bdiice[ 'GlueHostApplicationSoftwareRunTimeEnvironment'] if self.vo == 'lhcb': if 'VO-lhcb-pilot' in rte: newpilot = 'True' else: newpilot = 'False' else: newpilot = 'Unknown' except: newpilot = 'Unknown' if pilot != newpilot and newpilot != 'Unknown': section = cfgPath(ceSection, 'Pilot') self.log.info(section, " -> ".join( (pilot, newpilot))) if pilot == 'Unknown': self.csAPI.setOption(section, newpilot) else: self.csAPI.modifyValue(section, newpilot) changed = True result = ldapService(ce) if not result['OK']: result = self._checkAlternativeBDIISite( ldapService, ce) if result['OK']: services = result['Value'] newcetype = 'LCG' for service in services: if service['GlueServiceType'].count('CREAM'): newcetype = "CREAM" else: newcetype = 'Unknown' if cetype != newcetype and newcetype != 'Unknown': section = cfgPath(ceSection, 'CEType') self.log.info(section, " -> ".join( (cetype, newcetype))) if cetype == 'Unknown': self.csAPI.setOption(section, newcetype) else: self.csAPI.modifyValue(section, newcetype) changed = True result = ldapCEState(ce, vo=self.vo) #getBDIICEVOView if not result['OK']: self.log.warn('Error in bdii for queue %s' % ce, result['Message']) result = self._checkAlternativeBDIISite( ldapCEState, ce, self.vo) continue try: queues = result['Value'] except: self.log.warn('Error in bdii for queue %s' % ce, result['Massage']) continue for queue in queues: try: queueName = queue['GlueCEUniqueID'].split('/')[-1] except: self.log.warn('error in queuename ', queue) continue try: newmaxCPUTime = queue['GlueCEPolicyMaxCPUTime'] except: newmaxCPUTime = None newsi00 = None try: caps = queue['GlueCECapability'] if type(caps) == type(''): caps = [caps] for cap in caps: if cap.count('CPUScalingReferenceSI00'): newsi00 = cap.split('=')[-1] except: newsi00 = None queueSection = cfgPath(ceSection, 'Queues', queueName) result = gConfig.getOptionsDict(queueSection) if not result['OK']: self.log.warn("Section Queues", result['Message']) maxCPUTime = 'Unknown' si00 = 'Unknown' else: queueopt = result['Value'] maxCPUTime = queueopt.get('maxCPUTime', 'Unknown') si00 = queueopt.get('SI00', 'Unknown') if newmaxCPUTime and (maxCPUTime != newmaxCPUTime): section = cfgPath(queueSection, 'maxCPUTime') self.log.info( section, " -> ".join( (maxCPUTime, newmaxCPUTime))) if maxCPUTime == 'Unknown': self.csAPI.setOption(section, newmaxCPUTime) else: self.csAPI.modifyValue(section, newmaxCPUTime) changed = True if newsi00 and (si00 != newsi00): section = cfgPath(queueSection, 'SI00') self.log.info(section, " -> ".join( (si00, newsi00))) if si00 == 'Unknown': self.csAPI.setOption(section, newsi00) else: self.csAPI.modifyValue(section, newsi00) changed = True if False and changed: self.log.info(body) if body and self.addressTo and self.addressFrom: notification = NotificationClient() result = notification.sendMail(self.addressTo, self.subject, body, self.addressFrom, localAttempt=False) return self.csAPI.commitChanges(sortUsers=False) else: self.log.info("No changes found") return S_OK()
def __infoFromCE(self): sitesSection = cfgPath("Resources", "Sites") result = gConfig.getSections(sitesSection) if not result["OK"]: return grids = result["Value"] changed = False body = "" for grid in grids: gridSection = cfgPath(sitesSection, grid) result = gConfig.getSections(gridSection) if not result["OK"]: return sites = result["Value"] for site in sites: # if site[-2:]!='ru': # continue siteSection = cfgPath(gridSection, site) opt = gConfig.getOptionsDict(siteSection)["Value"] name = opt.get("Name", "") if name: coor = opt.get("Coordinates", "Unknown") mail = opt.get("Mail", "Unknown") result = ldapSite(name) if not result["OK"]: self.log.warn("BDII site %s: %s" % (name, result["Message"])) result = self.__checkAlternativeBDIISite(ldapSite, name) if result["OK"]: bdiisites = result["Value"] if len(bdiisites) == 0: self.log.warn(name, "Error in bdii: leng = 0") else: if not len(bdiisites) == 1: self.log.warn(name, "Warning in bdii: leng = %d" % len(bdiisites)) bdiisite = bdiisites[0] try: longitude = bdiisite["GlueSiteLongitude"] latitude = bdiisite["GlueSiteLatitude"] newcoor = "%s:%s" % (longitude, latitude) except: self.log.warn("Error in bdii coor") newcoor = "Unknown" try: newmail = bdiisite["GlueSiteSysAdminContact"].split(":")[-1].strip() except: self.log.warn("Error in bdii mail") newmail = "Unknown" self.log.debug("%s %s %s" % (name, newcoor, newmail)) if newcoor != coor: self.log.info("%s" % (name), "%s -> %s" % (coor, newcoor)) if coor == "Unknown": self.csAPI.setOption(cfgPath(siteSection, "Coordinates"), newcoor) else: self.csAPI.modifyValue(cfgPath(siteSection, "Coordinates"), newcoor) changed = True if newmail != mail: self.log.info("%s" % (name), "%s -> %s" % (mail, newmail)) if mail == "Unknown": self.csAPI.setOption(cfgPath(siteSection, "Mail"), newmail) else: self.csAPI.modifyValue(cfgPath(siteSection, "Mail"), newmail) changed = True celist = List.fromChar(opt.get("CE", "")) if not celist: self.log.warn(site, "Empty site list") continue # result = gConfig.getSections( cfgPath( siteSection,'CEs' ) # if not result['OK']: # self.log.debug( "Section CEs:", result['Message'] ) for ce in celist: ceSection = cfgPath(siteSection, "CEs", ce) result = gConfig.getOptionsDict(ceSection) if not result["OK"]: self.log.debug("Section CE", result["Message"]) wnTmpDir = "Unknown" arch = "Unknown" os = "Unknown" si00 = "Unknown" pilot = "Unknown" cetype = "Unknown" else: ceopt = result["Value"] wnTmpDir = ceopt.get("wnTmpDir", "Unknown") arch = ceopt.get("architecture", "Unknown") os = ceopt.get("OS", "Unknown") si00 = ceopt.get("SI00", "Unknown") pilot = ceopt.get("Pilot", "Unknown") cetype = ceopt.get("CEType", "Unknown") result = ldapCE(ce) if not result["OK"]: self.log.warn("Error in bdii for %s" % ce, result["Message"]) result = self.__checkAlternativeBDIISite(ldapCE, ce) continue try: bdiice = result["Value"][0] except: self.log.warn("Error in bdii for %s" % ce, result) bdiice = None if bdiice: try: newwnTmpDir = bdiice["GlueSubClusterWNTmpDir"] except: newwnTmpDir = "Unknown" if wnTmpDir != newwnTmpDir and newwnTmpDir != "Unknown": section = cfgPath(ceSection, "wnTmpDir") self.log.info(section, " -> ".join((wnTmpDir, newwnTmpDir))) if wnTmpDir == "Unknown": self.csAPI.setOption(section, newwnTmpDir) else: self.csAPI.modifyValue(section, newwnTmpDir) changed = True try: newarch = bdiice["GlueHostArchitecturePlatformType"] except: newarch = "Unknown" if arch != newarch and newarch != "Unknown": section = cfgPath(ceSection, "architecture") self.log.info(section, " -> ".join((arch, newarch))) if arch == "Unknown": self.csAPI.setOption(section, newarch) else: self.csAPI.modifyValue(section, newarch) changed = True try: newos = "_".join( ( bdiice["GlueHostOperatingSystemName"], bdiice["GlueHostOperatingSystemVersion"], bdiice["GlueHostOperatingSystemRelease"], ) ) except: newos = "Unknown" if os != newos and newos != "Unknown": section = cfgPath(ceSection, "OS") self.log.info(section, " -> ".join((os, newos))) if os == "Unknown": self.csAPI.setOption(section, newos) else: self.csAPI.modifyValue(section, newos) changed = True body = body + "OS was changed %s -> %s for %s at %s\n" % (os, newos, ce, site) try: newsi00 = bdiice["GlueHostBenchmarkSI00"] except: newsi00 = "Unknown" if si00 != newsi00 and newsi00 != "Unknown": section = cfgPath(ceSection, "SI00") self.log.info(section, " -> ".join((si00, newsi00))) if si00 == "Unknown": self.csAPI.setOption(section, newsi00) else: self.csAPI.modifyValue(section, newsi00) changed = True try: rte = bdiice["GlueHostApplicationSoftwareRunTimeEnvironment"] if self.voName.lower() == "lhcb": if "VO-lhcb-pilot" in rte: newpilot = "True" else: newpilot = "False" else: newpilot = "Unknown" except: newpilot = "Unknown" if pilot != newpilot and newpilot != "Unknown": section = cfgPath(ceSection, "Pilot") self.log.info(section, " -> ".join((pilot, newpilot))) if pilot == "Unknown": self.csAPI.setOption(section, newpilot) else: self.csAPI.modifyValue(section, newpilot) changed = True result = ldapService(ce) if not result["OK"]: result = self.__checkAlternativeBDIISite(ldapService, ce) if result["OK"] and result["Value"]: services = result["Value"] newcetype = "LCG" for service in services: if service["GlueServiceType"].count("CREAM"): newcetype = "CREAM" else: newcetype = "Unknown" if cetype != newcetype and newcetype != "Unknown": section = cfgPath(ceSection, "CEType") self.log.info(section, " -> ".join((cetype, newcetype))) if cetype == "Unknown": self.csAPI.setOption(section, newcetype) else: self.csAPI.modifyValue(section, newcetype) changed = True result = ldapCEState(ce, vo=self.voName) # getBDIICEVOView if not result["OK"]: self.log.warn("Error in bdii for queue %s" % ce, result["Message"]) result = self.__checkAlternativeBDIISite(ldapCEState, ce, self.voName) continue try: queues = result["Value"] except: self.log.warn("Error in bdii for queue %s" % ce, result["Massage"]) continue for queue in queues: try: queueName = queue["GlueCEUniqueID"].split("/")[-1] except: self.log.warn("error in queuename ", queue) continue try: newmaxCPUTime = queue["GlueCEPolicyMaxCPUTime"] except: newmaxCPUTime = None newsi00 = None try: caps = queue["GlueCECapability"] if type(caps) == type(""): caps = [caps] for cap in caps: if cap.count("CPUScalingReferenceSI00"): newsi00 = cap.split("=")[-1] except: newsi00 = None queueSection = cfgPath(ceSection, "Queues", queueName) result = gConfig.getOptionsDict(queueSection) if not result["OK"]: self.log.warn("Section Queues", result["Message"]) maxCPUTime = "Unknown" si00 = "Unknown" else: queueopt = result["Value"] maxCPUTime = queueopt.get("maxCPUTime", "Unknown") si00 = queueopt.get("SI00", "Unknown") if newmaxCPUTime and (maxCPUTime != newmaxCPUTime): section = cfgPath(queueSection, "maxCPUTime") self.log.info(section, " -> ".join((maxCPUTime, newmaxCPUTime))) if maxCPUTime == "Unknown": self.csAPI.setOption(section, newmaxCPUTime) else: self.csAPI.modifyValue(section, newmaxCPUTime) changed = True if newsi00 and (si00 != newsi00): section = cfgPath(queueSection, "SI00") self.log.info(section, " -> ".join((si00, newsi00))) if si00 == "Unknown": self.csAPI.setOption(section, newsi00) else: self.csAPI.modifyValue(section, newsi00) changed = True if changed: self.log.info(body) if body and self.addressTo and self.addressFrom: notification = NotificationClient() result = notification.sendMail(self.addressTo, self.subject, body, self.addressFrom, localAttempt=False) return self.csAPI.commit() else: self.log.info("No changes found") return S_OK()
def enforce(self, pdpIn=None, rsDBIn=None, rmDBIn=None, ncIn=None, setupIn=None, daIn=None, csAPIIn=None, knownInfo=None): """ enforce policies, using a PDP (Policy Decision Point), based on self.__granularity (optional) self.__name (optional) self.__status (optional) self.__formerStatus (optional) self.__reason (optional) self.__siteType (optional) self.__serviceType (optional) self.__realBan (optional) self.__user (optional) self.__futurePolicyType (optional) self.__futureGranularity (optional) :params: :attr:`pdpIn`: a custom PDP object (optional) :attr:`rsDBIn`: a custom (statuses) database object (optional) :attr:`rmDBIn`: a custom (management) database object (optional) :attr:`setupIn`: a string with the present setup (optional) :attr:`ncIn`: a custom notification client object (optional) :attr:`daIn`: a custom DiracAdmin object (optional) :attr:`csAPIIn`: a custom CSAPI object (optional) :attr:`knownInfo`: a string of known provided information (optional) """ #PDP if pdpIn is not None: pdp = pdpIn else: # Use standard DIRAC PDP from DIRAC.ResourceStatusSystem.PolicySystem.PDP import PDP pdp = PDP(self.VOExtension, granularity=self.__granularity, name=self.__name, status=self.__status, formerStatus=self.__formerStatus, reason=self.__reason, siteType=self.__siteType, serviceType=self.__serviceType, resourceType=self.__resourceType, useNewRes=self.useNewRes) #DB if rsDBIn is not None: rsDB = rsDBIn else: # Use standard DIRAC DB from DIRAC.ResourceStatusSystem.DB.ResourceStatusDB import ResourceStatusDB rsDB = ResourceStatusDB() if rmDBIn is not None: rmDB = rmDBIn else: # Use standard DIRAC DB from DIRAC.ResourceStatusSystem.DB.ResourceManagementDB import ResourceManagementDB rmDB = ResourceManagementDB() #setup if setupIn is not None: setup = setupIn else: # get present setup setup = CS.getSetup()['Value'] #notification client if ncIn is not None: nc = ncIn else: from DIRAC.FrameworkSystem.Client.NotificationClient import NotificationClient nc = NotificationClient() #DiracAdmin if daIn is not None: da = daIn else: from DIRAC.Interfaces.API.DiracAdmin import DiracAdmin da = DiracAdmin() #CSAPI if csAPIIn is not None: csAPI = csAPIIn else: from DIRAC.ConfigurationSystem.Client.CSAPI import CSAPI csAPI = CSAPI() ################### # policy decision # ################### resDecisions = pdp.takeDecision(knownInfo=knownInfo) assert (type(resDecisions) == dict and resDecisions != {}) res = resDecisions['PolicyCombinedResult'] actionBaseMod = "DIRAC.ResourceStatusSystem.PolicySystem.Actions" # Security mechanism in case there is no PolicyType returned if res == {}: EmptyPolTypeActions(self.__granularity, self.__name, resDecisions, res) else: policyType = res['PolicyType'] if 'Resource_PolType' in policyType: m = Utils.voimport(actionBaseMod + ".Resource_PolType", self.VOExtension) m.ResourcePolTypeActions(self.__granularity, self.__name, resDecisions, res, rsDB, rmDB) if 'Alarm_PolType' in policyType: m = Utils.voimport(actionBaseMod + ".Alarm_PolType", self.VOExtension) m.AlarmPolTypeActions(self.__name, res, nc, setup, rsDB, rmDB, Granularity=self.__granularity, SiteType=self.__siteType, ServiceType=self.__serviceType, ResourceType=self.__resourceType) if 'RealBan_PolType' in policyType and self.__realBan == True: m = Utils.voimport(actionBaseMod + ".RealBan_PolType", self.VOExtension) m.RealBanPolTypeActions(self.__granularity, self.__name, res, da, csAPI, setup)
def __infoFromCE(self): sitesSection = cfgPath('Resources', 'Sites') result = gConfig.getSections(sitesSection) if not result['OK']: return grids = result['Value'] changed = False body = "" for grid in grids: gridSection = cfgPath(sitesSection, grid) result = gConfig.getSections(gridSection) if not result['OK']: return sites = result['Value'] for site in sites: siteSection = cfgPath(gridSection, site) opt = gConfig.getOptionsDict(siteSection)['Value'] name = opt.get('Name', '') if name: coor = opt.get('Coordinates', 'Unknown') mail = opt.get('Mail', 'Unknown') result = ldapSite(name) if not result['OK']: self.log.warn("BDII site %s: %s" % (name, result['Message'])) result = self.__checkAlternativeBDIISite( ldapSite, name) if result['OK']: bdiiSites = result['Value'] if len(bdiiSites) == 0: self.log.warn(name, "Error in BDII: leng = 0") else: if not len(bdiiSites) == 1: self.log.warn( name, "Warning in BDII: leng = %d" % len(bdiiSites)) bdiiSite = bdiiSites[0] try: longitude = bdiiSite['GlueSiteLongitude'] latitude = bdiiSite['GlueSiteLatitude'] newcoor = "%s:%s" % (longitude, latitude) except: self.log.warn("Error in BDII coordinates") newcoor = "Unknown" try: newmail = bdiiSite[ 'GlueSiteSysAdminContact'].split( ":")[-1].strip() except: self.log.warn("Error in BDII mail") newmail = "Unknown" self.log.debug("%s %s %s" % (name, newcoor, newmail)) if newcoor != coor: self.log.info("%s" % (name), "%s -> %s" % (coor, newcoor)) if coor == 'Unknown': self.csAPI.setOption( cfgPath(siteSection, 'Coordinates'), newcoor) else: self.csAPI.modifyValue( cfgPath(siteSection, 'Coordinates'), newcoor) changed = True if newmail != mail: self.log.info("%s" % (name), "%s -> %s" % (mail, newmail)) if mail == 'Unknown': self.csAPI.setOption( cfgPath(siteSection, 'Mail'), newmail) else: self.csAPI.modifyValue( cfgPath(siteSection, 'Mail'), newmail) changed = True ceList = List.fromChar(opt.get('CE', '')) if not ceList: self.log.warn(site, 'Empty site list') continue # result = gConfig.getSections( cfgPath( siteSection,'CEs' ) # if not result['OK']: # self.log.debug( "Section CEs:", result['Message'] ) for ce in ceList: ceSection = cfgPath(siteSection, 'CEs', ce) result = gConfig.getOptionsDict(ceSection) if not result['OK']: self.log.debug("Section CE", result['Message']) wnTmpDir = 'Unknown' arch = 'Unknown' os = 'Unknown' si00 = 'Unknown' pilot = 'Unknown' ceType = 'Unknown' else: ceopt = result['Value'] wnTmpDir = ceopt.get('wnTmpDir', 'Unknown') arch = ceopt.get('architecture', 'Unknown') os = ceopt.get('OS', 'Unknown') si00 = ceopt.get('SI00', 'Unknown') pilot = ceopt.get('Pilot', 'Unknown') ceType = ceopt.get('CEType', 'Unknown') result = ldapCE(ce) if not result['OK']: self.log.warn('Error in BDII for %s' % ce, result['Message']) result = self.__checkAlternativeBDIISite(ldapCE, ce) continue try: bdiiCE = result['Value'][0] except: self.log.warn('Error in BDII for %s' % ce, result) bdiiCE = None if bdiiCE: try: newWNTmpDir = bdiiCE['GlueSubClusterWNTmpDir'] except: newWNTmpDir = 'Unknown' if wnTmpDir != newWNTmpDir and newWNTmpDir != 'Unknown': section = cfgPath(ceSection, 'wnTmpDir') self.log.info(section, " -> ".join( (wnTmpDir, newWNTmpDir))) if wnTmpDir == 'Unknown': self.csAPI.setOption(section, newWNTmpDir) else: self.csAPI.modifyValue(section, newWNTmpDir) changed = True try: newArch = bdiiCE[ 'GlueHostArchitecturePlatformType'] except: newArch = 'Unknown' if arch != newArch and newArch != 'Unknown': section = cfgPath(ceSection, 'architecture') self.log.info(section, " -> ".join( (arch, newArch))) if arch == 'Unknown': self.csAPI.setOption(section, newArch) else: self.csAPI.modifyValue(section, newArch) changed = True try: newOS = '_'.join( (bdiiCE['GlueHostOperatingSystemName'], bdiiCE['GlueHostOperatingSystemVersion'], bdiiCE['GlueHostOperatingSystemRelease'])) except: newOS = 'Unknown' if os != newOS and newOS != 'Unknown': section = cfgPath(ceSection, 'OS') self.log.info(section, " -> ".join((os, newOS))) if os == 'Unknown': self.csAPI.setOption(section, newOS) else: self.csAPI.modifyValue(section, newOS) changed = True body = body + "OS was changed %s -> %s for %s at %s\n" % ( os, newOS, ce, site) try: newSI00 = bdiiCE['GlueHostBenchmarkSI00'] except: newSI00 = 'Unknown' if si00 != newSI00 and newSI00 != 'Unknown': section = cfgPath(ceSection, 'SI00') self.log.info(section, " -> ".join( (si00, newSI00))) if si00 == 'Unknown': self.csAPI.setOption(section, newSI00) else: self.csAPI.modifyValue(section, newSI00) changed = True try: rte = bdiiCE[ 'GlueHostApplicationSoftwareRunTimeEnvironment'] for vo in self.voName: if vo.lower() == 'lhcb': if 'VO-lhcb-pilot' in rte: newPilot = 'True' else: newPilot = 'False' else: newPilot = 'Unknown' except: newPilot = 'Unknown' if pilot != newPilot and newPilot != 'Unknown': section = cfgPath(ceSection, 'Pilot') self.log.info(section, " -> ".join( (pilot, newPilot))) if pilot == 'Unknown': self.csAPI.setOption(section, newPilot) else: self.csAPI.modifyValue(section, newPilot) changed = True newVO = '' for vo in self.voName: result = ldapCEState(ce, vo) #getBDIICEVOView if not result['OK']: self.log.warn('Error in BDII for queue %s' % ce, result['Message']) result = self.__checkAlternativeBDIISite( ldapCEState, ce, vo) continue try: queues = result['Value'] except: self.log.warn('Error in BDII for queue %s' % ce, result['Massage']) continue newCEType = 'Unknown' for queue in queues: try: queueType = queue['GlueCEImplementationName'] except: queueType = 'Unknown' if newCEType == 'Unknown': newCEType = queueType else: if queueType != newCEType: self.log.warn( 'Error in BDII for CE %s ' % ce, 'different CE types %s %s' % (newCEType, queueType)) if newCEType == 'ARC-CE': newCEType = 'ARC' if ceType != newCEType and newCEType != 'Unknown': section = cfgPath(ceSection, 'CEType') self.log.info(section, " -> ".join( (ceType, newCEType))) if ceType == 'Unknown': self.csAPI.setOption(section, newCEType) else: self.csAPI.modifyValue(section, newCEType) changed = True for queue in queues: try: queueName = queue['GlueCEUniqueID'].split( '/')[-1] except: self.log.warn('Error in queueName ', queue) continue try: newMaxCPUTime = queue['GlueCEPolicyMaxCPUTime'] except: newMaxCPUTime = None newSI00 = None try: caps = queue['GlueCECapability'] if type(caps) == type(''): caps = [caps] for cap in caps: if cap.count('CPUScalingReferenceSI00'): newSI00 = cap.split('=')[-1] except: newSI00 = None queueSection = cfgPath(ceSection, 'Queues', queueName) result = gConfig.getOptionsDict(queueSection) if not result['OK']: self.log.warn("Section Queues", result['Message']) maxCPUTime = 'Unknown' si00 = 'Unknown' allowedVOs = [''] else: queueOpt = result['Value'] maxCPUTime = queueOpt.get( 'maxCPUTime', 'Unknown') si00 = queueOpt.get('SI00', 'Unknown') if newVO == '': # Remember previous iteration, if none - read from conf allowedVOs = queueOpt.get('VO', '').split(",") else: # Else use newVO, as it can contain changes, which aren't in conf yet allowedVOs = newVO.split(",") if newMaxCPUTime and (maxCPUTime != newMaxCPUTime): section = cfgPath(queueSection, 'maxCPUTime') self.log.info( section, " -> ".join( (maxCPUTime, newMaxCPUTime))) if maxCPUTime == 'Unknown': self.csAPI.setOption( section, newMaxCPUTime) else: self.csAPI.modifyValue( section, newMaxCPUTime) changed = True if newSI00 and (si00 != newSI00): section = cfgPath(queueSection, 'SI00') self.log.info(section, " -> ".join( (si00, newSI00))) if si00 == 'Unknown': self.csAPI.setOption(section, newSI00) else: self.csAPI.modifyValue(section, newSI00) changed = True modifyVO = True # Flag saying if we need VO option to change newVO = '' if allowedVOs != ['']: for allowedVO in allowedVOs: allowedVO = allowedVO.strip( ) # Get rid of spaces newVO += allowedVO if allowedVO == vo: # Current VO has been already in list newVO = '' modifyVO = False # Don't change anything break # Skip next 'if', proceed to next VO newVO += ', ' if modifyVO: section = cfgPath(queueSection, 'VO') newVO += vo self.log.info( section, " -> ".join( ('%s' % allowedVOs, newVO))) if allowedVOs == ['']: self.csAPI.setOption(section, newVO) else: self.csAPI.modifyValue(section, newVO) changed = True if changed: self.log.info(body) if body and self.addressTo and self.addressFrom: notification = NotificationClient() result = notification.sendMail(self.addressTo, self.subject, body, self.addressFrom, localAttempt=False) return self.csAPI.commit() else: self.log.info("No changes found") return S_OK()
def execute(self): for vo in self.voList: voAdminUser = getVOOption(vo, "VOAdmin") voAdminMail = None if voAdminUser: voAdminMail = getUserOption(voAdminUser, "Email") voAdminGroup = getVOOption(vo, "VOAdminGroup", getVOOption(vo, "DefaultGroup")) self.log.info( 'Performing VOMS sync for VO %s with credentials %s@%s' % (vo, voAdminUser, voAdminGroup)) autoAddUsers = getVOOption(vo, "AutoAddUsers", self.autoAddUsers) autoModifyUsers = getVOOption(vo, "AutoModifyUsers", self.autoModifyUsers) autoDeleteUsers = getVOOption(vo, "AutoDeleteUsers", self.autoDeleteUsers) vomsSync = VOMS2CSSynchronizer(vo, autoAddUsers=autoAddUsers, autoModifyUsers=autoModifyUsers, autoDeleteUsers=autoDeleteUsers) result = self.__syncCSWithVOMS( vomsSync, # pylint: disable=unexpected-keyword-arg proxyUserName=voAdminUser, proxyUserGroup=voAdminGroup) if not result['OK']: self.log.error('Failed to perform VOMS to CS synchronization:', 'VO %s: %s' % (vo, result["Message"])) continue resultDict = result['Value'] newUsers = resultDict.get("NewUsers", []) modUsers = resultDict.get("ModifiedUsers", []) delUsers = resultDict.get("DeletedUsers", []) susUsers = resultDict.get("SuspendedUsers", []) csapi = resultDict.get("CSAPI") adminMessages = resultDict.get("AdminMessages", { 'Errors': [], 'Info': [] }) voChanged = resultDict.get("VOChanged", False) self.log.info( "Run user results: new %d, modified %d, deleted %d, new/suspended %d" % (len(newUsers), len(modUsers), len(delUsers), len(susUsers))) if csapi.csModified: # We have accumulated all the changes, commit them now self.log.info( "There are changes to the CS for vo %s ready to be committed" % vo) if self.dryRun: self.log.info("Dry Run: CS won't be updated") csapi.showDiff() else: result = csapi.commitChanges() if not result['OK']: self.log.error( "Could not commit configuration changes", result['Message']) return result self.log.notice("Configuration committed for VO %s" % vo) else: self.log.info( "No changes to the CS for VO %s recorded at this cycle" % vo) # Add user home directory in the file catalog if self.makeFCEntry and newUsers: self.log.info("Creating home directories for users %s" % str(newUsers)) result = self.__addHomeDirectory( vo, newUsers, # pylint: disable=unexpected-keyword-arg proxyUserName=voAdminUser, proxyUserGroup=voAdminGroup) if not result['OK']: self.log.error('Failed to create user home directories:', 'VO %s: %s' % (vo, result["Message"])) else: for user in result['Value']['Failed']: self.log.error( "Failed to create home directory", "user: %s, operation: %s" % (user, result['Value']['Failed'][user])) adminMessages['Errors'].append( "Failed to create home directory for user %s: operation %s" % (user, result['Value']['Failed'][user])) for user in result['Value']['Successful']: adminMessages['Info'].append( "Created home directory for user %s" % user) if voChanged or self.detailedReport: mailMsg = "" if adminMessages['Errors']: mailMsg += "\nErrors list:\n %s" % "\n ".join( adminMessages['Errors']) if adminMessages['Info']: mailMsg += "\nRun result:\n %s" % "\n ".join( adminMessages['Info']) if self.detailedReport: result = vomsSync.getVOUserReport() if result['OK']: mailMsg += '\n\n' mailMsg += result['Value'] else: mailMsg += 'Failed to produce a detailed user report' mailMsg += result['Message'] if self.dryRun: self.log.info("Dry Run: mail won't be sent") else: NotificationClient().sendMail( self.am_getOption('MailTo', voAdminMail), "VOMS2CSAgent run log", mailMsg, self.am_getOption( 'MailFrom', self.am_getOption('mailFrom', "DIRAC system"))) return S_OK()
def __sendErrorMail(self, rb, name, command, result, proxy): """ In case or error with RB/WM: - check if RB/WMS still in use - remove RB/WMS from current list - check if RB/WMS not in cache - add RB/WMS to cache - send Error mail """ if rb in self.resourceBrokers: try: self.resourceBrokers.remove(rb) self.log.info('Removed RB from list', rb) except: pass if not self.__failingWMSCache.exists(rb): self.__failingWMSCache.add( rb, self.errorClearTime) # disable for 30 minutes mailAddress = self.errorMailAddress msg = '' if not result['OK']: subject = "%s: timeout executing %s" % (rb, name) msg += '\n%s' % result['Message'] elif result['Value'][0] != 0: if re.search('the server is temporarily drained', ' '.join(result['Value'][1:3])): return if re.search('System load is too high:', ' '.join(result['Value'][1:3])): return subject = "%s: error executing %s" % (rb, name) else: return msg += ' '.join(command) msg += '\nreturns: %s\n' % str(result['Value'][0]) + '\n'.join( result['Value'][1:3]) msg += '\nUsing Proxy:\n' + getProxyInfoAsString( proxy)['Value'] #msg += '\nUsing Proxy:\n' + gProxyManager. ticketTime = self.errorClearTime + self.errorTicketTime if self.__ticketsWMSCache.exists(rb): mailAddress = self.alarmMailAddress # the RB was already detected failing a short time ago msg = 'Submit GGUS Ticket for this error if not already opened\n' + \ 'It has been failing at least for %s hours\n' % ( ticketTime / 60 / 60 ) + msg else: self.__ticketsWMSCache.add(rb, ticketTime) if mailAddress: result = NotificationClient().sendMail( mailAddress, subject, msg, fromAddress=self.mailFromAddress) if not result['OK']: self.log.error("Mail could not be sent") return
class ProxyDB( DB ): NOTIFICATION_TIMES = [ 2592000, 1296000 ] def __init__( self, useMyProxy = False ): DB.__init__( self, 'ProxyDB', 'Framework/ProxyDB' ) random.seed() self.__defaultRequestLifetime = 300 # 5min self.__defaultTokenLifetime = 86400 * 7 # 1 week self.__defaultTokenMaxUses = 50 self.__useMyProxy = useMyProxy self._minSecsToAllowStore = 3600 self.__notifClient = NotificationClient() retVal = self.__initializeDB() if not retVal[ 'OK' ]: raise Exception( "Can't create tables: %s" % retVal[ 'Message' ] ) self.purgeExpiredProxies( sendNotifications = False ) self.__checkDBVersion() def getMyProxyServer( self ): return gConfig.getValue( "/DIRAC/VOPolicy/MyProxyServer" , "myproxy.cern.ch" ) def getMyProxyMaxLifeTime( self ): return gConfig.getValue( "/DIRAC/VOPolicy/MyProxyMaxDelegationTime", 168 ) * 3600 def __initializeDB( self ): """ Create the tables """ retVal = self._query( "show tables" ) if not retVal[ 'OK' ]: return retVal tablesInDB = [ t[0] for t in retVal[ 'Value' ] ] tablesD = {} if 'ProxyDB_Requests' not in tablesInDB: tablesD[ 'ProxyDB_Requests' ] = { 'Fields' : { 'Id' : 'INTEGER AUTO_INCREMENT NOT NULL', 'UserDN' : 'VARCHAR(255) NOT NULL', 'Pem' : 'BLOB', 'ExpirationTime' : 'DATETIME' }, 'PrimaryKey' : 'Id' } if 'ProxyDB_Proxies' not in tablesInDB: tablesD[ 'ProxyDB_Proxies' ] = { 'Fields' : { 'UserName' : 'VARCHAR(64) NOT NULL', 'UserDN' : 'VARCHAR(255) NOT NULL', 'UserGroup' : 'VARCHAR(255) NOT NULL', 'Pem' : 'BLOB', 'ExpirationTime' : 'DATETIME', 'PersistentFlag' : 'ENUM ("True","False") NOT NULL DEFAULT "True"', }, 'PrimaryKey' : [ 'UserDN', 'UserGroup' ] } if 'ProxyDB_VOMSProxies' not in tablesInDB: tablesD[ 'ProxyDB_VOMSProxies' ] = { 'Fields' : { 'UserName' : 'VARCHAR(64) NOT NULL', 'UserDN' : 'VARCHAR(255) NOT NULL', 'UserGroup' : 'VARCHAR(255) NOT NULL', 'VOMSAttr' : 'VARCHAR(255) NOT NULL', 'Pem' : 'BLOB', 'ExpirationTime' : 'DATETIME', }, 'PrimaryKey' : [ 'UserDN', 'UserGroup', 'vomsAttr' ] } if 'ProxyDB_Log' not in tablesInDB: tablesD[ 'ProxyDB_Log' ] = { 'Fields' : { 'ID': 'BIGINT NOT NULL AUTO_INCREMENT', 'IssuerDN' : 'VARCHAR(255) NOT NULL', 'IssuerGroup' : 'VARCHAR(255) NOT NULL', 'TargetDN' : 'VARCHAR(255) NOT NULL', 'TargetGroup' : 'VARCHAR(255) NOT NULL', 'Action' : 'VARCHAR(128) NOT NULL', 'Timestamp' : 'DATETIME', }, 'PrimaryKey': 'ID', 'Indexes' : { 'Timestamp' : [ 'Timestamp' ]} } if 'ProxyDB_Tokens' not in tablesInDB: tablesD[ 'ProxyDB_Tokens' ] = { 'Fields' : { 'Token' : 'VARCHAR(64) NOT NULL', 'RequesterDN' : 'VARCHAR(255) NOT NULL', 'RequesterGroup' : 'VARCHAR(255) NOT NULL', 'ExpirationTime' : 'DATETIME NOT NULL', 'UsesLeft' : 'SMALLINT UNSIGNED DEFAULT 1', }, 'PrimaryKey' : 'Token' } if 'ProxyDB_ExpNotifs' not in tablesInDB: tablesD[ 'ProxyDB_ExpNotifs' ] = { 'Fields' : { 'UserDN' : 'VARCHAR(255) NOT NULL', 'UserGroup' : 'VARCHAR(255) NOT NULL', 'LifeLimit' : 'INTEGER UNSIGNED DEFAULT 0', 'ExpirationTime' : 'DATETIME NOT NULL', }, 'PrimaryKey' : [ 'UserDN', 'UserGroup' ] } return self._createTables( tablesD ) def __addUserNameToTable( self, tableName ): result = self._update( "ALTER TABLE `%s` ADD COLUMN UserName VARCHAR(64) NOT NULL" % tableName ) if not result[ 'OK' ]: return result result = self._query( "SELECT DISTINCT UserName, UserDN FROM `%s`" % tableName ) if not result[ 'OK' ]: return result data = result[ 'Value' ] for userName, userDN in data: if not userName: result = Registry.getUsernameForDN( userDN ) if not result[ 'OK' ]: self.log.error( "Could not retrieve username for DN", userDN ) continue userName = result[ 'Value' ] try: userName = self._escapeString( userName )[ 'Value' ] userDN = self._escapeString( userDN )[ 'Value' ] except KeyError: self.log.error( "Could not escape username or DN", "%s %s" % ( userName, userDN ) ) continue userName = result[ 'Value' ] result = self._update( "UPDATE `%s` SET UserName=%s WHERE UserDN=%s" % ( tableName, userName, userDN ) ) if not result[ 'OK' ]: self.log.error( "Could update username for DN", "%s: %s" % ( userDN, result[ 'Message' ] ) ) continue self.log.info( "UserDN %s has user %s" % ( userDN, userName ) ) return S_OK() def __checkDBVersion( self ): for tableName in ( "ProxyDB_Proxies", "ProxyDB_VOMSProxies" ): result = self._query( "describe `%s`" % tableName ) if not result[ 'OK' ]: return result if 'UserName' not in [ row[0] for row in result[ 'Value' ] ]: self.log.notice( "Username missing in table %s schema. Adding it" % tableName ) result = self.__addUserNameToTable( tableName ) if not result[ 'OK' ]: return result def generateDelegationRequest( self, proxyChain, userDN ): """ Generate a request and store it for a given proxy Chain """ retVal = self._getConnection() if not retVal[ 'OK' ]: return retVal connObj = retVal[ 'Value' ] retVal = proxyChain.generateProxyRequest() if not retVal[ 'OK' ]: return retVal request = retVal[ 'Value' ] retVal = request.dumpRequest() if not retVal[ 'OK' ]: return retVal reqStr = retVal[ 'Value' ] retVal = request.dumpPKey() if not retVal[ 'OK' ]: return retVal allStr = reqStr + retVal[ 'Value' ] try: sUserDN = self._escapeString( userDN )[ 'Value' ] sAllStr = self._escapeString( allStr )[ 'Value' ] except KeyError: return S_ERROR( "Cannot escape DN" ) cmd = "INSERT INTO `ProxyDB_Requests` ( Id, UserDN, Pem, ExpirationTime )" cmd += " VALUES ( 0, %s, %s, TIMESTAMPADD( SECOND, %d, UTC_TIMESTAMP() ) )" % ( sUserDN, sAllStr, int( self.__defaultRequestLifetime ) ) retVal = self._update( cmd, conn = connObj ) if not retVal[ 'OK' ]: return retVal #99% of the times we will stop here if 'lastRowId' in retVal: return S_OK( { 'id' : retVal['lastRowId'], 'request' : reqStr } ) #If the lastRowId hack does not work. Get it by hand retVal = self._query( "SELECT Id FROM `ProxyDB_Requests` WHERE Pem='%s'" % reqStr ) if not retVal[ 'OK' ]: return retVal data = retVal[ 'Value' ] if len( data ) == 0: return S_ERROR( "Insertion of the request in the db didn't work as expected" ) retVal = proxyChain.getDIRACGroup() if retVal[ 'OK' ] and retVal[ 'Value' ]: userGroup = retVal[ 'Value' ] else: userGroup = "unset" self.logAction( "request upload", userDN, userGroup, userDN, "any" ) #Here we go! return S_OK( { 'id' : data[0][0], 'request' : reqStr } ) def retrieveDelegationRequest( self, requestId, userDN ): """ Retrieve a request from the DB """ try: sUserDN = self._escapeString( userDN )[ 'Value' ] except KeyError: return S_ERROR( "Cannot escape DN" ) cmd = "SELECT Pem FROM `ProxyDB_Requests` WHERE Id = %s AND UserDN = %s" % ( requestId, sUserDN ) retVal = self._query( cmd ) if not retVal[ 'OK' ]: return retVal data = retVal[ 'Value' ] if len( data ) == 0: return S_ERROR( "No requests with id %s" % requestId ) request = X509Request() retVal = request.loadAllFromString( data[0][0] ) if not retVal[ 'OK' ]: return retVal return S_OK( request ) def purgeExpiredRequests( self ): """ Purge expired requests from the db """ cmd = "DELETE FROM `ProxyDB_Requests` WHERE ExpirationTime < UTC_TIMESTAMP()" return self._update( cmd ) def deleteRequest( self, requestId ): """ Delete a request from the db """ cmd = "DELETE FROM `ProxyDB_Requests` WHERE Id=%s" % requestId return self._update( cmd ) def completeDelegation( self, requestId, userDN, delegatedPem ): """ Complete a delegation and store it in the db """ retVal = self.retrieveDelegationRequest( requestId, userDN ) if not retVal[ 'OK' ]: return retVal request = retVal[ 'Value' ] chain = X509Chain( keyObj = request.getPKey() ) retVal = chain.loadChainFromString( delegatedPem ) if not retVal[ 'OK' ]: return retVal retVal = chain.isValidProxy( ignoreDefault = True ) noGroupFlag = False if not retVal[ 'OK' ]: if retVal['Message'] == "Proxy does not have an explicit group": noGroupFlag = True else: return retVal result = chain.isVOMS() if result[ 'OK' ] and result[ 'Value' ]: return S_ERROR( "Proxies with VOMS extensions are not allowed to be uploaded" ) retVal = request.checkChain( chain ) if not retVal[ 'OK' ]: return retVal if not retVal[ 'Value' ]: return S_ERROR( "Received chain does not match request: %s" % retVal[ 'Message' ] ) retVal = chain.getDIRACGroup() if not retVal[ 'OK' ]: return retVal userGroup = retVal[ 'Value' ] if not userGroup: userGroup = Registry.getDefaultUserGroup() retVal = Registry.getGroupsForDN( userDN ) if not retVal[ 'OK' ]: return retVal if not userGroup in retVal[ 'Value' ]: return S_ERROR( "%s group is not valid for %s" % ( userGroup, userDN ) ) # For proxies without embedded DIRAC group only one default is allowed # Cleaning all the proxies for this DN if any before uploading the new one. if noGroupFlag: retVal = self.deleteProxy( userDN ) if not retVal[ 'OK' ]: return retVal retVal = self.storeProxy( userDN, userGroup, chain ) if not retVal[ 'OK' ]: return retVal retVal = self.deleteRequest( requestId ) if not retVal[ 'OK' ]: return retVal return S_OK() def storeProxy( self, userDN, userGroup, chain ): """ Store user proxy into the Proxy repository for a user specified by his DN and group. """ retVal = Registry.getUsernameForDN( userDN ) if not retVal[ 'OK' ]: return retVal userName = retVal[ 'Value' ] #Get remaining secs retVal = chain.getRemainingSecs() if not retVal[ 'OK' ]: return retVal remainingSecs = retVal[ 'Value' ] if remainingSecs < self._minSecsToAllowStore: return S_ERROR( "Cannot store proxy, remaining secs %s is less than %s" % ( remainingSecs, self._minSecsToAllowStore ) ) #Compare the DNs retVal = chain.getIssuerCert() if not retVal[ 'OK' ]: return retVal proxyIdentityDN = retVal[ 'Value' ].getSubjectDN()[ 'Value' ] if not userDN == proxyIdentityDN: msg = "Mismatch in the user DN" vMsg = "Proxy says %s and credentials are %s" % ( proxyIdentityDN, userDN ) self.log.error( msg, vMsg ) return S_ERROR( "%s. %s" % ( msg, vMsg ) ) #Check the groups retVal = chain.getDIRACGroup() if not retVal[ 'OK' ]: return retVal proxyGroup = retVal[ 'Value' ] if not proxyGroup: proxyGroup = Registry.getDefaultUserGroup() if not userGroup == proxyGroup: msg = "Mismatch in the user group" vMsg = "Proxy says %s and credentials are %s" % ( proxyGroup, userGroup ) self.log.error( msg, vMsg ) return S_ERROR( "%s. %s" % ( msg, vMsg ) ) #Check if its limited if chain.isLimitedProxy()['Value']: return S_ERROR( "Limited proxies are not allowed to be stored" ) dLeft = remainingSecs / 86400 hLeft = remainingSecs / 3600 - dLeft * 24 mLeft = remainingSecs / 60 - hLeft * 60 - dLeft * 1440 sLeft = remainingSecs - hLeft * 3600 - mLeft * 60 - dLeft * 86400 self.log.info( "Storing proxy for credentials %s (%d:%02d:%02d:%02d left)" % ( proxyIdentityDN, dLeft, hLeft, mLeft, sLeft ) ) try: sUserDN = self._escapeString( userDN )[ 'Value' ] sUserGroup = self._escapeString( userGroup )[ 'Value' ] except KeyError: return S_ERROR( "Cannot escape DN" ) # Check what we have already got in the repository cmd = "SELECT TIMESTAMPDIFF( SECOND, UTC_TIMESTAMP(), ExpirationTime ), Pem FROM `ProxyDB_Proxies` WHERE UserDN=%s AND UserGroup=%s" % ( sUserDN, sUserGroup ) result = self._query( cmd ) if not result['OK']: return result # check if there is a previous ticket for the DN data = result[ 'Value' ] sqlInsert = True if len( data ) > 0: sqlInsert = False pem = data[0][1] if pem: remainingSecsInDB = data[0][0] if remainingSecs <= remainingSecsInDB: self.log.info( "Proxy stored is longer than uploaded, omitting.", "%s in uploaded, %s in db" % ( remainingSecs, remainingSecsInDB ) ) return S_OK() pemChain = chain.dumpAllToString()['Value'] dValues = { 'UserName' : self._escapeString( userName )[ 'Value' ], 'UserDN' : sUserDN, 'UserGroup' : sUserGroup, 'Pem' : self._escapeString( pemChain )[ 'Value' ], 'ExpirationTime' : 'TIMESTAMPADD( SECOND, %d, UTC_TIMESTAMP() )' % int( remainingSecs ), 'PersistentFlag' : "'False'" } if sqlInsert: sqlFields = [] sqlValues = [] for key in dValues: sqlFields.append( key ) sqlValues.append( dValues[ key ] ) cmd = "INSERT INTO `ProxyDB_Proxies` ( %s ) VALUES ( %s )" % ( ", ".join( sqlFields ), ", ".join( sqlValues ) ) else: sqlSet = [] sqlWhere = [] for k in dValues: if k in ( 'UserDN', 'UserGroup' ): sqlWhere.append( "%s = %s" % ( k, dValues[k] ) ) else: sqlSet.append( "%s = %s" % ( k, dValues[k] ) ) cmd = "UPDATE `ProxyDB_Proxies` SET %s WHERE %s" % ( ", ".join( sqlSet ), " AND ".join( sqlWhere ) ) self.logAction( "store proxy", userDN, userGroup, userDN, userGroup ) return self._update( cmd ) def purgeExpiredProxies( self, sendNotifications = True ): """ Purge expired requests from the db """ purged = 0 for tableName in ( "ProxyDB_Proxies", "ProxyDB_VOMSProxies" ): cmd = "DELETE FROM `%s` WHERE ExpirationTime < UTC_TIMESTAMP()" % tableName result = self._update( cmd ) if not result[ 'OK' ]: return result purged += result[ 'Value' ] self.log.info( "Purged %s expired proxies from %s" % ( result[ 'Value' ], tableName ) ) if sendNotifications: result = self.sendExpirationNotifications() if not result[ 'OK' ]: return result return S_OK( purged ) def deleteProxy( self, userDN, userGroup='any' ): """ Remove proxy of the given user from the repository """ try: userDN = self._escapeString( userDN )[ 'Value' ] if userGroup != 'any': userGroup = self._escapeString( userGroup )[ 'Value' ] except KeyError: return S_ERROR( "Invalid DN or group" ) req = "DELETE FROM `%%s` WHERE UserDN=%s" % userDN if userGroup != 'any': req += " AND UserGroup=%s" % userGroup for db in [ 'ProxyDB_Proxies', 'ProxyDB_VOMSProxies' ]: result = self._update( req % db ) return result def __getPemAndTimeLeft( self, userDN, userGroup = False, vomsAttr = False ): try: sUserDN = self._escapeString( userDN )[ 'Value' ] if userGroup: sUserGroup = self._escapeString( userGroup )[ 'Value' ] if vomsAttr: sVomsAttr = self._escapeString( vomsAttr )[ 'Value' ] except KeyError: return S_ERROR( "Invalid DN or group" ) if not vomsAttr: table = "`ProxyDB_Proxies`" else: table = "`ProxyDB_VOMSProxies`" cmd = "SELECT Pem, TIMESTAMPDIFF( SECOND, UTC_TIMESTAMP(), ExpirationTime ) from %s" % table cmd += "WHERE UserDN=%s AND TIMESTAMPDIFF( SECOND, UTC_TIMESTAMP(), ExpirationTime ) > 0" % ( sUserDN ) if userGroup: cmd += " AND UserGroup=%s" % sUserGroup if vomsAttr: cmd += " AND VOMSAttr=%s" % sVomsAttr retVal = self._query( cmd ) if not retVal['OK']: return retVal data = retVal[ 'Value' ] for record in data: if record[0]: return S_OK( ( record[0], record[1] ) ) if userGroup: userMask = "%s@%s" % ( userDN, userGroup ) else: userMask = userDN return S_ERROR( "%s has no proxy registered" % userMask ) def renewFromMyProxy( self, userDN, userGroup, lifeTime = False, chain = False ): if not lifeTime: lifeTime = 43200 if not self.__useMyProxy: return S_ERROR( "myproxy is disabled" ) #Get the chain if not chain: retVal = self.__getPemAndTimeLeft( userDN, userGroup ) if not retVal[ 'OK' ]: return retVal pemData = retVal[ 'Value' ][0] chain = X509Chain() retVal = chain.loadProxyFromString( pemData ) if not retVal[ 'OK' ]: return retVal originChainLifeTime = chain.getRemainingSecs()[ 'Value' ] maxMyProxyLifeTime = self.getMyProxyMaxLifeTime() #If we have a chain that's 0.8 of max mplifetime don't ask to mp if originChainLifeTime > maxMyProxyLifeTime * 0.8: self.log.error( "Skipping myproxy download", "user %s %s chain has %s secs and requested %s secs" % ( userDN, userGroup, originChainLifeTime, maxMyProxyLifeTime ) ) return S_OK( chain ) lifeTime *= 1.3 if lifeTime > maxMyProxyLifeTime: lifeTime = maxMyProxyLifeTime self.log.error( "Renewing proxy from myproxy", "user %s %s for %s secs" % ( userDN, userGroup, lifeTime ) ) myProxy = MyProxy( server = self.getMyProxyServer() ) retVal = myProxy.getDelegatedProxy( chain, lifeTime ) if not retVal[ 'OK' ]: return retVal mpChain = retVal[ 'Value' ] retVal = mpChain.getRemainingSecs() if not retVal[ 'OK' ]: return S_ERROR( "Can't retrieve remaining secs from renewed proxy: %s" % retVal[ 'Message' ] ) mpChainSecsLeft = retVal['Value'] if mpChainSecsLeft < originChainLifeTime: self.log.info( "Chain downloaded from myproxy has less lifetime than the one stored in the db", "\n Downloaded from myproxy: %s secs\n Stored in DB: %s secs" % ( mpChainSecsLeft, originChainLifeTime ) ) return S_OK( chain ) retVal = mpChain.getDIRACGroup() if not retVal[ 'OK' ]: return S_ERROR( "Can't retrieve DIRAC Group from renewed proxy: %s" % retVal[ 'Message' ] ) chainGroup = retVal['Value'] if chainGroup != userGroup: return S_ERROR( "Mismatch between renewed proxy group and expected: %s vs %s" % ( userGroup, chainGroup ) ) retVal = self.storeProxy( userDN, userGroup, mpChain ) if not retVal[ 'OK' ]: self.log.error( "Cannot store proxy after renewal", retVal[ 'Message' ] ) retVal = myProxy.getServiceDN() if not retVal[ 'OK' ]: hostDN = userDN else: hostDN = retVal[ 'Value' ] self.logAction( "myproxy renewal", hostDN, "host", userDN, userGroup ) return S_OK( mpChain ) def __getPUSProxy( self, userDN, userGroup, requiredLifetime, requestedVOMSAttr = None ): result = Registry.getGroupsForDN( userDN ) if not result['OK']: return result validGroups = result['Value'] if not userGroup in validGroups: return S_ERROR( 'Invalid group %s for user' % userGroup ) voName = Registry.getVOForGroup( userGroup ) if not voName: return S_ERROR( 'Can not determine VO for group %s' % userGroup ) retVal = self.__getVOMSAttribute( userGroup, requestedVOMSAttr ) if not retVal[ 'OK' ]: return retVal vomsAttribute = retVal[ 'Value' ][ 'attribute' ] vomsVO = retVal[ 'Value' ][ 'VOMSVO' ] puspServiceURL = Registry.getVOOption( voName, 'PUSPServiceURL' ) if not puspServiceURL: return S_ERROR( 'Can not determine PUSP service URL for VO %s' % voName ) user = userDN.split(":")[-1] puspURL = "%s?voms=%s:%s&proxy-renewal=false&disable-voms-proxy=false" \ "&rfc-proxy=true&cn-label=user:%s" % ( puspServiceURL, vomsVO, vomsAttribute, user ) try: proxy = urllib.urlopen( puspURL ).read() except Exception as e: return S_ERROR( 'Failed to get proxy from the PUSP server' ) chain = X509Chain() chain.loadChainFromString( proxy ) chain.loadKeyFromString( proxy ) result = chain.getCredentials() if not result['OK']: return S_ERROR( 'Failed to get a valid PUSP proxy' ) credDict = result['Value'] if credDict['identity'] != userDN: return S_ERROR( 'Requested DN does not match the obtained one in the PUSP proxy' ) timeLeft = credDict['secondsLeft'] result = chain.generateProxyToString( lifeTime = timeLeft, diracGroup = userGroup ) if not result['OK']: return result proxyString = result['Value'] return S_OK( ( proxyString, timeLeft ) ) def getProxy( self, userDN, userGroup, requiredLifeTime = False ): """ Get proxy string from the Proxy Repository for use with userDN in the userGroup """ # Get the Per User SubProxy if one is requested if isPUSPdn( userDN ): result = self.__getPUSProxy( userDN, userGroup, requiredLifeTime ) if not result['OK']: return result pemData = result[ 'Value' ][0] timeLeft = result[ 'Value' ][1] chain = X509Chain() result = chain.loadProxyFromString( pemData ) if not result[ 'OK' ]: return result return S_OK( ( chain, timeLeft ) ) # Standard proxy is requested retVal = self.__getPemAndTimeLeft( userDN, userGroup ) if not retVal[ 'OK' ]: return retVal pemData = retVal[ 'Value' ][0] timeLeft = retVal[ 'Value' ][1] chain = X509Chain() retVal = chain.loadProxyFromString( pemData ) if not retVal[ 'OK' ]: return retVal if requiredLifeTime: if timeLeft < requiredLifeTime: retVal = self.renewFromMyProxy( userDN, userGroup, lifeTime = requiredLifeTime, chain = chain ) if not retVal[ 'OK' ]: return S_ERROR( "Can't get a proxy for %s seconds: %s" % ( requiredLifeTime, retVal[ 'Message' ] ) ) chain = retVal[ 'Value' ] #Proxy is invalid for some reason, let's delete it if not chain.isValidProxy()['Value']: self.deleteProxy( userDN, userGroup ) return S_ERROR( "%s@%s has no proxy registered" % ( userDN, userGroup ) ) return S_OK( ( chain, timeLeft ) ) def __getVOMSAttribute( self, userGroup, requiredVOMSAttribute = False ): if requiredVOMSAttribute: return S_OK( { 'attribute' : requiredVOMSAttribute, 'VOMSVO' : Registry.getVOMSVOForGroup( userGroup ) } ) csVOMSMapping = Registry.getVOMSAttributeForGroup( userGroup ) if not csVOMSMapping: return S_ERROR( "No mapping defined for group %s in the CS" % userGroup ) return S_OK( { 'attribute' : csVOMSMapping, 'VOMSVO' : Registry.getVOMSVOForGroup( userGroup ) } ) def getVOMSProxy( self, userDN, userGroup, requiredLifeTime = False, requestedVOMSAttr = False ): """ Get proxy string from the Proxy Repository for use with userDN in the userGroup and VOMS attr """ retVal = self.__getVOMSAttribute( userGroup, requestedVOMSAttr ) if not retVal[ 'OK' ]: return retVal vomsAttr = retVal[ 'Value' ][ 'attribute' ] vomsVO = retVal[ 'Value' ][ 'VOMSVO' ] #Look in the cache retVal = self.__getPemAndTimeLeft( userDN, userGroup, vomsAttr ) if retVal[ 'OK' ]: pemData = retVal[ 'Value' ][0] vomsTime = retVal[ 'Value' ][1] chain = X509Chain() retVal = chain.loadProxyFromString( pemData ) if retVal[ 'OK' ]: retVal = chain.getRemainingSecs() if retVal[ 'OK' ]: remainingSecs = retVal[ 'Value' ] if requiredLifeTime and requiredLifeTime <= vomsTime and requiredLifeTime <= remainingSecs: return S_OK( ( chain, min( vomsTime, remainingSecs ) ) ) if isPUSPdn( userDN ): # Get the Per User SubProxy if one is requested result = self.__getPUSProxy( userDN, userGroup, requiredLifeTime, requestedVOMSAttr ) if not result['OK']: return result pemData = result[ 'Value' ][0] chain = X509Chain() result = chain.loadProxyFromString( pemData ) if not result[ 'OK' ]: return result else: # Get the stored proxy and dress it with the VOMS extension retVal = self.getProxy( userDN, userGroup, requiredLifeTime ) if not retVal[ 'OK' ]: return retVal chain, secsLeft = retVal[ 'Value' ] if requiredLifeTime and requiredLifeTime > secsLeft: return S_ERROR( "Stored proxy is not long lived enough" ) vomsMgr = VOMS() retVal = vomsMgr.getVOMSAttributes( chain ) if retVal[ 'OK' ]: attrs = retVal[ 'Value' ] if len( attrs ) > 0: if attrs[0] != vomsAttr: return S_ERROR( "Stored proxy has already a different VOMS attribute %s than requested %s" % ( vomsAttr, attrs[0] ) ) else: result = self.__storeVOMSProxy( userDN, userGroup, vomsAttr, chain ) if not result[ 'OK' ]: return result secsLeft = result[ 'Value' ] if requiredLifeTime and requiredLifeTime <= secsLeft: return S_OK( ( chain, secsLeft ) ) return S_ERROR( "Stored proxy has already a different VOMS attribute and is not long lived enough" ) retVal = vomsMgr.setVOMSAttributes( chain , vomsAttr, vo = vomsVO ) if not retVal[ 'OK' ]: return S_ERROR( "Cannot append voms extension: %s" % retVal[ 'Message' ] ) chain = retVal[ 'Value' ] # We have got the VOMS proxy, store it into the cache result = self.__storeVOMSProxy( userDN, userGroup, vomsAttr, chain ) if not result[ 'OK' ]: return result secsLeft = result[ 'Value' ] return S_OK( ( chain, secsLeft ) ) def __storeVOMSProxy( self, userDN, userGroup, vomsAttr, chain ): retVal = self._getConnection() if not retVal[ 'OK' ]: return retVal connObj = retVal[ 'Value' ] retVal1 = VOMS().getVOMSProxyInfo( chain, 'actimeleft' ) retVal2 = VOMS().getVOMSProxyInfo( chain, 'timeleft' ) if not retVal1[ 'OK' ]: return retVal1 if not retVal2[ 'OK' ]: return retVal2 try: vomsSecsLeft1 = int( retVal1[ 'Value' ].strip() ) vomsSecsLeft2 = int( retVal2[ 'Value' ].strip() ) vomsSecsLeft = min( vomsSecsLeft1, vomsSecsLeft2 ) except Exception as e: return S_ERROR( "Can't parse VOMS time left: %s" % str( e ) ) secsLeft = min( vomsSecsLeft, chain.getRemainingSecs()[ 'Value' ] ) pemData = chain.dumpAllToString()[ 'Value' ] result = Registry.getUsernameForDN( userDN ) if not result[ 'OK' ]: userName = "" else: userName = result[ 'Value' ] try: sUserName = self._escapeString( userName )[ 'Value' ] sUserDN = self._escapeString( userDN )[ 'Value' ] sUserGroup = self._escapeString( userGroup )[ 'Value' ] sVomsAttr = self._escapeString( vomsAttr )[ 'Value' ] sPemData = self._escapeString( pemData )[ 'Value' ] except KeyError: return S_ERROR( "Could not escape some data" ) cmd = "REPLACE INTO `ProxyDB_VOMSProxies` ( UserName, UserDN, UserGroup, VOMSAttr, Pem, ExpirationTime ) VALUES " cmd += "( %s, %s, %s, %s, %s, TIMESTAMPADD( SECOND, %d, UTC_TIMESTAMP() ) )" % ( sUserName, sUserDN, sUserGroup, sVomsAttr, sPemData, secsLeft ) result = self._update( cmd, conn = connObj ) if not result[ 'OK' ]: return result return S_OK( secsLeft ) def getRemainingTime( self, userDN, userGroup ): """ Returns the remaining time the proxy is valid """ try: userDN = self._escapeString( userDN )[ 'Value' ] userGroup = self._escapeString( userGroup )[ 'Value' ] except KeyError: return S_ERROR( "Invalid DN or group" ) cmd = "SELECT TIMESTAMPDIFF( SECOND, UTC_TIMESTAMP(), ExpirationTime ) FROM `ProxyDB_Proxies`" retVal = self._query( "%s WHERE UserDN = %s AND UserGroup = %s" % ( cmd, userDN, userGroup ) ) if not retVal[ 'OK' ]: return retVal data = retVal[ 'Value' ] if not data: return S_OK( 0 ) return S_OK( int( data[0][0] ) ) def getUsers( self, validSecondsLeft = 0, dnMask = False, groupMask = False, userMask = False ): """ Get all the distinct users from the Proxy Repository. Optionally, only users with valid proxies within the given validity period expressed in seconds """ cmd = "SELECT UserName, UserDN, UserGroup, ExpirationTime, PersistentFlag FROM `ProxyDB_Proxies`" sqlCond = [] if validSecondsLeft: try: validSecondsLeft = int( validSecondsLeft ) except ValueError: return S_ERROR( "Seconds left has to be an integer" ) sqlCond.append( "TIMESTAMPDIFF( SECOND, UTC_TIMESTAMP(), ExpirationTime ) > %d" % validSecondsLeft ) for field, mask in ( ( 'UserDN', dnMask ), ( 'UserGroup', groupMask ), ( 'UserName', userMask ) ): if not mask: continue if type( mask ) not in ( types.ListType, types.TupleType ): mask = [ mask ] mask = [ self._escapeString( entry )[ 'Value' ] for entry in mask ] sqlCond.append( "%s in ( %s )" % ( field, ", ".join( mask ) ) ) if sqlCond: cmd += " WHERE %s" % " AND ".join( sqlCond ) retVal = self._query( cmd ) if not retVal[ 'OK' ]: return retVal data = [] for record in retVal[ 'Value' ]: data.append( { 'Name': record[0], 'DN' : record[1], 'group' : record[2], 'expirationtime' : record[3], 'persistent' : record[4] == 'True' } ) return S_OK( data ) def getCredentialsAboutToExpire( self, requiredSecondsLeft, onlyPersistent = True ): cmd = "SELECT UserDN, UserGroup, ExpirationTime, PersistentFlag FROM `ProxyDB_Proxies`" cmd += " WHERE TIMESTAMPDIFF( SECOND, ExpirationTime, UTC_TIMESTAMP() ) < %d and TIMESTAMPDIFF( SECOND, ExpirationTime, UTC_TIMESTAMP() ) > 0" % requiredSecondsLeft if onlyPersistent: cmd += " AND PersistentFlag = 'True'" return self._query( cmd ) def setPersistencyFlag( self, userDN, userGroup, persistent = True ): """ Set the proxy PersistentFlag to the flag value """ try: sUserDN = self._escapeString( userDN )[ 'Value' ] sUserGroup = self._escapeString( userGroup )[ 'Value' ] except KeyError: return S_ERROR( "Can't escape something" ) if persistent: sqlFlag = "True" else: sqlFlag = "False" retVal = self._query( "SELECT PersistentFlag FROM `ProxyDB_Proxies` WHERE UserDN=%s AND UserGroup=%s" % ( sUserDN, sUserGroup ) ) sqlInsert = True if retVal[ 'OK' ]: data = retVal[ 'Value' ] if len( data ) > 0: sqlInsert = False if data[0][0] == sqlFlag: return S_OK() if sqlInsert: #If it's not in the db and we're removing the persistency then do nothing if not persistent: return S_OK() cmd = "INSERT INTO `ProxyDB_Proxies` ( UserDN, UserGroup, Pem, ExpirationTime, PersistentFlag ) VALUES " cmd += "( %s, %s, '', UTC_TIMESTAMP(), 'True' )" % ( sUserDN, sUserGroup ) else: cmd = "UPDATE `ProxyDB_Proxies` SET PersistentFlag='%s' WHERE UserDN=%s AND UserGroup=%s" % ( sqlFlag, sUserDN, sUserGroup ) retVal = self._update( cmd ) if not retVal[ 'OK' ]: return retVal return S_OK() def getProxiesContent( self, selDict, sortList, start = 0, limit = 0 ): """ Function to get the contents of the db parameters are a filter to the db """ fields = ( "UserName", "UserDN", "UserGroup", "ExpirationTime", "PersistentFlag" ) cmd = "SELECT %s FROM `ProxyDB_Proxies`" % ", ".join( fields ) sqlWhere = [ "Pem is not NULL" ] for field in selDict: if field not in fields: continue fVal = selDict[field] if type( fVal ) in ( types.DictType, types.TupleType, types.ListType ): sqlWhere.append( "%s in (%s)" % ( field, ", ".join( [ self._escapeString( str( value ) )[ 'Value' ] for value in fVal ] ) ) ) else: sqlWhere.append( "%s = %s" % ( field, self._escapeString( str( fVal ) )[ 'Value' ] ) ) sqlOrder = [] if sortList: for sort in sortList: if len( sort ) == 1: sort = ( sort, "DESC" ) elif len( sort ) > 2: return S_ERROR( "Invalid sort %s" % sort ) if sort[0] not in fields: return S_ERROR( "Invalid sorting field %s" % sort[0] ) if sort[1].upper() not in ( "ASC", "DESC" ): return S_ERROR( "Invalid sorting order %s" % sort[1] ) sqlOrder.append( "%s %s" % ( sort[0], sort[1] ) ) if sqlWhere: cmd = "%s WHERE %s" % ( cmd, " AND ".join( sqlWhere ) ) if sqlOrder: cmd = "%s ORDER BY %s" % ( cmd, ", ".join( sqlOrder ) ) if limit: try: start = int( start ) limit = int( limit ) except ValueError: return S_ERROR( "start and limit have to be integers" ) cmd += " LIMIT %d,%d" % ( start, limit ) retVal = self._query( cmd ) if not retVal[ 'OK' ]: return retVal data = [] for record in retVal[ 'Value' ]: record = list( record ) if record[4] == 'True': record[4] = True else: record[4] = False data.append( record ) totalRecords = len( data ) cmd = "SELECT COUNT( UserGroup ) FROM `ProxyDB_Proxies`" if sqlWhere: cmd = "%s WHERE %s" % ( cmd, " AND ".join( sqlWhere ) ) retVal = self._query( cmd ) if retVal[ 'OK' ]: totalRecords = retVal[ 'Value' ][0][0] return S_OK( { 'ParameterNames' : fields, 'Records' : data, 'TotalRecords' : totalRecords } ) def logAction( self, action, issuerDN, issuerGroup, targetDN, targetGroup ): """ Add an action to the log """ try: sAction = self._escapeString( action )[ 'Value' ] sIssuerDN = self._escapeString( issuerDN )[ 'Value' ] sIssuerGroup = self._escapeString( issuerGroup )[ 'Value' ] sTargetDN = self._escapeString( targetDN )[ 'Value' ] sTargetGroup = self._escapeString( targetGroup )[ 'Value' ] except KeyError: return S_ERROR( "Can't escape from death" ) cmd = "INSERT INTO `ProxyDB_Log` ( Action, IssuerDN, IssuerGroup, TargetDN, TargetGroup, Timestamp ) VALUES " cmd += "( %s, %s, %s, %s, %s, UTC_TIMESTAMP() )" % ( sAction, sIssuerDN, sIssuerGroup, sTargetDN, sTargetGroup ) retVal = self._update( cmd ) if not retVal[ 'OK' ]: self.log.error( "Can't add a proxy action log: ", retVal[ 'Message' ] ) def purgeLogs( self ): """ Purge expired requests from the db """ cmd = "DELETE FROM `ProxyDB_Log` WHERE TIMESTAMPDIFF( SECOND, Timestamp, UTC_TIMESTAMP() ) > 15552000" return self._update( cmd ) def getLogsContent( self, selDict, sortList, start = 0, limit = 0 ): """ Function to get the contents of the logs table parameters are a filter to the db """ fields = ( "Action", "IssuerDN", "IssuerGroup", "TargetDN", "TargetGroup", "Timestamp" ) cmd = "SELECT %s FROM `ProxyDB_Log`" % ", ".join( fields ) if selDict: qr = [] if 'beforeDate' in selDict: qr.append( "Timestamp < %s" % self._escapeString( selDict[ 'beforeDate' ] )[ 'Value' ] ) del( selDict[ 'beforeDate' ] ) if 'afterDate' in selDict: qr.append( "Timestamp > %s" % self._escapeString( selDict[ 'afterDate' ] )[ 'Value' ] ) del( selDict[ 'afterDate' ] ) for field in selDict: qr.append( "(%s)" % " OR ".join( [ "%s=%s" % ( field, self._escapeString( str( value ) )[ 'Value' ] ) for value in selDict[field] ] ) ) whereStr = " WHERE %s" % " AND ".join( qr ) cmd += whereStr else: whereStr = "" if sortList: cmd += " ORDER BY %s" % ", ".join( [ "%s %s" % ( sort[0], sort[1] ) for sort in sortList ] ) if limit: cmd += " LIMIT %d,%d" % ( start, limit ) retVal = self._query( cmd ) if not retVal[ 'OK' ]: return retVal data = retVal[ 'Value' ] totalRecords = len( data ) cmd = "SELECT COUNT( Timestamp ) FROM `ProxyDB_Log`" cmd += whereStr retVal = self._query( cmd ) if retVal[ 'OK' ]: totalRecords = retVal[ 'Value' ][0][0] return S_OK( { 'ParameterNames' : fields, 'Records' : data, 'TotalRecords' : totalRecords } ) def generateToken( self, requesterDN, requesterGroup, numUses = 1, lifeTime = 0, retries = 10 ): """ Generate and return a token and the number of uses for the token """ if not lifeTime: lifeTime = gConfig.getValue( "/DIRAC/VOPolicy/TokenLifeTime", self.__defaultTokenLifetime ) maxUses = gConfig.getValue( "/DIRAC/VOPolicy/TokenMaxUses", self.__defaultTokenMaxUses ) numUses = max( 1, min( numUses, maxUses ) ) m = hashlib.md5() rndData = "%s.%s.%s.%s" % ( time.time(), random.random(), numUses, lifeTime ) m.update( rndData ) token = m.hexdigest() fieldsSQL = ", ".join( ( "Token", "RequesterDN", "RequesterGroup", "ExpirationTime", "UsesLeft" ) ) valuesSQL = ", ".join( ( self._escapeString( token )['Value'], self._escapeString( requesterDN )['Value'], self._escapeString( requesterGroup )['Value'], "TIMESTAMPADD( SECOND, %d, UTC_TIMESTAMP() )" % int( lifeTime ), str( numUses ) ) ) insertSQL = "INSERT INTO `ProxyDB_Tokens` ( %s ) VALUES ( %s )" % ( fieldsSQL, valuesSQL ) result = self._update( insertSQL ) if result[ 'OK' ]: return S_OK( ( token, numUses ) ) if result[ 'Message' ].find( "uplicate entry" ) > -1: if retries: return self.generateToken( numUses, lifeTime, retries - 1 ) return S_ERROR( "Max retries reached for token generation. Aborting" ) return result def purgeExpiredTokens( self ): delSQL = "DELETE FROM `ProxyDB_Tokens` WHERE ExpirationTime < UTC_TIMESTAMP() OR UsesLeft < 1" return self._update( delSQL ) def useToken( self, token, requesterDN, requesterGroup ): sqlCond = " AND ".join( ( "UsesLeft > 0", "Token=%s" % self._escapeString( token )['Value'], "RequesterDN=%s" % self._escapeString( requesterDN )['Value'], "RequesterGroup=%s" % self._escapeString( requesterGroup )['Value'], "ExpirationTime >= UTC_TIMESTAMP()" ) ) updateSQL = "UPDATE `ProxyDB_Tokens` SET UsesLeft = UsesLeft - 1 WHERE %s" % sqlCond result = self._update( updateSQL ) if not result[ 'OK' ]: return result return S_OK( result[ 'Value' ] > 0 ) def __cleanExpNotifs( self ): cmd = "DELETE FROM `ProxyDB_ExpNotifs` WHERE ExpirationTime < UTC_TIMESTAMP()" return self._update( cmd ) def sendExpirationNotifications( self ): result = self.__cleanExpNotifs() if not result[ 'OK' ]: return result cmd = "SELECT UserDN, UserGroup, LifeLimit FROM `ProxyDB_ExpNotifs`" result = self._query( cmd ) if not result[ 'OK' ]: return result notifDone = dict( [ ( ( row[0], row[1] ), row[2] ) for row in result[ 'Value' ] ] ) notifLimits = sorted( [ int( x ) for x in self.getCSOption( "NotificationTimes", ProxyDB.NOTIFICATION_TIMES ) ] ) sqlSel = "UserDN, UserGroup, TIMESTAMPDIFF( SECOND, UTC_TIMESTAMP(), ExpirationTime )" sqlCond = "TIMESTAMPDIFF( SECOND, UTC_TIMESTAMP(), ExpirationTime ) < %d" % max( notifLimits ) cmd = "SELECT %s FROM `ProxyDB_Proxies` WHERE %s" % ( sqlSel, sqlCond ) result = self._query( cmd ) if not result[ 'OK' ]: return result pilotProps = ( Properties.GENERIC_PILOT, Properties.PILOT ) data = result[ 'Value' ] sent = [] for row in data: userDN, group, lTime = row #If it's a pilot proxy, skip it if Registry.groupHasProperties( group, pilotProps ): continue #IF it dosn't hace the auto upload proxy, skip it if not Registry.getGroupOption( group, "AutoUploadProxy", False ): continue notKey = ( userDN, group ) for notifLimit in notifLimits: if notifLimit < lTime: #Not yet in this notification limit continue if notKey in notifDone and notifDone[ notKey ] <= notifLimit: #Already notified for this notification limit break if not self._notifyProxyAboutToExpire( userDN, group, lTime, notifLimit ): #Cannot send notification, retry later break try: sUserDN = self._escapeString( userDN )[ 'Value' ] sGroup = self._escapeString( group )[ 'Value' ] except KeyError: return S_ERROR( "OOPS" ) if notKey not in notifDone: values = "( %s, %s, %d, TIMESTAMPADD( SECOND, %s, UTC_TIMESTAMP() ) )" % ( sUserDN, sGroup, notifLimit, lTime ) cmd = "INSERT INTO `ProxyDB_ExpNotifs` ( UserDN, UserGroup, LifeLimit, ExpirationTime ) VALUES %s" % values result = self._update( cmd ) if not result[ 'OK' ]: gLogger.error( "Could not mark notification as sent", result[ 'Message' ] ) else: values = "LifeLimit = %d, ExpirationTime = TIMESTAMPADD( SECOND, %s, UTC_TIMESTAMP() )" % ( notifLimit, lTime ) cmd = "UPDATE `ProxyDB_ExpNotifs` SET %s WHERE UserDN = %s AND UserGroup = %s" % ( values, sUserDN, sGroup ) result = self._update( cmd ) if not result[ 'OK' ]: gLogger.error( "Could not mark notification as sent", result[ 'Message' ] ) sent.append( ( userDN, group, lTime ) ) notifDone[ notKey ] = notifLimit return S_OK( sent ) def _notifyProxyAboutToExpire( self, userDN, userGroup, lTime, notifLimit ): result = Registry.getUsernameForDN( userDN ) if not result[ 'OK' ]: return False userName = result[ 'Value' ] userEMail = Registry.getUserOption( userName, "Email", "" ) if not userEMail: gLogger.error( "Could not discover user email", userName ) return False daysLeft = int( lTime / 86400 ) msgSubject = "Your proxy uploaded to DIRAC will expire in %d days" % daysLeft msgBody = """\ Dear %s, The proxy you uploaded to DIRAC will expire in aproximately %d days. The proxy information is: DN: %s Group: %s If you plan on keep using this credentials please upload a newer proxy to DIRAC by executing: $ dirac-proxy-init -UP -g %s If you have been issued different certificate, please make sure you have a proxy uploaded with that certificate. Cheers, DIRAC's Proxy Manager """ % ( userName, daysLeft, userDN, userGroup, userGroup ) result = self.__notifClient.sendMail( userEMail, msgSubject, msgBody, fromAddress = '*****@*****.**' ) if not result[ 'OK' ]: gLogger.error( "Could not send email", result[ 'Message' ] ) return False return True
def __syncCSWithVOMS( self ): self.__adminMsgs = { 'Errors' : [], 'Info' : [] } #Get DIRAC VOMS Mapping self.log.info( "Getting DIRAC VOMS mapping" ) mappingSection = '/Registry/VOMS/Mapping' ret = gConfig.getOptionsDict( mappingSection ) if not ret['OK']: self.log.fatal( 'No VOMS to DIRAC Group Mapping Available' ) return ret vomsMapping = ret['Value'] self.log.info( "There are %s registered voms mappings in DIRAC" % len( vomsMapping ) ) #Get VOMS VO name self.log.info( "Getting VOMS VO name" ) result = self.vomsSrv.admGetVOName() if not ret['OK']: self.log.fatal( 'Could not retrieve VOMS VO name' ) voNameInVOMS = result[ 'Value' ] self.log.info( "VOMS VO Name is %s" % voNameInVOMS ) #Get VOMS roles self.log.info( "Getting the list of registered roles in VOMS" ) result = self.vomsSrv.admListRoles() if not ret['OK']: self.log.fatal( 'Could not retrieve registered roles in VOMS' ) rolesInVOMS = result[ 'Value' ] self.log.info( "There are %s registered roles in VOMS" % len( rolesInVOMS ) ) print rolesInVOMS rolesInVOMS.append( '' ) #Map VOMS roles vomsRoles = {} for role in rolesInVOMS: if role: role = "%s/%s" % ( voNameInVOMS, role ) else: role = voNameInVOMS groupsForRole = [] for group in vomsMapping: if vomsMapping[ group ] == role: groupsForRole.append( group ) if groupsForRole: vomsRoles[ role ] = { 'Groups' : groupsForRole, 'Users' : [] } self.log.info( "DIRAC valid VOMS roles are:\n\t", "\n\t ".join( vomsRoles.keys() ) ) #Get DIRAC users self.log.info( "Getting the list of registered users in DIRAC" ) csapi = CSAPI() ret = csapi.listUsers() if not ret['OK']: self.log.fatal( 'Could not retrieve current list of Users' ) return ret currentUsers = ret['Value'] ret = csapi.describeUsers( currentUsers ) if not ret['OK']: self.log.fatal( 'Could not retrieve current User description' ) return ret currentUsers = ret['Value'] self.__adminMsgs[ 'Info' ].append( "There are %s registered users in DIRAC" % len( currentUsers ) ) self.log.info( "There are %s registered users in DIRAC" % len( currentUsers ) ) #Get VOMS user entries self.log.info( "Getting the list of registered user entries in VOMS" ) result = self.vomsSrv.admListMembers() if not ret['OK']: self.log.fatal( 'Could not retrieve registered user entries in VOMS' ) usersInVOMS = result[ 'Value' ] self.__adminMsgs[ 'Info' ].append( "There are %s registered user entries in VOMS" % len( usersInVOMS ) ) self.log.info( "There are %s registered user entries in VOMS" % len( usersInVOMS ) ) #Consolidate users by nickname usersData = {} newUserNames = [] knownUserNames = [] obsoleteUserNames = [] self.log.info( "Retrieving usernames..." ) usersInVOMS.sort() for iUPos in range( len( usersInVOMS ) ): userName = '' user = usersInVOMS[ iUPos ] for oldUser in currentUsers: if user[ 'DN' ].strip() in List.fromChar( currentUsers[oldUser][ 'DN' ] ): userName = oldUser if not userName: result = self.vomsSrv.attGetUserNickname( user[ 'DN' ], user[ 'CA' ] ) if result[ 'OK' ]: userName = result[ 'Value' ] else: self.__adminMsgs[ 'Errors' ].append( "Could not retrieve nickname for DN %s" % user[ 'DN' ] ) self.log.error( "Could not get nickname for DN %s" % user[ 'DN' ] ) userName = user[ 'mail' ][:user[ 'mail' ].find( '@' )] if not userName: self.log.error( "Empty nickname for DN %s" % user[ 'DN' ] ) self.__adminMsgs[ 'Errors' ].append( "Empty nickname for DN %s" % user[ 'DN' ] ) continue self.log.info( " (%02d%%) Found username %s : %s " % ( ( iUPos * 100 / len( usersInVOMS ) ), userName, user[ 'DN' ] ) ) if userName not in usersData: usersData[ userName ] = { 'DN': [], 'CA': [], 'Email': [], 'Groups' : ['user'] } for key in ( 'DN', 'CA', 'mail' ): value = user[ key ] if value: if key == "mail": List.appendUnique( usersData[ userName ][ 'Email' ], value ) else: usersData[ userName ][ key ].append( value.strip() ) if userName not in currentUsers: List.appendUnique( newUserNames, userName ) else: List.appendUnique( knownUserNames, userName ) self.log.info( "Finished retrieving usernames" ) if newUserNames: self.log.info( "There are %s new users" % len( newUserNames ) ) else: self.log.info( "There are no new users" ) #Get the list of users for each group result = csapi.listGroups() if not result[ 'OK' ]: self.log.error( "Could not get the list of groups in DIRAC", result[ 'Message' ] ) return result staticGroups = result[ 'Value' ] vomsGroups = [] self.log.info( "Mapping users in VOMS to groups" ) for vomsRole in vomsRoles: self.log.info( " Getting users for role %s" % vomsRole ) groupsForRole = vomsRoles[ vomsRole ][ 'Groups' ] vomsMap = vomsRole.split( "Role=" ) for g in groupsForRole: if g in staticGroups: staticGroups.pop( staticGroups.index( g ) ) else: vomsGroups.append( g ) if len( vomsMap ) == 1: # no Role users = usersInVOMS else: vomsGroup = "Role=".join( vomsMap[:-1] ) if vomsGroup[-1] == "/": vomsGroup = vomsGroup[:-1] vomsRole = "Role=%s" % vomsMap[-1] result = self.vomsSrv.admListUsersWithRole( vomsGroup, vomsRole ) if not result[ 'OK' ]: errorMsg = "Could not get list of users for VOMS %s" % ( vomsMapping[ group ] ) self.__adminMsgs[ 'Errors' ].append( errorMsg ) self.log.error( errorMsg, result[ 'Message' ] ) return result users = result['Value'] numUsersInGroup = 0 for vomsUser in users: for userName in usersData: if vomsUser[ 'DN' ] in usersData[ userName ][ 'DN' ]: numUsersInGroup += 1 usersData[ userName ][ 'Groups' ].extend( groupsForRole ) infoMsg = "There are %s users in group(s) %s for VOMS Role %s" % ( numUsersInGroup, ",".join( groupsForRole ), vomsRole ) self.__adminMsgs[ 'Info' ].append( infoMsg ) self.log.info( " %s" % infoMsg ) self.log.info( "Checking static groups" ) staticUsers = [] for group in staticGroups: self.log.info( " Checking static group %s" % group ) numUsersInGroup = 0 result = csapi.listUsers( group ) if not result[ 'OK' ]: self.log.error( "Could not get the list of users in DIRAC group %s" % group , result[ 'Message' ] ) return result for userName in result[ 'Value' ]: if userName in usersData: numUsersInGroup += 1 usersData[ userName ][ 'Groups' ].append( group ) else: if group not in vomsGroups and userName not in staticUsers: staticUsers.append( userName ) infoMsg = "There are %s users in group %s" % ( numUsersInGroup, group ) self.__adminMsgs[ 'Info' ].append( infoMsg ) self.log.info( " %s" % infoMsg ) if staticUsers: infoMsg = "There are %s static users: %s" % ( len( staticUsers ) , ', '.join( staticUsers ) ) self.__adminMsgs[ 'Info' ].append( infoMsg ) self.log.info( "%s" % infoMsg ) for user in currentUsers: if user not in usersData and user not in staticUsers: self.log.info( 'User %s is no longer valid' % user ) obsoleteUserNames.append( user ) #Do the CS Sync self.log.info( "Updating CS..." ) ret = csapi.downloadCSData() if not ret['OK']: self.log.fatal( 'Can not update from CS', ret['Message'] ) return ret usersWithMoreThanOneDN = {} for user in usersData: csUserData = dict( usersData[ user ] ) if len( csUserData[ 'DN' ] ) > 1: usersWithMoreThanOneDN[ user ] = csUserData[ 'DN' ] result = csapi.describeUsers( [ user ] ) if result[ 'OK' ]: if result[ 'Value' ]: prevUser = result[ 'Value' ][ user ] prevDNs = List.fromChar( prevUser[ 'DN' ] ) newDNs = csUserData[ 'DN' ] for DN in newDNs: if DN not in prevDNs: self.__adminMsgs[ 'Info' ].append( "User %s has new DN %s" % ( user, DN ) ) for DN in prevDNs: if DN not in newDNs: self.__adminMsgs[ 'Info' ].append( "User %s has lost a DN %s" % ( user, DN ) ) else: newDNs = csUserData[ 'DN' ] for DN in newDNs: self.__adminMsgs[ 'Info' ].append( "New user %s has new DN %s" % ( user, DN ) ) for k in ( 'DN', 'CA', 'Email' ): csUserData[ k ] = ", ".join( csUserData[ k ] ) result = csapi.modifyUser( user, csUserData, createIfNonExistant = True ) if not result[ 'OK' ]: self.__adminMsgs[ 'Error' ].append( "Cannot modify user %s: %s" % ( user, result[ 'Message' ] ) ) self.log.error( "Cannot modify user %s" % user ) if usersWithMoreThanOneDN: self.__adminMsgs[ 'Info' ].append( "\nUsers with more than one DN:" ) for uwmtod in sorted( usersWithMoreThanOneDN ): self.__adminMsgs[ 'Info' ].append( " %s" % uwmtod ) self.__adminMsgs[ 'Info' ].append( " + DN list:" ) for DN in usersWithMoreThanOneDN[uwmtod]: self.__adminMsgs[ 'Info' ].append( " - %s" % DN ) if obsoleteUserNames: self.__adminMsgs[ 'Info' ].append( "\nObsolete users:" ) address = self.am_getOption( 'MailTo', '*****@*****.**' ) fromAddress = self.am_getOption( 'mailFrom', '*****@*****.**' ) subject = 'Obsolete LFC Users found' body = 'Delete entries into LFC: \n' for obsoleteUser in obsoleteUserNames: self.log.info( subject, ", ".join( obsoleteUserNames ) ) body += 'for ' + obsoleteUser + '\n' self.__adminMsgs[ 'Info' ].append( " %s" % obsoleteUser ) self.log.info( "Deleting %s users" % len( obsoleteUserNames ) ) NotificationClient().sendMail( address, 'UsersAndGroupsAgent: %s' % subject, body, fromAddress ) csapi.deleteUsers( obsoleteUserNames ) if newUserNames: self.__adminMsgs[ 'Info' ].append( "\nNew users:" ) for newUser in newUserNames: self.__adminMsgs[ 'Info' ].append( " %s" % newUser ) self.__adminMsgs[ 'Info' ].append( " + DN list:" ) for DN in usersData[newUser][ 'DN' ]: self.__adminMsgs[ 'Info' ].append( " - %s" % DN ) self.__adminMsgs[ 'Info' ].append( " + EMail: %s" % usersData[newUser][ 'Email' ] ) result = csapi.commitChanges() if not result[ 'OK' ]: self.log.error( "Could not commit configuration changes", result[ 'Message' ] ) return result self.log.info( "Configuration committed" ) #LFC Check if self.am_getOption( "LFCCheckEnabled", True ): result = self.checkLFCRegisteredUsers( usersData ) if not result[ 'OK' ]: return result return S_OK()
class ErrorMessageMonitor(AgentModule): def initialize(self): self.systemLoggingDB = SystemLoggingDB() self.notification = NotificationClient() userList = self.am_getOption("Reviewer", []) self.log.debug("Users to be notified:", ', '.join(userList)) mailList = [] for user in userList: mail = getUserOption(user, 'Email', '') if not mail: self.log.warn("Could not get user's mail", user) else: mailList.append(mail) if not mailList: mailList = Operations().getValue('EMail/Logging', []) if not len(mailList): errString = "There are no valid users in the mailing list" varString = "[" + ','.join(userList) + "]" self.log.warn(errString, varString) self.log.info("List of mails to be notified", ','.join(mailList)) self._mailAddress = mailList self._subject = 'New error messages were entered in the SystemLoggingDB' return S_OK() def execute(self): """ The main agent execution method """ condDict = {'ReviewedMessage': 0} result = self.systemLoggingDB.getCounters('FixedTextMessages', ['ReviewedMessage'], condDict) if not result['OK']: return result if not result['Value']: self.log.info('No messages need review') return S_OK('No messages need review') returnFields = [ 'FixedTextID', 'FixedTextString', 'SystemName', 'SubSystemName' ] result = self.systemLoggingDB._queryDB( showFieldList=returnFields, groupColumn='FixedTextString,FixedTextID,SystemName', condDict=condDict) if not result['OK']: self.log.error('Failed to obtain the non reviewed Strings', result['Message']) return S_OK() messageList = result['Value'] if messageList == 'None' or not messageList: self.log.error('The DB query returned an empty result') return S_OK() mailBody = 'These new messages have arrived to the Logging Service\n' for message in messageList: mailBody = mailBody + "String: '" + message[1] + "'\tSystem: '" \ + message[2] + "'\tSubsystem: '" + message[3] + "'\n" if self._mailAddress: result = self.notification.sendMail(self._mailAddress, self._subject, mailBody) if not result['OK']: self.log.warn("The mail could not be sent", result['Message']) return S_OK() messageIDs = [message[0] for message in messageList] condDict = {'FixedTextID': messageIDs} result = self.systemLoggingDB.updateFields('FixedTextMessages', ['ReviewedMessage'], [1], condDict=condDict) if not result['OK']: self.log.error('Could not update message Status', result['ERROR']) return S_OK() self.log.verbose('Updated message Status for:', str(messageList)) self.log.info("The messages have been sent for review", "There are %s new descriptions" % len(messageList)) return S_OK("%s Messages have been sent for review" % len(messageList))
gLogger.debug("Successfully updated %s catalog status to Active" % site) allowed.append(site) if not allowed: gLogger.error("Failed to allow any catalog mirrors") DIRAC.exit(-1) res = csAPI.commitChanges() if not res['OK']: gLogger.error("Failed to commit changes to CS", res['Message']) DIRAC.exit(-1) subject = '%d catalog instance(s) allowed for use' % len(allowed) addressPath = 'EMail/Production' address = Operations().getValue(addressPath, '') body = 'The catalog mirrors at the following sites were allowed' for site in allowed: body = "%s\n%s" % (body, site) if not address: gLogger.notice( "'%s' not defined in Operations, can not send Mail\n" % addressPath, body) DIRAC.exit(0) NotificationClient().sendMail(address, subject, body, getUserOption(userName, 'Email', '')) DIRAC.exit(0)
class MonitorAgents(AgentModule): """MonitorAgents class.""" def __init__(self, *args, **kwargs): """Initialize the agent, clients, default values.""" AgentModule.__init__(self, *args, **kwargs) self.name = 'MonitorAgents' self.setup = "Production" self.enabled = False self.restartAgents = False self.restartExecutors = False self.restartServices = False self.controlComponents = False self.commitURLs = False self.diracLocation = "/opt/dirac/pro" self.sysAdminClient = SystemAdministratorClient(socket.gethostname()) self.jobMonClient = JobMonitoringClient() self.nClient = NotificationClient() self.csAPI = None self.agents = dict() self.executors = dict() self.services = dict() self.errors = list() self.accounting = defaultdict(dict) self.addressTo = ["*****@*****.**"] self.addressFrom = "*****@*****.**" self.emailSubject = "MonitorAgents on %s" % socket.gethostname() def logError(self, errStr, varMsg=''): """Append errors to a list, which is sent in email notification.""" self.log.error(errStr, varMsg) self.errors.append(errStr + " " + varMsg) def beginExecution(self): """Reload the configurations before every cycle.""" self.setup = self.am_getOption("Setup", self.setup) self.enabled = self.am_getOption("EnableFlag", self.enabled) self.restartAgents = self.am_getOption("RestartAgents", self.restartAgents) self.restartExecutors = self.am_getOption("RestartExecutors", self.restartExecutors) self.restartServices = self.am_getOption("RestartServices", self.restartServices) self.diracLocation = os.environ.get("DIRAC", self.diracLocation) self.addressTo = self.am_getOption('MailTo', self.addressTo) self.addressFrom = self.am_getOption('MailFrom', self.addressFrom) self.controlComponents = self.am_getOption('ControlComponents', self.controlComponents) self.commitURLs = self.am_getOption('CommitURLs', self.commitURLs) self.csAPI = CSAPI() res = self.getRunningInstances(instanceType='Agents') if not res["OK"]: return S_ERROR("Failure to get running agents") self.agents = res["Value"] res = self.getRunningInstances(instanceType='Executors') if not res["OK"]: return S_ERROR("Failure to get running executors") self.executors = res["Value"] res = self.getRunningInstances(instanceType='Services') if not res["OK"]: return S_ERROR("Failure to get running services") self.services = res["Value"] self.accounting.clear() return S_OK() def sendNotification(self): """Send email notification about changes done in the last cycle.""" if not(self.errors or self.accounting): return S_OK() emailBody = "" rows = [] for instanceName, val in self.accounting.iteritems(): rows.append([[instanceName], [val.get('Treatment', 'No Treatment')], [str(val.get('LogAge', 'Not Relevant'))]]) if rows: columns = ["Instance", "Treatment", "Log File Age (Minutes)"] emailBody += printTable(columns, rows, printOut=False, numbering=False, columnSeparator=' | ') if self.errors: emailBody += "\n\nErrors:" emailBody += "\n".join(self.errors) self.log.notice("Sending Email:\n" + emailBody) for address in self.addressTo: res = self.nClient.sendMail(address, self.emailSubject, emailBody, self.addressFrom, localAttempt=False) if not res['OK']: self.log.error("Failure to send Email notification to ", address) continue self.errors = [] self.accounting.clear() return S_OK() def getRunningInstances(self, instanceType='Agents', runitStatus='Run'): """Return a dict of running agents, executors or services. Key is agent's name, value contains dict with PollingTime, PID, Port, Module, RunitStatus, LogFileLocation :param str instanceType: 'Agents', 'Executors', 'Services' :param str runitStatus: Return only those instances with given RunitStatus or 'All' :returns: Dictionary of running instances """ res = self.sysAdminClient.getOverallStatus() if not res["OK"]: self.logError("Failure to get %s from system administrator client" % instanceType, res["Message"]) return res val = res['Value'][instanceType] runningAgents = defaultdict(dict) for system, agents in val.iteritems(): for agentName, agentInfo in agents.iteritems(): if agentInfo['Setup'] and agentInfo['Installed']: if runitStatus != 'All' and agentInfo['RunitStatus'] != runitStatus: continue confPath = cfgPath('/Systems/' + system + '/' + self.setup + '/%s/' % instanceType + agentName) for option, default in (('PollingTime', HOUR), ('Port', None)): optPath = os.path.join(confPath, option) runningAgents[agentName][option] = gConfig.getValue(optPath, default) runningAgents[agentName]["LogFileLocation"] = \ os.path.join(self.diracLocation, 'runit', system, agentName, 'log', 'current') runningAgents[agentName]["PID"] = agentInfo["PID"] runningAgents[agentName]['Module'] = agentInfo['Module'] runningAgents[agentName]['RunitStatus'] = agentInfo['RunitStatus'] runningAgents[agentName]['System'] = system return S_OK(runningAgents) def on_terminate(self, agentName, process): """Execute callback when a process terminates gracefully.""" self.log.info("%s's process with ID: %s has been terminated successfully" % (agentName, process.pid)) def execute(self): """Execute checks for agents, executors, services.""" for instanceType in ('executor', 'agent', 'service'): for name, options in getattr(self, instanceType + 's').iteritems(): # call checkAgent, checkExecutor, checkService res = getattr(self, 'check' + instanceType.capitalize())(name, options) if not res['OK']: self.logError("Failure when checking %s" % instanceType, "%s, %s" % (name, res['Message'])) res = self.componentControl() if not res['OK']: if "Stopped does not exist" not in res['Message'] and \ "Running does not exist" not in res['Message']: self.logError("Failure to control components", res['Message']) if not self.errors: res = self.checkURLs() if not res['OK']: self.logError("Failure to check URLs", res['Message']) else: self.logError('Something was wrong before, not checking URLs this time') self.sendNotification() if self.errors: return S_ERROR("Error during this cycle, check log") return S_OK() @staticmethod def getLastAccessTime(logFileLocation): """Return the age of log file.""" lastAccessTime = 0 try: lastAccessTime = os.path.getmtime(logFileLocation) lastAccessTime = datetime.fromtimestamp(lastAccessTime) except OSError as e: return S_ERROR('Failed to access logfile %s: %r' % (logFileLocation, e)) now = datetime.now() age = now - lastAccessTime return S_OK(age) def restartInstance(self, pid, instanceName, enabled): """Kill a process which is then restarted automatically.""" if not (self.enabled and enabled): self.log.info("Restarting is disabled, please restart %s manually" % instanceName) self.accounting[instanceName]["Treatment"] = "Please restart it manually" return S_OK(NO_RESTART) try: agentProc = psutil.Process(int(pid)) processesToTerminate = agentProc.children(recursive=True) processesToTerminate.append(agentProc) for proc in processesToTerminate: proc.terminate() _gone, alive = psutil.wait_procs(processesToTerminate, timeout=5, callback=partial(self.on_terminate, instanceName)) for proc in alive: self.log.info("Forcefully killing process %s" % proc.pid) proc.kill() return S_OK() except psutil.Error as err: self.logError("Exception occurred in terminating processes", "%s" % err) return S_ERROR() def checkService(self, serviceName, options): """Ping the service, restart if the ping does not respond.""" url = self._getURL(serviceName, options) self.log.info("Pinging service", url) pingRes = Client().ping(url=url) if not pingRes['OK']: self.log.info('Failure pinging service: %s: %s' % (url, pingRes['Message'])) res = self.restartInstance(int(options['PID']), serviceName, self.restartServices) if not res["OK"]: return res elif res['OK'] and res['Value'] != NO_RESTART: self.accounting[serviceName]["Treatment"] = "Successfully Restarted" self.log.info("Agent %s has been successfully restarted" % serviceName) self.log.info("Service responded OK") return S_OK() def checkAgent(self, agentName, options): """Check the age of agent's log file, if it is too old then restart the agent.""" pollingTime, currentLogLocation, pid = options['PollingTime'], options['LogFileLocation'], options['PID'] self.log.info("Checking Agent: %s" % agentName) self.log.info("Polling Time: %s" % pollingTime) self.log.info("Current Log File location: %s" % currentLogLocation) res = self.getLastAccessTime(currentLogLocation) if not res["OK"]: return res age = res["Value"] self.log.info("Current log file for %s is %d minutes old" % (agentName, (age.seconds / MINUTES))) maxLogAge = max(pollingTime + HOUR, 2 * HOUR) if age.seconds < maxLogAge: return S_OK() self.log.info("Current log file is too old for Agent %s" % agentName) self.accounting[agentName]["LogAge"] = age.seconds / MINUTES res = self.restartInstance(int(pid), agentName, self.restartAgents) if not res["OK"]: return res elif res['OK'] and res['Value'] != NO_RESTART: self.accounting[agentName]["Treatment"] = "Successfully Restarted" self.log.info("Agent %s has been successfully restarted" % agentName) return S_OK() def checkExecutor(self, executor, options): """Check the age of executor log file, if too old check for jobs in checking status, then restart the executors.""" currentLogLocation = options['LogFileLocation'] pid = options['PID'] self.log.info("Checking executor: %s" % executor) self.log.info("Current Log File location: %s" % currentLogLocation) res = self.getLastAccessTime(currentLogLocation) if not res["OK"]: return res age = res["Value"] self.log.info("Current log file for %s is %d minutes old" % (executor, (age.seconds / MINUTES))) if age.seconds < 2 * HOUR: return S_OK() self.log.info("Current log file is too old for Executor %s" % executor) self.accounting[executor]["LogAge"] = age.seconds / MINUTES res = self.checkForCheckingJobs(executor) if not res['OK']: return res if res['OK'] and res['Value'] == NO_CHECKING_JOBS: self.accounting.pop(executor, None) return S_OK(NO_RESTART) res = self.restartInstance(int(pid), executor, self.restartExecutors) if not res["OK"]: return res elif res['OK'] and res['Value'] != NO_RESTART: self.accounting[executor]["Treatment"] = "Successfully Restarted" self.log.info("Executor %s has been successfully restarted" % executor) return S_OK() def checkForCheckingJobs(self, executorName): """Check if there are checking jobs with the **executorName** as current MinorStatus.""" attrDict = {'Status': 'Checking', 'MinorStatus': executorName} # returns list of jobs IDs resJobs = self.jobMonClient.getJobs(attrDict) if not resJobs['OK']: self.logError("Could not get jobs for this executor", "%s: %s" % (executorName, resJobs['Message'])) return resJobs if resJobs['Value']: self.log.info("Found %d jobs in 'Checking' status for %s" % (len(resJobs['Value']), executorName)) return S_OK(CHECKING_JOBS) self.log.info("Found no jobs in 'Checking' status for %s" % executorName) return S_OK(NO_CHECKING_JOBS) def componentControl(self): """Monitor and control component status as defined in the CS. Check for running and stopped components and ensure they have the proper status as defined in the CS Registry/Hosts/_HOST_/[Running|Stopped] sections :returns: :func:`~DIRAC:DIRAC.Core.Utilities.ReturnValues.S_OK`, :func:`~DIRAC:DIRAC.Core.Utilities.ReturnValues.S_ERROR` """ # get the current status of the components resCurrent = self._getCurrentComponentStatus() if not resCurrent['OK']: return resCurrent currentStatus = resCurrent['Value'] resDefault = self._getDefaultComponentStatus() if not resDefault['OK']: return resDefault defaultStatus = resDefault['Value'] # ensure instances are in the right state shouldBe = {} shouldBe['Run'] = defaultStatus['Run'].intersection(currentStatus['Down']) shouldBe['Down'] = defaultStatus['Down'].intersection(currentStatus['Run']) shouldBe['Unknown'] = defaultStatus['All'].symmetric_difference(currentStatus['All']) self._ensureComponentRunning(shouldBe['Run']) self._ensureComponentDown(shouldBe['Down']) for instance in shouldBe['Unknown']: self.logError("Unknown instance", "%r, either uninstall or add to config" % instance) return S_OK() def _getCurrentComponentStatus(self): """Get current status for components.""" resOverall = self.sysAdminClient.getOverallStatus() if not resOverall['OK']: return resOverall currentStatus = {'Down': set(), 'Run': set(), 'All': set()} informationDict = resOverall['Value'] for systemsDict in informationDict.values(): for system, instancesDict in systemsDict.items(): for instanceName, instanceInfoDict in instancesDict.items(): identifier = '%s__%s' % (system, instanceName) runitStatus = instanceInfoDict.get('RunitStatus') if runitStatus in ('Run', 'Down'): currentStatus[runitStatus].add(identifier) currentStatus['All'] = currentStatus['Run'] | currentStatus['Down'] return S_OK(currentStatus) def _getDefaultComponentStatus(self): """Get the configured status of the components.""" host = socket.gethostname() defaultStatus = {'Down': set(), 'Run': set(), 'All': set()} resRunning = gConfig.getOptionsDict(os.path.join('/Registry/Hosts/', host, 'Running')) resStopped = gConfig.getOptionsDict(os.path.join('/Registry/Hosts/', host, 'Stopped')) if not resRunning['OK']: return resRunning if not resStopped['OK']: return resStopped defaultStatus['Run'] = set(resRunning['Value'].keys()) defaultStatus['Down'] = set(resStopped['Value'].keys()) defaultStatus['All'] = defaultStatus['Run'] | defaultStatus['Down'] if defaultStatus['Run'].intersection(defaultStatus['Down']): self.logError("Overlap in configuration", str(defaultStatus['Run'].intersection(defaultStatus['Down']))) return S_ERROR("Bad host configuration") return S_OK(defaultStatus) def _ensureComponentRunning(self, shouldBeRunning): """Ensure the correct components are running.""" for instance in shouldBeRunning: self.log.info("Starting instance %s" % instance) system, name = instance.split('__') if self.controlComponents: res = self.sysAdminClient.startComponent(system, name) if not res['OK']: self.logError("Failed to start component:", "%s: %s" % (instance, res['Message'])) else: self.accounting[instance]["Treatment"] = "Instance was down, started instance" else: self.accounting[instance]["Treatment"] = "Instance is down, should be started" def _ensureComponentDown(self, shouldBeDown): """Ensure the correct components are not running.""" for instance in shouldBeDown: self.log.info("Stopping instance %s" % instance) system, name = instance.split('__') if self.controlComponents: res = self.sysAdminClient.stopComponent(system, name) if not res['OK']: self.logError("Failed to stop component:", "%s: %s" % (instance, res['Message'])) else: self.accounting[instance]["Treatment"] = "Instance was running, stopped instance" else: self.accounting[instance]["Treatment"] = "Instance is running, should be stopped" def checkURLs(self): """Ensure that the running services have their URL in the Config.""" self.log.info("Checking URLs") # get services again, in case they were started/stop in controlComponents gConfig.forceRefresh(fromMaster=True) res = self.getRunningInstances(instanceType='Services', runitStatus='All') if not res["OK"]: return S_ERROR("Failure to get running services") self.services = res["Value"] for service, options in self.services.iteritems(): self.log.debug("Checking URL for %s with options %s" % (service, options)) # ignore SystemAdministrator, does not have URLs if 'SystemAdministrator' in service: continue self._checkServiceURL(service, options) if self.csAPI.csModified and self.commitURLs: self.log.info("Commiting changes to the CS") result = self.csAPI.commit() if not result['OK']: self.logError('Commit to CS failed', result['Message']) return S_ERROR("Failed to commit to CS") return S_OK() def _checkServiceURL(self, serviceName, options): """Ensure service URL is properly configured in the CS.""" url = self._getURL(serviceName, options) system = options['System'] module = options['Module'] self.log.info("Checking URLs for %s/%s" % (system, module)) urlsConfigPath = os.path.join('/Systems', system, self.setup, 'URLs', module) urls = gConfig.getValue(urlsConfigPath, []) self.log.debug("Found configured URLs for %s: %s" % (module, urls)) self.log.debug("This URL is %s" % url) runitStatus = options['RunitStatus'] wouldHave = 'Would have ' if not self.commitURLs else '' if runitStatus == 'Run' and url not in urls: urls.append(url) message = "%sAdded URL %s to URLs for %s/%s" % (wouldHave, url, system, module) self.log.info(message) self.accounting[serviceName + "/URL"]["Treatment"] = message self.csAPI.modifyValue(urlsConfigPath, ",".join(urls)) if runitStatus == 'Down' and url in urls: urls.remove(url) message = "%sRemoved URL %s from URLs for %s/%s" % (wouldHave, url, system, module) self.log.info(message) self.accounting[serviceName + "/URL"]["Treatment"] = message self.csAPI.modifyValue(urlsConfigPath, ",".join(urls)) @staticmethod def _getURL(serviceName, options): """Return URL for the service.""" system = options['System'] port = options['Port'] host = socket.gethostname() url = 'dips://%s:%s/%s/%s' % (host, port, system, serviceName) return url
def main(): global excludedHosts Script.registerSwitch( "e:", "exclude=", "Comma separated list of hosts to be excluded from the scanning process", setExcludedHosts) Script.parseCommandLine(ignoreErrors=False) componentType = '' # Get my setup mySetup = gConfig.getValue('DIRAC/Setup') # Retrieve information from all the hosts client = SystemAdministratorIntegrator(exclude=excludedHosts) resultAll = client.getOverallStatus() # Retrieve user installing the component result = getProxyInfo() if result['OK']: user = result['Value']['username'] else: DIRACexit(-1) if not user: user = '******' notificationClient = NotificationClient() for host in resultAll['Value']: if not resultAll['Value'][host]['OK']: # If the host cannot be contacted, exclude it and send message excludedHosts.append(host) result = notificationClient.sendMail( Operations().getValue('EMail/Production', []), 'Unreachable host', '\ndirac-populate-component-db: Could not fill the database with the components from unreachable host %s\n' % host) if not result['OK']: gLogger.error( 'Can not send unreachable host notification mail: %s' % result['Message']) if not resultAll['OK']: gLogger.error(resultAll['Message']) DIRACexit(-1) resultHosts = client.getHostInfo() if not resultHosts['OK']: gLogger.error(resultHosts['Message']) DIRACexit(-1) resultInfo = client.getInfo() if not resultInfo['OK']: gLogger.error(resultInfo['Message']) DIRACexit(-1) resultMySQL = client.getMySQLStatus() if not resultMySQL['OK']: gLogger.error(resultMySQL['Message']) DIRACexit(-1) resultAllDB = client.getDatabases() if not resultAllDB['OK']: gLogger.error(resultAllDB['Message']) DIRACexit(-1) resultAvailableDB = client.getAvailableDatabases() if not resultAvailableDB['OK']: gLogger.error(resultAvailableDB['Message']) DIRACexit(-1) records = [] finalSet = list(set(resultAll['Value']) - set(excludedHosts)) for host in finalSet: hasMySQL = True result = resultAll['Value'][host] hostResult = resultHosts['Value'][host] infoResult = resultInfo['Value'][host] mySQLResult = resultMySQL['Value'][host] allDBResult = resultAllDB['Value'][host] availableDBResult = resultAvailableDB['Value'][host] if not result['OK']: gLogger.error('Host %s: %s' % (host, result['Message'])) continue if not hostResult['OK']: gLogger.error('Host %s: %s' % (host, hostResult['Message'])) continue if not infoResult['OK']: gLogger.error('Host %s: %s' % (host, infoResult['Message'])) continue if mySQLResult['OK']: if not allDBResult['OK']: gLogger.error('Host %s: %s' % (host, allDBResult['Message'])) continue if not availableDBResult['OK']: gLogger.error('Host %s: %s' % (host, availableDBResult['Message'])) continue else: hasMySQL = False setup = infoResult['Value']['Setup'] if setup != mySetup: continue cpu = hostResult['Value']['CPUModel'].strip() rDict = result['Value'] # Components other than databases for compType in rDict: if componentType and componentType != compType: continue for system in rDict[compType]: components = sorted(rDict[compType][system]) for component in components: record = {'Installation': {}, 'Component': {}, 'Host': {}} if rDict[compType][system][component]['Installed'] and \ component != 'ComponentMonitoring': runitStatus = \ str(rDict[compType][system][component]['RunitStatus']) if runitStatus != 'Unknown': module = \ str(rDict[compType][system][component]['Module']) record['Component']['System'] = system record['Component']['Module'] = module # Transform 'Services' into 'service', 'Agents' into 'agent' ... record['Component']['Type'] = compType.lower()[:-1] record['Host']['HostName'] = host record['Host']['CPU'] = cpu record['Installation']['Instance'] = component record['Installation'][ 'InstallationTime'] = datetime.utcnow() record['Installation']['InstalledBy'] = user records.append(record) # Databases csClient = CSAPI() cfg = csClient.getCurrentCFG()['Value'] if hasMySQL: allDB = allDBResult['Value'] availableDB = availableDBResult['Value'] for db in allDB: # Check for DIRAC only databases if db in availableDB and db != 'InstalledComponentsDB': # Check for 'installed' databases isSection = cfg.isSection( 'Systems/' + availableDB[db]['System'] + '/' + cfg.getOption('DIRAC/Setups/' + setup + '/' + availableDB[db]['System']) + '/Databases/' + db + '/') if isSection: record = { 'Installation': {}, 'Component': {}, 'Host': {} } record['Component']['System'] = availableDB[db][ 'System'] record['Component']['Module'] = db record['Component']['Type'] = 'DB' record['Host']['HostName'] = host record['Host']['CPU'] = cpu record['Installation']['Instance'] = db record['Installation'][ 'InstallationTime'] = datetime.utcnow() record['Installation']['InstalledBy'] = user records.append(record) monitoringClient = ComponentMonitoringClient() # Add the installations to the database for record in records: result = MonitoringUtilities.monitorInstallation( record['Component']['Type'], record['Component']['System'], record['Installation']['Instance'], record['Component']['Module'], record['Host']['CPU'], record['Host']['HostName']) if not result['OK']: gLogger.error(result['Message'])
body = "".join( body ) try: headers = dict( ( i.strip() , j.strip()) for i , j in ( item.split( ':' ) for item in head.split( '\\n' ) ) ) except: gLogger.error( "Failed to convert string: %s to email headers" % head ) DIRAC.exit( 3 ) if not "To" in headers: gLogger.error( "Failed to get 'To:' field from headers %s" % head ) DIRAC.exit( 4 ) to = headers[ "To" ] origin = socket.gethostname() if "From" in headers: origin = headers[ "From" ] subject = "" if "Subject" in headers: subject = headers[ "Subject" ] ntc = NotificationClient() result = ntc.sendMail( to , subject , body , origin , localAttempt = False ) if not result[ "OK" ]: gLogger.error( result[ "Message" ] ) DIRAC.exit( 5 ) DIRAC.exit( 0 )
def _lookForCE(self): knownces = self.am_getOption('BannedCEs', []) result = gConfig.getSections('/Resources/Sites') if not result['OK']: return grids = result['Value'] for grid in grids: result = gConfig.getSections('/Resources/Sites/%s' % grid) if not result['OK']: return sites = result['Value'] for site in sites: opt = gConfig.getOptionsDict('/Resources/Sites/%s/%s' % (grid, site))['Value'] ces = List.fromChar(opt.get('CE', '')) knownces += ces response = ldapCEState('', vo=self.vo) if not response['OK']: self.log.error("Error during BDII request", response['Message']) response = self._checkAlternativeBDIISite(ldapCEState, '', self.vo) return response newces = {} for queue in response['Value']: try: queuename = queue['GlueCEUniqueID'] except: continue cename = queuename.split(":")[0] if not cename in knownces: newces[cename] = None self.log.debug("newce", cename) body = "" possibleNewSites = [] for ce in newces.iterkeys(): response = ldapCluster(ce) if not response['OK']: self.log.warn("Error during BDII request", response['Message']) response = self._checkAlternativeBDIISite(ldapCluster, ce) continue clusters = response['Value'] if len(clusters) != 1: self.log.warn("Error in cluster leng", " CE %s Leng %d" % (ce, len(clusters))) if len(clusters) == 0: continue cluster = clusters[0] fkey = cluster.get('GlueForeignKey', []) if type(fkey) == type(''): fkey = [fkey] nameBDII = None for entry in fkey: if entry.count('GlueSiteUniqueID'): nameBDII = entry.split('=')[1] break if not nameBDII: continue cestring = "CE: %s, GOCDB Name: %s" % (ce, nameBDII) self.log.info(cestring) response = ldapCE(ce) if not response['OK']: self.log.warn("Error during BDII request", response['Message']) response = self._checkAlternativeBDIISite(ldapCE, ce) continue ceinfos = response['Value'] if len(ceinfos): ceinfo = ceinfos[0] systemName = ceinfo.get('GlueHostOperatingSystemName', 'Unknown') systemVersion = ceinfo.get('GlueHostOperatingSystemVersion', 'Unknown') systemRelease = ceinfo.get('GlueHostOperatingSystemRelease', 'Unknown') else: systemName = "Unknown" systemVersion = "Unknown" systemRelease = "Unknown" osstring = "SystemName: %s, SystemVersion: %s, SystemRelease: %s" % ( systemName, systemVersion, systemRelease) self.log.info(osstring) response = ldapCEState(ce, vo=self.vo) if not response['OK']: self.log.warn("Error during BDII request", response['Message']) response = self._checkAlternativeBDIISite( ldapCEState, ce, self.vo) continue newcestring = "\n\n%s\n%s" % (cestring, osstring) usefull = False cestates = response['Value'] for cestate in cestates: queuename = cestate.get('GlueCEUniqueID', 'UnknownName') queuestatus = cestate.get('GlueCEStateStatus', 'UnknownStatus') queuestring = "%s %s" % (queuename, queuestatus) self.log.info(queuestring) newcestring += "\n%s" % queuestring if queuestatus.count('Production'): usefull = True if usefull: body += newcestring possibleNewSites.append( 'dirac-admin-add-site DIRACSiteName %s %s' % (nameBDII, ce)) if body: body = "We are glade to inform You about new CE(s) possibly suitable for %s:\n" % self.vo + body body += "\n\nTo suppress information about CE add its name to BannedCEs list." for possibleNewSite in possibleNewSites: body = "%s\n%s" % (body, possibleNewSite) self.log.info(body) if self.addressTo and self.addressFrom: notification = NotificationClient() result = notification.sendMail(self.addressTo, self.subject, body, self.addressFrom, localAttempt=False) return S_OK()
def __infoFromCE( self ): sitesSection = cfgPath( 'Resources', 'Sites' ) result = gConfig.getSections( sitesSection ) if not result['OK']: return grids = result['Value'] changed = False body = "" for grid in grids: gridSection = cfgPath( sitesSection, grid ) result = gConfig.getSections( gridSection ) if not result['OK']: return sites = result['Value'] for site in sites: siteSection = cfgPath( gridSection, site ) opt = gConfig.getOptionsDict( siteSection )['Value'] name = opt.get( 'Name', '' ) if name: coor = opt.get( 'Coordinates', 'Unknown' ) mail = opt.get( 'Mail', 'Unknown' ) result = ldapSite( name ) if not result['OK']: self.log.warn( "BDII site %s: %s" % ( name, result['Message'] ) ) result = self.__checkAlternativeBDIISite( ldapSite, name ) if result['OK']: bdiiSites = result['Value'] if len( bdiiSites ) == 0: self.log.warn( name, "Error in BDII: leng = 0" ) else: if not len( bdiiSites ) == 1: self.log.warn( name, "Warning in BDII: leng = %d" % len( bdiiSites ) ) bdiiSite = bdiiSites[0] try: longitude = bdiiSite['GlueSiteLongitude'] latitude = bdiiSite['GlueSiteLatitude'] newcoor = "%s:%s" % ( longitude, latitude ) except: self.log.warn( "Error in BDII coordinates" ) newcoor = "Unknown" try: newmail = bdiiSite['GlueSiteSysAdminContact'].split( ":" )[-1].strip() except: self.log.warn( "Error in BDII mail" ) newmail = "Unknown" self.log.debug( "%s %s %s" % ( name, newcoor, newmail ) ) if newcoor != coor: self.log.info( "%s" % ( name ), "%s -> %s" % ( coor, newcoor ) ) if coor == 'Unknown': self.csAPI.setOption( cfgPath( siteSection, 'Coordinates' ), newcoor ) else: self.csAPI.modifyValue( cfgPath( siteSection, 'Coordinates' ), newcoor ) changed = True if newmail != mail: self.log.info( "%s" % ( name ), "%s -> %s" % ( mail, newmail ) ) if mail == 'Unknown': self.csAPI.setOption( cfgPath( siteSection, 'Mail' ), newmail ) else: self.csAPI.modifyValue( cfgPath( siteSection, 'Mail' ), newmail ) changed = True ceList = List.fromChar( opt.get( 'CE', '' ) ) if not ceList: self.log.warn( site, 'Empty site list' ) continue # result = gConfig.getSections( cfgPath( siteSection,'CEs' ) # if not result['OK']: # self.log.debug( "Section CEs:", result['Message'] ) for ce in ceList: ceSection = cfgPath( siteSection, 'CEs', ce ) result = gConfig.getOptionsDict( ceSection ) if not result['OK']: self.log.debug( "Section CE", result['Message'] ) wnTmpDir = 'Unknown' arch = 'Unknown' os = 'Unknown' si00 = 'Unknown' pilot = 'Unknown' ceType = 'Unknown' else: ceopt = result['Value'] wnTmpDir = ceopt.get( 'wnTmpDir', 'Unknown' ) arch = ceopt.get( 'architecture', 'Unknown' ) os = ceopt.get( 'OS', 'Unknown' ) si00 = ceopt.get( 'SI00', 'Unknown' ) pilot = ceopt.get( 'Pilot', 'Unknown' ) ceType = ceopt.get( 'CEType', 'Unknown' ) result = ldapCE( ce ) if not result['OK']: self.log.warn( 'Error in BDII for %s' % ce, result['Message'] ) result = self.__checkAlternativeBDIISite( ldapCE, ce ) continue try: bdiiCE = result['Value'][0] except: self.log.warn( 'Error in BDII for %s' % ce, result ) bdiiCE = None if bdiiCE: try: newWNTmpDir = bdiiCE['GlueSubClusterWNTmpDir'] except: newWNTmpDir = 'Unknown' if wnTmpDir != newWNTmpDir and newWNTmpDir != 'Unknown': section = cfgPath( ceSection, 'wnTmpDir' ) self.log.info( section, " -> ".join( ( wnTmpDir, newWNTmpDir ) ) ) if wnTmpDir == 'Unknown': self.csAPI.setOption( section, newWNTmpDir ) else: self.csAPI.modifyValue( section, newWNTmpDir ) changed = True try: newArch = bdiiCE['GlueHostArchitecturePlatformType'] except: newArch = 'Unknown' if arch != newArch and newArch != 'Unknown': section = cfgPath( ceSection, 'architecture' ) self.log.info( section, " -> ".join( ( arch, newArch ) ) ) if arch == 'Unknown': self.csAPI.setOption( section, newArch ) else: self.csAPI.modifyValue( section, newArch ) changed = True try: newOS = '_'.join( ( bdiiCE['GlueHostOperatingSystemName'], bdiiCE['GlueHostOperatingSystemVersion'], bdiiCE['GlueHostOperatingSystemRelease'] ) ) except: newOS = 'Unknown' if os != newOS and newOS != 'Unknown': section = cfgPath( ceSection, 'OS' ) self.log.info( section, " -> ".join( ( os, newOS ) ) ) if os == 'Unknown': self.csAPI.setOption( section, newOS ) else: self.csAPI.modifyValue( section, newOS ) changed = True body = body + "OS was changed %s -> %s for %s at %s\n" % ( os, newOS, ce, site ) try: newSI00 = bdiiCE['GlueHostBenchmarkSI00'] except: newSI00 = 'Unknown' if si00 != newSI00 and newSI00 != 'Unknown': section = cfgPath( ceSection, 'SI00' ) self.log.info( section, " -> ".join( ( si00, newSI00 ) ) ) if si00 == 'Unknown': self.csAPI.setOption( section, newSI00 ) else: self.csAPI.modifyValue( section, newSI00 ) changed = True try: rte = bdiiCE['GlueHostApplicationSoftwareRunTimeEnvironment'] for vo in self.voName: if vo.lower() == 'lhcb': if 'VO-lhcb-pilot' in rte: newPilot = 'True' else: newPilot = 'False' else: newPilot = 'Unknown' except: newPilot = 'Unknown' if pilot != newPilot and newPilot != 'Unknown': section = cfgPath( ceSection, 'Pilot' ) self.log.info( section, " -> ".join( ( pilot, newPilot ) ) ) if pilot == 'Unknown': self.csAPI.setOption( section, newPilot ) else: self.csAPI.modifyValue( section, newPilot ) changed = True newVO = '' for vo in self.voName: result = ldapCEState( ce, vo ) #getBDIICEVOView if not result['OK']: self.log.warn( 'Error in BDII for queue %s' % ce, result['Message'] ) result = self.__checkAlternativeBDIISite( ldapCEState, ce, vo ) continue try: queues = result['Value'] except: self.log.warn( 'Error in BDII for queue %s' % ce, result['Massage'] ) continue newCEType = 'Unknown' for queue in queues: try: queueType = queue['GlueCEImplementationName'] except: queueType = 'Unknown' if newCEType == 'Unknown': newCEType = queueType else: if queueType != newCEType: self.log.warn( 'Error in BDII for CE %s ' % ce, 'different CE types %s %s' % ( newCEType, queueType ) ) if newCEType=='ARC-CE': newCEType = 'ARC' if ceType != newCEType and newCEType != 'Unknown': section = cfgPath( ceSection, 'CEType' ) self.log.info( section, " -> ".join( ( ceType, newCEType ) ) ) if ceType == 'Unknown': self.csAPI.setOption( section, newCEType ) else: self.csAPI.modifyValue( section, newCEType ) changed = True for queue in queues: try: queueName = queue['GlueCEUniqueID'].split( '/' )[-1] except: self.log.warn( 'Error in queueName ', queue ) continue try: newMaxCPUTime = queue['GlueCEPolicyMaxCPUTime'] except: newMaxCPUTime = None newSI00 = None try: caps = queue['GlueCECapability'] if type( caps ) == type( '' ): caps = [caps] for cap in caps: if cap.count( 'CPUScalingReferenceSI00' ): newSI00 = cap.split( '=' )[-1] except: newSI00 = None queueSection = cfgPath( ceSection, 'Queues', queueName ) result = gConfig.getOptionsDict( queueSection ) if not result['OK']: self.log.warn( "Section Queues", result['Message'] ) maxCPUTime = 'Unknown' si00 = 'Unknown' allowedVOs = [''] else: queueOpt = result['Value'] maxCPUTime = queueOpt.get( 'maxCPUTime', 'Unknown' ) si00 = queueOpt.get( 'SI00', 'Unknown' ) if newVO == '': # Remember previous iteration, if none - read from conf allowedVOs = queueOpt.get( 'VO', '' ).split( "," ) else: # Else use newVO, as it can contain changes, which aren't in conf yet allowedVOs = newVO.split( "," ) if newMaxCPUTime and ( maxCPUTime != newMaxCPUTime ): section = cfgPath( queueSection, 'maxCPUTime' ) self.log.info( section, " -> ".join( ( maxCPUTime, newMaxCPUTime ) ) ) if maxCPUTime == 'Unknown': self.csAPI.setOption( section, newMaxCPUTime ) else: self.csAPI.modifyValue( section, newMaxCPUTime ) changed = True if newSI00 and ( si00 != newSI00 ): section = cfgPath( queueSection, 'SI00' ) self.log.info( section, " -> ".join( ( si00, newSI00 ) ) ) if si00 == 'Unknown': self.csAPI.setOption( section, newSI00 ) else: self.csAPI.modifyValue( section, newSI00 ) changed = True modifyVO = True # Flag saying if we need VO option to change newVO = '' if allowedVOs != ['']: for allowedVO in allowedVOs: allowedVO = allowedVO.strip() # Get rid of spaces newVO += allowedVO if allowedVO == vo: # Current VO has been already in list newVO = '' modifyVO = False # Don't change anything break # Skip next 'if', proceed to next VO newVO += ', ' if modifyVO: section = cfgPath( queueSection, 'VO' ) newVO += vo self.log.info( section, " -> ".join( ( '%s' % allowedVOs, newVO ) ) ) if allowedVOs == ['']: self.csAPI.setOption( section, newVO ) else: self.csAPI.modifyValue( section, newVO ) changed = True if changed: self.log.info( body ) if body and self.addressTo and self.addressFrom: notification = NotificationClient() result = notification.sendMail( self.addressTo, self.subject, body, self.addressFrom, localAttempt = False ) return self.csAPI.commit() else: self.log.info( "No changes found" ) return S_OK()
def __lookForNewCEs(self): """ Look up BDII for CEs not yet present in the DIRAC CS """ bannedCEs = self.am_getOption('BannedCEs', []) result = getCEsFromCS() if not result['OK']: return result knownCEs = set(result['Value']) knownCEs = knownCEs.union(set(bannedCEs)) for vo in self.voName: result = self.__getBdiiCEInfo(vo) if not result['OK']: continue bdiiInfo = result['Value'] result = getGridCEs(vo, bdiiInfo=bdiiInfo, ceBlackList=knownCEs) if not result['OK']: self.log.error('Failed to get unused CEs', result['Message']) siteDict = result['Value'] body = '' for site in siteDict: newCEs = set(siteDict[site].keys()) if not newCEs: continue ceString = '' for ce in newCEs: queueString = '' ceInfo = bdiiInfo[site]['CEs'][ce] ceString = "CE: %s, GOCDB Site Name: %s" % (ce, site) systemTuple = siteDict[site][ce]['System'] osString = "%s_%s_%s" % (systemTuple) newCEString = "\n%s\n%s\n" % (ceString, osString) for queue in ceInfo['Queues']: queueStatus = ceInfo['Queues'][queue].get( 'GlueCEStateStatus', 'UnknownStatus') if 'production' in queueStatus.lower(): ceType = ceInfo['Queues'][queue].get( 'GlueCEImplementationName', '') queueString += " %s %s %s\n" % ( queue, queueStatus, ceType) if queueString: ceString = newCEString ceString += "Queues:\n" ceString += queueString if ceString: body += ceString if body: body = "\nWe are glad to inform You about new CE(s) possibly suitable for %s:\n" % vo + body body += "\n\nTo suppress information about CE add its name to BannedCEs list.\n" body += "Add new Sites/CEs for vo %s with the command:\n" % vo body += "dirac-admin-add-resources --vo %s --ce\n" % vo self.log.info(body) if self.addressTo and self.addressFrom: notification = NotificationClient() result = notification.sendMail(self.addressTo, self.subject, body, self.addressFrom, localAttempt=False) if not result['OK']: self.log.error( 'Can not send new site notification mail', result['Message']) return S_OK()
class TokenAgent(AgentModule): ############################################################################# def initialize(self): """ TokenAgent initialization """ try: self.rsDB = ResourceStatusDB() self.nc = NotificationClient() return S_OK() except Exception: errorStr = "TokenAgent initialization" gLogger.exception(errorStr) return S_ERROR(errorStr) ############################################################################# def execute(self): """ The main TokenAgent execution method. Checks for tokens owned by users that are expiring, and notifies those users. Calls rsDB.setToken() to set 'RS_SVC' as owner for those tokens that expired. """ try: #reAssign the token to RS_SVC for g in ('Site', 'StorageElement'): tokensExpired = self.rsDB.getTokens(g, None, datetime.datetime.utcnow()) for token in tokensExpired: self.rsDB.setToken(g, token[0], 'RS_SVC', datetime.datetime(9999, 12, 31, 23, 59, 59)) #notify token owners in2Hours = datetime.datetime.utcnow() + datetime.timedelta(hours = 2) for g in ('Site', 'StorageElement'): tokensExpiring = self.rsDB.getTokens(g, None, in2Hours) for token in tokensExpiring: name = token[0] user = token[1] if user == 'RS_SVC': continue expiration = token[2] mailMessage = "The token for %s %s " %(g, name) mailMessage = mailMessage + "will expire on %s\n\n" %expiration mailMessage = mailMessage + "You can renew it with command 'dirac-rss-renew-token'.\n" mailMessage = mailMessage + "If you don't take any action, RSS will take control of the resource." self.nc.sendMail(getMailForUser(user)['Value'][0], 'Token for %s is expiring' %name, mailMessage) return S_OK() except Exception: errorStr = "TokenAgent execution" gLogger.exception(errorStr) return S_ERROR(errorStr)
from DIRAC import gLogger, exit as DIRACexit from DIRAC.FrameworkSystem.Client.NotificationClient import NotificationClient if not cli.to or not cli.fr or not cli.subject: gLogger.error("Missing argument") DIRACexit(2) if not cli.body and not cli.filename: gLogge.error("Missing body") DIRACexit(2) if cli.filename: cli.body = "".join(file(cli.filename, "r").readlines()) ntc = NotificationClient() gLogger.verbose("Sending:", " ".join([cli.to, cli.subject, cli.body, cli.fr])) print "sendMail(%s,%s,%s,%s,%s)" % (cli.to, cli.subject, cli.body, cli.fr, False) if not cli.debugMail: gLogger.info("trying to first submit locally - debug mode.") result = ntc.sendMail(cli.debugMail, cli.subject, cli.body, cli.fr, localAttempt=False) if not result["OK"]: gLogger.error(result["Message"]) gLogger.error("We bravely carry on...") result = ntc.sendMail(cli.to,
class FileStatusTransformationAgent(AgentModule): """ FileStatusTransformationAgent """ def __init__(self, *args, **kwargs): AgentModule.__init__(self, *args, **kwargs) self.name = 'FileStatusTransformationAgent' self.enabled = False self.shifterProxy = 'DataManager' self.transformationTypes = ["Replication"] self.transformationStatuses = ["Active"] self.transformationFileStatuses = ["Assigned", "Problematic", "Processed", "Unused"] self.addressTo = ["*****@*****.**"] self.addressFrom = "*****@*****.**" self.emailSubject = "FileStatusTransformationAgent" self.accounting = defaultdict(list) self.errors = [] self.fcClient = FileCatalogClient() self.tClient = TransformationClient() self.reqClient = ReqClient() self.nClient = NotificationClient() def checkFileStatusFuncExists(self, status): """ returns True/False if a function to check transformation files with a given status exists or not """ checkFileStatusFuncName = "check_%s_files" % (status.lower()) if not (hasattr(self, checkFileStatusFuncName) and callable(getattr(self, checkFileStatusFuncName))): self.log.warn("Unable to process transformation files with status ", status) return False return True def beginExecution(self): """ Reload the configurations before every cycle """ self.enabled = self.am_getOption('EnableFlag', False) self.shifterProxy = self.am_setOption('shifterProxy', 'DataManager') self.transformationTypes = self.am_getOption('TransformationTypes', ["Replication"]) self.transformationStatuses = self.am_getOption('TransformationStatuses', ["Active"]) self.transformationFileStatuses = self.am_getOption( 'TransformationFileStatuses', ["Assigned", "Problematic", "Processed", "Unused"]) self.addressTo = self.am_getOption('MailTo', ["*****@*****.**"]) self.addressFrom = self.am_getOption('MailFrom', "*****@*****.**") self.transformationFileStatuses = filter(self.checkFileStatusFuncExists, self.transformationFileStatuses) self.accounting.clear() return S_OK() def sendNotification(self, transID, transType=None, sourceSEs=None, targetSEs=None): """ sends email notification about accounting information of a transformation """ if not(self.errors or self.accounting): return S_OK() emailBody = "Transformation ID: %s\n" % transID if transType: emailBody += "Transformation Type: %s\n" % transType if sourceSEs: emailBody += "Source SE: %s\n" % (" ".join(str(source) for source in sourceSEs)) if targetSEs: emailBody += "Target SE: %s\n\n" % (" ".join(str(target) for target in targetSEs)) rows = [] for action, transFiles in self.accounting.iteritems(): emailBody += "Total number of files with action %s: %s\n" % (action, len(transFiles)) for transFile in transFiles: rows.append([[transFile['LFN']], [str(transFile['AvailableOnSource'])], [str(transFile['AvailableOnTarget'])], [transFile['Status']], [action]]) if rows: columns = ["LFN", "Source", "Target", "Old Status", "Action"] emailBody += printTable(columns, rows, printOut=False, numbering=False, columnSeparator=' | ') if self.errors: emailBody += "\n\nErrors:" emailBody += "\n".join(self.errors) self.log.notice(emailBody) subject = "%s: %s" % (self.emailSubject, transID) for address in self.addressTo: res = self.nClient.sendMail(address, subject, emailBody, self.addressFrom, localAttempt=False) if not res['OK']: self.log.error("Failure to send Email notification to ", address) continue self.errors = [] self.accounting.clear() return S_OK() def logError(self, errStr, varMsg=''): self.log.error(errStr, varMsg) self.errors.append(errStr + varMsg) def execute(self): """ main execution loop of Agent """ res = self.getTransformations() if not res['OK']: self.log.error('Failure to get transformations', res['Message']) return S_ERROR("Failure to get transformations") transformations = res['Value'] if not transformations: self.log.notice('No transformations found with Status %s and Type %s ' % (self.transformationStatuses, self.transformationTypes)) return S_OK() self.log.notice('Will treat %d transformations' % len(transformations)) self.log.notice('Transformations: %s' % ",".join([str(transformation['TransformationID']) for transformation in transformations])) for trans in transformations: transID = trans['TransformationID'] if 'SourceSE' not in trans or not trans['SourceSE']: self.logError("SourceSE not set for transformation, skip processing, transID: ", "%s" % transID) self.sendNotification(transID) continue if 'TargetSE' not in trans or not trans['TargetSE']: self.logError("TargetSE not set for transformation, skip processing, transID: ", "%s" % transID) self.sendNotification(transID, sourceSEs=trans['SourceSE']) continue if 'DataTransType' not in trans: self.logError("Transformation Type not set for transformation, skip processing, transID: ", "%s" % transID) self.sendNotification(transID, sourceSEs=trans['SourceSE'], targetSEs=trans['TargetSE']) continue res = self.processTransformation(transID, trans['SourceSE'], trans['TargetSE'], trans['DataTransType']) if not res['OK']: self.log.error('Failure to process transformation with ID:', transID) continue return S_OK() def getTransformations(self, transID=None): """ returns transformations of a given type and status """ res = None if transID: res = self.tClient.getTransformations( condDict={'TransformationID': transID, 'Status': self.transformationStatuses, 'Type': self.transformationTypes}) else: res = self.tClient.getTransformations( condDict={'Status': self.transformationStatuses, 'Type': self.transformationTypes}) if not res['OK']: return res result = res['Value'] for trans in result: res = self.tClient.getTransformationParameters(trans['TransformationID'], ['SourceSE', 'TargetSE']) if not res['OK']: self.log.error('Failure to get SourceSE and TargetSE parameters for Transformation ID:', trans['TransformationID']) continue trans['SourceSE'] = eval(res['Value']['SourceSE']) trans['TargetSE'] = eval(res['Value']['TargetSE']) res = self.getDataTransformationType(trans['TransformationID']) if not res['OK']: self.log.error('Failure to determine Data Transformation Type', "%s: %s" % (trans['TransformationID'], res['Message'])) continue trans['DataTransType'] = res['Value'] return S_OK(result) def getRequestStatus(self, transID, taskIDs): """ returns request statuses for a given list of task IDs """ res = self.tClient.getTransformationTasks(condDict={'TransformationID': transID, 'TaskID': taskIDs}) if not res['OK']: self.log.error('Failure to get Transformation Tasks for Transformation ID:', transID) return res result = res['Value'] requestStatus = {} for task in result: requestStatus[task['TaskID']] = {'RequestStatus': task['ExternalStatus'], 'RequestID': long(task['ExternalID'])} return S_OK(requestStatus) def getDataTransformationType(self, transID): """ returns transformation types Replication/Moving/Unknown for a given transformation """ res = self.tClient.getTransformationParameters(transID, 'Body') if not res['OK']: return res # if body is empty then we assume that it is a replication transformation if not res['Value']: return S_OK(REPLICATION_TRANS) replication = False rmReplica = False try: body = json.loads(res['Value']) for operation in body: if 'ReplicateAndRegister' in operation: replication = True if 'RemoveReplica' in operation: rmReplica = True except ValueError: if 'ReplicateAndRegister' in res['Value']: replication = True if 'RemoveReplica' in res['Value']: rmReplica = True if rmReplica and replication: return S_OK(MOVING_TRANS) if replication: return S_OK(REPLICATION_TRANS) return S_ERROR("Unknown Transformation Type '%r'" % res['Value']) def setFileStatus(self, transID, transFiles, status): """ sets transformation file status """ lfns = [transFile['LFN'] for transFile in transFiles] lfnStatuses = {lfn: status for lfn in lfns} if lfnStatuses: if self.enabled: res = self.tClient.setFileStatusForTransformation(transID, newLFNsStatus=lfnStatuses, force=True) if not res['OK']: self.logError('Failed to set statuses for LFNs ', "%s" % res['Message']) return res for transFile in transFiles: self.accounting[status].append({'LFN': transFile['LFN'], 'Status': transFile['Status'], 'AvailableOnSource': transFile['AvailableOnSource'], 'AvailableOnTarget': transFile['AvailableOnTarget']}) return S_OK() def selectFailedRequests(self, transFile): """ returns True if transformation file has a failed request otherwise returns False """ res = self.getRequestStatus(transFile['TransformationID'], transFile['TaskID']) if not res['OK']: self.log.error('Failure to get Request Status for Assigned File') return False result = res['Value'] if result[transFile['TaskID']]['RequestStatus'] == 'Failed': return True return False def retryStrategyForFiles(self, transID, transFiles): """ returns retryStrategy Reset Request if a request is found in RMS, otherwise returns set file status to unused""" taskIDs = [transFile['TaskID'] for transFile in transFiles] res = self.getRequestStatus(transID, taskIDs) if not res['OK']: return res result = res['Value'] retryStrategy = defaultdict(dict) for taskID in taskIDs: if taskID is None: self.log.error("Task ID is None", "Transformation: %s\n Files: %r " % (transID, transFiles)) retryStrategy[None]['Strategy'] = SET_UNUSED continue res = self.reqClient.getRequest(requestID=result[taskID]['RequestID']) if not res['OK']: self.log.notice('Request %s does not exist setting file status to unused' % result[taskID]['RequestID']) retryStrategy[taskID]['Strategy'] = SET_UNUSED else: retryStrategy[taskID]['Strategy'] = SET_UNUSED # RESET_REQUEST retryStrategy[taskID]['RequestID'] = result[taskID]['RequestID'] return S_OK(retryStrategy) def check_assigned_files(self, actions, transFiles, transType): """ treatment for transformation files with assigned status """ for transFile in transFiles: if transFile['AvailableOnSource'] and transFile['AvailableOnTarget']: if transType == REPLICATION_TRANS: actions[SET_PROCESSED].append(transFile) if transType == MOVING_TRANS: actions[RETRY].append(transFile) elif transFile['AvailableOnSource'] and not transFile['AvailableOnTarget']: actions[RETRY].append(transFile) elif not transFile['AvailableOnSource'] and transFile['AvailableOnTarget']: actions[SET_PROCESSED].append(transFile) else: # not on src and target actions[SET_DELETED].append(transFile) def check_unused_files(self, actions, transFiles, transType): """ treatment for transformation files with unused status """ for transFile in transFiles: if not transFile['AvailableOnSource'] and transFile['AvailableOnTarget']: actions[SET_PROCESSED].append(transFile) if not transFile['AvailableOnSource'] and not transFile['AvailableOnTarget']: actions[SET_DELETED].append(transFile) def check_processed_files(self, actions, transFiles, transType): """ treatment for transformation files with processed status """ for transFile in transFiles: if transFile['AvailableOnSource'] and transFile['AvailableOnTarget'] and transType == MOVING_TRANS: actions[RETRY].append(transFile) if transFile['AvailableOnSource'] and not transFile['AvailableOnTarget']: actions[RETRY].append(transFile) if not transFile['AvailableOnSource'] and not transFile['AvailableOnTarget']: actions[SET_DELETED].append(transFile) def check_problematic_files(self, actions, transFiles, transType): """ treatment for transformation files with problematic status """ for transFile in transFiles: if transFile['AvailableOnSource'] and transFile['AvailableOnTarget']: if transType == REPLICATION_TRANS: actions[SET_PROCESSED].append(transFile) if transType == MOVING_TRANS: actions[RETRY].append(transFile) elif transFile['AvailableOnSource'] and not transFile['AvailableOnTarget']: actions[RETRY].append(transFile) elif not transFile['AvailableOnSource'] and transFile['AvailableOnTarget']: actions[SET_PROCESSED].append(transFile) else: # not available on source and target actions[SET_DELETED].append(transFile) def retryFiles(self, transID, transFiles): """ resubmits request or sets file status to unused based on the retry strategy of transformation file """ setFilesUnused = [] setFilesAssigned = [] res = self.retryStrategyForFiles(transID, transFiles) if not res['OK']: self.logError('Failure to determine retry strategy (unused / reset request) for files ', "%s" % res['Message']) return res retryStrategy = res['Value'] for transFile in transFiles: if retryStrategy[transFile['TaskID']]['Strategy'] != RESET_REQUEST: setFilesUnused.append(transFile) continue requestID = retryStrategy[transFile['TaskID']]['RequestID'] if self.enabled: res = self.reqClient.resetFailedRequest(requestID, allR=True) if not res['OK']: self.logError('Failed to reset request ', 'ReqID: %s Error: %s' % (requestID, res['Message'])) continue if res['Value'] == "Not reset": self.logError('Failed to reset request ', 'ReqID: %s is non-recoverable' % requestID) continue setFilesAssigned.append(transFile) res = self.tClient.setTaskStatus(transID, transFile['TaskID'], 'Waiting') if not res['OK']: self.logError('Failure to set Waiting status for Task ID: ', "%s %s" % (transFile['TaskID'], res['Message'])) continue self.accounting[RESET_REQUEST].append({'LFN': transFile['LFN'], 'Status': transFile['Status'], 'AvailableOnSource': transFile['AvailableOnSource'], 'AvailableOnTarget': transFile['AvailableOnTarget']}) if setFilesUnused: self.setFileStatus(transID, setFilesUnused, 'Unused') if setFilesAssigned: self.setFileStatus(transID, setFilesAssigned, 'Assigned') return S_OK() def applyActions(self, transID, actions): """ sets new file statuses and resets requests """ for action, transFiles in actions.iteritems(): if action == SET_PROCESSED and transFiles: self.setFileStatus(transID, transFiles, 'Processed') if action == SET_DELETED and transFiles: self.setFileStatus(transID, transFiles, 'Deleted') if action == RETRY and transFiles: # if there is a request in RMS then reset request otherwise set file status unused self.retryFiles(transID, transFiles) def existsInFC(self, storageElements, lfns): """ checks if files have replicas registered in File Catalog for all given storageElements """ res = self.fcClient.getReplicas(lfns) if not res['OK']: return res result = {} result['Successful'] = {} result['Failed'] = {} setOfSEs = set(storageElements) for lfn, msg in res['Value']['Failed'].iteritems(): if msg == 'No such file or directory': result['Successful'][lfn] = False else: result['Failed'][lfn] = msg # check if all replicas are registered in FC filesFoundInFC = res['Value']['Successful'] for lfn, replicas in filesFoundInFC.iteritems(): result['Successful'][lfn] = setOfSEs.issubset(replicas.keys()) return S_OK(result) def existsOnSE(self, storageElements, lfns): """ checks if the given files exist physically on a list of storage elements""" result = {} result['Failed'] = {} result['Successful'] = {} if not lfns: return S_OK(result) voName = lfns[0].split('/')[1] for se in storageElements: res = StorageElement(se, vo=voName).exists(lfns) if not res['OK']: return res for lfn, status in res['Value']['Successful'].iteritems(): if lfn not in result['Successful']: result['Successful'][lfn] = status if not status: result['Successful'][lfn] = False result['Failed'][se] = res['Value']['Failed'] return S_OK(result) def exists(self, storageElements, lfns): """ checks if files exists on both file catalog and storage elements """ fcRes = self.existsInFC(storageElements, lfns) if not fcRes['OK']: self.logError('Failure to determine if files exists in File Catalog ', "%s" % fcRes['Message']) return fcRes if fcRes['Value']['Failed']: self.logError("Failed FileCatalog Response ", "%s" % fcRes['Value']['Failed']) # check if files found in file catalog also exist on SE checkLFNsOnStorage = [lfn for lfn in fcRes['Value']['Successful'] if fcRes['Value']['Successful'][lfn]] # no files were found in FC, return the result instead of verifying them on SE if not checkLFNsOnStorage: return fcRes seRes = self.existsOnSE(storageElements, checkLFNsOnStorage) if not seRes['OK']: self.logError('Failure to determine if files exist on SE ', "%s" % seRes['Message']) return seRes for se in storageElements: if seRes['Value']['Failed'][se]: self.logError('Failed to determine if files exist on SE ', "%s %s" % (se, seRes['Value']['Failed'][se])) return S_ERROR() fcResult = fcRes['Value']['Successful'] seResult = seRes['Value']['Successful'] for lfn in fcResult: if fcResult[lfn] and not seResult[lfn]: fcRes['Value']['Successful'][lfn] = False return fcRes def processTransformation(self, transID, sourceSE, targetSEs, transType): """ process transformation for a given transformation ID """ actions = {} actions[SET_PROCESSED] = [] actions[RETRY] = [] actions[SET_DELETED] = [] for status in self.transformationFileStatuses: res = self.tClient.getTransformationFiles(condDict={'TransformationID': transID, 'Status': status}) if not res['OK']: errStr = 'Failure to get Transformation Files, Status: %s Transformation ID: %s Message: %s' % (status, transID, res['Message']) self.logError(errStr) continue transFiles = res['Value'] if not transFiles: self.log.notice("No Transformation Files found with status %s for Transformation ID %d" % (status, transID)) continue self.log.notice("Processing Transformation Files with status %s for TransformationID %d " % (status, transID)) if status == 'Assigned': transFiles = filter(self.selectFailedRequests, transFiles) lfns = [transFile['LFN'] for transFile in transFiles] if not lfns: continue res = self.exists(sourceSE, lfns) if not res['OK']: continue resultSourceSe = res['Value']['Successful'] res = self.exists(targetSEs, lfns) if not res['OK']: continue resultTargetSEs = res['Value']['Successful'] for transFile in transFiles: lfn = transFile['LFN'] transFile['AvailableOnSource'] = resultSourceSe[lfn] transFile['AvailableOnTarget'] = resultTargetSEs[lfn] checkFilesFuncName = "check_%s_files" % status.lower() checkFiles = getattr(self, checkFilesFuncName) checkFiles(actions, transFiles, transType) self.applyActions(transID, actions) self.sendNotification(transID, transType, sourceSE, targetSEs) return S_OK()
def __lookForCE( self ): knownces = self.am_getOption( 'BannedCEs', [] ) result = gConfig.getSections( '/Resources/Sites' ) if not result['OK']: return grids = result['Value'] for grid in grids: result = gConfig.getSections( '/Resources/Sites/%s' % grid ) if not result['OK']: return sites = result['Value'] for site in sites: opt = gConfig.getOptionsDict( '/Resources/Sites/%s/%s' % ( grid, site ) )['Value'] ces = List.fromChar( opt.get( 'CE', '' ) ) knownces += ces response = ldapCEState( '', vo = self.voName ) if not response['OK']: self.log.error( "Error during BDII request", response['Message'] ) response = self.__checkAlternativeBDIISite( ldapCEState, '', self.voName ) return response newces = {} for queue in response['Value']: try: queuename = queue['GlueCEUniqueID'] except: continue cename = queuename.split( ":" )[0] if not cename in knownces: newces[cename] = None self.log.debug( "newce", cename ) body = "" possibleNewSites = [] for ce in newces.iterkeys(): response = ldapCluster( ce ) if not response['OK']: self.log.warn( "Error during BDII request", response['Message'] ) response = self.__checkAlternativeBDIISite( ldapCluster, ce ) continue clusters = response['Value'] if len( clusters ) != 1: self.log.warn( "Error in cluster length", " CE %s Length %d" % ( ce, len( clusters ) ) ) if len( clusters ) == 0: continue cluster = clusters[0] fkey = cluster.get( 'GlueForeignKey', [] ) if type( fkey ) == type( '' ): fkey = [fkey] nameBDII = None for entry in fkey: if entry.count( 'GlueSiteUniqueID' ): nameBDII = entry.split( '=' )[1] break if not nameBDII: continue cestring = "CE: %s, GOCDB Name: %s" % ( ce, nameBDII ) self.log.info( cestring ) response = ldapCE( ce ) if not response['OK']: self.log.warn( "Error during BDII request", response['Message'] ) response = self.__checkAlternativeBDIISite( ldapCE, ce ) continue ceinfos = response['Value'] if len( ceinfos ): ceinfo = ceinfos[0] systemName = ceinfo.get( 'GlueHostOperatingSystemName', 'Unknown' ) systemVersion = ceinfo.get( 'GlueHostOperatingSystemVersion', 'Unknown' ) systemRelease = ceinfo.get( 'GlueHostOperatingSystemRelease', 'Unknown' ) else: systemName = "Unknown" systemVersion = "Unknown" systemRelease = "Unknown" osstring = "SystemName: %s, SystemVersion: %s, SystemRelease: %s" % ( systemName, systemVersion, systemRelease ) self.log.info( osstring ) response = ldapCEState( ce, vo = self.voName ) if not response['OK']: self.log.warn( "Error during BDII request", response['Message'] ) response = self.__checkAlternativeBDIISite( ldapCEState, ce, self.voName ) continue newcestring = "\n\n%s\n%s" % ( cestring, osstring ) usefull = False cestates = response['Value'] for cestate in cestates: queuename = cestate.get( 'GlueCEUniqueID', 'UnknownName' ) queuestatus = cestate.get( 'GlueCEStateStatus', 'UnknownStatus' ) queuestring = "%s %s" % ( queuename, queuestatus ) self.log.info( queuestring ) newcestring += "\n%s" % queuestring if queuestatus.count( 'Production' ): usefull = True if usefull: body += newcestring possibleNewSites.append( 'dirac-admin-add-site DIRACSiteName %s %s' % ( nameBDII, ce ) ) if body: body = "We are glad to inform You about new CE(s) possibly suitable for %s:\n" % self.voName + body body += "\n\nTo suppress information about CE add its name to BannedCEs list." for possibleNewSite in possibleNewSites: body = "%s\n%s" % ( body, possibleNewSite ) self.log.info( body ) if self.addressTo and self.addressFrom: notification = NotificationClient() result = notification.sendMail( self.addressTo, self.subject, body, self.addressFrom, localAttempt = False ) return S_OK()
def run(self): """ Do actions required to notify users. Mandatory keyword arguments: - Granularity Optional keyword arguments: - SiteType - ServiceType - ResourceType """ # Initializing variables nc = NotificationClient() # raise alarms, right now makes a simple notification if 'Granularity' not in self.kw['Params'].keys(): raise ValueError, "You have to provide a argument Granularity = <desired_granularity>" if self.new_status['Action']: notif = "%s %s is perceived as" % (self.granularity, self.name) notif = notif + " %s. Reason: %s." % (self.new_status['Status'], self.new_status['Reason']) users_to_notify = self._getUsersToNotify() for notif in users_to_notify: for user in notif['Users']: if 'Web' in notif['Notifications']: gLogger.info("Sending web notification to user %s" % user) nc.addNotificationForUser(user, notif) if 'Mail' in notif['Notifications']: gLogger.info("Sending mail notification to user %s" % user) was = self.rsClient.getElementHistory( self.granularity, elementName = self.name, statusType = self.status_type, meta = {"order": "DESC", 'limit' : 1, "columns": ['Status', 'Reason', 'DateEffective']})#[0] if not was[ 'OK' ]: gLogger.error( was[ 'Message' ] ) continue was = was[ 'Value' ][ 0 ] mailMessage = """ ---TESTING--- -------------------------------------------------------------------------------- RSS changed the status of the following resource: Granularity:\t%s Name:\t\t%s New status:\t%s Reason:\t\t%s Was:\t\t%s (%s) since %s Setup:\t\t%s If you think RSS took the wrong decision, please set the status manually: Use: dirac-rss-set-status -g <granularity> -n <element_name> -s <desired_status> [-t status_type] (if you omit the optional last part of the command, all status types are matched.) This notification has been sent according to those parameters: %s """ % (self.granularity, self.name, self.new_status['Status'], self.new_status['Reason'], was[0], was[1], was[2], CS.getSetup(), str(users_to_notify)) # Actually send the mail! resUser = self.rmClient.getUserRegistryCache( user ) if not resUser[ 'OK' ]: gLogger.error( resUser[ 'Message' ] ) continue resUser = resUser[ 'Value' ][ 0 ][ 2 ] nc.sendMail(resUser, '[RSS][%s][%s] %s -> %s' % (self.granularity, self.name, self.new_status['Status'], was[0]), mailMessage) ################################################################################ #EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF
body = "".join(body.strip()) try: headers = dict((i.strip(), j.strip()) for i, j in (item.split(':') for item in head.split('\\n'))) except: gLogger.error("Failed to convert string: %s to email headers" % head) DIRACexit(4) if not "To" in headers: gLogger.error("Failed to get 'To:' field from headers %s" % head) DIRACexit(5) to = headers["To"] origin = "%s@%s" % (os.getenv("LOGNAME", "dirac"), socket.getfqdn()) if "From" in headers: origin = headers["From"] subject = "Sent from %s" % socket.getfqdn() if "Subject" in headers: subject = headers["Subject"] ntc = NotificationClient() print "sendMail(%s,%s,%s,%s,%s)" % (to, subject, body, origin, False) result = ntc.sendMail(to, subject, body, origin, localAttempt=False) if not result["OK"]: gLogger.error(result["Message"]) DIRACexit(6) DIRACexit(0)
def sendMail( self, address, subject, body, fromAddress = None, localAttempt = True ): """ Send mail to specified address with body. """ notification = NotificationClient() return notification.sendMail( address, subject, body, fromAddress, localAttempt )
if not res['OK']: gLogger.error( "The provided site (%s) does not have an associated catalog." % site) continue res = csAPI.setOption("%s/%s/Status" % (storageCFGBase, site), "Active") if not res['OK']: gLogger.error("Failed to update %s catalog status to Active" % site) else: gLogger.debug("Successfully updated %s catalog status to Active" % site) allowed.append(site) if not allowed: gLogger.error("Failed to allow any catalog mirrors") DIRAC.exit(-1) res = csAPI.commitChanges() if not res['OK']: gLogger.error("Failed to commit changes to CS", res['Message']) DIRAC.exit(-1) subject = '%d catalog instance(s) allowed for use' % len(allowed) address = gConfig.getValue('/Operations/EMail/Production', '*****@*****.**') body = 'The catalog mirrors at the following sites were allowed' for site in allowed: body = "%s\n%s" % (body, site) NotificationClient().sendMail(address, subject, body, '*****@*****.**' % userName) DIRAC.exit(0)
def __infoFromCE( self ): sitesSection = cfgPath( 'Resources', 'Sites' ) result = gConfig.getSections( sitesSection ) if not result['OK']: return grids = result['Value'] changed = False body = "" for grid in grids: gridSection = cfgPath( sitesSection, grid ) result = gConfig.getSections( gridSection ) if not result['OK']: return sites = result['Value'] for site in sites: siteSection = cfgPath( gridSection, site ) opt = gConfig.getOptionsDict( siteSection )['Value'] name = opt.get( 'Name', '' ) if name: coor = opt.get( 'Coordinates', 'Unknown' ) mail = opt.get( 'Mail', 'Unknown' ) result = ldapSite( name ) if not result['OK']: self.log.warn( "BDII site %s: %s" % ( name, result['Message'] ) ) result = self.__checkAlternativeBDIISite( ldapSite, name ) if result['OK']: bdiisites = result['Value'] if len( bdiisites ) == 0: self.log.warn( name, "Error in bdii: leng = 0" ) else: if not len( bdiisites ) == 1: self.log.warn( name, "Warning in bdii: leng = %d" % len( bdiisites ) ) bdiisite = bdiisites[0] try: longitude = bdiisite['GlueSiteLongitude'] latitude = bdiisite['GlueSiteLatitude'] newcoor = "%s:%s" % ( longitude, latitude ) except: self.log.warn( "Error in bdii coor" ) newcoor = "Unknown" try: newmail = bdiisite['GlueSiteSysAdminContact'].split( ":" )[-1].strip() except: self.log.warn( "Error in bdii mail" ) newmail = "Unknown" self.log.debug( "%s %s %s" % ( name, newcoor, newmail ) ) if newcoor != coor: self.log.info( "%s" % ( name ), "%s -> %s" % ( coor, newcoor ) ) if coor == 'Unknown': self.csAPI.setOption( cfgPath( siteSection, 'Coordinates' ), newcoor ) else: self.csAPI.modifyValue( cfgPath( siteSection, 'Coordinates' ), newcoor ) changed = True if newmail != mail: self.log.info( "%s" % ( name ), "%s -> %s" % ( mail, newmail ) ) if mail == 'Unknown': self.csAPI.setOption( cfgPath( siteSection, 'Mail' ), newmail ) else: self.csAPI.modifyValue( cfgPath( siteSection, 'Mail' ), newmail ) changed = True celist = List.fromChar( opt.get( 'CE', '' ) ) if not celist: self.log.warn( site, 'Empty site list' ) continue # result = gConfig.getSections( cfgPath( siteSection,'CEs' ) # if not result['OK']: # self.log.debug( "Section CEs:", result['Message'] ) for ce in celist: ceSection = cfgPath( siteSection, 'CEs', ce ) result = gConfig.getOptionsDict( ceSection ) if not result['OK']: self.log.debug( "Section CE", result['Message'] ) wnTmpDir = 'Unknown' arch = 'Unknown' os = 'Unknown' si00 = 'Unknown' pilot = 'Unknown' cetype = 'Unknown' else: ceopt = result['Value'] wnTmpDir = ceopt.get( 'wnTmpDir', 'Unknown' ) arch = ceopt.get( 'architecture', 'Unknown' ) os = ceopt.get( 'OS', 'Unknown' ) si00 = ceopt.get( 'SI00', 'Unknown' ) pilot = ceopt.get( 'Pilot', 'Unknown' ) cetype = ceopt.get( 'CEType', 'Unknown' ) result = ldapCE( ce ) if not result['OK']: self.log.warn( 'Error in bdii for %s' % ce, result['Message'] ) result = self.__checkAlternativeBDIISite( ldapCE, ce ) continue try: bdiice = result['Value'][0] except: self.log.warn( 'Error in bdii for %s' % ce, result ) bdiice = None if bdiice: try: newwnTmpDir = bdiice['GlueSubClusterWNTmpDir'] except: newwnTmpDir = 'Unknown' if wnTmpDir != newwnTmpDir and newwnTmpDir != 'Unknown': section = cfgPath( ceSection, 'wnTmpDir' ) self.log.info( section, " -> ".join( ( wnTmpDir, newwnTmpDir ) ) ) if wnTmpDir == 'Unknown': self.csAPI.setOption( section, newwnTmpDir ) else: self.csAPI.modifyValue( section, newwnTmpDir ) changed = True try: newarch = bdiice['GlueHostArchitecturePlatformType'] except: newarch = 'Unknown' if arch != newarch and newarch != 'Unknown': section = cfgPath( ceSection, 'architecture' ) self.log.info( section, " -> ".join( ( arch, newarch ) ) ) if arch == 'Unknown': self.csAPI.setOption( section, newarch ) else: self.csAPI.modifyValue( section, newarch ) changed = True try: newos = '_'.join( ( bdiice['GlueHostOperatingSystemName'], bdiice['GlueHostOperatingSystemVersion'], bdiice['GlueHostOperatingSystemRelease'] ) ) except: newos = 'Unknown' if os != newos and newos != 'Unknown': section = cfgPath( ceSection, 'OS' ) self.log.info( section, " -> ".join( ( os, newos ) ) ) if os == 'Unknown': self.csAPI.setOption( section, newos ) else: self.csAPI.modifyValue( section, newos ) changed = True body = body + "OS was changed %s -> %s for %s at %s\n" % ( os, newos, ce, site ) try: newsi00 = bdiice['GlueHostBenchmarkSI00'] except: newsi00 = 'Unknown' if si00 != newsi00 and newsi00 != 'Unknown': section = cfgPath( ceSection, 'SI00' ) self.log.info( section, " -> ".join( ( si00, newsi00 ) ) ) if si00 == 'Unknown': self.csAPI.setOption( section, newsi00 ) else: self.csAPI.modifyValue( section, newsi00 ) changed = True try: rte = bdiice['GlueHostApplicationSoftwareRunTimeEnvironment'] if self.voName.lower() == 'lhcb': if 'VO-lhcb-pilot' in rte: newpilot = 'True' else: newpilot = 'False' else: newpilot = 'Unknown' except: newpilot = 'Unknown' if pilot != newpilot and newpilot != 'Unknown': section = cfgPath( ceSection, 'Pilot' ) self.log.info( section, " -> ".join( ( pilot, newpilot ) ) ) if pilot == 'Unknown': self.csAPI.setOption( section, newpilot ) else: self.csAPI.modifyValue( section, newpilot ) changed = True result = ldapCEState( ce, vo = self.voName ) #getBDIICEVOView if not result['OK']: self.log.warn( 'Error in bdii for queue %s' % ce, result['Message'] ) result = self.__checkAlternativeBDIISite( ldapCEState, ce, self.voName ) continue try: queues = result['Value'] except: self.log.warn( 'Error in bdii for queue %s' % ce, result['Massage'] ) continue newcetype = 'Unknown' for queue in queues: try: queuetype = queue['GlueCEImplementationName'] except: queuetype = 'Unknown' if newcetype == 'Unknown': newcetype = queuetype else: if queuetype != newcetype: self.log.warn( 'Error in bdii for ce %s ' % ce, 'different cetypes %s %s' % ( newcetype, queuetype ) ) if newcetype=='ARC-CE': newcetype = 'ARC' if cetype != newcetype and newcetype != 'Unknown': section = cfgPath( ceSection, 'CEType' ) self.log.info( section, " -> ".join( ( cetype, newcetype ) ) ) if cetype == 'Unknown': self.csAPI.setOption( section, newcetype ) else: self.csAPI.modifyValue( section, newcetype ) changed = True for queue in queues: try: queueName = queue['GlueCEUniqueID'].split( '/' )[-1] except: self.log.warn( 'error in queuename ', queue ) continue try: newmaxCPUTime = queue['GlueCEPolicyMaxCPUTime'] except: newmaxCPUTime = None newsi00 = None try: caps = queue['GlueCECapability'] if type( caps ) == type( '' ): caps = [caps] for cap in caps: if cap.count( 'CPUScalingReferenceSI00' ): newsi00 = cap.split( '=' )[-1] except: newsi00 = None queueSection = cfgPath( ceSection, 'Queues', queueName ) result = gConfig.getOptionsDict( queueSection ) if not result['OK']: self.log.warn( "Section Queues", result['Message'] ) maxCPUTime = 'Unknown' si00 = 'Unknown' else: queueopt = result['Value'] maxCPUTime = queueopt.get( 'maxCPUTime', 'Unknown' ) si00 = queueopt.get( 'SI00', 'Unknown' ) if newmaxCPUTime and ( maxCPUTime != newmaxCPUTime ): section = cfgPath( queueSection, 'maxCPUTime' ) self.log.info( section, " -> ".join( ( maxCPUTime, newmaxCPUTime ) ) ) if maxCPUTime == 'Unknown': self.csAPI.setOption( section, newmaxCPUTime ) else: self.csAPI.modifyValue( section, newmaxCPUTime ) changed = True if newsi00 and ( si00 != newsi00 ): section = cfgPath( queueSection, 'SI00' ) self.log.info( section, " -> ".join( ( si00, newsi00 ) ) ) if si00 == 'Unknown': self.csAPI.setOption( section, newsi00 ) else: self.csAPI.modifyValue( section, newsi00 ) changed = True if changed: self.log.info( body ) if body and self.addressTo and self.addressFrom: notification = NotificationClient() result = notification.sendMail( self.addressTo, self.subject, body, self.addressFrom, localAttempt = False ) return self.csAPI.commit() else: self.log.info( "No changes found" ) return S_OK()
def sendMail(self, sendDict=None, title=None, body=None, fromAddress=None): """ Sending an email using sendDict: { e-mail : name } as addressbook title and body is the e-mail's Subject and Body fromAddress is an email address in behalf of whom the message is sent Return success/failure JSON structure """ if not sendDict: result = "" gLogger.debug(result) return {"success": "false", "error": result} if not title: result = "title argument is missing" gLogger.debug(result) return {"success": "false", "error": result} if not body: result = "body argument is missing" gLogger.debug(result) return {"success": "false", "error": result} if not fromAddress: result = "fromAddress argument is missing" gLogger.debug(result) return {"success": "false", "error": result} sentSuccess = list() sentFailed = list() gLogger.debug("Initializing Notification client") ntc = NotificationClient( lambda x, timeout: RPCClient(x, timeout=timeout, static=True)) for email, name in sendDict.iteritems(): result = ntc.sendMail(email, title, body, fromAddress, False) if not result["OK"]: error = name + ": " + result["Message"] sentFailed.append(error) gLogger.error("Sent failure: ", error) else: gLogger.info("Successfully sent to %s" % name) sentSuccess.append(name) success = ", ".join(sentSuccess) failure = "\n".join(sentFailed) if len(success) > 0 and len(failure) > 0: result = "Successfully sent e-mail to: " result = result + success + "\n\nFailed to send e-mail to:\n" + failure gLogger.debug(result) return {"success": "true", "result": result} elif len(success) > 0 and len(failure) < 1: result = "Successfully sent e-mail to: %s" % success gLogger.debug(result) return {"success": "true", "result": result} elif len(success) < 1 and len(failure) > 0: result = "Failed to sent email to:\n%s" % failure gLogger.debug(result) return {"success": "false", "error": result} else: result = "No messages were sent due technical failure" gLogger.debug(result) return {"success": "false", "error": result}
mySetup = gConfig.getValue( 'DIRAC/Setup' ) # Retrieve information from all the hosts client = SystemAdministratorIntegrator( exclude = excludedHosts ) resultAll = client.getOverallStatus() # Retrieve user installing the component result = getProxyInfo() if result[ 'OK' ]: user = result[ 'Value' ][ 'username' ] else: DIRACexit( -1 ) if not user: user = '******' notificationClient = NotificationClient() for host in resultAll[ 'Value' ]: if not resultAll[ 'Value' ][ host ][ 'OK' ]: # If the host cannot be contacted, exclude it and send message excludedHosts.append( host ) result = notificationClient.sendMail( Operations().getValue( 'EMail/Production', [] ), 'Unreachable host', '\ndirac-populate-component-db: Could not fill the database with the components from unreachable host %s\n' % host ) if not result[ 'OK' ]: gLogger.error( 'Can not send unreachable host notification mail: %s' % result[ 'Message' ] ) if not resultAll[ 'OK' ]: gLogger.error( resultAll[ 'Message' ] ) DIRACexit( -1 ) resultHosts = client.getHostInfo() if not resultHosts[ 'OK' ]: gLogger.error( resultHosts[ 'Message' ] )
class ErrorMessageMonitor( AgentModule ): def initialize( self ): self.systemLoggingDB = SystemLoggingDB() self.notification = NotificationClient() userList = self.am_getOption( "Reviewer", [] ) self.log.debug( "Users to be notified:", ', '.join( userList ) ) mailList = [] for user in userList: mail = getUserOption( user, 'Email', '' ) if not mail: self.log.warn( "Could not get user's mail", user ) else: mailList.append( mail ) if not mailList: mailList = Operations().getValue( 'EMail/Logging', [] ) if not len( mailList ): errString = "There are no valid users in the mailing list" varString = "[" + ','.join( userList ) + "]" self.log.warn( errString, varString ) self.log.info( "List of mails to be notified", ','.join( mailList ) ) self._mailAddress = mailList self._subject = 'New error messages were entered in the SystemLoggingDB' return S_OK() def execute( self ): """ The main agent execution method """ condDict = {'ReviewedMessage':0} result = self.systemLoggingDB.getCounters( 'FixedTextMessages', ['ReviewedMessage'], condDict ) if not result['OK']: return result if not result['Value']: self.log.info( 'No messages need review' ) return S_OK( 'No messages need review' ) returnFields = [ 'FixedTextID', 'FixedTextString', 'SystemName', 'SubSystemName' ] result = self.systemLoggingDB._queryDB( showFieldList = returnFields, groupColumn = 'FixedTextString', condDict = condDict ) if not result['OK']: self.log.error( 'Failed to obtain the non reviewed Strings', result['Message'] ) return S_OK() messageList = result['Value'] if messageList == 'None' or not messageList: self.log.error( 'The DB query returned an empty result' ) return S_OK() mailBody = 'These new messages have arrived to the Logging Service\n' for message in messageList: mailBody = mailBody + "String: '" + message[1] + "'\tSystem: '" \ + message[2] + "'\tSubsystem: '" + message[3] + "'\n" if self._mailAddress: result = self.notification.sendMail( self._mailAddress, self._subject, mailBody ) if not result[ 'OK' ]: self.log.warn( "The mail could not be sent" ) return S_OK() messageIDs = [ message[0] for message in messageList ] condDict = {'FixedTextID': messageIDs} result = self.systemLoggingDB.updateFields( 'FixedTextMessages', ['ReviewedMessage'], [1], condDict = condDict ) if not result['OK']: self.log.error( 'Could not update message Status', result['ERROR'] ) return S_OK() self.log.verbose( 'Updated message Status for:', str( messageList ) ) self.log.info( "The messages have been sent for review", "There are %s new descriptions" % len( messageList ) ) return S_OK( "%s Messages have been sent for review" % len( messageList ) )
mySetup = gConfig.getValue('DIRAC/Setup') # Retrieve information from all the hosts client = SystemAdministratorIntegrator(exclude=excludedHosts) resultAll = client.getOverallStatus() # Retrieve user installing the component result = getProxyInfo() if result['OK']: user = result['Value']['username'] else: DIRACexit(-1) if not user: user = '******' notificationClient = NotificationClient() for host in resultAll['Value']: if not resultAll['Value'][host]['OK']: # If the host cannot be contacted, exclude it and send message excludedHosts.append(host) result = notificationClient.sendMail( Operations().getValue('EMail/Production', []), 'Unreachable host', '\ndirac-populate-component-db: Could not fill the database with the components from unreachable host %s\n' % host) if not result['OK']: gLogger.error( 'Can not send unreachable host notification mail: %s' % result['Message']) if not resultAll['OK']:
def sendMail(self , sendDict=None , title=None , body=None , fromAddress=None): """ Sending an email using sendDict: { e-mail : name } as addressbook title and body is the e-mail's Subject and Body fromAddress is an email address in behalf of whom the message is sent Return success/failure JSON structure """ if not sendDict: result = "" gLogger.debug(result) return { "success" : "false" , "error" : result } if not title: result = "title argument is missing" gLogger.debug(result) return { "success" : "false" , "error" : result } if not body: result = "body argument is missing" gLogger.debug(result) return { "success" : "false" , "error" : result } if not fromAddress: result = "fromAddress argument is missing" gLogger.debug(result) return { "success" : "false" , "error" : result } sentSuccess = list() sentFailed = list() gLogger.debug("Initializing Notification client") ntc = NotificationClient(lambda x , timeout: RPCClient(x , timeout=timeout , static=True)) for email , name in sendDict.iteritems(): result = ntc.sendMail(email , title , body , fromAddress , False) if not result[ "OK" ]: error = name + ": " + result[ "Message" ] sentFailed.append(error) gLogger.error("Sent failure: " , error) else: gLogger.info("Successfully sent to %s" % name) sentSuccess.append(name) success = ", ".join(sentSuccess) failure = "\n".join(sentFailed) if len(success) > 0 and len(failure) > 0: result = "Successfully sent e-mail to: " result = result + success + "\n\nFailed to send e-mail to:\n" + failure gLogger.debug(result) return { "success" : "true" , "result" : result } elif len(success) > 0 and len(failure) < 1: result = "Successfully sent e-mail to: %s" % success gLogger.debug(result) return { "success" : "true" , "result" : result } elif len(success) < 1 and len(failure) > 0: result = "Failed to sent email to:\n%s" % failure gLogger.debug(result) return { "success" : "false" , "error" : result } else: result = "No messages were sent due technical failure" gLogger.debug(result) return { "success" : "false" , "error" : result }
class TopErrorMessagesReporter(AgentModule): """ initialization - you need to specify a user or an email """ def initialize(self): self.systemLoggingDB = SystemLoggingDB() self.agentName = self.am_getModuleParam('fullName') self.notification = NotificationClient() mailList = self.am_getOption("MailList", []) userList = self.am_getOption("Reviewer", []) self.log.debug("Users to be notified:", ', '.join(userList)) for user in userList: mail = getUserOption(user, 'Email', '') if not mail: self.log.warn("Could not get user's mail", user) else: mailList.append(mail) if not mailList: mailList = Operations().getValue('EMail/Logging', []) if not mailList: errString = "There are no valid users in the list of email where to send the report" errString += "\nPlease specify some in Operations/<default>/EMail/Logging" varString = "[" + ','.join(userList) + "]" self.log.error(errString, varString) return S_ERROR(errString + varString) self.log.info("List of mails to be notified", ','.join(mailList)) self._mailAddress = mailList self._threshold = int(self.am_getOption('Threshold', 10)) self.__days = self.am_getOption('QueryPeriod', 7) self._period = int(self.__days) * day self._limit = int(self.am_getOption('NumberOfErrors', 10)) string = "The %i most common errors in the SystemLoggingDB" % self._limit self._subject = string + " for the last %s days" % self.__days return S_OK() def execute(self): """ The main agent execution method """ limitDate = date() - self._period tableList = [ "MessageRepository", "FixedTextMessages", "Systems", "SubSystems" ] columnsList = [ "SystemName", "SubSystemName", "count(*) as entries", "FixedTextString" ] cmd = "SELECT " + ', '.join(columnsList) + " FROM " \ + " NATURAL JOIN ".join(tableList) \ + " WHERE MessageTime > '%s'" % limitDate \ + " AND LogLevel in ('ERROR','FATAL','EXCEPT')" \ + " GROUP BY FixedTextID,SystemName,SubSystemName HAVING entries > %s" % self._threshold \ + " ORDER BY entries DESC LIMIT %i;" % self._limit result = self.systemLoggingDB._query(cmd) if not result['OK']: return result messageList = result['Value'] if messageList == 'None' or messageList == (): self.log.warn('The DB query returned an empty result') return S_OK() mailBody = '\n' for message in messageList: mailBody = mailBody + "Count: " + str(message[2]) + "\tError: '"\ + message[3] + "'\tSystem: '" + message[0]\ + "'\tSubsystem: '" + message[1] + "'\n" mailBody = mailBody + "\n\n-------------------------------------------------------\n"\ + "Please do not reply to this mail. It was automatically\n"\ + "generated by a Dirac Agent.\n" result = self.systemLoggingDB._getDataFromAgentTable(self.agentName) self.log.debug(result) if not result['OK']: errorString = "Could not get the date when the last mail was sent" self.log.error(errorString) return S_ERROR(errorString) else: if result['Value']: self.log.debug("date value: %s" % fromString(result['Value'][0][0][1:-1])) lastMailSentDate = fromString(result['Value'][0][0][1:-1]) else: lastMailSentDate = limitDate - 1 * day result = self.systemLoggingDB._insertDataIntoAgentTable( self.agentName, lastMailSentDate) if not result['OK']: errorString = "Could not insert data into the DB" self.log.error(errorString, result['Message']) return S_ERROR(errorString + ": " + result['Message']) self.log.debug("limitDate: %s\t" % limitDate + "lastMailSentDate: %s\n" % lastMailSentDate) if lastMailSentDate > limitDate: self.log.info("The previous report was sent less " + " than %s days ago" % self.__days) return S_OK() dateSent = toString(date()) self.log.info("The list with the top errors has been sent") result = self.systemLoggingDB._insertDataIntoAgentTable( self.agentName, dateSent) if not result['OK']: errorString = "Could not insert data into the DB" self.log.error(errorString, result['Message']) return S_ERROR(errorString + ": " + result['Message']) result = self.notification.sendMail(self._mailAddress, self._subject, mailBody) if not result['OK']: self.log.warn("The notification could not be sent") return S_OK() return S_OK("The list with the top errors has been sent")