def initialize(self): self.siteJobLimits = self.getCSOption("SiteJobLimits", False) self.checkPilotVersion = self.getCSOption("CheckPilotVersion", True) self.setup = gConfig.getValue("/DIRAC/Setup", "") self.vo = getVO() self.pilotVersion = gConfig.getValue("/Operations/%s/%s/Versions/PilotVersion" % (self.vo, self.setup), "")
def __init__(self, useCertificates = False): """ Constructor of the RequestClient class """ self.localUrl = gConfig.getValue('/Systems/DataManagement/Development/Services/RequestDB/localURL') self.centralUrl = gConfig.getValue('/Systems/DataManagement/Development/Services/RequestDB/centralURL') voBoxUrls = gConfig.getValue('/Systems/DataManagement/Development/Services/RequestDB/voBoxURLs') self.voBoxUrls = randomize(voBoxUrls).remove(self.localUrl)
def initializeSecurityLoggingHandler( serviceInfo ): global gSecurityFileLog serviceCS = serviceInfo [ 'serviceSectionPath' ] dataPath = gConfig.getValue( "%s/DataLocation" % serviceCS, "data/securityLog" ) dataPath = dataPath.strip() if "/" != dataPath[0]: dataPath = os.path.realpath( "%s/%s" % ( gConfig.getValue( '/LocalSite/InstancePath', rootPath ), dataPath ) ) "Data will be written into %s" % dataPath ) try: os.makedirs( dataPath ) except: pass try: testFile = "%s/seclog.jarl.test" % dataPath fd = file( testFile, "w" ) fd.close() os.unlink( testFile ) except IOError: gLogger.fatal( "Can't write to %s" % dataPath ) return S_ERROR( "Data location is not writable" ) #Define globals gSecurityFileLog = SecurityFileLog( dataPath ) SecurityLogClient().setLogStore( gSecurityFileLog ) return S_OK()
def _updateSiteList( self, sitesData ): ceSection = "/Resources/Sites" for grid in self.gridsToMap: gridSection = "%s/%s" % ( ceSection, grid ) result = gConfig.getSections( gridSection ) if not result[ 'OK' ]: gLogger.error( "Cannot get a list of sites for grid", "%s :%s" % ( grid, result[ 'Message' ] ) ) continue for site in result[ 'Value' ]: coords = gConfig.getValue( "%s/%s/Coordinates" % ( gridSection, site ), "" ) try: coords = [ float( "%.4f" % float( c.strip() ) ) for c in coords.split( ":" ) if c.strip() ] except Exception, e: print e gLogger.warn( "Site %s has coordinates incorrectly defined: %s" % ( site, coords ) ) continue if not coords or len( coords ) != 2: gLogger.warn( "Site %s has coordinates incorrectly defined: %s" % ( site, coords ) ) continue name = gConfig.getValue( "%s/%s/Name" % ( gridSection, site ), "" ) if not name: gLogger.warn( "Site %s no name defined" % site ) continue tier = gConfig.getValue( "%s/%s/MoUTierLevel" % ( gridSection, site ), "" ) if not tier or tier.lower() == "none": tier = 2 siteData = { 'longlat' : coords, 'name' : name, 'tier' : tier } sitesData[ site ] = siteData
def export_sendSMS( self, userName, body, fromAddress ): """ Send an SMS with supplied body to the specified DIRAC user using the Mail utility via an SMS switch. """ gLogger.verbose( 'Received signal to send the following SMS to %s:\n%s' % ( userName, body ) ) mobile = gConfig.getValue( '/Registry/Users/%s/Mobile' % userName, '' ) if not mobile: return S_ERROR( 'No registered mobile number for %s' % userName ) csSection = PathFinder.getServiceSection( 'Framework/Notification' ) smsSwitch = gConfig.getValue( '%s/SMSSwitch' % csSection, '' ) if not smsSwitch: return S_ERROR( 'No SMS switch is defined in CS path %s/SMSSwitch' % csSection ) address = '%s@%s' % ( mobile, smsSwitch ) subject = 'DIRAC SMS' m = Mail() m._subject = subject m._message = body m._mailAddress = address if not fromAddress == 'None': m._fromAddress = fromAddress result = m._send() if not result['OK']: gLogger.warn( 'Could not send SMS to %s with the following message:\n%s' % ( userName, result['Message'] ) ) else: 'SMS sent successfully to %s ' % ( userName ) ) gLogger.debug( result['Value'] ) return result
def configureFromSection( self, mySection ): """ reload from CS """ self.log.debug( 'Configuring from %s' % mySection ) self.errorMailAddress = gConfig.getValue( mySection + '/ErrorMailAddress' , self.errorMailAddress ) self.alarmMailAddress = gConfig.getValue( mySection + '/AlarmMailAddress' , self.alarmMailAddress ) self.mailFromAddress = gConfig.getValue( mySection + '/MailFromAddress' , self.mailFromAddress ) # following will do something only when call from reload including SubmitPool as mySection requestedRunningPods = gConfig.getValue( mySection + '/RunningPods', self.runningPods.keys() ) for runningPodName in requestedRunningPods: self.log.verbose( 'Trying to configure RunningPod:', runningPodName ) if runningPodName in self.runningPods: continue runningPodDict = virtualMachineDB.getRunningPodDict( runningPodName ) if not runningPodDict['OK']: self.log.error('Error in RunningPodDict: %s' % runningPodDict['Message']) return runningPodDict self.log.verbose( 'Trying to configure RunningPodDict:', runningPodDict ) runningPodDict = runningPodDict[ 'Value' ] for option in ['Image', 'MaxInstances', 'CPUPerInstance', 'Priority', 'CloudEndpoints', 'Requirements', 'CampaignStartDate', 'CampaignEndDate']: if option not in runningPodDict.keys(): self.log.error( 'Missing option in "%s" RunningPod definition:' % runningPodName, option ) continue self.runningPods[runningPodName] = {} self.runningPods[runningPodName]['Image'] = runningPodDict['Image'] self.runningPods[runningPodName]['Requirements'] = runningPodDict['Requirements'] self.runningPods[runningPodName]['MaxInstances'] = int( runningPodDict['MaxInstances'] ) self.runningPods[runningPodName]['CPUPerInstance'] = int( runningPodDict['CPUPerInstance'] ) self.runningPods[runningPodName]['Priority'] = int( runningPodDict['Priority'] ) self.runningPods[runningPodName]['CloudEndpoints'] = runningPodDict['CloudEndpoints'] self.runningPods[runningPodName]['CampaignEndDate'] = runningPodDict['CampaignEndDate']
def generateToken( self, requesterDN, requesterGroup, numUses = 1, lifeTime = 0, retries = 10 ): """ Generate and return a token and the number of uses for the token """ if not lifeTime: lifeTime = gConfig.getValue( "/DIRAC/VOPolicy/TokenLifeTime", self.__defaultTokenLifetime ) maxUses = gConfig.getValue( "/DIRAC/VOPolicy/TokenMaxUses", self.__defaultTokenMaxUses ) numUses = max( 1, min( numUses, maxUses ) ) m = md5.md5() rndData = "%s.%s.%s.%s" % ( time.time(), random.random(), numUses, lifeTime ) m.update( rndData ) token = m.hexdigest() fieldsSQL = ", ".join( ( "Token", "RequesterDN", "RequesterGroup", "ExpirationTime", "UsesLeft" ) ) valuesSQL = ", ".join( ( self._escapeString( token )['Value'], self._escapeString( requesterDN )['Value'], self._escapeString( requesterGroup )['Value'], "TIMESTAMPADD( SECOND, %s, UTC_TIMESTAMP() )" % lifeTime, str( numUses ) ) ) insertSQL = "INSERT INTO `ProxyDB_Tokens` ( %s ) VALUES ( %s )" % ( fieldsSQL, valuesSQL ) result = self._update( insertSQL ) if result[ 'OK' ]: return S_OK( ( token, numUses ) ) if result[ 'Message' ].find( "uplicate entry" ) > -1: if retries: return self.generateToken( numUses, lifeTime, retries - 1 ) return S_ERROR( "Max retries reached for token generation. Aborting" ) return result
def _ex_initialize( cls, exeName, loadName ): cls.__properties = { 'fullName' : exeName, 'loadName' : loadName, 'section' : PathFinder.getExecutorSection( exeName ), 'loadSection' : PathFinder.getExecutorSection( loadName ), 'messagesProcessed' : 0, 'reconnects' : 0, 'setup' : gConfig.getValue( "/DIRAC/Setup", "Unknown" ) } cls.__basePath = gConfig.getValue( '/LocalSite/InstancePath', rootPath ) cls.__defaults = {} cls.__defaults[ 'MonitoringEnabled' ] = True cls.__defaults[ 'Enabled' ] = True cls.__defaults[ 'ControlDirectory' ] = os.path.join( cls.__basePath, 'control', *exeName.split( "/" ) ) cls.__defaults[ 'WorkDirectory' ] = os.path.join( cls.__basePath, 'work', *exeName.split( "/" ) ) cls.__defaults[ 'ReconnectRetries' ] = 10 cls.__defaults[ 'ReconnectSleep' ] = 5 cls.__properties[ 'shifterProxy' ] = '' cls.__properties[ 'shifterProxyLocation' ] = os.path.join( cls.__defaults[ 'WorkDirectory' ], '.shifterCred' ) cls.__mindName = False cls.__mindExtraArgs = False cls.__freezeTime = 0 cls.__fastTrackEnabled = True cls.log = gLogger.getSubLogger( exeName, child = False ) try: result = cls.initialize() except Exception, excp: gLogger.exception( "Exception while initializing %s" % loadName ) return S_ERROR( "Exception while initializing: %s" % str( excp ) )
def __getUserDetails( self ): """ Get details on user account """ credentials = self.getRemoteCredentials() if credentials: diracUser = credentials.get( "username" ) diracGroup = credentials.get( "group" ) if not ( diracUser and diracGroup ): return S_ERROR( 'Failed to get DIRAC user name and/or group' ) vo = getVOForGroup( diracGroup ) diracHome = '' if vo: diracHome = '/%s/user/%s/%s' % ( vo, diracUser[0], diracUser ) cfgPath = self.serviceInfoDict[ 'serviceSectionPath' ] gLogger.debug( "cfgPath: %s" % cfgPath ) irodsUser = gConfig.getValue( "%s/UserCredentials/%s/iRodsUser" % ( cfgPath , diracUser ) , diracUser ) irodsHome = gConfig.getValue( "%s/UserCredentials/%s/iRodsHome" % ( cfgPath , diracUser ) , '' ) irodsGroup = gConfig.getValue( "%s/UserCredentials/%s/iRodsGroup" % ( cfgPath , diracUser ) , '' ) irodsPassword = gConfig.getValue( "%s/UserCredentials/%s/iRodsPassword" % ( cfgPath , diracUser ) , '' ) resultDict = {} resultDict['DIRACUser'] = diracUser resultDict['DIRACGroup'] = diracGroup resultDict['DIRACHome'] = diracHome resultDict['iRodsUser'] = irodsUser resultDict['iRodsGroup'] = irodsGroup resultDict['iRodsHome'] = irodsHome resultDict['iRodsPassword'] = irodsPassword return S_OK( resultDict )
def initializeReportGeneratorHandler( serviceInfo ): global gAccountingDB gAccountingDB = AccountingDB( readOnly = True ) #Get data location reportSection = PathFinder.getServiceSection( "Accounting/ReportGenerator" ) dataPath = gConfig.getValue( "%s/DataLocation" % reportSection, "data/accountingGraphs" ) dataPath = dataPath.strip() if "/" != dataPath[0]: dataPath = os.path.realpath( "%s/%s" % ( gConfig.getValue( '/LocalSite/InstancePath', rootPath ), dataPath ) ) "Data will be written into %s" % dataPath ) try: os.makedirs( dataPath ) except: pass try: testFile = "%s/acc.jarl.test" % dataPath fd = file( testFile, "w" ) fd.close() os.unlink( testFile ) except IOError: gLogger.fatal( "Can't write to %s" % dataPath ) return S_ERROR( "Data location is not writable" ) gDataCache.setGraphsLocation( dataPath ) gMonitor.registerActivity( "plotsDrawn", "Drawn plot images", "Accounting reports", "plots", gMonitor.OP_SUM ) gMonitor.registerActivity( "reportsRequested", "Generated reports", "Accounting reports", "reports", gMonitor.OP_SUM ) return S_OK()
def initializeStorageElementProxyHandler(serviceInfo): global base_path, httpFlag, httpPort, httpPath cfgPath = serviceInfo['serviceSectionPath'] base_path = gConfig.getValue( "%s/BasePath" % cfgPath, base_path ) if not base_path: gLogger.error( 'Failed to get the base path' ) return S_ERROR( 'Failed to get the base path' )'The base path obtained is %s. Checking its existence...' % base_path) if not os.path.exists(base_path):'%s did not exist. Creating....' % base_path) os.makedirs(base_path) httpFlag = gConfig.getValue( "%s/HttpAccess" % cfgPath, False ) if httpFlag: httpPath = '%s/httpCache' % base_path httpPath = gConfig.getValue( "%s/HttpCache" % cfgPath, httpPath ) if not os.path.exists( httpPath ):'Creating HTTP cache directory %s' % (httpPath) ) os.makedirs( httpPath ) httpPort = gConfig.getValue( "%s/HttpPort" % cfgPath, 9180 )'Creating HTTP server thread, port:%d, path:%s' % (httpPort,httpPath) ) httpThread = HttpThread( httpPort,httpPath ) return S_OK()
def addUserToEgroup(clip): """Add user to e-group""" login = gConfig.getValue("/Security/egroupAdmin","").strip('"') pwd = gConfig.getValue("/Security/egroupPass","").strip('"') url = '' if not ( login and pwd ): gLogger.warn("Missing configuration parameters: username or password for WSDL interactions") gLogger.warn("Add options: -o /Security/egroupAdmin=<cernusername> -o /Security/egroupPass=<password>") gLogger.error("User registration in e-group must be done manually") return try: client = Client(url=url, username=login, password=pwd) #gLogger.notice(client) except suds.transport.TransportError as exc: gLogger.error("Failed to get the WSDL client:%s" %exc) gLogger.error("User registration in e-group must be done manually") return except: gLogger.error("Something unexpected happened with the suds client, aborting") return if clip.external: sudsUser = client.factory.create("ns0:MemberType") sudsUser['Type'] = 'External' sudsUser['Email'] = userl = [sudsUser] else: user = getUserInfoFromPhonebook(client, clip) userl = [user] res = client.service.AddEgroupMembers('ilc-dirac',False, userl) if hasattr(res, 'warnings'): gLogger.notice(res.warnings)
def getSiteProtocols(self, site, printOutput=False): """Allows to check the defined protocols for each site SE. """ result = self.__checkSiteIsValid(site) if not result["OK"]: return result siteSection = "/Resources/Sites/%s/%s/SE" % (site.split(".")[0], site) siteSEs = gConfig.getValue(siteSection, []) if not siteSEs: return S_ERROR("No SEs found for site %s in section %s" % (site, siteSection)) defaultProtocols = gConfig.getValue("/Resources/StorageElements/DefaultProtocols", []) self.log.verbose("Default list of protocols are" ", ".join(defaultProtocols)) seInfo = {} siteSEs.sort() for se in siteSEs: sections = gConfig.getSections("/Resources/StorageElements/%s/" % (se)) if not sections["OK"]: return sections for section in sections["Value"]: if gConfig.getValue("/Resources/StorageElements/%s/%s/ProtocolName" % (se, section), "") == "SRM2": path = "/Resources/StorageElements/%s/%s/ProtocolsList" % (se, section) seProtocols = gConfig.getValue(path, []) if not seProtocols: seProtocols = defaultProtocols seInfo[se] = seProtocols if printOutput: print "\nSummary of protocols for StorageElements at site %s" % site print "\nStorageElement".ljust(30) + "ProtocolsList".ljust(30) + "\n" for se, protocols in seInfo.items(): print se.ljust(30) + ", ".join(protocols).ljust(30) return S_OK(seInfo)
def __getProtocols( self ): """ returns list of protocols to use at a given site :warn: priority is given to a protocols list defined in the CS :param self: self reference """ sections = gConfig.getSections( '/Resources/StorageElements/%s/' % ( ) ) self.log.debug( "GFAL2_SRM2Storage.__getProtocols: Trying to get protocols for storage %s." % ) if not sections['OK']: return sections protocolsList = [] for section in sections['Value']: path = '/Resources/StorageElements/%s/%s/ProtocolName' % (, section ) if gConfig.getValue( path, '' ) == self.protocol: protPath = '/Resources/StorageElements/%s/%s/ProtocolsList' % (, section ) siteProtocols = gConfig.getValue( protPath, [] ) if siteProtocols: self.log.debug( 'GFAL2_SRM2Storage.__getProtocols: Found SE protocols list to override defaults:', ', '.join( siteProtocols, ) ) protocolsList = siteProtocols if not protocolsList: self.log.debug( "GFAL2_SRM2Storage.__getProtocols: No protocols provided, using the default protocols." ) protocolsList = gConfig.getValue( '/Resources/StorageElements/DefaultProtocols', [] ) self.log.debug( 'GFAL2_SRM2Storage.__getProtocols: protocolList = %s' % protocolsList ) # if there is even no default protocol if not protocolsList: return S_ERROR( "GFAL2_SRM2Storage.__getProtocols: No local protocols defined and no defaults found." ) return S_OK( protocolsList )
def initializeStorageElementProxyHandler( serviceInfo ): """ handler initialisation """ global BASE_PATH, HTTP_FLAG, HTTP_PORT, HTTP_PATH cfgPath = serviceInfo['serviceSectionPath'] BASE_PATH = gConfig.getValue( "%s/BasePath" % cfgPath, BASE_PATH ) if not BASE_PATH: gLogger.error( 'Failed to get the base path' ) return S_ERROR( 'Failed to get the base path' )'The base path obtained is %s. Checking its existence...' % BASE_PATH) if not os.path.exists(BASE_PATH):'%s did not exist. Creating....' % BASE_PATH) os.makedirs(BASE_PATH) HTTP_FLAG = gConfig.getValue( "%s/HttpAccess" % cfgPath, False ) if HTTP_FLAG: HTTP_PATH = '%s/httpCache' % BASE_PATH HTTP_PATH = gConfig.getValue( "%s/HttpCache" % cfgPath, HTTP_PATH ) if not os.path.exists( HTTP_PATH ):'Creating HTTP cache directory %s' % (HTTP_PATH) ) os.makedirs( HTTP_PATH ) HTTP_PORT = gConfig.getValue( "%s/HttpPort" % cfgPath, 9180 )'Creating HTTP server thread, port:%d, path:%s' % ( HTTP_PORT, HTTP_PATH ) ) httpThread = HttpThread( HTTP_PORT, HTTP_PATH ) return S_OK()
def __getProtocols(self): """ returns list of protocols to use at a given site :warn: priority is given to a protocols list defined in the CS :param self: self reference """ sections = gConfig.getSections("/Resources/StorageElements/%s/" % ( self.log.debug("GFAL2_SRM2Storage.__getProtocols: Trying to get protocols for storage %s." % if not sections["OK"]: return sections protocolsList = [] for section in sections["Value"]: path = "/Resources/StorageElements/%s/%s/PluginName" % (, section) if gConfig.getValue(path, "") == self.pluginName: protPath = "/Resources/StorageElements/%s/%s/ProtocolsList" % (, section) siteProtocols = gConfig.getValue(protPath, []) if siteProtocols: self.log.debug( "GFAL2_SRM2Storage.__getProtocols: Found SE protocols list to override defaults:", ", ".join(siteProtocols), ) protocolsList = siteProtocols if not protocolsList: self.log.debug("GFAL2_SRM2Storage.__getProtocols: No protocols provided, using the default protocols.") protocolsList = gConfig.getValue("/Resources/StorageElements/DefaultProtocols", []) self.log.debug("GFAL2_SRM2Storage.__getProtocols: protocolList = %s" % protocolsList) # if there is even no default protocol if not protocolsList: return S_ERROR("GFAL2_SRM2Storage.__getProtocols: No local protocols defined and no defaults found.") return S_OK(protocolsList)
def __getCEName( self ): """ Try to get the CE name """ # FIXME: this should not be part of the standard configuration (flavours discriminations should stay out) if self.pp.flavour in ['LCG', 'gLite', 'OSG']: retCode, CEName = self.executeAndGetOutput( 'glite-brokerinfo getCE', self.pp.installEnv ) if not retCode: self.pp.ceName = CEName.split( ':' )[0] if len( CEName.split( '/' ) ) > 1: self.pp.queueName = CEName.split( '/' )[1] elif os.environ.has_key( 'OSG_JOB_CONTACT' ): # OSG_JOB_CONTACT String specifying the endpoint to use within the job submission # for reaching the site (e.g. ) CE = os.environ['OSG_JOB_CONTACT'] self.pp.ceName = CE.split( '/' )[0] if len( CE.split( '/' ) ) > 1: self.pp.queueName = CE.split( '/' )[1] # configureOpts.append( '-N "%s"' % cliParams.ceName ) else: # is it already present? from DIRAC import gConfig ceName = gConfig.getValue( 'LocalSite/GridCE', '' ) ceQueue = gConfig.getValue( 'LocalSite/CEQueue', '' ) if ceName and ceQueue: self.pp.ceName = ceName self.pp.queueName = ceQueue else: self.log.error( "Can't find ceName nor queue... have to fail!" ) sys.exit( 1 ) elif self.pp.flavour == "CREAM": if os.environ.has_key( 'CE_ID' ): self.pp.ceName = os.environ['CE_ID'].split( ':' )[0] if os.environ['CE_ID'].count( "/" ): self.pp.queueName = os.environ['CE_ID'].split( '/' )[1]
def __init__( self, server = False, serverCert = False, serverKey = False, voName = False, timeout = False ): if timeout: self._secCmdTimeout = timeout else: self._secCmdTimeout = 30 if not server: self._secServer = gConfig.getValue( "/DIRAC/VOPolicy/MyProxyServer", "" ) else: self._secServer = server if not voName: self._secVO = getVO( "unknown" ) else: self._secVO = voName ckLoc = Locations.getHostCertificateAndKeyLocation() if serverCert: self._secCertLoc = serverCert else: if ckLoc: self._secCertLoc = ckLoc[0] else: self._secCertLoc = "%s/etc/grid-security/servercert.pem" % DIRAC.rootPath if serverKey: self._secKeyLoc = serverKey else: if ckLoc: self._secKeyLoc = ckLoc[1] else: self._secKeyLoc = "%s/etc/grid-security/serverkey.pem" % DIRAC.rootPath self._secRunningFromTrustedHost = gConfig.getValue( "/DIRAC/VOPolicy/MyProxyTrustedHost", "True" ).lower() in ( "y", "yes", "true" ) self._secMaxProxyHours = gConfig.getValue( "/DIRAC/VOPolicy/MyProxyMaxDelegationTime", 168 )
def __irodsClient( self , user = None ): global IRODS_USER password = None cfgPath = self.serviceInfoDict[ 'serviceSectionPath' ] gLogger.debug( "cfgPath: %s" % cfgPath ) if not user: credentials = self.getRemoteCredentials() if credentials and ( "username" in credentials ): IRODS_USER = credentials[ "username" ] ## TODO: should get user password somehow elif user == "r": IRODS_USER = gConfig.getValue( "%s/read" % cfgPath , IRODS_USER ) elif user == "w": IRODS_USER = gConfig.getValue( "%s/write" % cfgPath , IRODS_USER ) if not IRODS_USER: return False , "Failed to get iRods user" gLogger.debug( "iRods user: %s" % IRODS_USER ) password = gConfig.getValue( "%s/%s" % ( cfgPath , IRODS_USER ) , password ) conn , errMsg = rcConnect( IRODS_HOST , IRODS_PORT , IRODS_USER , IRODS_ZONE ) status = clientLoginWithPassword( conn , password ) if not status == 0: return False , "Failed to authenticate user '%s'" % IRODS_USER return conn , errMsg
def getMailDict(self , names=None): """ Convert list of usernames to dict like { e-mail : full name } Argument is a list. Return value is a dict """ resultDict = dict() if not names: return resultDict for user in names: email = gConfig.getValue("/Registry/Users/%s/Email" % user , "") gLogger.debug("/Registry/Users/%s/Email - '%s'" % (user , email)) emil = email.strip() if not email: gLogger.error("Can't find value for option /Registry/Users/%s/Email" % user) continue fname = gConfig.getValue("/Registry/Users/%s/FullName" % user , "") gLogger.debug("/Registry/Users/%s/FullName - '%s'" % (user , fname)) fname = fname.strip() if not fname: fname = user gLogger.debug("FullName is absent, name to be used: %s" % fname) resultDict[ email ] = fname return resultDict
def export_getValues( self ): ( frame, filename, line_number, function_name, lines, index ) = inspect.getouterframes( inspect.currentframe() )[0] print( frame, filename, line_number, function_name, lines, index ) #=============================================================================== # basePath = '/Resources/NewResources/' # # for nb in gConfig.getValue(basePath + 'toUse' ): # good = gConfig.getValue( basePath + 'sub' + nb + '/good', False ) # # successful = [ gConfig.getValue( basePath + 'sub' + nb + '/val' ) for nb in toUse if ] #=============================================================================== toUse = gConfig.getValue( '/Resources/NewResources/toUse', [] ) successful = [] failed = [] for num in toUse : path = '/Resources/NewResources/sub' + num + '/' good = gConfig.getValue( path + 'good', False ) if good : successful.append( gConfig.getValue( path + 'val', 'unknown' ) ) else : failed.append( gConfig.getValue( path + 'val', 'unknown' ) ) return S_OK( { 'Successful': successful, 'Failed' : failed } )
def _getConfigStorageOptions( self, storageName ): """ Get the options associated to the StorageElement as defined in the CS """ storageConfigPath = cfgPath( self.rootConfigPath, storageName ) res = gConfig.getOptions( storageConfigPath ) if not res['OK']: errStr = "StorageFactory._getStorageOptions: Failed to get storage options." gLogger.error( errStr, "%s: %s" % ( storageName, res['Message'] ) ) return S_ERROR( errStr ) options = res['Value'] optionsDict = {} for option in options: if option in [ 'ReadAccess', 'WriteAccess', 'CheckAccess', 'RemoveAccess']: continue optionConfigPath = cfgPath( storageConfigPath, option ) if option in [ 'VO' ]: optionsDict[option] = gConfig.getValue( optionConfigPath, [] ) else: optionsDict[option] = gConfig.getValue( optionConfigPath, '' ) res = self.resourceStatus.getStorageElementStatus( storageName ) if not res[ 'OK' ]: errStr = "StorageFactory._getStorageOptions: Failed to get storage status" gLogger.error( errStr, "%s: %s" % ( storageName, res['Message'] ) ) return S_ERROR( errStr ) # For safety, we did not add the ${statusType}Access keys # this requires modifications in the StorageElement class # We add the dictionary with the statusTypes and values # { 'statusType1' : 'status1', 'statusType2' : 'status2' ... } optionsDict.update( res[ 'Value' ][ storageName ] ) return S_OK( optionsDict )
def execute(self): """The PilotAgent execution method. """ self.pilotStalledDays = self.am_getOption('PilotStalledDays', 3) self.gridEnv = self.am_getOption('GridEnv') if not self.gridEnv: # No specific option found, try a general one setup = gConfig.getValue('/DIRAC/Setup', '') if setup: instance = gConfig.getValue('/DIRAC/Setups/%s/WorkloadManagement' % setup, '') if instance: self.gridEnv = gConfig.getValue('/Systems/WorkloadManagement/%s/GridEnv' % instance, '') result = self.pilotDB._getConnection() if result['OK']: connection = result['Value'] else: return result # Now handle pilots not updated in the last N days (most likely the Broker is no # longer available) and declare them Deleted. result = self.handleOldPilots(connection) connection.close() result = self.WMSAdministrator.clearPilots(self.clearPilotsDelay, self.clearAbortedDelay) if not result['OK']: self.log.warn('Failed to clear old pilots in the PilotAgentsDB') return S_OK()
def execute( self ): """ Main execution method """ monitoredSetups = gConfig.getValue('/Operations/lhcb/Lemon/MonitoredSetups', ['LHCb-Production']) self.monitoringEnabled = self.setup in monitoredSetups if not self.monitoringEnabled: self._log("Framework/LemonAgent", self.NON_CRITICAL, self.OK, "Monitoring not enabled for this setup: " + self.setup +". Exiting."); return S_OK() hostsInMaintenance = gConfig.getValue('/Operations/lhcb/Lemon/HostsInMaintenance',[]); if gethostname() in hostsInMaintenance: self._log("Framework/LemonAgent", self.NON_CRITICAL, self.OK, "I am in maintenance mode, exiting."); return S_OK() result = self.admClient.getOverallStatus() if not result or not result['OK']: self._log("Framework/LemonAgent", self.CRITICAL, self.FAILURE, "Can not obtain result!!"); return S_OK() services = result[ 'Value' ][ 'Services' ] agents = result[ 'Value' ][ 'Agents' ] self._processResults(services); self._processResults(agents); return S_OK()
def configure( self, csSection, submitPool ): """ Here goes common configuration for all PilotDirectors """ self.configureFromSection( csSection ) self.reloadConfiguration( csSection, submitPool ) setup = gConfig.getValue( '/DIRAC/Setup', '' ) section = cfgPath( 'Operations', self.virtualOrganization, setup, 'Versions' ) self.installVersion = gConfig.getValue( cfgPath( section, 'PilotVersion' ), self.installVersion ) self.installInstallation = gConfig.getValue( cfgPath( section, 'PilotInstallation' ), self.installInstallation ) '===============================================' ) 'Configuration:' ) '' ) ' Install script: ', self.install ) ' Pilot script: ', self.pilot ) ' Install Ver: ', self.installVersion ) if self.installInstallation: ' Installation: ', self.installInstallation ) if self.extraPilotOptions: ' Exta Options: ', ' '.join( self.extraPilotOptions ) ) ' ListMatch: ', self.enableListMatch ) ' Private %: ', self.privatePilotFraction * 100 ) if self.enableListMatch: ' ListMatch Delay:', self.listMatchDelay ) self.listMatchCache.purgeExpired()
def initializePlottingHandler( serviceInfo ): #Get data location plottingSection = PathFinder.getServiceSection( "Framework/Plotting" ) dataPath = gConfig.getValue( "%s/DataLocation" % plottingSection, "data/graphs" ) dataPath = dataPath.strip() if "/" != dataPath[0]: dataPath = os.path.realpath( "%s/%s" % ( gConfig.getValue( '/LocalSite/InstancePath', rootPath ), dataPath ) ) "Data will be written into %s" % dataPath ) try: os.makedirs( dataPath ) except: pass try: testFile = "%s/plot__.test" % dataPath fd = file( testFile, "w" ) fd.close() os.unlink( testFile ) except IOError: gLogger.fatal( "Can't write to %s" % dataPath ) return S_ERROR( "Data location is not writable" ) gPlotCache.setPlotsLocation( dataPath ) gMonitor.registerActivity( "plotsDrawn", "Drawn plot images", "Plotting requests", "plots", gMonitor.OP_SUM ) return S_OK()
def getSiteProtocols( self, site, printOutput = False ): """Allows to check the defined protocols for each site SE. """ result = self.__checkSiteIsValid( site ) if not result['OK']: return result siteSection = '/Resources/Sites/%s/%s/SE' % ( site.split( '.' )[0], site ) siteSEs = gConfig.getValue( siteSection, [] ) if not siteSEs: return S_ERROR( 'No SEs found for site %s in section %s' % ( site, siteSection ) ) defaultProtocols = gConfig.getValue( '/Resources/StorageElements/DefaultProtocols', [] ) self.log.verbose( 'Default list of protocols are' ', '.join( defaultProtocols ) ) seInfo = {} siteSEs.sort() for se in siteSEs: sections = gConfig.getSections( '/Resources/StorageElements/%s/' % ( se ) ) if not sections['OK']: return sections for section in sections['Value']: if gConfig.getValue( '/Resources/StorageElements/%s/%s/ProtocolName' % ( se, section ), '' ) == 'SRM2': path = '/Resources/StorageElements/%s/%s/ProtocolsList' % ( se, section ) seProtocols = gConfig.getValue( path, [] ) if not seProtocols: seProtocols = defaultProtocols seInfo[se] = seProtocols if printOutput: print '\nSummary of protocols for StorageElements at site %s' % site print '\nStorageElement'.ljust( 30 ) + 'ProtocolsList'.ljust( 30 ) + '\n' for se, protocols in seInfo.items(): print se.ljust( 30 ) + ', '.join( protocols ).ljust( 30 ) return S_OK( seInfo )
def __init__(self): """Module initialization. """ super(UploadLogFile, self).__init__() self.version = __RCSID__ self.log = gLogger.getSubLogger("UploadLogFile") self.PRODUCTION_ID = None self.JOB_ID = None self.workflow_commons = None self.request = None self.logFilePath = "" self.logLFNPath = "" self.logdir = "" self.logSE = self.ops.getValue("/LogStorage/LogSE", "LogSE") self.root = gConfig.getValue("/LocalSite/Root", os.getcwd()) self.logSizeLimit = self.ops.getValue("/LogFiles/SizeLimit", 20 * 1024 * 1024) self.logExtensions = [] self.failoverSEs = gConfig.getValue("/Resources/StorageElementGroups/Tier1-Failover", []) self.diracLogo = self.ops.getValue( "/SAM/LogoURL", "" ) self.rm = ReplicaManager() self.experiment = "CLIC" self.enable = True self.failoverTest = False # flag to put log files to failover by default self.jobID = ""
def _getAccessParams( self, element ): ''' get the access host and port for the specified ce. ''' _basePath = 'Resources/Sites' domains = gConfig.getSections( _basePath ) if not domains[ 'OK' ]: return domains domains = domains[ 'Value' ] for domain in domains: sites = gConfig.getSections( '%s/%s' % ( _basePath, domain ) ) if not sites[ 'OK' ]: return sites sites = sites[ 'Value' ] for site in sites: ces = gConfig.getValue( '%s/%s/%s/CE' % ( _basePath, domain, site ), '' ).split(',') ces = map(lambda str : str.strip(), ces) if element in ces: host = gConfig.getValue('%s/%s/%s/CEs/%s/SSHHost' % ( _basePath, domain, site, element )) if host: idx = host.find('/') if idx != -1: host = host[ 0 : idx ] return S_OK((host, 22)) else: return S_OK((element, 8443)) return S_ERROR('%s is not a vaild CE.' % element)
def initializeMonitoringHandler( serviceInfo ): #Check that the path is writable monitoringSection = PathFinder.getServiceSection( "Framework/Monitoring" ) #Get data location dataPath = gConfig.getValue( "%s/DataLocation" % monitoringSection, "data/monitoring" ) dataPath = dataPath.strip() if "/" != dataPath[0]: dataPath = os.path.realpath( "%s/%s" % ( gConfig.getValue( '/LocalSite/InstancePath', rootPath ), dataPath ) ) "Data will be written into %s" % dataPath ) try: os.makedirs( dataPath ) except: pass try: testFile = "%s/mon.jarl.test" % dataPath fd = file( testFile, "w" ) fd.close() os.unlink( testFile ) except IOError: gLogger.fatal( "Can't write to %s" % dataPath ) return S_ERROR( "Data location is not writable" ) #Define globals gServiceInterface.initialize( dataPath ) if not gServiceInterface.initializeDB(): return S_ERROR( "Can't start db engine" ) gMonitor.registerActivity( "cachedplots", "Cached plot images", "Monitoring plots", "plots", gMonitor.OP_SUM ) gMonitor.registerActivity( "drawnplots", "Drawn plot images", "Monitoring plots", "plots", gMonitor.OP_SUM ) return S_OK()
def _updateSoftwarePy2(self, version, rootPath, diracOSVersion): """Update the local DIRAC software installation to version""" # Check that we have a sane local configuration result = gConfig.getOptionsDict("/LocalInstallation") if not result["OK"]: return S_ERROR( "Invalid installation - missing /LocalInstallation section in the configuration" ) elif not result["Value"]: return S_ERROR( "Invalid installation - empty /LocalInstallation section in the configuration" ) if rootPath and not os.path.exists(rootPath): return S_ERROR('Path "%s" does not exists' % rootPath) installer = None if not find_executable("dirac-install"): installer = tempfile.NamedTemporaryFile(suffix=".py", mode="wb") with requests.get( "", stream=True) as r: if not r.ok: return S_ERROR( "Failed to download dirac-install from management repo" ) for chunk in r.iter_content(chunk_size=1024**2): installer.write(chunk) installer.flush()"Downloaded py2 installer to", cmdList = ["python",, "-r", version, "-t", "server"] else: cmdList = ["dirac-install", "-r", version, "-t", "server"] if rootPath: cmdList.extend(["-P", rootPath]) # Check if there are extensions extensionList = getCSExtensions() # By default we do not install WebApp if "WebApp" in extensionList or []: extensionList.remove("WebApp") webPortal = gConfig.getValue("/LocalInstallation/WebApp", False) if webPortal and "WebAppDIRAC" not in extensionList: extensionList.append("WebAppDIRAC") cmdList += ["-e", ",".join(extensionList)] project = gConfig.getValue("/LocalInstallation/Project") if project: cmdList += ["-l", project] targetPath = gConfig.getValue( "/LocalInstallation/TargetPath", gConfig.getValue("/LocalInstallation/RootPath", "")) if targetPath and os.path.exists(targetPath + "/etc/dirac.cfg"): cmdList.append(targetPath + "/etc/dirac.cfg") else: return S_ERROR("Local configuration not found") result = systemCall(240, cmdList) if installer: installer.close() if not result["OK"]: return result status = result["Value"][0] if status == 0: return S_OK() # Get error messages error = [ line.strip() for line in result["Value"][1].split("\n") if "error" in line.lower() ] return S_ERROR("\n".join( error or "Failed to update software to %s" % version))
def main(): global excludedHosts Script.registerSwitch( "e:", "exclude=", "Comma separated list of hosts to be excluded from the scanning process", setExcludedHosts) Script.parseCommandLine(ignoreErrors=False) componentType = "" # Get my setup mySetup = gConfig.getValue("DIRAC/Setup") # Retrieve information from all the hosts client = SystemAdministratorIntegrator(exclude=excludedHosts) resultAll = client.getOverallStatus() if not resultAll["OK"]: gLogger.error(resultAll["Message"]) DIRACexit(-1) # Retrieve user installing the component result = getProxyInfo() if result["OK"]: user = result["Value"]["username"] else: DIRACexit(-1) if not user: user = "******" for host in resultAll["Value"]: if not resultAll["Value"][host]["OK"]: # If the host cannot be contacted, exclude it and send message excludedHosts.append(host) result = NotificationClient().sendMail( Operations().getValue("EMail/Production", []), "Unreachable host", "\ndirac-populate-component-db: Could not fill the database with the components from unreachable host %s\n" % host, ) if not result["OK"]: gLogger.error( "Can not send unreachable host notification mail: %s" % result["Message"]) resultHosts = client.getHostInfo() if not resultHosts["OK"]: gLogger.error(resultHosts["Message"]) DIRACexit(-1) resultInfo = client.getInfo() if not resultInfo["OK"]: gLogger.error(resultInfo["Message"]) DIRACexit(-1) resultMySQL = client.getMySQLStatus() if not resultMySQL["OK"]: gLogger.error(resultMySQL["Message"]) DIRACexit(-1) resultAllDB = client.getDatabases() if not resultAllDB["OK"]: gLogger.error(resultAllDB["Message"]) DIRACexit(-1) resultAvailableDB = client.getAvailableDatabases() if not resultAvailableDB["OK"]: gLogger.error(resultAvailableDB["Message"]) DIRACexit(-1) records = [] finalSet = list(set(resultAll["Value"]) - set(excludedHosts)) for host in finalSet: hasMySQL = True result = resultAll["Value"][host] hostResult = resultHosts["Value"][host] infoResult = resultInfo["Value"][host] mySQLResult = resultMySQL["Value"][host] allDBResult = resultAllDB["Value"][host] availableDBResult = resultAvailableDB["Value"][host] if not result["OK"]: gLogger.error("Host %s: %s" % (host, result["Message"])) continue if not hostResult["OK"]: gLogger.error("Host %s: %s" % (host, hostResult["Message"])) continue if not infoResult["OK"]: gLogger.error("Host %s: %s" % (host, infoResult["Message"])) continue if mySQLResult["OK"]: if not allDBResult["OK"]: gLogger.error("Host %s: %s" % (host, allDBResult["Message"])) continue if not availableDBResult["OK"]: gLogger.error("Host %s: %s" % (host, availableDBResult["Message"])) continue else: hasMySQL = False setup = infoResult["Value"]["Setup"] if setup != mySetup: continue cpu = hostResult["Value"]["CPUModel"].strip() rDict = result["Value"] # Components other than databases for compType in rDict: if componentType and componentType != compType: continue for system in rDict[compType]: components = sorted(rDict[compType][system]) for component in components: record = {"Installation": {}, "Component": {}, "Host": {}} if rDict[compType][system][component][ "Installed"] and component != "ComponentMonitoring": runitStatus = str( rDict[compType][system][component]["RunitStatus"]) if runitStatus != "Unknown": module = str( rDict[compType][system][component]["Module"]) record["Component"]["System"] = system record["Component"]["Module"] = module # Transform 'Services' into 'service', 'Agents' into 'agent' ... record["Component"]["Type"] = compType.lower()[:-1] record["Host"]["HostName"] = host record["Host"]["CPU"] = cpu record["Installation"]["Instance"] = component record["Installation"][ "InstallationTime"] = datetime.utcnow() record["Installation"]["InstalledBy"] = user records.append(record) # Databases csClient = CSAPI() cfg = csClient.getCurrentCFG()["Value"] if hasMySQL: allDB = allDBResult["Value"] availableDB = availableDBResult["Value"] for db in allDB: # Check for DIRAC only databases if db in availableDB and db != "InstalledComponentsDB": # Check for 'installed' databases isSection = cfg.isSection( "Systems/" + availableDB[db]["System"] + "/" + cfg.getOption("DIRAC/Setups/" + setup + "/" + availableDB[db]["System"]) + "/Databases/" + db + "/") if isSection: record = { "Installation": {}, "Component": {}, "Host": {} } record["Component"]["System"] = availableDB[db][ "System"] record["Component"]["Module"] = db record["Component"]["Type"] = "DB" record["Host"]["HostName"] = host record["Host"]["CPU"] = cpu record["Installation"]["Instance"] = db record["Installation"][ "InstallationTime"] = datetime.utcnow() record["Installation"]["InstalledBy"] = user records.append(record) monitoringClient = ComponentMonitoringClient() # Add the installations to the database for record in records: result = MonitoringUtilities.monitorInstallation( record["Component"]["Type"], record["Component"]["System"], record["Installation"]["Instance"], record["Component"]["Module"], record["Host"]["CPU"], record["Host"]["HostName"], ) if not result["OK"]: gLogger.error(result["Message"])
def __getPilotOptions(self, queue, pilotsToSubmit): """ Prepare pilot options """ queueDict = self.queueDict[queue]['ParametersDict'] pilotOptions = [] setup = gConfig.getValue("/DIRAC/Setup", "unknown") if setup == 'unknown': self.log.error('Setup is not defined in the configuration') return [None, None] pilotOptions.append('-S %s' % setup) opsHelper = Operations.Operations(group=self.pilotGroup, setup=setup) #Installation defined? installationName = opsHelper.getValue("Pilot/Installation", "") if installationName: pilotOptions.append('-V %s' % installationName) #Project defined? projectName = opsHelper.getValue("Pilot/Project", "") if projectName: pilotOptions.append('-l %s' % projectName) else:'DIRAC project will be installed by pilots') #Request a release diracVersion = opsHelper.getValue("Pilot/Version", []) if not diracVersion: self.log.error('Pilot/Version is not defined in the configuration') return [None, None] #diracVersion is a list of accepted releases. Just take the first one pilotOptions.append('-r %s' % diracVersion[0]) ownerDN = self.pilotDN ownerGroup = self.pilotGroup # Request token for maximum pilot efficiency result = gProxyManager.requestToken( ownerDN, ownerGroup, pilotsToSubmit * self.maxJobsInFillMode) if not result['OK']: self.log.error('Invalid proxy token request', result['Message']) return [None, None] (token, numberOfUses) = result['Value'] pilotOptions.append('-o /Security/ProxyToken=%s' % token) # Use Filling mode pilotOptions.append('-M %s' % min(numberOfUses, self.maxJobsInFillMode)) # Since each pilot will execute min( numberOfUses, self.maxJobsInFillMode ) # with numberOfUses tokens we can submit at most: # numberOfUses / min( numberOfUses, self.maxJobsInFillMode ) # pilots newPilotsToSubmit = numberOfUses / min(numberOfUses, self.maxJobsInFillMode) if newPilotsToSubmit != pilotsToSubmit: 'Number of pilots to submit is changed to %d after getting the proxy token' % newPilotsToSubmit) pilotsToSubmit = newPilotsToSubmit # Debug if self.pilotLogLevel.lower() == 'debug': pilotOptions.append('-d') # CS Servers csServers = gConfig.getValue("/DIRAC/Configuration/Servers", []) pilotOptions.append('-C %s' % ",".join(csServers)) # DIRAC Extensions extensionsList = CSGlobals.getCSExtensions() if extensionsList: pilotOptions.append('-e %s' % ",".join(extensionsList)) # Requested CPU time pilotOptions.append('-T %s' % queueDict['CPUTime']) # CEName pilotOptions.append('-N %s' % self.queueDict[queue]['CEName']) # SiteName pilotOptions.append('-n %s' % queueDict['Site']) if 'ClientPlatform' in queueDict: pilotOptions.append("-p '%s'" % queueDict['ClientPlatform']) if 'SharedArea' in queueDict: pilotOptions.append("-o '/LocalSite/SharedArea=%s'" % queueDict['SharedArea']) if 'SI00' in queueDict: factor = float(queueDict['SI00']) / 250. pilotOptions.append("-o '/LocalSite/CPUScalingFactor=%s'" % factor) pilotOptions.append("-o '/LocalSite/CPUNormalizationFactor=%s'" % factor) else: if 'CPUScalingFactor' in queueDict: pilotOptions.append("-o '/LocalSite/CPUScalingFactor=%s'" % queueDict['CPUScalingFactor']) if 'CPUNormalizationFactor' in queueDict: pilotOptions.append( "-o '/LocalSite/CPUNormalizationFactor=%s'" % queueDict['CPUNormalizationFactor']) # Hack if self.defaultSubmitPools: pilotOptions.append( '-o /Resources/Computing/CEDefaults/SubmitPool=%s' % self.defaultSubmitPools) if pilotOptions.append('-G %s' % self.log.verbose("pilotOptions: ", ' '.join(pilotOptions)) return [pilotOptions, pilotsToSubmit]
def initialize(self, loops=0): """Sets default parameters and creates CE instance """ # Disable monitoring, logLevel INFO, limited cycles self.am_setOption('MonitoringEnabled', False) self.am_setOption('MaxCycles', loops) ceType = self.am_getOption('CEType', self.ceName) localCE = gConfig.getValue('/LocalSite/LocalCE', '') if localCE:'Defining CE from local configuration', '= %s' % localCE) ceType = localCE # Create backend Computing Element ceFactory = ComputingElementFactory() self.ceName = ceType ceInstance = ceFactory.getCE(ceType) if not ceInstance['OK']: self.log.warn("Can't instantiate a CE", ceInstance['Message']) return ceInstance self.computingElement = ceInstance['Value'] result = self.computingElement.getDescription() if not result['OK']: self.log.warn("Can not get the CE description") return result if isinstance(result['Value'], list): ceDict = result['Value'][0] else: ceDict = result['Value'] self.timeLeft = ceDict.get('CPUTime', self.timeLeft) self.timeLeft = gConfig.getValue( '/Resources/Computing/CEDefaults/MaxCPUTime', self.timeLeft) self.initTimes = os.times() # Localsite options self.siteName = gConfig.getValue('/LocalSite/Site', self.siteName) self.pilotReference = gConfig.getValue('/LocalSite/PilotReference', self.pilotReference) self.defaultProxyLength = gConfig.getValue( '/Registry/DefaultProxyLifeTime', self.defaultProxyLength) # Agent options # This is the factor to convert raw CPU to Normalized units (based on the CPU Model) self.cpuFactor = gConfig.getValue('/LocalSite/CPUNormalizationFactor', self.cpuFactor) self.jobSubmissionDelay = self.am_getOption('SubmissionDelay', self.jobSubmissionDelay) self.fillingMode = self.am_getOption('FillingModeFlag', self.fillingMode) self.minimumTimeLeft = self.am_getOption('MinimumTimeLeft', self.minimumTimeLeft) self.stopOnApplicationFailure = self.am_getOption( 'StopOnApplicationFailure', self.stopOnApplicationFailure) self.stopAfterFailedMatches = self.am_getOption( 'StopAfterFailedMatches', self.stopAfterFailedMatches) self.extraOptions = gConfig.getValue( '/AgentJobRequirements/ExtraOptions', self.extraOptions) # Timeleft self.timeLeftUtil = TimeLeft() return S_OK()
def setSiteProtocols(self, site, protocolsList, printOutput=False): """ Allows to set the defined protocols for each SE for a given site. """ result = self.__checkSiteIsValid(site) if not result['OK']: return result siteSection = '/Resources/Sites/%s/%s/SE' % (site.split('.')[0], site) siteSEs = gConfig.getValue(siteSection, []) if not siteSEs: return S_ERROR('No SEs found for site %s in section %s' % (site, siteSection)) defaultProtocols = gConfig.getValue( '/Resources/StorageElements/DefaultProtocols', []) self.log.verbose('Default list of protocols are', ', '.join(defaultProtocols)) for protocol in protocolsList: if protocol not in defaultProtocols: return S_ERROR( 'Requested to set protocol %s in list but %s is not ' 'in default list of protocols:\n%s' % (protocol, protocol, ', '.join(defaultProtocols))) modifiedCS = False result = promptUser( 'Do you want to add the following default protocols:' ' %s for SE(s):\n%s' % (', '.join(protocolsList), ', '.join(siteSEs))) if not result['OK']: return result if result['Value'].lower() != 'y': self.log.always('No protocols will be added') return S_OK() for se in siteSEs: sections = gConfig.getSections('/Resources/StorageElements/%s/' % (se)) if not sections['OK']: return sections for section in sections['Value']: if gConfig.getValue( '/Resources/StorageElements/%s/%s/ProtocolName' % (se, section), '') == 'SRM2': path = '/Resources/StorageElements/%s/%s/ProtocolsList' % ( se, section) self.log.verbose('Setting %s to %s' % (path, ', '.join(protocolsList))) result = self.csSetOption(path, ', '.join(protocolsList)) if not result['OK']: return result modifiedCS = True if modifiedCS: result = self.csCommitChanges(False) if not result['OK']: return S_ERROR('CS Commit failed with message = %s' % (result['Message'])) else: if printOutput: print 'Successfully committed changes to CS' else: if printOutput: print 'No modifications to CS required' return S_OK()
def __disabled(self): return gConfig.getValue("%s/DisableMonitoring" % self.cfgSection, "false").lower() in \ ("yes", "y", "true", "1")
def execute(self): """ The main agent execution method """ self.log.verbose('Waking up Stalled Job Agent') wms_instance = getSystemInstance('WorkloadManagement') if not wms_instance: return S_ERROR( 'Can not get the WorkloadManagement system instance') wrapperSection = cfgPath('Systems', 'WorkloadManagement', wms_instance, 'JobWrapper') stalledTime = self.am_getOption('StalledTimeHours', 2) failedTime = self.am_getOption('FailedTimeHours', 6) self.stalledJobsTolerantSites = self.am_getOption( 'StalledJobsTolerantSites', []) self.stalledJobsToleranceTime = self.am_getOption( 'StalledJobsToleranceTime', 0) self.submittingTime = self.am_getOption('SubmittingTime', self.submittingTime) self.matchedTime = self.am_getOption('MatchedTime', self.matchedTime) self.rescheduledTime = self.am_getOption('RescheduledTime', self.rescheduledTime) self.log.verbose('StalledTime = %s cycles' % (stalledTime)) self.log.verbose('FailedTime = %s cycles' % (failedTime)) watchdogCycle = gConfig.getValue( cfgPath(wrapperSection, 'CheckingTime'), 30 * 60) watchdogCycle = max( watchdogCycle, gConfig.getValue(cfgPath(wrapperSection, 'MinCheckingTime'), 20 * 60)) # Add half cycle to avoid race conditions stalledTime = int(watchdogCycle * (stalledTime + 0.5)) failedTime = int(watchdogCycle * (failedTime + 0.5)) result = self._markStalledJobs(stalledTime) if not result['OK']: self.log.error('Failed to detect stalled jobs', result['Message']) # Note, jobs will be revived automatically during the heartbeat signal phase and # subsequent status changes will result in jobs not being selected by the # stalled job agent. result = self._failStalledJobs(failedTime) if not result['OK']: self.log.error('Failed to process stalled jobs', result['Message']) result = self._failSubmittingJobs() if not result['OK']: self.log.error('Failed to process jobs being submitted', result['Message']) result = self._kickStuckJobs() if not result['OK']: self.log.error('Failed to kick stuck jobs', result['Message']) return S_OK('Stalled Job Agent cycle complete')
def __getSelectionData(self): callback = {} group = credentials.getSelectedGroup() user = str(credentials.getUsername()) if len(request.params) > 0: tmp = {} for i in request.params: tmp[i] = str(request.params[i]) callback["extra"] = tmp if callback["extra"].has_key("prod"): callback["extra"]["prod"] = callback["extra"]["prod"].zfill(8) if callback["extra"]["prod"] == "00000000": callback["extra"]["prod"] = """ - ",callback["extra"]) if user == "Anonymous": callback["prod"] = [["Insufficient rights"]] else: RPC = getRPCClient("WorkloadManagement/JobMonitoring") result = RPC.getProductionIds() if result["OK"]: prod = [] prods = result["Value"] if len(prods)>0: prod.append([str("All")]) tmp = [] for keys in prods: try: id = str(int(keys)).zfill(8) except: id = str(keys) tmp.append(str(id)) tmp.sort(reverse=True) for i in tmp: prod.append([str(i)]) else: prod = [["Nothing to display"]] else: gLogger.error("RPC.getProductionIds() return error: %s" % result["Message"]) prod = [["Error happened on service side"]] callback["prod"] = prod ### RPC = getRPCClient("WorkloadManagement/JobMonitoring") result = RPC.getSites() if result["OK"]: tier1 = gConfig.getValue("/Website/PreferredSites",[]) # Always return a list site = [] if len(result["Value"])>0: s = list(result["Value"]) site.append([str("All")]) for i in tier1: site.append([str(i)]) for i in s: if i not in tier1: site.append([str(i)]) else: site = [["Nothing to display"]] else: gLogger.error("RPC.getSites() return error: %s" % result["Message"]) site = [["Error happened on service side"]] callback["site"] = site ### result = RPC.getStates() if result["OK"]: stat = [] if len(result["Value"])>0: stat.append([str("All")]) for i in result["Value"]: stat.append([str(i)]) else: stat = [["Nothing to display"]] else: gLogger.error("RPC.getStates() return error: %s" % result["Message"]) stat = [["Error happened on service side"]] callback["status"] = stat ### result = RPC.getMinorStates() if result["OK"]: stat = [] if len(result["Value"])>0: stat.append([str("All")]) for i in result["Value"]: i = i.replace(",",";") stat.append([i]) else: stat = [["Nothing to display"]] else: gLogger.error("RPC.getMinorStates() return error: %s" % result["Message"]) stat = [["Error happened on service side"]] callback["minorstat"] = stat ### result = RPC.getApplicationStates() if result["OK"]: app = [] if len(result["Value"])>0: app.append([str("All")]) for i in result["Value"]: i = i.replace(",",";") app.append([i]) else: app = [["Nothing to display"]] else: gLogger.error("RPC.getApplicationstates() return error: %s" % result["Message"]) app = [["Error happened on service side"]] callback["app"] = app ### result = RPC.getJobTypes() if result["OK"]: types = [] if len(result["Value"])>0: types.append([str("All")]) for i in result["Value"]: i = i.replace(",",";") types.append([i]) else: types = [["Nothing to display"]] else: gLogger.error("RPC.getJobTypes() return error: %s" % result["Message"]) types = [["Error happened on service side"]] callback["types"] = types ### groupProperty = credentials.getProperties(group) if user == "Anonymous": callback["owner"] = [["Insufficient rights"]] elif ( "JobAdministrator" or "JobSharing" ) not in groupProperty: callback["owner"] = [["All"],[str(credentials.getUsername())]] else: result = RPC.getOwners() if result["OK"]: owner = [] if len(result["Value"])>0: owner.append([str("All")]) for i in result["Value"]: owner.append([str(i)]) else: owner = [["Nothing to display"]] else: gLogger.error("RPC.getOwners() return error: %s" % result["Message"]) owner = [["Error happened on service side"]] callback["owner"] = owner return callback
def getCSDict(self, includeMasterCS=True): """Gets minimal info for running a pilot, from the CS :returns: pilotDict (containing pilots run info) :rtype: S_OK, S_ERROR, value is pilotDict """ pilotDict = { "timestamp": datetime.datetime.utcnow().isoformat(), "Setups": {}, "CEs": {}, "GenericPilotDNs": [], }"-- Getting the content of the CS --") # These are in fact not only setups: they may be "Defaults" sections, or VOs, in multi-VOs installations setupsRes = gConfig.getSections("/Operations/") if not setupsRes["OK"]: self.log.error("Can't get sections from Operations", setupsRes["Message"]) return setupsRes setupsInOperations = setupsRes["Value"] # getting the setup(s) in this CS, and comparing with what we found in Operations setupsInDIRACRes = gConfig.getSections("DIRAC/Setups") if not setupsInDIRACRes["OK"]: self.log.error("Can't get sections from DIRAC/Setups", setupsInDIRACRes["Message"]) return setupsInDIRACRes setupsInDIRAC = setupsInDIRACRes["Value"] # Handling the case of multi-VO CS if not set(setupsInDIRAC).intersection(set(setupsInOperations)): vos = list(setupsInOperations) for vo in vos: setupsFromVOs = gConfig.getSections("/Operations/%s" % vo) if not setupsFromVOs["OK"]: continue else: setupsInOperations = setupsFromVOs["Value"] self.log.verbose("From Operations/[Setup]/Pilot") for setup in setupsInOperations: self._getPilotOptionsPerSetup(setup, pilotDict) self.log.verbose("From Resources/Sites") sitesSection = gConfig.getSections("/Resources/Sites/") if not sitesSection["OK"]: self.log.error("Can't get sections from Resources", sitesSection["Message"]) return sitesSection for grid in sitesSection["Value"]: gridSection = gConfig.getSections("/Resources/Sites/" + grid) if not gridSection["OK"]: self.log.error("Can't get sections from Resources", gridSection["Message"]) return gridSection for site in gridSection["Value"]: ceList = gConfig.getSections( cfgPath("/Resources", "Sites", grid, site, "CEs")) if not ceList["OK"]: # Skip but log it self.log.error("Site has no CEs! - skipping", site) continue for ce in ceList["Value"]: # This CEType is like 'HTCondor' or 'ARC' etc. ceType = gConfig.getValue( cfgPath("/Resources", "Sites", grid, site, "CEs", ce, "CEType")) if ceType is None: # Skip but log it self.log.error("CE has no option CEType!", ce + " at " + site) pilotDict["CEs"][ce] = {"Site": site} else: pilotDict["CEs"][ce] = { "Site": site, "GridCEType": ceType } # This LocalCEType is like 'InProcess' or 'Pool' or 'Pool/Singularity' etc. # It can be in the queue and/or the CE level localCEType = gConfig.getValue( cfgPath("/Resources", "Sites", grid, site, "CEs", ce, "LocalCEType")) if localCEType is not None: pilotDict["CEs"][ce].setdefault( "LocalCEType", localCEType) res = gConfig.getSections( cfgPath("/Resources", "Sites", grid, site, "CEs", ce, "Queues")) if not res["OK"]: # Skip but log it self.log.error("No queues found for CE", ce + ": " + res["Message"]) continue queueList = res["Value"] for queue in queueList: localCEType = gConfig.getValue( cfgPath("/Resources", "Sites", grid, site, "CEs", ce, "Queues", queue, "LocalCEType")) if localCEType is not None: pilotDict["CEs"][ce].setdefault( queue, {"LocalCEType": localCEType}) defaultSetup = gConfig.getValue("/DIRAC/DefaultSetup") if defaultSetup: pilotDict["DefaultSetup"] = defaultSetup self.log.debug("From DIRAC/Configuration") configurationServers = gConfig.getServersList() if not includeMasterCS: masterCS = gConfigurationData.getMasterServer() configurationServers = list( set(configurationServers) - set([masterCS])) pilotDict["ConfigurationServers"] = configurationServers self.log.debug("Got pilotDict", str(pilotDict)) return S_OK(pilotDict)
def getNumberOfProcessors(siteName=None, gridCE=None, queue=None): """gets the number of processors on a certain CE/queue/node (what the pilot administers) The siteName/gridCE/queue parameters are normally not necessary. Tries to find it in this order: 1) from the /Resources/Computing/CEDefaults/NumberOfProcessors (which is what the pilot fills up) 2) if not present from JobFeatures 3) if not present looks in CS for "NumberOfProcessors" Queue or CE option 4) if not present but there's WholeNode tag, look what the WN provides using multiprocessing.cpu_count() 5) return 1 """ # 1) from /Resources/Computing/CEDefaults/NumberOfProcessors"Getting numberOfProcessors from /Resources/Computing/CEDefaults/NumberOfProcessors") numberOfProcessors = gConfig.getValue("/Resources/Computing/CEDefaults/NumberOfProcessors", 0) if numberOfProcessors: return numberOfProcessors # 2) from MJF"Getting numberOfProcessors from MJF") numberOfProcessors = getProcessorFromMJF() if numberOfProcessors: return numberOfProcessors"NumberOfProcessors could not be found in MJF") # 3) looks in CS for "NumberOfProcessors" Queue or CE or site option if not siteName: siteName = gConfig.getValue("/LocalSite/Site", "") if not gridCE: gridCE = gConfig.getValue("/LocalSite/GridCE", "") if not queue: queue = gConfig.getValue("/LocalSite/CEQueue", "") if not (siteName and gridCE and queue): gLogger.error("Could not find NumberOfProcessors: missing siteName or gridCE or queue. Returning '1'") return 1 grid = siteName.split(".")[0] csPaths = [ "/Resources/Sites/%s/%s/CEs/%s/Queues/%s/NumberOfProcessors" % (grid, siteName, gridCE, queue), "/Resources/Sites/%s/%s/CEs/%s/NumberOfProcessors" % (grid, siteName, gridCE), "/Resources/Sites/%s/%s/Cloud/%s/VMTypes/%s/NumberOfProcessors" % (grid, siteName, gridCE, queue), "/Resources/Sites/%s/%s/Cloud/%s/NumberOfProcessors" % (grid, siteName, gridCE), "/Resources/Sites/%s/%s/NumberOfProcessors" % (grid, siteName), ] for csPath in csPaths:"Looking in", csPath) numberOfProcessors = gConfig.getValue(csPath, 0) if numberOfProcessors: return numberOfProcessors # 4) looks in CS for tags"Getting tags" "for %s: %s: %s" % (siteName, gridCE, queue)) # Tags of the CE tags = fromChar( gConfig.getValue("/Resources/Sites/%s/%s/CEs/%s/Tag" % (siteName.split(".")[0], siteName, gridCE), "") ) + fromChar( gConfig.getValue("/Resources/Sites/%s/%s/Cloud/%s/Tag" % (siteName.split(".")[0], siteName, gridCE), "") ) # Tags of the Queue tags += fromChar( gConfig.getValue( "/Resources/Sites/%s/%s/CEs/%s/Queues/%s/Tag" % (siteName.split(".")[0], siteName, gridCE, queue), "" ) ) + fromChar( gConfig.getValue( "/Resources/Sites/%s/%s/Cloud/%s/VMTypes/%s/Tag" % (siteName.split(".")[0], siteName, gridCE, queue), "" ) )"NumberOfProcessors could not be found in CS") if "WholeNode" in tags:"Found WholeNode tag, using multiprocessing.cpu_count()") return multiprocessing.cpu_count() # 5) return the default return 1
def getCSOption( self, optionName, defaultValue = None ): cs_path = getDatabaseSection( self.fullname ) return gConfig.getValue( "/%s/%s" % ( cs_path, optionName ), defaultValue )
Script.showHelp() email = True for switch in Script.getUnprocessedSwitches(): if switch[0] == "email": email = getBoolean( switch[1] ) args = Script.getPositionalArgs() if len( args ) < 2: Script.showHelp() diracAdmin = DiracAdmin() exitCode = 0 errorList = [] setup = gConfig.getValue( '/DIRAC/Setup', '' ) if not setup: print('ERROR: Could not contact Configuration Service') exitCode = 2 DIRACExit( exitCode ) #result = promptUser( 'All the elements that are associated with this site will be banned, are you sure about this action?' ) #if not result['OK'] or result['Value'] is 'n': # print 'Script stopped' # DIRACExit( 0 ) site = args[0] comment = args[1] result = diracAdmin.banSite( site, comment, printOutput = True ) if not result['OK']: errorList.append( ( site, result['Message'] ) )
localConfigFile = os.path.expandvars('$WORKSPACE')+'/ServerInstallDIR/etc/dirac.cfg' elif os.path.isfile( './etc/dirac.cfg' ): localConfigFile = './etc/dirac.cfg' else: print "Local CFG file not found" exit( 2 ) localCfg.loadFromFile( localConfigFile ) if not localCfg.isSection( '/LocalSite' ): localCfg.createNewSection( '/LocalSite' ) localCfg.setOption( '/LocalSite/CPUTimeLeft', 5000 ) localCfg.setOption( '/DIRAC/Security/UseServerCertificate', False ) if not sMod: if not setup: setup = gConfig.getValue('/DIRAC/Setup') if not setup: setup = 'JenkinsSetup' if not vo: vo = gConfig.getValue('/DIRAC/VirtualOrganization') if not vo: vo = 'dirac' if not localCfg.isSection( '/DIRAC/VOPolicy' ): localCfg.createNewSection( '/DIRAC/VOPolicy' ) if not localCfg.isSection( '/DIRAC/VOPolicy/%s' % vo ): localCfg.createNewSection( '/DIRAC/VOPolicy/%s' % vo ) if not localCfg.isSection( '/DIRAC/VOPolicy/%s/%s' % ( vo, setup ) ): localCfg.createNewSection( '/DIRAC/VOPolicy/%s/%s' % ( vo, setup ) ) localCfg.setOption( '/DIRAC/VOPolicy/%s/%s/SoftwareDistModule' % ( vo, setup ), '' )
def __init__( self, *args, **kwargs ): """ c'tor """ # # call base class ctor AgentModule.__init__( self, *args, **kwargs ) # # ProcessPool related stuff self.__requestsPerCycle = self.am_getOption( "RequestsPerCycle", self.__requestsPerCycle ) "Requests/cycle = %d" % self.__requestsPerCycle ) self.__minProcess = self.am_getOption( "MinProcess", self.__minProcess ) "ProcessPool min process = %d" % self.__minProcess ) self.__maxProcess = self.am_getOption( "MaxProcess", 4 ) "ProcessPool max process = %d" % self.__maxProcess ) self.__queueSize = self.am_getOption( "ProcessPoolQueueSize", self.__queueSize ) "ProcessPool queue size = %d" % self.__queueSize ) self.__poolTimeout = int( self.am_getOption( "ProcessPoolTimeout", self.__poolTimeout ) ) "ProcessPool timeout = %d seconds" % self.__poolTimeout ) self.__poolSleep = int( self.am_getOption( "ProcessPoolSleep", self.__poolSleep ) ) "ProcessPool sleep time = %d seconds" % self.__poolSleep ) self.__bulkRequest = self.am_getOption( "BulkRequest", 0 ) "Bulk request size = %d" % self.__bulkRequest ) # # keep config path and agent name self.agentName = self.am_getModuleParam( "fullName" ) self.__configPath = PathFinder.getAgentSection( self.agentName ) # # operation handlers over here opHandlersPath = "%s/%s" % ( self.__configPath, "OperationHandlers" ) opHandlers = gConfig.getSections( opHandlersPath ) if not opHandlers["OK"]: self.log.error( opHandlers["Message" ] ) raise AgentConfigError( "OperationHandlers section not found in CS under %s" % self.__configPath ) opHandlers = opHandlers["Value"] self.timeOuts = dict() # # handlers dict self.handlersDict = dict() for opHandler in opHandlers: opHandlerPath = "%s/%s/Location" % ( opHandlersPath, opHandler ) opLocation = gConfig.getValue( opHandlerPath, "" ) if not opLocation: self.log.error( "%s not set for %s operation handler" % ( opHandlerPath, opHandler ) ) continue self.timeOuts[opHandler] = { "PerFile": self.__fileTimeout, "PerOperation": self.__operationTimeout } opTimeout = gConfig.getValue( "%s/%s/TimeOut" % ( opHandlersPath, opHandler ), 0 ) if opTimeout: self.timeOuts[opHandler]["PerOperation"] = opTimeout fileTimeout = gConfig.getValue( "%s/%s/TimeOutPerFile" % ( opHandlersPath, opHandler ), 0 ) if fileTimeout: self.timeOuts[opHandler]["PerFile"] = fileTimeout self.handlersDict[opHandler] = opLocation "Operation handlers:" ) for item in enumerate ( self.handlersDict.items() ): opHandler = item[1][0]"[%s] %s: %s (timeout: %d s + %d s per file)" % (item[0], item[1][0], item[1][1], self.timeOuts[opHandler]['PerOperation'], self.timeOuts[opHandler]['PerFile'])) # # common monitor activity gMonitor.registerActivity( "Iteration", "Agent Loops", "RequestExecutingAgent", "Loops/min", gMonitor.OP_SUM ) gMonitor.registerActivity( "Processed", "Request Processed", "RequestExecutingAgent", "Requests/min", gMonitor.OP_SUM ) gMonitor.registerActivity( "Done", "Request Completed", "RequestExecutingAgent", "Requests/min", gMonitor.OP_SUM ) # # create request dict self.__requestCache = dict() # ?? Probably should be removed self.FTSMode = self.am_getOption( "FTSMode", False )
def getServicePorts(self, setup='', printOutput=False): """Checks the service ports for the specified setup. If not given this is taken from the current installation (/DIRAC/Setup) Example usage: >>> print diracAdmin.getServicePorts() {'OK': True, 'Value':''} :return: S_OK,S_ERROR """ if not setup: setup = gConfig.getValue('/DIRAC/Setup', '') setupList = gConfig.getSections('/DIRAC/Setups', []) if not setupList['OK']: return S_ERROR('Could not get /DIRAC/Setups sections') setupList = setupList['Value'] if setup not in setupList: return S_ERROR('Setup %s is not in allowed list: %s' % (setup, ', '.join(setupList))) serviceSetups = gConfig.getOptionsDict('/DIRAC/Setups/%s' % setup) if not serviceSetups['OK']: return S_ERROR('Could not get /DIRAC/Setups/%s options' % setup) serviceSetups = serviceSetups['Value'] # dict systemList = gConfig.getSections('/Systems') if not systemList['OK']: return S_ERROR('Could not get Systems sections') systemList = systemList['Value'] result = {} for system in systemList: if system in serviceSetups: path = '/Systems/%s/%s/Services' % (system, serviceSetups[system]) servicesList = gConfig.getSections(path) if not servicesList['OK']: self.log.warn('Could not get sections in %s' % path) else: servicesList = servicesList['Value'] if not servicesList: servicesList = [] self.log.verbose('System: %s ServicesList: %s' % (system, ', '.join(servicesList))) for service in servicesList: spath = '%s/%s/Port' % (path, service) servicePort = gConfig.getValue(spath, 0) if servicePort: self.log.verbose('Found port for %s/%s = %s' % (system, service, servicePort)) result['%s/%s' % (system, service)] = servicePort else: self.log.warn('No port found for %s' % spath) else: self.log.warn('%s is not defined in /DIRAC/Setups/%s' % (system, setup)) if printOutput: print self.pPrint.pformat(result) return S_OK(result)
def _getCSDict(self): """ Gets minimal info for running a pilot, from the CS :returns: pilotDict (containing pilots run info) :rtype: S_OK, S_ERROR, value is pilotDict """ pilotDict = {'Setups': {}, 'CEs': {}, 'GenericPilotDNs': []}'-- Getting the content of the CS --') # These are in fact not only setups: they may be "Defaults" sections, or VOs, in multi-VOs installations setupsRes = gConfig.getSections('/Operations/') if not setupsRes['OK']: self.log.error("Can't get sections from Operations", setupsRes['Message']) return setupsRes setupsInOperations = setupsRes['Value'] # getting the setup(s) in this CS, and comparing with what we found in Operations setupsInDIRACRes = gConfig.getSections('DIRAC/Setups') if not setupsInDIRACRes['OK']: self.log.error("Can't get sections from DIRAC/Setups", setupsInDIRACRes['Message']) return setupsInDIRACRes setupsInDIRAC = setupsInDIRACRes['Value'] # Handling the case of multi-VO CS if not set(setupsInDIRAC).intersection(set(setupsInOperations)): vos = list(setupsInOperations) for vo in vos: setupsFromVOs = gConfig.getSections('/Operations/%s' % vo) if not setupsFromVOs['OK']: continue else: setupsInOperations = setupsFromVOs['Value'] self.log.verbose('From Operations/[Setup]/Pilot') for setup in setupsInOperations: self._getPilotOptionsPerSetup(setup, pilotDict) self.log.verbose('From Resources/Sites') sitesSection = gConfig.getSections('/Resources/Sites/') if not sitesSection['OK']: self.log.error("Can't get sections from Resources", sitesSection['Message']) return sitesSection for grid in sitesSection['Value']: gridSection = gConfig.getSections('/Resources/Sites/' + grid) if not gridSection['OK']: self.log.error("Can't get sections from Resources", gridSection['Message']) return gridSection for site in gridSection['Value']: ceList = gConfig.getSections('/Resources/Sites/' + grid + '/' + site + '/CEs/') if not ceList['OK']: # Skip but log it self.log.error('Site has no CEs! - skipping', site) continue for ce in ceList['Value']: ceType = gConfig.getValue('/Resources/Sites/' + grid + '/' + site + '/CEs/' + ce + '/CEType') localCEType = gConfig.getValue('/Resources/Sites/' + grid + '/' + site + '/CEs/' + ce + '/LocalCEType') if ceType is None: # Skip but log it self.log.error('CE has no option CEType!', ce + ' at ' + site) pilotDict['CEs'][ce] = {'Site': site} else: pilotDict['CEs'][ce] = {'Site': site, 'GridCEType': ceType} if localCEType is not None: pilotDict['CEs'][ce].setdefault('LocalCEType', localCEType) defaultSetup = gConfig.getValue('/DIRAC/DefaultSetup') if defaultSetup: pilotDict['DefaultSetup'] = defaultSetup self.log.debug('From DIRAC/Configuration') pilotDict['ConfigurationServers'] = gConfig.getServersList() self.log.debug("Got pilotDict", str(pilotDict)) return S_OK(pilotDict)
def do_install(self, args): """ Install various DIRAC components usage: install mysql install db <database> install service <system> <service> install agent <system> <agent> """ argss = args.split() if not argss: print self.do_install.__doc__ return option = argss[0] del argss[0] if option == "mysql": print "Installing MySQL database, this can take a while ..." client = SystemAdministratorClient(, self.port) if InstallTools.mysqlPassword == 'LocalConfig': InstallTools.mysqlPassword = '' InstallTools.getMySQLPasswords() result = client.installMySQL(InstallTools.mysqlRootPwd, InstallTools.mysqlPassword) if not result['OK']: self.__errMsg(result['Message']) else: print "MySQL:", result['Value'] print "You might need to restart SystemAdministrator service to take new settings into account" elif option == "db": if not argss: print self.do_install.__doc__ return database = argss[0] client = SystemAdministratorClient(, self.port) result = client.getAvailableDatabases() if not result['OK']: self.__errMsg("Can not get database list: %s" % result['Message']) return if not result['Value'].has_key(database): self.__errMsg("Unknown database %s: " % database) return system = result['Value'][database]['System'] setup = gConfig.getValue('/DIRAC/Setup', '') if not setup: self.__errMsg("Unknown current setup") return instance = gConfig.getValue( '/DIRAC/Setups/%s/%s' % (setup, system), '') if not instance: self.__errMsg("No instance defined for system %s" % system) self.__errMsg( "\tAdd new instance with 'add instance %s <instance_name>'" % system) return if not InstallTools.mysqlPassword: InstallTools.mysqlPassword = '******' InstallTools.getMySQLPasswords() result = client.installDatabase(database, InstallTools.mysqlRootPwd) if not result['OK']: self.__errMsg(result['Message']) return extension, system = result['Value'] # result = client.addDatabaseOptionsToCS( system, database ) InstallTools.mysqlHost = result = client.getInfo() if not result['OK']: self.__errMsg(result['Message']) hostSetup = result['Value']['Setup'] result = InstallTools.addDatabaseOptionsToCS( gConfig, system, database, hostSetup) if not result['OK']: self.__errMsg(result['Message']) return print "Database %s from %s/%s installed successfully" % ( database, extension, system) elif option == "service" or option == "agent": if len(argss) < 2: print self.do_install.__doc__ return system = argss[0] component = argss[1] client = SystemAdministratorClient(, self.port) # First need to update the CS # result = client.addDefaultOptionsToCS( option, system, component ) = result = client.getInfo() if not result['OK']: self.__errMsg(result['Message']) return hostSetup = result['Value']['Setup'] result = InstallTools.addDefaultOptionsToCS( gConfig, option, system, component, getCSExtensions(), hostSetup) if not result['OK']: self.__errMsg(result['Message']) return # Then we can install and start the component result = client.setupComponent(option, system, component) if not result['OK']: self.__errMsg(result['Message']) return compType = result['Value']['ComponentType'] runit = result['Value']['RunitStatus'] print "%s %s_%s is installed, runit status: %s" % ( compType, system, component, runit) else: print "Unknown option:", option
def execute(self): """The JobAgent execution method. """ if self.jobCount: # Temporary mechanism to pass a shutdown message to the agent if os.path.exists('/var/lib/dirac_drain'): return self.__finish('Node is being drained by an operator') # Only call timeLeft utility after a job has been picked up'Attempting to check CPU time left for filling mode') if self.fillingMode: if self.timeLeftError: self.log.warn( "Disabling filling mode as errors calculating time left", self.timeLeftError) return self.__finish(self.timeLeftError)'normalized CPU units remaining in slot', self.timeLeft) if self.timeLeft <= self.minimumTimeLeft: return self.__finish('No more time left') # Need to update the Configuration so that the new value is published in the next matching request result = self.computingElement.setCPUTimeLeft( cpuTimeLeft=self.timeLeft) if not result['OK']: return self.__finish(result['Message']) # Update local configuration to be used by submitted job wrappers localCfg = CFG() if self.extraOptions: localConfigFile = os.path.join('.', self.extraOptions) else: localConfigFile = os.path.join(rootPath, "etc", "dirac.cfg") localCfg.loadFromFile(localConfigFile) if not localCfg.isSection('/LocalSite'): localCfg.createNewSection('/LocalSite') localCfg.setOption('/LocalSite/CPUTimeLeft', self.timeLeft) localCfg.writeToFile(localConfigFile) else: return self.__finish('Filling Mode is Disabled') self.log.verbose('Job Agent execution loop') result = self.computingElement.available() if not result['OK']:'Resource is not available', result['Message']) return self.__finish('CE Not Available') ceInfoDict = result['CEInfoDict'] runningJobs = ceInfoDict.get("RunningJobs") availableSlots = result['Value'] if not availableSlots: if runningJobs:'No available slots', '%d running jobs' % runningJobs) return S_OK('Job Agent cycle complete with %d running jobs' % runningJobs) else:'CE is not available') return self.__finish('CE Not Available') result = self.computingElement.getDescription() if not result['OK']: return result # We can have several prioritized job retrieval strategies if isinstance(result['Value'], dict): ceDictList = [result['Value']] elif isinstance(result['Value'], list): # This is the case for Pool ComputingElement, and parameter 'MultiProcessorStrategy' ceDictList = result['Value'] for ceDict in ceDictList: # Add pilot information gridCE = gConfig.getValue('LocalSite/GridCE', 'Unknown') if gridCE != 'Unknown': ceDict['GridCE'] = gridCE if 'PilotReference' not in ceDict: ceDict['PilotReference'] = str(self.pilotReference) ceDict['PilotBenchmark'] = self.cpuFactor ceDict['PilotInfoReportedFlag'] = self.pilotInfoReportedFlag # Add possible job requirements result = gConfig.getOptionsDict('/AgentJobRequirements') if result['OK']: requirementsDict = result['Value'] ceDict.update(requirementsDict)'Requirements:', requirementsDict) self.log.verbose('CE dict', ceDict) # here finally calling the matcher start = time.time() jobRequest = MatcherClient().requestJob(ceDict) matchTime = time.time() - start'MatcherTime', '= %.2f (s)' % (matchTime)) if jobRequest['OK']: break self.stopAfterFailedMatches = self.am_getOption( 'StopAfterFailedMatches', self.stopAfterFailedMatches) if not jobRequest['OK']: if'No match found', jobRequest['Message']): self.log.notice('Job request OK, but no match found', ': %s' % (jobRequest['Message'])) self.matchFailedCount += 1 if self.matchFailedCount > self.stopAfterFailedMatches: return self.__finish( 'Nothing to do for more than %d cycles' % self.stopAfterFailedMatches) return S_OK(jobRequest['Message']) elif jobRequest['Message'].find("seconds timeout") != -1: self.log.error('Timeout while requesting job', jobRequest['Message']) self.matchFailedCount += 1 if self.matchFailedCount > self.stopAfterFailedMatches: return self.__finish( 'Nothing to do for more than %d cycles' % self.stopAfterFailedMatches) return S_OK(jobRequest['Message']) elif jobRequest['Message'].find( "Pilot version does not match") != -1: errorMsg = 'Pilot version does not match the production version' self.log.error(errorMsg, jobRequest['Message'].replace(errorMsg, '')) return S_ERROR(jobRequest['Message']) else: self.log.notice('Failed to get jobs', ': %s' % (jobRequest['Message'])) self.matchFailedCount += 1 if self.matchFailedCount > self.stopAfterFailedMatches: return self.__finish( 'Nothing to do for more than %d cycles' % self.stopAfterFailedMatches) return S_OK(jobRequest['Message']) # Reset the Counter self.matchFailedCount = 0 matcherInfo = jobRequest['Value'] if not self.pilotInfoReportedFlag: # Check the flag after the first access to the Matcher self.pilotInfoReportedFlag = matcherInfo.get( 'PilotInfoReportedFlag', False) jobID = matcherInfo['JobID'] matcherParams = ['JDL', 'DN', 'Group'] for param in matcherParams: if param not in matcherInfo: self.__report(jobID, 'Failed', 'Matcher did not return %s' % (param)) return self.__finish('Matcher Failed') elif not matcherInfo[param]: self.__report(jobID, 'Failed', 'Matcher returned null %s' % (param)) return self.__finish('Matcher Failed') else: self.log.verbose('Matcher returned', '%s = %s ' % (param, matcherInfo[param])) jobJDL = matcherInfo['JDL'] jobGroup = matcherInfo['Group'] ownerDN = matcherInfo['DN'] optimizerParams = {} for key in matcherInfo: if key not in matcherParams: optimizerParams[key] = matcherInfo[key] parameters = self._getJDLParameters(jobJDL) if not parameters['OK']: self.__report(jobID, 'Failed', 'Could Not Extract JDL Parameters') self.log.warn('Could Not Extract JDL Parameters', parameters['Message']) return self.__finish('JDL Problem') params = parameters['Value'] if 'JobID' not in params: msg = 'Job has not JobID defined in JDL parameters' self.__report(jobID, 'Failed', msg) self.log.warn(msg) return self.__finish('JDL Problem') else: jobID = params['JobID'] if 'JobType' not in params: self.log.warn('Job has no JobType defined in JDL parameters') jobType = 'Unknown' else: jobType = params['JobType'] if 'CPUTime' not in params: self.log.warn( 'Job has no CPU requirement defined in JDL parameters') # Job requirements for determining the number of processors # the minimum number of processors requested processors = int( params.get('NumberOfProcessors', int(params.get('MinNumberOfProcessors', 1)))) # the maximum number of processors allowed to the payload maxNumberOfProcessors = int(params.get('MaxNumberOfProcessors', 0)) # need or not the whole node for the job wholeNode = 'WholeNode' in params mpTag = 'MultiProcessor' in params.get('Tags', []) if self.extraOptions: params['Arguments'] += ' ' + self.extraOptions params['ExtraOptions'] = self.extraOptions self.log.verbose('Job request successful: \n', jobRequest['Value']) 'Received', 'JobID=%s, JobType=%s, OwnerDN=%s, JobGroup=%s' % (jobID, jobType, ownerDN, jobGroup)) self.jobCount += 1 try: jobReport = JobReport(jobID, 'JobAgent@%s' % self.siteName) jobReport.setJobParameter('MatcherServiceTime', str(matchTime), sendFlag=False) if 'BOINC_JOB_ID' in os.environ: # Report BOINC environment for thisp in ('BoincUserID', 'BoincHostID', 'BoincHostPlatform', 'BoincHostName'): jobReport.setJobParameter(thisp, gConfig.getValue( '/LocalSite/%s' % thisp, 'Unknown'), sendFlag=False) jobReport.setJobStatus('Matched', 'Job Received by Agent') result = self._setupProxy(ownerDN, jobGroup) if not result['OK']: return self._rescheduleFailedJob(jobID, result['Message'], self.stopOnApplicationFailure) proxyChain = result.get('Value') # Save the job jdl for external monitoring self.__saveJobJDLRequest(jobID, jobJDL) software = self._checkInstallSoftware(jobID, params, ceDict) if not software['OK']: self.log.error('Failed to install software for job', '%s' % (jobID)) errorMsg = software['Message'] if not errorMsg: errorMsg = 'Failed software installation' return self._rescheduleFailedJob(jobID, errorMsg, self.stopOnApplicationFailure) self.log.debug('Before self._submitJob() (%sCE)' % (self.ceName)) result = self._submitJob(jobID, params, ceDict, optimizerParams, proxyChain, processors, wholeNode, maxNumberOfProcessors, mpTag) if not result['OK']: self.__report(jobID, 'Failed', result['Message']) return self.__finish(result['Message']) elif 'PayloadFailed' in result: # Do not keep running and do not overwrite the Payload error message = 'Payload execution failed with error code %s' % result[ 'PayloadFailed'] if self.stopOnApplicationFailure: return self.__finish(message, self.stopOnApplicationFailure) else: self.log.debug('After %sCE submitJob()' % (self.ceName)) except Exception as subExcept: # pylint: disable=broad-except self.log.exception("Exception in submission", "", lException=subExcept, lExcInfo=True) return self._rescheduleFailedJob( jobID, 'Job processing failed with exception', self.stopOnApplicationFailure) # Sum all times but the last one (elapsed_time) and remove times at init (is this correct?) cpuTime = sum(os.times()[:-1]) - sum(self.initTimes[:-1]) result = self.timeLeftUtil.getTimeLeft(cpuTime, processors) if result['OK']: self.timeLeft = result['Value'] else: if result['Message'] != 'Current batch system is not supported': self.timeLeftError = result['Message'] else: # if the batch system is not defined, use the process time and the CPU normalization defined locally self.timeLeft = self._getCPUTimeLeft() return S_OK('Job Agent cycle complete')
def export_updateSoftware(self, version, rootPath="", gridVersion=""): """ Update the local DIRAC software installation to version """ # Check that we have a sane local configuration result = gConfig.getOptionsDict('/LocalInstallation') if not result['OK']: return S_ERROR( 'Invalid installation - missing /LocalInstallation section in the configuration' ) elif not result['Value']: return S_ERROR( 'Invalid installation - empty /LocalInstallation section in the configuration' ) if rootPath and not os.path.exists(rootPath): return S_ERROR('Path "%s" does not exists' % rootPath) # For LHCb we need to check Oracle client installOracleClient = False oracleFlag = gConfig.getValue('/LocalInstallation/InstallOracleClient', 'unknown') if oracleFlag.lower() in ['yes', 'true', '1']: installOracleClient = True elif oracleFlag.lower() == "unknown": result = systemCall(0, ['python', '-c', 'import cx_Oracle']) if result['OK'] and result['Value'][0] == 0: installOracleClient = True cmdList = ['dirac-install', '-r', version, '-t', 'server'] if rootPath: cmdList.extend(['-P', rootPath]) # Check if there are extensions extensionList = getCSExtensions() webFlag = gConfig.getValue('/LocalInstallation/WebPortal', False) if webFlag: extensionList.append('Web') if extensionList: cmdList += ['-e', ','.join(extensionList)] # Are grid middleware bindings required ? if gridVersion: cmdList.extend(['-g', gridVersion]) targetPath = gConfig.getValue( '/LocalInstallation/TargetPath', gConfig.getValue('/LocalInstallation/RootPath', '')) if targetPath and os.path.exists(targetPath + '/etc/dirac.cfg'): cmdList.append(targetPath + '/etc/dirac.cfg') else: return S_ERROR('Local configuration not found') result = systemCall(0, cmdList) if not result['OK']: return result status = result['Value'][0] if status != 0: # Get error messages error = [] output = result['Value'][1].split('\n') for line in output: line = line.strip() if 'error' in line.lower(): error.append(line) if error: message = '\n'.join(error) else: message = "Failed to update software to %s" % version return S_ERROR(message) # Check if there is a MySQL installation and fix the server scripts if necessary if os.path.exists(InstallTools.mysqlDir): startupScript = os.path.join(InstallTools.instancePath, 'mysql', 'share', 'mysql', 'mysql.server') if not os.path.exists(startupScript): startupScript = os.path.join(InstallTools.instancePath, 'pro', 'mysql', 'share', 'mysql', 'mysql.server') if os.path.exists(startupScript): InstallTools.fixMySQLScripts(startupScript) # For LHCb we need to check Oracle client if installOracleClient: result = systemCall(0, '') if not result['OK']: return result status = result['Value'][0] if status != 0: # Get error messages error = result['Value'][1].split('\n') error.extend(result['Value'][2].split('\n')) error.append('Failed to install Oracle client module') return S_ERROR('\n'.join(error)) return S_OK()
def web_getSelectionData(self): callback = {} RPC = RPCClient("WorkloadManagement/WMSAdministrator") result = yield self.threadTask(RPC.getSiteSummarySelectors)"\033[0;31m ++++++: \033[0m %s" % result) if result["OK"]: result = result["Value"] if len(result.get("Status", [])) > 0: status = [] status.append([str("All")]) for i in result["Status"]: status.append([str(i)]) else: status = [["Nothing to display"]] callback["status"] = status if len(result.get("GridType", [])) > 0: gridtype = [] gridtype.append([str("All")]) for i in result["GridType"]: gridtype.append([str(i)]) else: gridtype = [["Nothing to display"]] callback["gridtype"] = gridtype if len(result.get("MaskStatus", [])) > 0: maskstatus = [] maskstatus.append([str("All")]) for i in result["MaskStatus"]: maskstatus.append([str(i)]) else: maskstatus = [["Nothing to display"]] callback["maskstatus"] = maskstatus if len(result.get("Site", [])) > 0: s = list(result["Site"]) tier1 = gConfig.getValue("/Website/PreferredSites", []) site = list() site.append(["All"]) for i in tier1: site.append([str(i)]) for i in s: if i not in tier1: site.append([str(i)]) else: site = [["Error during RPC call"]] callback["site"] = site if len(result.get("Country", [])) > 0: country = [] country.append(["All"]) countryCode = self.__getCountries() for i in result["Country"]: if countryCode.has_key(i): j = countryCode[i] country.append([str(j)]) else: country = [["Nothing to display"]] country.sort() callback["country"] = country else: callback["status"] = [["Error during RPC call"]] callback["gridtype"] = [["Error during RPC call"]] callback["maskstatus"] = [["Error during RPC call"]] callback["site"] = [["Error during RPC call"]] callback["country"] = [["Error during RPC call"]] ### self.finish(callback)
from DIRAC.Core.Base import Script Script.setUsageMessage(""" Get the currently defined user data volume quotas Usage: %s [options] """ % Script.scriptName) Script.parseCommandLine(ignoreErrors=False) import DIRAC from DIRAC import gLogger, gConfig from DIRAC.Core.Security.ProxyInfo import getProxyInfo res = getProxyInfo(False, False) if not res['OK']: gLogger.error("Failed to get client proxy information.", res['Message']) DIRAC.exit(2) proxyInfo = res['Value'] username = proxyInfo['username'] try: quota = gConfig.getValue('/Registry/DefaultStorageQuota', 0.) quota = gConfig.getValue('/Registry/Users/%s/Quota' % username, quota) gLogger.notice('Current quota found to be %.1f GB' % quota) DIRAC.exit(0) except Exception, x: gLogger.exception("Failed to convert retrieved quota", '', x) DIRAC.exit(-1)
def loadModule( self, modName, hideExceptions = False, parentModule = False ): """ Load module name. name must take the form [DIRAC System Name]/[DIRAC module] """ while modName and modName[0] == "/": modName = modName[1:] if modName in self.__modules: return S_OK() modList = modName.split( "/" ) if len( modList ) != 2: return S_ERROR( "Can't load %s: Invalid module name" % ( modName ) ) csSection = self.__sectionFinder( modName ) loadGroup = gConfig.getValue( "%s/Load" % csSection, [] ) #Check if it's a load group if loadGroup: "Found load group %s. Will load %s" % ( modName, ", ".join( loadGroup ) ) ) for loadModName in loadGroup: if loadModName.find( "/" ) == -1: loadModName = "%s/%s" % ( modList[0], loadModName ) result = self.loadModule( loadModName, hideExceptions = hideExceptions, parentModule = False ) if not result[ 'OK' ]: return result return S_OK() #Normal load loadName = gConfig.getValue( "%s/Module" % csSection, "" ) if not loadName: loadName = modName "Loading %s" % ( modName ) ) else: if loadName.find( "/" ) == -1: loadName = "%s/%s" % ( modList[0], loadName ) "Loading %s (%s)" % ( modName, loadName ) ) #If already loaded, skip loadList = loadName.split( "/" ) if len( loadList ) != 2: return S_ERROR( "Can't load %s: Invalid module name" % ( loadName ) ) system, module = loadList #Load className = module if self.__modSuffix: className = "%s%s" % ( className, self.__modSuffix ) if loadName not in self.__loadedModules: #Check if handler is defined loadCSSection = self.__sectionFinder( loadName ) handlerPath = gConfig.getValue( "%s/HandlerPath" % loadCSSection, "" ) if handlerPath: "Trying to %s from CS defined path %s" % ( loadName, handlerPath ) ) gLogger.verbose( "Found handler for %s: %s" % ( loadName, handlerPath ) ) handlerPath = handlerPath.replace( "/", "." ) if handlerPath.find( ".py", len( handlerPath ) -3 ) > -1: handlerPath = handlerPath[ :-3 ] className = List.fromChar( handlerPath, "." )[-1] result = self.__recurseImport( handlerPath ) if not result[ 'OK' ]: return S_ERROR( "Cannot load user defined handler %s: %s" % ( handlerPath, result[ 'Message' ] ) ) gLogger.verbose( "Loaded %s" % handlerPath ) elif parentModule: "Trying to autodiscover %s from parent" % loadName ) #If we've got a parent module, load from there. modImport = module if self.__modSuffix: modImport = "%s%s" % ( modImport, self.__modSuffix ) result = self.__recurseImport( modImport, parentModule, hideExceptions = hideExceptions ) else: #Check to see if the module exists in any of the root modules "Trying to autodiscover %s" % loadName ) rootModulesToLook = getInstalledExtensions() for rootModule in rootModulesToLook: importString = '%s.%sSystem.%s.%s' % ( rootModule, system, self.__importLocation, module ) if self.__modSuffix: importString = "%s%s" % ( importString, self.__modSuffix ) gLogger.verbose( "Trying to load %s" % importString ) result = self.__recurseImport( importString, hideExceptions = hideExceptions ) #Error while loading if not result[ 'OK' ]: return result #Something has been found! break :) if result[ 'Value' ]: gLogger.verbose( "Found %s" % importString ) break #Nothing found if not result[ 'Value' ]: return S_ERROR( "Could not find %s" % loadName ) modObj = result[ 'Value' ] try: #Try to get the class from the module modClass = getattr( modObj, className ) except AttributeError: location = "" if '__file__' in dir( modObj ): location = modObj.__file__ else: location = modObj.__path__ gLogger.exception( "%s module does not have a %s class!" % ( location, module ) ) return S_ERROR( "Cannot load %s" % module ) #Check if it's subclass if not issubclass( modClass, self.__superClass ): return S_ERROR( "%s has to inherit from %s" % ( loadName, self.__superClass.__name__ ) ) self.__loadedModules[ loadName ] = { 'classObj' : modClass, 'moduleObj' : modObj } #End of loading of 'loadName' module #A-OK :) self.__modules[ modName ] = self.__loadedModules[ loadName ].copy() #keep the name of the real code module self.__modules[ modName ][ 'modName' ] = modName self.__modules[ modName ][ 'loadName' ] = loadName gLogger.notice( "Loaded module %s" % modName ) return S_OK()
records = [] if vo is None and not allVOsFlag: result = getVOfromProxyGroup() if not result['OK']: gLogger.error('Failed to determine the user VO') DIRAC.exit(-1) vo = result['Value'] print(allVOsFlag, noVOFlag, vo) for se, statusDict in res['Value'].items(): # Check if the SE is allowed for the user VO if not allVOsFlag: voList = gConfig.getValue('/Resources/StorageElements/%s/VO' % se, []) if noVOFlag and voList: continue if voList and vo not in voList: continue record = [se] for status in fields[1:]: value = statusDict.get(status, 'Unknown') record.append(value) records.append(record) printTable(fields, records, numbering=False, sortField='SE') DIRAC.exit(0)
def __init__(self, name, protocols=None, vo=None): """ c'tor :param str name: SE name :param list protocols: requested protocols :param vo """ self.methodName = None if vo: self.vo = vo else: result = getVOfromProxyGroup() if not result['OK']: return self.vo = result['Value'] self.opHelper = Operations(vo=self.vo) proxiedProtocols = gConfig.getValue( '/LocalSite/StorageElements/ProxyProtocols', "").split(',') useProxy = (gConfig.getValue( "/Resources/StorageElements/%s/AccessProtocol.1/Protocol" % name, "UnknownProtocol") in proxiedProtocols) if not useProxy: useProxy = gConfig.getValue( '/LocalSite/StorageElements/%s/UseProxy' % name, False) if not useProxy: useProxy = self.opHelper.getValue( '/Services/StorageElements/%s/UseProxy' % name, False) self.valid = True if protocols == None: res = StorageFactory(useProxy=useProxy, vo=self.vo).getStorages(name, protocolList=[]) else: res = StorageFactory(useProxy=useProxy, vo=self.vo).getStorages( name, protocolList=protocols) if not res['OK']: self.valid = False = name self.errorReason = res['Message'] else: factoryDict = res['Value'] = factoryDict['StorageName'] self.options = factoryDict['StorageOptions'] self.localProtocols = factoryDict['LocalProtocols'] self.remoteProtocols = factoryDict['RemoteProtocols'] self.storages = factoryDict['StorageObjects'] self.protocolOptions = factoryDict['ProtocolOptions'] self.turlProtocols = factoryDict['TurlProtocols'] self.log = gLogger.getSubLogger("SE[%s]" % self.readMethods = [ 'getFile', 'getAccessUrl', 'getTransportURL', 'prestageFile', 'prestageFileStatus', 'getDirectory' ] self.writeMethods = [ 'retransferOnlineFile', 'putFile', 'replicateFile', 'pinFile', 'releaseFile', 'createDirectory', 'putDirectory' ] self.removeMethods = ['removeFile', 'removeDirectory'] self.checkMethods = [ 'exists', 'getDirectoryMetadata', 'getDirectorySize', 'getFileSize', 'getFileMetadata', 'listDirectory', 'isDirectory', 'isFile', ] self.okMethods = [ 'getLocalProtocols', 'getPfnForProtocol', 'getPfnForLfn', 'getPfnPath', 'getProtocols', 'getRemoteProtocols', 'getStorageElementName', 'getStorageElementOption', 'getStorageParameters', 'isLocalSE' ]
def getSiteUpdates(vo, bdiiInfo=None, log=None): """ Get all the necessary updates for the already defined sites and CEs """ def addToChangeSet(entry, changeSet): _section, _option, value, new_value = entry if new_value and new_value != value: changeSet.add(entry) if log is None: log = gLogger ceBdiiDict = bdiiInfo if bdiiInfo is None: result = getBdiiCEInfo(vo) if not result['OK']: return result ceBdiiDict = result['Value'] changeSet = set() for site in ceBdiiDict: result = getDIRACSiteName(site) if not result['OK']: continue siteNames = result['Value'] for siteName in siteNames: siteSection = cfgPath('/Resources', 'Sites', siteName.split('.')[0], siteName) result = gConfig.getOptionsDict(siteSection) if not result['OK']: continue siteDict = result['Value'] # Current CS values coor = siteDict.get('Coordinates', 'Unknown') mail = siteDict.get('Mail', 'Unknown').replace(' ', '') description = siteDict.get('Description', 'Unknown') description = description.replace(' ,', ',') longitude = ceBdiiDict[site].get('GlueSiteLongitude', '').strip() latitude = ceBdiiDict[site].get('GlueSiteLatitude', '').strip() # Current BDII value newcoor = '' if longitude and latitude: newcoor = "%s:%s" % (longitude, latitude) newmail = ceBdiiDict[site].get('GlueSiteSysAdminContact', '').replace('mailto:', '').strip() newdescription = ceBdiiDict[site].get('GlueSiteDescription', '').strip() # Adding site data to the changes list addToChangeSet((siteSection, 'Coordinates', coor, newcoor), changeSet) addToChangeSet((siteSection, 'Mail', mail, newmail), changeSet) addToChangeSet( (siteSection, 'Description', description, newdescription), changeSet) ces = gConfig.getValue(cfgPath(siteSection, 'CE'), []) for ce in ces: ceSection = cfgPath(siteSection, 'CEs', ce) ceDict = {} result = gConfig.getOptionsDict(ceSection) if result['OK']: ceDict = result['Value'] else: if ceBdiiDict[site]['CEs'].get(ce, None): log.notice("Adding new CE %s to site %s/%s" % (ce, siteName, site)) ceInfo = ceBdiiDict[site]['CEs'].get(ce, None) if ceInfo is None: ceType = ceDict.get('CEType', '') continue # Current CS CE info arch = ceDict.get('architecture', 'Unknown') OS = ceDict.get('OS', 'Unknown') si00 = ceDict.get('SI00', 'Unknown') ceType = ceDict.get('CEType', 'Unknown') ram = ceDict.get('MaxRAM', 'Unknown') submissionMode = ceDict.get('SubmissionMode', 'Unknown') # Current BDII CE info newarch = ceBdiiDict[site]['CEs'][ce].get( 'GlueHostArchitecturePlatformType', '').strip() systemName = ceInfo.get('GlueHostOperatingSystemName', '').strip() systemVersion = ceInfo.get('GlueHostOperatingSystemVersion', '').strip() systemRelease = ceInfo.get('GlueHostOperatingSystemRelease', '').strip() newOS = '' if systemName and systemVersion and systemRelease: newOS = '_'.join( (systemName, systemVersion, systemRelease)) newsi00 = ceInfo.get('GlueHostBenchmarkSI00', '').strip() newCEType = 'Unknown' for queue in ceInfo['Queues']: queueDict = ceInfo['Queues'][queue] newCEType = queueDict.get('GlueCEImplementationName', '').strip() if newCEType: break if newCEType == 'ARC-CE': newCEType = 'ARC' newSubmissionMode = None if newCEType in ['ARC', 'CREAM']: newSubmissionMode = "Direct" newRAM = ceInfo.get('GlueHostMainMemoryRAMSize', '').strip() # Protect from unreasonable values if newRAM and int(newRAM) > 150000: newRAM = '' # Adding CE data to the change list addToChangeSet((ceSection, 'architecture', arch, newarch), changeSet) addToChangeSet((ceSection, 'OS', OS, newOS), changeSet) addToChangeSet((ceSection, 'SI00', si00, newsi00), changeSet) addToChangeSet((ceSection, 'CEType', ceType, newCEType), changeSet) addToChangeSet((ceSection, 'MaxRAM', ram, newRAM), changeSet) if submissionMode == "Unknown" and newSubmissionMode: addToChangeSet((ceSection, 'SubmissionMode', submissionMode, newSubmissionMode), changeSet) queues = ceInfo['Queues'].keys() for queue in queues: queueInfo = ceInfo['Queues'][queue] queueStatus = queueInfo['GlueCEStateStatus'] queueSection = cfgPath(ceSection, 'Queues', queue) queueDict = {} result = gConfig.getOptionsDict(queueSection) if result['OK']: queueDict = result['Value'] else: if queueStatus.lower() == "production": log.notice("Adding new queue %s to CE %s" % (queue, ce)) else: continue # Current CS queue info maxCPUTime = queueDict.get('maxCPUTime', 'Unknown') si00 = queueDict.get('SI00', 'Unknown') maxTotalJobs = queueDict.get('MaxTotalJobs', 'Unknown') # Current BDII queue info newMaxCPUTime = queueInfo.get('GlueCEPolicyMaxCPUTime', '') if newMaxCPUTime == "4" * len( newMaxCPUTime) or newMaxCPUTime == "9" * len( newMaxCPUTime): newMaxCPUTime = '' newSI00 = '' caps = queueInfo['GlueCECapability'] if type(caps) == type(''): caps = [caps] for cap in caps: if 'CPUScalingReferenceSI00' in cap: newSI00 = cap.split('=')[-1] # Adding queue info to the CS addToChangeSet((queueSection, 'maxCPUTime', maxCPUTime, newMaxCPUTime), changeSet) addToChangeSet((queueSection, 'SI00', si00, newSI00), changeSet) if maxTotalJobs == "Unknown": newTotalJobs = min( 1000, int( int(queueInfo.get('GlueCEInfoTotalCPUs', 0)) / 2)) newWaitingJobs = max(2, int(newTotalJobs * 0.1)) newTotalJobs = str(newTotalJobs) newWaitingJobs = str(newWaitingJobs) addToChangeSet( (queueSection, 'MaxTotalJobs', '', newTotalJobs), changeSet) addToChangeSet((queueSection, 'MaxWaitingJobs', '', newWaitingJobs), changeSet) # Updating eligible VO list VOs = set() if queueDict.get('VO', ''): VOs = set([ q.strip() for q in queueDict.get('VO', '').split(',') if q ]) if not vo in VOs: VOs.add(vo) VOs = list(VOs) newVOs = ','.join(VOs) addToChangeSet((queueSection, 'VO', '', newVOs), changeSet) return S_OK(changeSet)
def __getGlue2ShareInfo(host, shareInfoLists): """ get information from endpoints, which are the CE at a Site :param str host: BDII host to query :param dict shareInfoDict: dictionary of GLUE2 parameters belonging to the ComputingShare :returns: result structure S_OK/S_ERROR """ executionEnvironments = [] for _siteName, shareInfoDicts in shareInfoLists.items(): for shareInfoDict in shareInfoDicts: executionEnvironment = shareInfoDict.get( 'GLUE2ComputingShareExecutionEnvironmentForeignKey', []) if not executionEnvironment: sLog.error( 'No entry for GLUE2ComputingShareExecutionEnvironmentForeignKey', pformat(shareInfoDict)) continue if isinstance(executionEnvironment, six.string_types): executionEnvironment = [executionEnvironment] executionEnvironments.extend(executionEnvironment) resExeInfo = __getGlue2ExecutionEnvironmentInfo(host, executionEnvironments) if not resExeInfo['OK']: sLog.error( "Cannot get execution environment info for:", str(executionEnvironments)[:100] + " " + resExeInfo['Message']) return resExeInfo exeInfos = resExeInfo['Value'] siteDict = {} for siteName, shareInfoDicts in shareInfoLists.items(): siteDict[siteName] = {'CEs': {}} cesDict = siteDict[siteName]['CEs'] for shareInfoDict in shareInfoDicts: ceInfo = {} ceInfo['MaxWaitingJobs'] = shareInfoDict.get( 'GLUE2ComputingShareMaxWaitingJobs', '-1') # This is not used ceInfo['Queues'] = {} queueInfo = {} queueInfo['GlueCEStateStatus'] = shareInfoDict[ 'GLUE2ComputingShareServingState'] queueInfo['GlueCEPolicyMaxCPUTime'] = str( int( int( shareInfoDict.get('GLUE2ComputingShareMaxCPUTime', 86400)) / 60)) queueInfo['GlueCEPolicyMaxWallClockTime'] = str( int( int( shareInfoDict.get('GLUE2ComputingShareMaxWallTime', 86400)) / 60)) queueInfo['GlueCEInfoTotalCPUs'] = shareInfoDict.get( 'GLUE2ComputingShareMaxRunningJobs', '10000') queueInfo['GlueCECapability'] = ['CPUScalingReferenceSI00=2552'] try: maxNOPfromCS = gConfig.getValue( '/Resources/Computing/CEDefaults/GLUE2ComputingShareMaxSlotsPerJob_limit', 8) maxNOPfromGLUE = int( shareInfoDict.get('GLUE2ComputingShareMaxSlotsPerJob', 1)) numberOfProcs = min(maxNOPfromGLUE, maxNOPfromCS) queueInfo['NumberOfProcessors'] = numberOfProcs if numberOfProcs != maxNOPfromGLUE: 'Limited NumberOfProcessors for', '%s from %s to %s' % (siteName, maxNOPfromGLUE, numberOfProcs)) except ValueError: sLog.error( "Bad content for GLUE2ComputingShareMaxSlotsPerJob:", siteName + ' ' + shareInfoDict.get('GLUE2ComputingShareMaxSlotsPerJob')) queueInfo['NumberOfProcessors'] = 1 executionEnvironment = shareInfoDict.get( 'GLUE2ComputingShareExecutionEnvironmentForeignKey', []) if isinstance(executionEnvironment, six.string_types): executionEnvironment = [executionEnvironment] resExeInfo = __getGlue2ExecutionEnvironmentInfoForSite( siteName, executionEnvironment, exeInfos) if not resExeInfo['OK']: continue exeInfo = resExeInfo.get('Value') if not exeInfo: sLog.error( 'Using dummy values. Did not find information for execution environment', siteName) exeInfo = { 'GlueHostMainMemoryRAMSize': '1999', # intentionally identifiably dummy value 'GlueHostOperatingSystemVersion': '', 'GlueHostOperatingSystemName': '', 'GlueHostOperatingSystemRelease': '', 'GlueHostArchitecturePlatformType': 'x86_64', 'GlueHostBenchmarkSI00': '2500', # needed for the queue to be used by the sitedirector 'MANAGER': 'manager:unknownBatchSystem', # need some value for ARC } else:'Found information for execution environment for', siteName) # sometimes the time is still in hours maxCPUTime = int(queueInfo['GlueCEPolicyMaxCPUTime']) if maxCPUTime in [12, 24, 36, 48, 168]: queueInfo['GlueCEPolicyMaxCPUTime'] = str(maxCPUTime * 60) queueInfo['GlueCEPolicyMaxWallClockTime'] = str( int(queueInfo['GlueCEPolicyMaxWallClockTime']) * 60) ceInfo.update(exeInfo) shareEndpoints = shareInfoDict.get('GLUE2ShareEndpointForeignKey', []) if isinstance(shareEndpoints, six.string_types): shareEndpoints = [shareEndpoints] for endpoint in shareEndpoints: ceType = endpoint.rsplit('.', 1)[1] # get queue Name, in CREAM this is behind GLUE2entityOtherInfo... if ceType == 'CREAM': for otherInfo in shareInfoDict['GLUE2EntityOtherInfo']: if otherInfo.startswith('CREAMCEId'): queueName = otherInfo.split('/', 1)[1] # creamCEs are EOL soon, ignore any info they have if queueInfo.pop('NumberOfProcessors', 1) != 1: sLog.verbose( 'Ignoring MaxSlotsPerJob option for CreamCE', endpoint) # HTCondorCE, htcondorce elif ceType.lower().endswith('htcondorce'): ceType = 'HTCondorCE' queueName = 'condor' else: sLog.error( 'Unknown CE Type, please check the available information', ceType) continue queueInfo['GlueCEImplementationName'] = ceType ceName = endpoint.split('_', 1)[0] cesDict.setdefault(ceName, {}) existingQueues = dict(cesDict[ceName].get('Queues', {})) existingQueues[queueName] = queueInfo ceInfo['Queues'] = existingQueues cesDict[ceName].update(ceInfo) # ARC CEs do not have endpoints, we have to try something else to get the information about the queue etc. try: if not shareEndpoints and shareInfoDict[ 'GLUE2ShareID'].startswith('urn:ogf'): exeInfo = dict(exeInfo) # silence pylint about tuples queueInfo['GlueCEImplementationName'] = 'ARC' managerName = exeInfo.pop('MANAGER', '').split(' ', 1)[0].rsplit(':', 1)[1] managerName = managerName.capitalize( ) if managerName == 'condor' else managerName queueName = 'nordugrid-%s-%s' % ( managerName, shareInfoDict['GLUE2ComputingShareMappingQueue']) ceName = shareInfoDict['GLUE2ShareID'].split( 'ComputingShare:')[1].split(':')[0] cesDict.setdefault(ceName, {}) existingQueues = dict(cesDict[ceName].get('Queues', {})) existingQueues[queueName] = queueInfo ceInfo['Queues'] = existingQueues cesDict[ceName].update(ceInfo) except Exception: sLog.error('Exception in ARC part for site:', siteName) return S_OK(siteDict)
def getSRMUpdates(vo, bdiiInfo=None): changeSet = set() def addToChangeSet(entry, changeSet): _section, _option, value, new_value = entry if new_value and new_value != value: changeSet.add(entry) result = getGridSRMs(vo, bdiiInfo=bdiiInfo) if not result['OK']: return result srmBdiiDict = result['Value'] result = getSEsFromCS() if not result['OK']: return result seDict = result['Value'] result = getVOs() if result['OK']: csVOs = set(result['Value']) else: csVOs = set([vo]) for seHost, diracSE in seDict.items(): seSection = '/Resources/StorageElements/%s' % diracSE[0] # Look up existing values first description = gConfig.getValue(cfgPath(seSection, 'Description'), 'Unknown') backend = gConfig.getValue(cfgPath(seSection, 'BackendType'), 'Unknown') vos = gConfig.getValue(cfgPath(seSection, 'VO'), 'Unknown').replace(' ', '') size = gConfig.getValue(cfgPath(seSection, 'TotalSize'), 'Unknown') # Look up current BDII values srmDict = {} seBdiiDict = {} for site in srmBdiiDict: if seHost in srmBdiiDict[site]: srmDict = srmBdiiDict[site][seHost]['SRM'] seBdiiDict = srmBdiiDict[site][seHost]['SE'] break if not srmDict or not seBdiiDict: continue newDescription = seBdiiDict.get('GlueSEName', 'Unknown') newBackend = seBdiiDict.get('GlueSEImplementationName', 'Unknown') newSize = seBdiiDict.get('GlueSESizeTotal', 'Unknown') addToChangeSet((seSection, 'Description', description, newDescription), changeSet) addToChangeSet((seSection, 'BackendType', backend, newBackend), changeSet) addToChangeSet((seSection, 'TotalSize', size, newSize), changeSet) # Evaluate VOs if no space token defined, otherwise this is VO specific spaceToken = '' for i in range(1, 10): protocol = gConfig.getValue( cfgPath(seSection, 'AccessProtocol.%d' % i, 'Protocol'), '') if protocol.lower() == 'srm': spaceToken = gConfig.getValue( cfgPath(seSection, 'AccessProtocol.%d' % i, 'SpaceToken'), '') break if not spaceToken: bdiiVOs = srmDict.get('GlueServiceAccessControlBaseRule', []) bdiiVOs = set([re.sub('^VO:', '', rule) for rule in bdiiVOs]) seVOs = csVOs.intersection(bdiiVOs) newVOs = ','.join(seVOs) addToChangeSet((seSection, 'VO', vos, newVOs), changeSet) return S_OK(changeSet)
def fillVisList(vdict, num): # Assuming that, if there's only one element in the list of output visibility flags, every step will catch that flag if len( vdict ) == 1: val = vdict[vdict.keys()[0]] vdict = dict( [(str(i), val) for i in range(int(vdict.keys()[0]), int(vdict.keys()[0])+num)] ) # Another assumption: if the number of steps is bigger than that of vis flags, then extend the list with the last flag available # to fill the "holes" #if len(vlist) < len(slist): # vlist.extend( vlist[-1] * (len(slist) - len(vlist)) ) return vdict gLogger = gLogger.getSubLogger( '' ) currentSetup = gConfig.getValue( 'DIRAC/Setup' ) pr = ProductionRequest() stepsList = [ '{{p1Step}}' ] stepsList.append( '{{p2Step}}' ) stepsList.append( '{{p3Step}}' ) stepsList.append( '{{p4Step}}' ) stepsList.append( '{{p5Step}}' ) stepsList.append( '{{p6Step}}' ) stepsList.append( '{{p7Step}}' ) stepsList.append( '{{p8Step}}' ) stepsList.append( '{{p9Step}}' ) stepsList.append( '{{p10Step}}' ) stepsList.append( '{{p11Step}}' ) stepsList.append( '{{p12Step}}' )
def sync(self): """ sync. Reads from ComponentsMonitoringDB and prepares entries on RSS ComponentStatus table. """ #TODO: delete from RSS if not anymore on ComponentsMonitoringDB setup = gConfig.getValue('DIRAC/Setup') components = self.compoDB.getComponentsStatus({'Setup': setup}) if not components['OK']: return components components = components['Value'][0][setup] for agentName, agentsList in components['agent'].iteritems(): for agentDict in agentsList: if agentDict['Status'] == 'Error': self.log.warn('%(ComponentName)s %(Message)s' % agentDict) continue res = self.rsClient.addIfNotThereStatusElement( 'Component', 'Status', name=agentName, statusType=agentDict['Host'], status='Unknown', elementType='Agent', reason='Synchronized', ) if not res['OK']: return res for serviceName, servicesList in components['service'].iteritems(): for serviceDict in servicesList: if serviceDict['Status'] == 'Error': self.log.warn('%(ComponentName)s %(Message)s' % serviceDict) continue res = self.rsClient.addIfNotThereStatusElement( 'Component', 'Status', name=serviceName, statusType='%(Host)s:%(Port)s' % serviceDict, status='Unknown', elementType='Service', reason='Synchronized', ) if not res['OK']: return res return S_OK() #............................................................................... #EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF
def getQueues(self, resourceDict): """ Get the list of relevant CEs and their descriptions """ self.queueDict = {} ceFactory = ComputingElementFactory() for site in resourceDict: for ce in resourceDict[site]: ceDict = resourceDict[site][ce] qDict = ceDict.pop('Queues') for queue in qDict: queueName = '%s_%s' % (ce, queue) self.queueDict[queueName] = {} self.queueDict[queueName]['ParametersDict'] = qDict[queue] self.queueDict[queueName]['ParametersDict'][ 'Queue'] = queue self.queueDict[queueName]['ParametersDict']['Site'] = site self.queueDict[queueName]['ParametersDict'][ 'GridEnv'] = self.gridEnv self.queueDict[queueName]['ParametersDict'][ 'Setup'] = gConfig.getValue('/DIRAC/Setup', 'unknown') # Evaluate the CPU limit of the queue according to the Glue convention # To Do: should be a utility if "maxCPUTime" in self.queueDict[queueName]['ParametersDict'] and \ "SI00" in self.queueDict[queueName]['ParametersDict']: maxCPUTime = float(self.queueDict[queueName] ['ParametersDict']['maxCPUTime']) # For some sites there are crazy values in the CS maxCPUTime = max(maxCPUTime, 0) maxCPUTime = min(maxCPUTime, 86400 * 12.5) si00 = float(self.queueDict[queueName] ['ParametersDict']['SI00']) queueCPUTime = 60. / 250. * maxCPUTime * si00 self.queueDict[queueName]['ParametersDict'][ 'CPUTime'] = int(queueCPUTime) qwDir = os.path.join(self.workingDirectory, queue) if not os.path.exists(qwDir): os.makedirs(qwDir) self.queueDict[queueName]['ParametersDict'][ 'WorkingDirectory'] = qwDir platform = '' if "Platform" in self.queueDict[queueName][ 'ParametersDict']: platform = self.queueDict[queueName]['ParametersDict'][ 'Platform'] elif "Platform" in ceDict: platform = ceDict['Platform'] elif "OS" in ceDict: architecture = ceDict.get('architecture', 'x86_64') OS = ceDict['OS'] platform = '_'.join([architecture, OS]) if platform and not platform in self.platforms: self.platforms.append(platform) if not "Platform" in self.queueDict[queueName][ 'ParametersDict'] and platform: result = Resources.getDIRACPlatform(platform) if result['OK']: self.queueDict[queueName]['ParametersDict'][ 'Platform'] = result['Value'] ceQueueDict = dict(ceDict) ceQueueDict.update( self.queueDict[queueName]['ParametersDict']) result = ceFactory.getCE(ceName=ce, ceType=ceDict['CEType'], ceParametersDict=ceQueueDict) if not result['OK']: return result self.queueDict[queueName]['CE'] = result['Value'] self.queueDict[queueName]['CEName'] = ce self.queueDict[queueName]['CEType'] = ceDict['CEType'] self.queueDict[queueName]['Site'] = site self.queueDict[queueName]['QueueName'] = queue result = self.queueDict[queueName]['CE'].isValid() if not result['OK']: self.log.fatal(result['Message']) return result if 'BundleProxy' in self.queueDict[queueName][ 'ParametersDict']: self.queueDict[queueName]['BundleProxy'] = True elif 'BundleProxy' in ceDict: self.queueDict[queueName]['BundleProxy'] = True if site not in self.sites: self.sites.append(site) return S_OK()