def __moduleProcess( self, eType, taskId, taskStub, fastTrackLevel = 0 ): result = self.__getInstance( eType ) if not result[ 'OK' ]: return result modInstance = result[ 'Value' ] try: result = modInstance._ex_processTask( taskId, taskStub ) except Exception as excp: gLogger.exception( "Error while processing task %s" % taskId, lException = excp ) return S_ERROR( "Error processing task %s: %s" % ( taskId, excp ) ) self.__storeInstance( eType, modInstance ) if not result[ 'OK' ]: return S_OK( ( 'TaskError', taskStub, "Error: %s" % result[ 'Message' ] ) ) taskStub, freezeTime, fastTrackType = result[ 'Value' ] if freezeTime: return S_OK( ( "TaskFreeze", taskStub, freezeTime ) ) if fastTrackType: if fastTrackLevel < 10 and fastTrackType in self.__modules: gLogger.notice( "Fast tracking task %s to %s" % ( taskId, fastTrackType ) ) return self.__moduleProcess( fastTrackType, taskId, taskStub, fastTrackLevel + 1 ) else: gLogger.notice( "Stopping %s fast track. Sending back to the mind" % ( taskId ) ) return S_OK( ( "TaskDone", taskStub, True ) )
def export_update( self, params, meta ): ''' This method is a bridge to access :class:`ResourceManagementDB` remotely. It does not add neither processing nor validation. If you need to know more about this method, you must keep reading on the database documentation. :Parameters: **params** - `dict` arguments for the mysql query ( must match table columns ! ). **meta** - `dict` metadata for the mysql query. It must contain, at least, `table` key with the proper table name. :return: S_OK() || S_ERROR() ''' gLogger.info( 'update: %s %s' % ( params, meta ) ) try: res = db.update( params, meta ) gLogger.debug( 'update %s' % res ) except Exception, e: _msg = 'Exception calling db.update: \n %s' % e gLogger.exception( _msg ) res = S_ERROR( _msg )
def __prepareSecurityDetails(self): """ Obtains the connection details for the client """ try: credDict = self.getRemoteCredentials() clientDN = credDict['DN'] clientUsername = credDict['username'] clientGroup = credDict['group'] gLogger.debug( "Getting proxy for %s@%s (%s)" % ( clientUsername, clientGroup, clientDN ) ) res = gProxyManager.downloadVOMSProxy( clientDN, clientGroup ) if not res['OK']: return res chain = res['Value'] proxyBase = "%s/proxies" % BASE_PATH if not os.path.exists(proxyBase): os.makedirs(proxyBase) proxyLocation = "%s/proxies/%s-%s" % ( BASE_PATH, clientUsername, clientGroup ) gLogger.debug("Obtained proxy chain, dumping to %s." % proxyLocation) res = gProxyManager.dumpProxyToFile( chain, proxyLocation ) if not res['OK']: return res gLogger.debug("Updating environment.") os.environ['X509_USER_PROXY'] = res['Value'] return res except Exception, error: exStr = "__getConnectionDetails: Failed to get client connection details." gLogger.exception( exStr, '', error ) return S_ERROR(exStr)
def doCommand(self, CEs = None): """ Returns failed pilots using the DIRAC accounting system for every CE for the last self.args[0] hours :params: :attr:`CEs`: list of CEs (when not given, take every CE) :returns: """ if CEs is None: from DIRAC.Core.DISET.RPCClient import RPCClient RPC_RSS = RPCClient("ResourceStatus/ResourceStatus") CEs = RPC_RSS.getCEsList() if not CEs['OK']: raise RSSException, where(self, self.doCommand) + " " + CEs['Message'] else: CEs = CEs['Value'] if self.RPC is None: from DIRAC.Core.DISET.RPCClient import RPCClient self.RPC = RPCClient("Accounting/ReportGenerator", timeout = self.timeout) if self.client is None: from DIRAC.AccountingSystem.Client.ReportsClient import ReportsClient self.client = ReportsClient(rpcClient = self.RPC) fromD = datetime.datetime.utcnow()-datetime.timedelta(hours = self.args[0]) toD = datetime.datetime.utcnow() try: failed_pilots = self.client.getReport('Pilot', 'NumberOfPilots', fromD, toD, {'GridStatus':['Aborted'], 'GridCE':CEs}, 'GridCE') if not failed_pilots['OK']: raise RSSException, where(self, self.doCommand) + " " + failed_pilots['Message'] else: failed_pilots = failed_pilots['Value'] except: gLogger.exception("Exception when calling FailedPilotsByCESplitted_Command") return {} listOfCEs = failed_pilots['data'].keys() plotGran = failed_pilots['granularity'] singlePlots = {} for CE in listOfCEs: if CE in CEs: plot = {} plot['data'] = {CE: failed_pilots['data'][CE]} plot['granularity'] = plotGran singlePlots[CE] = plot resToReturn = {'Pilot': singlePlots} return resToReturn
def doCommand(self, sites = None): """ Returns running and runned jobs, querying the WMSHistory for the last self.args[0] hours :params: :attr:`sites`: list of sites (when not given, take every sites) :returns: """ if sites is None: from DIRAC.Core.DISET.RPCClient import RPCClient RPC_RSS = RPCClient("ResourceStatus/ResourceStatus") sites = RPC_RSS.getSitesList() if not sites['OK']: raise RSSException, where(self, self.doCommand) + " " + sites['Message'] else: sites = sites['Value'] if self.RPC is None: from DIRAC.Core.DISET.RPCClient import RPCClient self.RPC = RPCClient("Accounting/ReportGenerator", timeout = self.timeout) if self.client is None: from DIRAC.AccountingSystem.Client.ReportsClient import ReportsClient self.client = ReportsClient(rpcClient = self.RPC) fromD = datetime.datetime.utcnow()-datetime.timedelta(hours = self.args[0]) toD = datetime.datetime.utcnow() try: run_jobs = self.client.getReport('WMSHistory', 'NumberOfJobs', fromD, toD, {}, 'Site') if not run_jobs['OK']: raise RSSException, where(self, self.doCommand) + " " + run_jobs['Message'] else: run_jobs = run_jobs['Value'] except: gLogger.exception("Exception when calling RunningJobsBySiteSplitted_Command") return {} listOfSites = run_jobs['data'].keys() plotGran = run_jobs['granularity'] singlePlots = {} for site in listOfSites: if site in sites: plot = {} plot['data'] = {site: run_jobs['data'][site]} plot['granularity'] = plotGran singlePlots[site] = plot resToReturn = {'WMSHistory': singlePlots} return resToReturn
def addTransport(self, transport, *args, **kwargs): trid = self.__trPool.add(transport) try: result = self.addTransportId(trid, *args, **kwargs) except Exception, e: gLogger.exception("Cannot add transport id") result = S_ERROR("Cannot add transport id")
def sweeper( cls ): """ move cached request to the central request manager :param self: self reference """ cacheDir = cls.cacheDir() # # cache dir empty? if not os.listdir( cacheDir ): gLogger.always( "sweeper: CacheDir %s is empty, nothing to do" % cacheDir ) return S_OK() else: # # read 10 cache dir files, the oldest first cachedRequests = [ os.path.abspath( requestFile ) for requestFile in sorted( filter( os.path.isfile, [ os.path.join( cacheDir, requestName ) for requestName in os.listdir( cacheDir ) ] ), key = os.path.getctime ) ][:10] # # set cached requests to the central RequestManager for cachedFile in cachedRequests: # # break if something went wrong last time try: requestJSON = "".join( open( cachedFile, "r" ).readlines() ) cachedRequest = json.loads( requestJSON ) cachedName = cachedRequest.get( "RequestName", "***UNKNOWN***" ) putRequest = cls.requestManager().putRequest( requestJSON ) if not putRequest["OK"]: gLogger.error( "sweeper: unable to set request %s @ ReqManager: %s" % ( cachedName, putRequest["Message"] ) ) continue gLogger.info( "sweeper: successfully put request '%s' @ ReqManager" % cachedName ) os.unlink( cachedFile ) except Exception, error: gLogger.exception( "sweeper: hit by exception %s" % str( error ) ) return S_ERROR( "sweeper: hit by exception: %s" % str( error ) ) return S_OK()
def run(self): """ The main watchdog execution method """ result = self.initialize() if not result['OK']: gLogger.always('Can not start wtchdog for the following reason') gLogger.always(result['Message']) return result try: while True: gLogger.debug('Starting agent loop # %d' % self.count) start_cycle_time = time.time() result = self.execute() exec_cycle_time = time.time() - start_cycle_time if not result['OK']: gLogger.error("Watchdog error during execution", result['Message']) break elif result['Value'] == "Ended": break self.count += 1 if exec_cycle_time < self.pollingTime: time.sleep(self.pollingTime - exec_cycle_time) return S_OK() except Exception, x: gLogger.exception() return S_ERROR('Exception')
def loadUserData( self ): """ This is the magic method that reads the command line and processes it It is used by the Script Base class and the dirac-service and dirac-agent scripts Before being called: - any additional switches to be processed - mandatory and default configuration configuration options must be defined. """ if self.initialized: return S_OK() self.initialized = True try: retVal = self.__addUserDataToConfiguration() for optionTuple in self.optionalEntryList: optionPath = self.__getAbsolutePath( optionTuple[0] ) if not gConfigurationData.extractOptionFromCFG( optionPath ): gConfigurationData.setOptionInCFG( optionPath, optionTuple[1] ) self.__initLogger( self.componentName, self.loggingSection ) if not retVal[ 'OK' ]: return retVal retVal = self.__checkMandatoryOptions() if not retVal[ 'OK' ]: return retVal except Exception as e: gLogger.exception() return S_ERROR( str( e ) ) return S_OK()
def execute( self ): """ The main RSInspectorAgent execution method. Calls :meth:`DIRAC.ResourceStatusSystem.DB.ResourceStatusDB.getResourcesToCheck` and put result in self.StorageElementToBeChecked (a Queue) and in self.StorageElementInCheck (a list) """ try: res = self.rsDB.getStuffToCheck( 'StorageElementsWrite', self.StorageElsWriteFreqs ) for resourceTuple in res: if resourceTuple[ 0 ] in self.StorageElementInCheck: break resourceL = [ 'StorageElementWrite' ] for x in resourceTuple: resourceL.append( x ) self.StorageElementInCheck.insert( 0, resourceL[ 1 ] ) self.StorageElementToBeChecked.put( resourceL ) return S_OK() except Exception, x: errorStr = where( self, self.execute ) gLogger.exception( errorStr, lException = x ) return S_ERROR( errorStr )
def initialize( self ): """ Standard constructor """ try: self.rsDB = ResourceStatusDB() self.rmDB = ResourceManagementDB() self.StorageElementToBeChecked = Queue.Queue() self.StorageElementInCheck = [] self.maxNumberOfThreads = self.am_getOption( 'maxThreadsInPool', 1 ) self.threadPool = ThreadPool( self.maxNumberOfThreads, self.maxNumberOfThreads ) if not self.threadPool: self.log.error( 'Can not create Thread Pool' ) return S_ERROR( 'Can not create Thread Pool' ) self.setup = getSetup()[ 'Value' ] self.VOExtension = getExt() self.StorageElsWriteFreqs = CheckingFreqs[ 'StorageElsWriteFreqs' ] self.nc = NotificationClient() self.diracAdmin = DiracAdmin() self.csAPI = CSAPI() for _i in xrange( self.maxNumberOfThreads ): self.threadPool.generateJobAndQueueIt( self._executeCheck, args = ( None, ) ) return S_OK() except Exception: errorStr = "StElWriteInspectorAgent initialization" gLogger.exception( errorStr ) return S_ERROR( errorStr )
def setRequest( self, requestName, requestString, desiredStatus = None ): """ Set request to the database (including all sub-requests) """ gLogger.info( "RequestDBFile._setRequest: Attempting to set %s." % requestName ) request = RequestContainer( requestString ) requestTypes = request.getSubRequestTypes()['Value'] try: for requestType in requestTypes: subRequestString = request.toXML( desiredType = requestType )['Value'] if subRequestString: if desiredStatus: status = desiredStatus elif not request.isRequestTypeEmpty( requestType )['Value']: status = 'ToDo' else: status = 'Done' subRequestDir = '%s/%s/%s' % ( self.root, requestType, status ) if not os.path.exists( subRequestDir ): os.makedirs( subRequestDir ) subRequestPath = '%s/%s' % ( subRequestDir, requestName ) subRequestFile = open( subRequestPath, 'w' ) subRequestFile.write( subRequestString ) subRequestFile.close() gLogger.info( "RequestDBFile._setRequest: Successfully set %s." % requestName ) return S_OK() except Exception, x: errStr = "RequestDBFile._setRequest: Exception while setting request." gLogger.exception( errStr, requestName, lException = x ) self.deleteRequest( requestName ) return S_ERROR( errStr )
def doCommand(self): """ Returns transfer quality as it is cached :attr:`args`: - args[0]: string: should be a ValidRes - args[1]: string should be the name of the ValidRes :returns: {'Result': None | a float between 0.0 and 100.0} """ super(TransferQualityCached_Command, self).doCommand() if self.client is None: from DIRAC.ResourceStatusSystem.Client.ResourceManagementClient import ResourceManagementClient self.client = ResourceManagementClient(timeout = self.timeout) name = self.args[1] try: res = self.client.getCachedResult(name, 'TransferQualityEverySEs', 'TQ', 'NULL') if res == []: return {'Result':None} except: gLogger.exception("Exception when calling ResourceManagementClient for %s" %(name)) return {'Result':'Unknown'} return {'Result':float(res[0])}
def __loadOptimizer(self): # Need to load an optimizer gLogger.info("Loading optimizer %s" % self.optimizerName) optList = List.fromChar(self.optimizerName, "/") optList[1] = "/".join(optList[1:]) systemName = optList[0] agentName = "%sAgent" % optList[1] rootModulesToLook = gConfig.getValue("/LocalSite/Extensions", []) + ["DIRAC"] for rootModule in rootModulesToLook: try: gLogger.info("Trying to load from root module %s" % rootModule) opPyPath = "%s.%sSystem.Agent.%s" % (rootModule, systemName, agentName) optimizerModule = __import__(opPyPath, globals(), locals(), agentName) except ImportError, e: gLogger.info("Can't load %s: %s" % (opPyPath, str(e))) continue try: optimizerClass = getattr(optimizerModule, agentName) optimizer = optimizerClass("%sAgent" % self.optimizerName, self.containerName) result = optimizer.am_initialize(self.jobDB, self.jobLoggingDB) if not result["OK"]: return S_ERROR("Can't initialize optimizer %s: %s" % (self.optimizerName, result["Message"])) return S_OK(optimizer) except Exception, e: gLogger.exception("Can't load optimizer %s with root module %s" % (self.optimizerName, rootModule))
def run(self): """ The main watchdog execution method """ result = self.initialize() if not result["OK"]: gLogger.always("Can not start watchdog for the following reason") gLogger.always(result["Message"]) return result try: while True: gLogger.debug("Starting watchdog loop # %d" % self.count) start_cycle_time = time.time() result = self.execute() exec_cycle_time = time.time() - start_cycle_time if not result["OK"]: gLogger.error("Watchdog error during execution", result["Message"]) break elif result["Value"] == "Ended": break self.count += 1 if exec_cycle_time < self.pollingTime: time.sleep(self.pollingTime - exec_cycle_time) return S_OK() except Exception: gLogger.exception() return S_ERROR("Exception")
def initAPIs( desiredAPIs, knownAPIs, force = False ): if not isinstance( desiredAPIs, list ): gLogger.error( 'Got "%s" instead of list while initializing APIs' % desiredAPIs ) return knownAPIs # Remove duplicated desiredAPIs = list(set( desiredAPIs ) ) for dAPI in desiredAPIs: if knownAPIs.has_key( dAPI ) and not force == True: continue if not dAPI in __APIs__.keys(): gLogger.error( '"%s" is not a known client on initAPIs' % dAPI ) return knownAPIs try: if not '/' in __APIs__[ dAPI ]: dClientMod = __import__( __APIs__[ dAPI ], globals(), locals(), ['*'] ) knownAPIs[ dAPI ] = getattr( dClientMod, dAPI )() else: knownAPIs[ dAPI ] = RPCClient( __APIs__[ dAPI ] ) gLogger.info( 'API %s initialized' % dAPI ) except Exception, x: gLogger.exception( 'Exception %s while importing "%s - %s"' % ( x, dAPI, __APIs__[ dAPI ] ) )
def doCommand(self): """ Uses :meth:`DIRAC.ResourceStatusSystem.Client.ResourceStatusClient.getServiceStats` :params: :attr:`args`: a tuple - args[1]: a ValidRes - args[0]: should be the name of the Site :returns: {'Active':xx, 'Probing':yy, 'Banned':zz, 'Total':xyz} """ super(ServiceStats_Command, self).doCommand() if self.client is None: from DIRAC.ResourceStatusSystem.Client.ResourceStatusClient import ResourceStatusClient self.client = ResourceStatusClient(timeout=self.timeout) try: res = self.client.getServiceStats(self.args[0], self.args[1]) except: gLogger.exception("Exception when calling ResourceStatusClient for %s %s" % (self.args[0], self.args[1])) return {"Result": "Unknown"} return {"Result": res}
def doCommand(self): """ Uses :meth:`DIRAC.ResourceStatusSystem.Client.ResourceStatusClient.getResourceStats` :params: :attr:`args`: a tuple - `args[0]` string, a ValidRes. Should be in ('Site', 'Service') - `args[1]` should be the name of the Site or Service :returns: """ super(ResourceStats_Command, self).doCommand() if self.client is None: from DIRAC.ResourceStatusSystem.Client.ResourceStatusClient import ResourceStatusClient self.client = ResourceStatusClient(timeout=self.timeout) try: res = self.client.getResourceStats(self.args[0], self.args[1]) except: gLogger.exception("Exception when calling ResourceStatusClient for %s %s" % (self.args[0], self.args[1])) return {"Result": "Unknown"} return {"Result": res}
def msg_TaskError( self, msgObj ): taskId = msgObj.taskId try: result = self.exec_deserializeTask( msgObj.taskStub ) except Exception, excp: gLogger.exception( "Exception while deserializing task %s" % taskId ) return S_ERROR( "Cannot deserialize task %s: %s" % ( taskId, str( excp ) ) )
def export_peekFTSFile( self, ftsFileID ): """ peek FTSFile given FTSFileID """ try: peekFile = self.ftsDB.peekFTSFile( ftsFileID ) except Exception, error: gLogger.exception( error ) return S_ERROR( error )
def export_getFTSFile( self, ftsFileID ): """ get FTSFile from FTSDB """ try: getFile = self.ftsDB.getFTSFile( ftsFileID ) except Exception, error: gLogger.exception( error ) return S_ERROR( error )
def doCommand(self): """ Return getPeriods from ResourceStatus Client - args[0] should be a ValidRes - args[1] should be the name of the ValidRes - args[2] should be the present status - args[3] are the number of hours requested """ super(RSPeriods_Command, self).doCommand() if self.client is None: from DIRAC.ResourceStatusSystem.Client.ResourceStatusClient import ResourceStatusClient self.client = ResourceStatusClient() try: res = self.client.getPeriods(self.args[0], self.args[1], self.args[2], self.args[3]) except: gLogger.exception("Exception when calling ResourceStatusClient for %s %s" % (self.args[0], self.args[1])) return {"Result": "Unknown"} return {"Result": res}
def loadAgentModules( self, modulesList, hideExceptions = False ): """ Load all modules required in moduleList """ result = self.__loader.loadModules( modulesList, hideExceptions = hideExceptions ) if not result[ 'OK' ]: return result self.__agentModules = self.__loader.getModules() for agentName in self.__agentModules: agentData = self.__agentModules[ agentName ] agentData[ 'running' ] = False try: instanceObj = agentData[ 'classObj' ]( agentName, agentData[ 'loadName' ], self.__baseAgentName ) result = instanceObj.am_initialize() if not result[ 'OK' ]: return S_ERROR( "Error while calling initialize method of %s: %s" % ( agentName, result[ 'Message' ] ) ) agentData[ 'instanceObj' ] = instanceObj except Exception, excp: if not hideExceptions: gLogger.exception( "Can't load agent %s" % agentName ) return S_ERROR( "Can't load agent %s: \n %s" % ( agentName, excp ) ) agentPeriod = instanceObj.am_getPollingTime() result = self.__scheduler.addPeriodicTask( agentPeriod, instanceObj.am_go, executions = instanceObj.am_getMaxCycles(), elapsedTime = agentPeriod ) if not result[ 'OK' ]: return result taskId = result[ 'Value' ] self.__tasks[ result[ 'Value' ] ] = agentName agentData[ 'taskId' ] = taskId agentData[ 'running' ] = True
def getRequest(self,requestType,status): """ Get request from RequestDB. First try the local repository then if none available or error try random repository """ try: #Create list with two RequestDB URLs to try url = self.localUrl urls = [url] urls.append(self.voBoxUrls.pop()) for url in urls: requestRPCClient = RPCClient(url,timeout=120) res = requestRPCClient.getRequest(requestType,status) if res['OK']: if res['Request']: gLogger.info("Got '%s' request from RequestDB (%s) with status '%s'" % (requestType,url,status)) res['Server'] = url return res else: gLogger.info("Found no '%s' requests on RequestDB (%s) with status '%s'" % (requestType,url,status)) else: errKey = "Failed getting request from %s" % url errExpl = " : %s of %s because: %s" % (requestType,status,res['Message']) gLogger.error(errKey,errExpl) return res except Exception,x: errKey = "Failed to get request" errExpl = " : %s" %str(x) gLogger.exception(errKey,errExpl) return S_ERROR(errKey+errExpl)
def _initMonitoring( self ): #Init extra bits of monitoring self._monitor.setComponentType( MonitoringClient.COMPONENT_SERVICE ) self._monitor.setComponentName( self._name ) self._monitor.setComponentLocation( self._cfg.getURL() ) self._monitor.initialize() self._monitor.registerActivity( "Connections", "Connections received", "Framework", "connections", MonitoringClient.OP_RATE ) self._monitor.registerActivity( "Queries", "Queries served", "Framework", "queries", MonitoringClient.OP_RATE ) self._monitor.registerActivity( 'CPU', "CPU Usage", 'Framework', "CPU,%", MonitoringClient.OP_MEAN, 600 ) self._monitor.registerActivity( 'MEM', "Memory Usage", 'Framework', 'Memory,MB', MonitoringClient.OP_MEAN, 600 ) self._monitor.registerActivity( 'PendingQueries', "Pending queries", 'Framework', 'queries', MonitoringClient.OP_MEAN ) self._monitor.registerActivity( 'ActiveQueries', "Active queries", 'Framework', 'threads', MonitoringClient.OP_MEAN ) self._monitor.registerActivity( 'RunningThreads', "Running threads", 'Framework', 'threads', MonitoringClient.OP_MEAN ) self._monitor.registerActivity( 'MaxFD', "Max File Descriptors", 'Framework', 'fd', MonitoringClient.OP_MEAN ) self._monitor.setComponentExtraParam( 'DIRACVersion', DIRAC.version ) self._monitor.setComponentExtraParam( 'platform', DIRAC.getPlatform() ) self._monitor.setComponentExtraParam( 'startTime', Time.dateTime() ) for prop in ( ( "__RCSID__", "version" ), ( "__doc__", "description" ) ): try: value = getattr( self._handler[ 'module' ], prop[0] ) except Exception as e: gLogger.exception( e ) gLogger.error( "Missing property", prop[0] ) value = 'unset' self._monitor.setComponentExtraParam( prop[1], value ) for secondaryName in self._cfg.registerAlsoAs(): gLogger.info( "Registering %s also as %s" % ( self._name, secondaryName ) ) self._validNames.append( secondaryName ) return S_OK()
def transfer_toClient( self, fileId, token, fileHelper ): """ Get graphs data :param str fileId encoded plot attributes :param object :param DIRAC.Core.DISET.private.FileHelper.FileHelper fileHelper """ # First check if we've got to generate the plot if len( fileId ) > 5 and fileId[1] == ':': gLogger.info( "Seems the file request is a plot generation request!" ) try: result = self.__generatePlotFromFileId( fileId ) except Exception as e: # pylint: disable=broad-except gLogger.exception( "Exception while generating plot", str( e ) ) result = S_ERROR( "Error while generating plot: %s" % str( e ) ) if not result[ 'OK' ]: self.__sendErrorAsImg( result[ 'Message' ], fileHelper ) fileHelper.sendEOF() return result fileId = result[ 'Value' ] retVal = gDataCache.getPlotData( fileId ) if not retVal[ 'OK' ]: self.__sendErrorAsImg( retVal[ 'Message' ], fileHelper ) return retVal retVal = fileHelper.sendData( retVal[ 'Value' ] ) if not retVal[ 'OK' ]: return retVal fileHelper.sendEOF() return S_OK()
def __listenAutoReceiveConnections(self): while self.__listeningForMessages: self.__trInOutLock.acquire() try: sIdList = [] for trid in self.__messageTransports: mt = self.__messageTransports[trid] if not mt["listen"]: continue sIdList.append((trid, mt["transport"].getSocket())) if not sIdList: self.__listeningForMessages = False return finally: self.__trInOutLock.release() try: inList, outList, exList = select.select([pos[1] for pos in sIdList], [], [], 1) if len(inList) == 0: continue except: from DIRAC import gLogger gLogger.exception("Exception while select'ing persistent connections") continue for sock in inList: for iPos in range(len(sIdList)): if sock == sIdList[iPos][1]: trid = sIdList[iPos][0] if trid in self.__messageTransports: result = self.__receiveMsgDataAndQueue(trid) if not result["OK"]: self.removeTransport(trid) break
def setRequest(self,requestType,requestName,requestString,requestStatus='ToDo',url=''): """ Set request. URL can be supplied if not a all VOBOXes will be tried in random order. """ try: urls = [] if url: urls[url] urls.append(self.voBoxUrls) for url in urls: requestRPCClient = RPCClient(url) res = requestRPCClient.setRequest(requestType,requestName,requestStatus,requestString) if res['OK']: gLogger.info("Succeded setting request for %s at %s" % (requestName,url)) res["Server"] = url return res else: errKey = "Failed setting request at %s" % url errExpl = " : for %s because: %s" % (requestName,res['Message']) gLogger.error(errKey,errExpl) errKey = "Completely failed setting request" errExpl = " : %s\n%s\n%s" % (requestName,requestType,requestString) gLogger.fatal(errKey,errExpl) return S_ERROR(errKey) except Exception,x: errKey = "Completely failed setting request" errExpl = " : for %s with exception %s" % (requestName,str(x)) gLogger.exception(errKey,errExpl) return S_ERROR(errKey)
def getRequestSummary(self,url=''): """ Get the summary of requests in the RequestDBs. If a URL is not supplied will get status for all. """ try: if url: urls = [url] else: urls = self.voBoxUrls res = S_OK() for url in urls: requestRPCClient = RPCClient(url,timeout=120) res['Value'][url] = {} result = requestRPCClient.getRequestSummary() if result['OK']: gLogger.info("Succeded getting request summary at %s" % url) res['Value'][url] = result['Value'] else: errKey = "Failed getting request summary" errExpl = " : at %s because %s" % (url,result['Message']) gLogger.error(errKey,errExpl) return res except Exception,x: errKey = "Failed getting request summary" errExpl = " : with exception %s" % str(x) gLogger.exception(errKey,errExpl) return S_ERROR(errKey+errExpl)
def _instantiateHandler( self, trid, proposalTuple = None ): """ Generate an instance of the handler for a given service """ #Generate the client params clientParams = { 'serviceStartTime' : self._startTime } if proposalTuple: clientParams[ 'clientSetup' ] = proposalTuple[0][1] if len( proposalTuple[0] ) < 3: clientParams[ 'clientVO' ] = gConfig.getValue( "/DIRAC/VirtualOrganization", "unknown" ) else: clientParams[ 'clientVO' ] = proposalTuple[0][2] clientTransport = self._transportPool.get( trid ) if clientTransport: clientParams[ 'clientAddress' ] = clientTransport.getRemoteAddress() #Generate handler dict with per client info handlerInitDict = dict( self._serviceInfoDict ) for key in clientParams: handlerInitDict[ key ] = clientParams[ key ] #Instantiate and initialize try: handlerInstance = self._handler[ 'class' ]( handlerInitDict, trid ) handlerInstance.initialize() except Exception as e: gLogger.exception( "Server error while loading handler: %s" % str( e ) ) return S_ERROR( "Server error while loading handler" ) return S_OK( handlerInstance )
request = RequestContainer() result = request.addSubRequest( { 'Attributes': { 'Operation': 'removePhysicalFile', 'TargetSE': SEName, 'ExecutionOrder': 1 } }, 'removal') index = result['Value'] fileDict = {'PFN': SEPFN, 'Status': 'Waiting'} request.setSubRequestFiles(index, 'removal', [fileDict]) return RequestClient().setRequest( "RemoteSBDeletion:%s|%s:%s" % (SEName, SEPFN, time.time()), request.toXML()['Value']) except Exception, e: gLogger.exception("Exception while setting deletion request") return S_ERROR("Cannot set deletion request: %s" % str(e)) else: gLogger.info("Deleting external Sandbox") try: rm = ReplicaManager() return rm.removeStorageFile(SEPFN, SEName) except Exception, e: gLogger.exception( "RM raised an exception while trying to delete a remote sandbox" ) return S_ERROR( "RM raised an exception while trying to delete a remote sandbox" )
def constructProductionLFNs(paramDict, bkClient=None, quick=True): """ Used for local testing of a workflow, a temporary measure until LFN construction is tidied. This works using the workflow commons for on the fly construction. """ try: keys = [ 'PRODUCTION_ID', 'JOB_ID', 'configVersion', 'outputList', 'configName', 'outputDataFileMask' ] for k in keys: if k not in paramDict: return S_ERROR('%s not defined' % k) productionID = paramDict['PRODUCTION_ID'] jobID = paramDict['JOB_ID'] wfConfigName = paramDict['configName'] wfConfigVersion = paramDict['configVersion'] wfMask = paramDict['outputDataFileMask'] if not isinstance(wfMask, list): wfMask = [i.lower().strip() for i in wfMask.split(';')] outputList = paramDict['outputList'] fileTupleList = [] gLogger.verbose( 'wfConfigName = %s, wfConfigVersion = %s, wfMask = %s' % (wfConfigName, wfConfigVersion, wfMask)) for info in outputList: try: fileName = info['outputDataName'] except KeyError: # this happens when the parameters are set at runtime (e.g. parametric jobs) # should only apply for preSubmission LFNs prodID = str(productionID).zfill(8) jobID = str(jobID).zfill(8) stepInstanceNumber = '1' # can't be more precise at this stage fileName = "%s_%s_%s" % (prodID, jobID, stepInstanceNumber) fileTupleList.append((fileName, info['outputDataType'])) # Strip output data according to file mask fileTupleListMasked = _applyMask(wfMask, fileTupleList) lfnRoot = _getLFNRoot('', wfConfigName, wfConfigVersion, bkClient, quick=quick) gLogger.debug('LFN_ROOT is: %s' % (lfnRoot)) debugRoot = _getLFNRoot('', 'debug', wfConfigVersion, bkClient, quick=quick) gLogger.verbose('LFN_ROOT is: %s' % (lfnRoot)) if not lfnRoot: return S_ERROR('LFN root could not be constructed') # Get all LFN(s) to both output data and BK lists at this point (fine for BK) outputData = [] bkLFNs = [] debugLFNs = [] # outputData is masked for fileTuple in fileTupleListMasked: lfn = _makeProductionLFN( str(jobID).zfill(8), lfnRoot, fileTuple, str(productionID).zfill(8)) outputData.append(lfn) # BKLFNs and debugLFNs are not masked for fileTuple in fileTupleList: lfn = _makeProductionLFN( str(jobID).zfill(8), lfnRoot, fileTuple, str(productionID).zfill(8)) bkLFNs.append(lfn) if debugRoot: debugLFNs.append( _makeProductionLFN( str(jobID).zfill(8), debugRoot, fileTuple, str(productionID).zfill(8))) if debugRoot: debugLFNs.append( _makeProductionLFN( str(jobID).zfill(8), debugRoot, ('%s_core' % str(jobID).zfill(8), 'core'), str(productionID).zfill(8))) # Get log file path - unique for all modules logPath = _makeProductionPath(str(jobID).zfill(8), lfnRoot, 'LOG', str(productionID).zfill(8), log=True) logFilePath = ['%s/%s' % (logPath, str(jobID).zfill(8))] logTargetPath = [ '%s/%s_%s.tar' % (logPath, str(productionID).zfill(8), str(jobID).zfill(8)) ] # [ aside, why does makeProductionPath not append the jobID itself ???? # this is really only used in one place since the logTargetPath is just # written to a text file (should be reviewed)... ] if not outputData: gLogger.info('No output data LFN(s) constructed') else: gLogger.verbose('Created the following output data LFN(s):\n%s' % ('\n'.join(outputData))) gLogger.verbose('Log file path is:\n%s' % logFilePath[0]) gLogger.verbose('Log target path is:\n%s' % logTargetPath[0]) if bkLFNs: gLogger.verbose('BookkeepingLFN(s) are:\n%s' % ('\n'.join(bkLFNs))) if debugLFNs: gLogger.verbose('DebugLFN(s) are:\n%s' % ('\n'.join(debugLFNs))) jobOutputs = { 'ProductionOutputData': outputData, 'LogFilePath': logFilePath, 'LogTargetPath': logTargetPath, 'BookkeepingLFNs': bkLFNs, 'DebugLFNs': debugLFNs } return S_OK(jobOutputs) except Exception as e: gLogger.exception(str(e)) return S_ERROR(e)
class StorageElementProxyHandler(RequestHandler): """ .. class:: StorageElementProxyHandler """ types_callProxyMethod = [ StringTypes, StringTypes, ListType, DictType ] def export_callProxyMethod( self, se, name, args, kargs ): """ A generic method to call methods of the Storage Element. """ res = pythonCall( 200, self.__proxyWrapper, se, name, args, kargs ) if res['OK']: return res['Value'] return res def __proxyWrapper( self, se, name, args, kargs ): """ The wrapper will obtain the client proxy and set it up in the environment. The required functionality is then executed and returned to the client. """ res = self.__prepareSecurityDetails() if not res['OK']: return res credDict = self.getRemoteCredentials() group = credDict['group'] vo = Registry.getVOForGroup( group ) if not vo: return S_ERROR( 'Can not determine VO of the operation requester' ) storageElement = StorageElement( se, vo = vo ) method = getattr( storageElement, name ) if hasattr( storageElement, name ) else None if not method: return S_ERROR( "Method '%s' isn't implemented!" % name ) if not callable( getattr( storageElement, name ) ): return S_ERROR( "Attribute '%s' isn't a method!" % name ) return method( *args, **kargs ) types_uploadFile = [ StringTypes, StringTypes ] def export_uploadFile( self, se, pfn ): """ This method uploads a file present in the local cache to the specified storage element """ res = pythonCall( 300, self.__uploadFile, se, pfn ) if res['OK']: return res['Value'] return res def __uploadFile(self, se, pfn): """ proxied upload file """ res = self.__prepareSecurityDetails() if not res['OK']: return res # Put file to the SE try: storageElement = StorageElement(se) except AttributeError, x: errStr = "__uploadFile: Exception while instantiating the Storage Element." gLogger.exception( errStr, se, str(x) ) return S_ERROR(errStr) putFileDir = "%s/putFile" % BASE_PATH localFileName = "%s/%s" % ( putFileDir, os.path.basename(pfn) ) res = returnSingleResult( storageElement.putFile( { pfn : localFileName } ) ) if not res['OK']: gLogger.error("prepareFile: Failed to put local file to storage.", res['Message'] ) # Clear the local cache try: gLogger.debug("Removing temporary file", localFileName ) os.remove( localFileName ) except Exception as x: gLogger.exception("Failed to remove local file", localFileName, x ) return res
class StorageElementProxyHandler(RequestHandler): """ .. class:: StorageElementProxyHandler """ types_callProxyMethod = [StringType, StringType, ListType, DictType] def export_callProxyMethod(self, se, name, args, kargs): """ A generic method to call methods of the Storage Element. """ res = pythonCall(200, self.__proxyWrapper, se, name, args, kargs) if res['OK']: return res['Value'] return res def __proxyWrapper(self, se, name, args, kargs): """ The wrapper will obtain the client proxy and set it up in the environment. The required functionality is then executed and returned to the client. """ res = self.__prepareSecurityDetails() if not res['OK']: return res storageElement = StorageElement(se) method = getattr(storageElement, name) if hasattr( storageElement, name) else None if not method: return S_ERROR("Method '%s' isn't implemented!" % name) if not callable(getattr(storageElement, name)): return S_ERROR("Attribute '%s' isn't a method!" % name) return method(*args, **kargs) types_uploadFile = [StringType, StringType] def export_uploadFile(self, se, pfn): """ This method uploads a file present in the local cache to the specified storage element """ res = pythonCall(300, self.__uploadFile, se, pfn) if res['OK']: return res['Value'] return res def __uploadFile(self, se, pfn): """ proxied upload file """ res = self.__prepareSecurityDetails() if not res['OK']: return res # Put file to the SE try: storageElement = StorageElement(se) except AttributeError, x: errStr = "__uploadFile: Exception while instantiating the Storage Element." gLogger.exception(errStr, se, str(x)) return S_ERROR(errStr) putFileDir = "%s/putFile" % BASE_PATH localFileName = "%s/%s" % (putFileDir, os.path.basename(pfn)) res = storageElement.putFile({pfn: localFileName}, True) if not res['OK']: gLogger.error("prepareFile: Failed to put local file to storage.", res['Message']) # Clear the local cache try: shutil.rmtree(putFileDir) gLogger.debug("Cleared existing putFile cache") except Exception, x: gLogger.exception("Failed to remove destination dir.", putFileDir, x)
fileName = args[0] import os from DIRAC import exit as DIRACExit, gLogger from DIRAC.Interfaces.API.Dirac import Dirac from DIRAC.StorageManagementSystem.Client.StorageManagerClient import StorageManagerClient stageLfns = {} if os.path.exists( fileName ): try: lfnFile = open( fileName ) lfns = [ k.strip() for k in lfnFile.readlines() ] lfnFile.close() except Exception: gLogger.exception( 'Can not open file', fileName ) DIRACExit( -1 ) else: lfns = args[:len(args)-1] stageLfns[seName] = lfns stagerClient = StorageManagerClient() res = stagerClient.setRequest( stageLfns, 'WorkloadManagement', 'updateJobFromStager@WorkloadManagement/JobStateUpdate', 0 ) # fake JobID = 0 if not res['OK']: gLogger.error( res['Message'] ) DIRACExit( -1 ) else: print "Stage request submitted for LFNs:\n %s" %lfns
def loadModule( self, modName, hideExceptions = False, parentModule = False ): """ Load module name. name must take the form [DIRAC System Name]/[DIRAC module] """ while modName and modName[0] == "/": modName = modName[1:] if modName in self.__modules: return S_OK() modList = modName.split( "/" ) if len( modList ) != 2: return S_ERROR( "Can't load %s: Invalid module name" % ( modName ) ) csSection = self.__sectionFinder( modName ) loadGroup = gConfig.getValue( "%s/Load" % csSection, [] ) #Check if it's a load group if loadGroup: gLogger.info( "Found load group %s. Will load %s" % ( modName, ", ".join( loadGroup ) ) ) for loadModName in loadGroup: if loadModName.find( "/" ) == -1: loadModName = "%s/%s" % ( modList[0], loadModName ) result = self.loadModule( loadModName, hideExceptions = hideExceptions, parentModule = False ) if not result[ 'OK' ]: return result return S_OK() #Normal load loadName = gConfig.getValue( "%s/Module" % csSection, "" ) if not loadName: loadName = modName gLogger.info( "Loading %s" % ( modName ) ) else: if loadName.find( "/" ) == -1: loadName = "%s/%s" % ( modList[0], loadName ) gLogger.info( "Loading %s (%s)" % ( modName, loadName ) ) #If already loaded, skip loadList = loadName.split( "/" ) if len( loadList ) != 2: return S_ERROR( "Can't load %s: Invalid module name" % ( loadName ) ) system, module = loadList #Load className = module if self.__modSuffix: className = "%s%s" % ( className, self.__modSuffix ) if loadName not in self.__loadedModules: #Check if handler is defined loadCSSection = self.__sectionFinder( loadName ) handlerPath = gConfig.getValue( "%s/HandlerPath" % loadCSSection, "" ) if handlerPath: gLogger.info( "Trying to %s from CS defined path %s" % ( loadName, handlerPath ) ) gLogger.verbose( "Found handler for %s: %s" % ( loadName, handlerPath ) ) handlerPath = handlerPath.replace( "/", "." ) if handlerPath.find( ".py", len( handlerPath ) -3 ) > -1: handlerPath = handlerPath[ :-3 ] className = List.fromChar( handlerPath, "." )[-1] result = self.__recurseImport( handlerPath ) if not result[ 'OK' ]: return S_ERROR( "Cannot load user defined handler %s: %s" % ( handlerPath, result[ 'Message' ] ) ) gLogger.verbose( "Loaded %s" % handlerPath ) elif parentModule: gLogger.info( "Trying to autodiscover %s from parent" % loadName ) #If we've got a parent module, load from there. modImport = module if self.__modSuffix: modImport = "%s%s" % ( modImport, self.__modSuffix ) result = self.__recurseImport( modImport, parentModule, hideExceptions = hideExceptions ) else: #Check to see if the module exists in any of the root modules gLogger.info( "Trying to autodiscover %s" % loadName ) rootModulesToLook = getInstalledExtensions() for rootModule in rootModulesToLook: importString = '%s.%sSystem.%s.%s' % ( rootModule, system, self.__importLocation, module ) if self.__modSuffix: importString = "%s%s" % ( importString, self.__modSuffix ) gLogger.verbose( "Trying to load %s" % importString ) result = self.__recurseImport( importString, hideExceptions = hideExceptions ) #Error while loading if not result[ 'OK' ]: return result #Something has been found! break :) if result[ 'Value' ]: gLogger.verbose( "Found %s" % importString ) break #Nothing found if not result[ 'Value' ]: return S_ERROR( "Could not find %s" % loadName ) modObj = result[ 'Value' ] try: #Try to get the class from the module modClass = getattr( modObj, className ) except AttributeError: location = "" if '__file__' in dir( modObj ): location = modObj.__file__ else: location = modObj.__path__ gLogger.exception( "%s module does not have a %s class!" % ( location, module ) ) return S_ERROR( "Cannot load %s" % module ) #Check if it's subclass if not issubclass( modClass, self.__superClass ): return S_ERROR( "%s has to inherit from %s" % ( loadName, self.__superClass.__name__ ) ) self.__loadedModules[ loadName ] = { 'classObj' : modClass, 'moduleObj' : modObj } #End of loading of 'loadName' module #A-OK :) self.__modules[ modName ] = self.__loadedModules[ loadName ].copy() #keep the name of the real code module self.__modules[ modName ][ 'modName' ] = modName self.__modules[ modName ][ 'loadName' ] = loadName gLogger.notice( "Loaded module %s" % modName ) return S_OK()
def __treatRenewalCallback(self, oTJ, exceptionList): gLogger.exception(lException=exceptionList)
ftsJob.addFile( FTSFile( ftsFile ) ) except Exception, error: gLogger.exception( error ) return S_ERROR( error ) isValid = self.ftsValidator.validate( ftsJob ) if not isValid['OK']: gLogger.error( isValid['Message'] ) return isValid try: put = self.ftsDB.putFTSJob( ftsJob ) if not put['OK']: return S_ERROR( put['Message'] ) return S_OK() except Exception, error: gLogger.exception( error ) return S_ERROR( error ) types_getFTSJob = [ [IntType, LongType] ] @classmethod def export_getFTSJob( self, ftsJobID ): """ read FTSJob for processing given FTSJobID """ try: getFTSJob = self.ftsDB.getFTSJob( ftsJobID ) if not getFTSJob['OK']: gLogger.error( getFTSJob['Message'] ) return getFTSJob getFTSJob = getFTSJob['Value'] if not getFTSJob: return S_OK() toJSON = getFTSJob.toJSON()
for switch, _val in parList: if switch == 'BinaryTag': try: # Get the binaryTag name. If an error occurs, an exception is thrown binaryTag = LbPlatformUtils.host_binary_tag() if not binaryTag: gLogger.fatal( "There is no binaryTag corresponding to this machine") sendMail( "There is no binaryTag corresponding to this machine") dExit(1) print binaryTag dExit(0) except Exception as e: msg = "Exception getting binaryTag: " + repr(e) gLogger.exception(msg, lException=e) sendMail(msg) dExit(1) try: # Get the platform name. If an error occurs, an exception is thrown platform = LbPlatformUtils.dirac_platform() if not platform: gLogger.fatal("There is no platform corresponding to this machine") sendMail("There is no platform corresponding to this machine") dExit(1) print platform dExit(0) except Exception as e: msg = "Exception getting platform: " + repr(e)
return 2 ###################### Note ############################## # The below arguments are automatically generated by the # # JobAgent, do not edit them. # ########################################################## ret = -3 try: jsonFileName = os.path.realpath(__file__) + '.json' with open(jsonFileName, 'r') as f: jobArgsFromJSON = json.loads(f.readlines()[0]) jobArgs = ast.literal_eval(jobArgsFromJSON) if not isinstance(jobArgs, dict): raise TypeError, "jobArgs is of type %s" % type(jobArgs) if 'Job' not in jobArgs: raise ValueError, "jobArgs does not contain 'Job' key: %s" % str( jobArgs) ret = execute(jobArgs) gJobReport.commit() except Exception as exc: #pylint: disable=broad-except gLogger.exception("JobWrapperTemplate exception", lException=exc) try: gJobReport.commit() ret = -1 except Exception as exc: #pylint: disable=broad-except gLogger.exception("Could not commit the job report", lException=exc) ret = -2 sys.exit(ret)
from DIRAC.Core.Base import Script Script.setUsageMessage(""" Get the currently defined user data volume quotas Usage: %s [options] """ % Script.scriptName) Script.parseCommandLine(ignoreErrors=False) import DIRAC from DIRAC import gLogger, gConfig from DIRAC.Core.Security.ProxyInfo import getProxyInfo res = getProxyInfo(False, False) if not res['OK']: gLogger.error("Failed to get client proxy information.", res['Message']) DIRAC.exit(2) proxyInfo = res['Value'] username = proxyInfo['username'] try: quota = gConfig.getValue('/Registry/DefaultStorageQuota', 0.) quota = gConfig.getValue('/Registry/Users/%s/Quota' % username, quota) gLogger.notice('Current quota found to be %.1f GB' % quota) DIRAC.exit(0) except Exception, x: gLogger.exception("Failed to convert retrieved quota", '', x) DIRAC.exit(-1)
def srm_pfnparse(pfn): """ Parse pfn and save all bits of information into dictionary :param str pfn: pfn string """ if not pfn: return S_ERROR( "wrong 'pfn' argument value in function call, expected non-empty string, got %s" % str(pfn)) pfnDict = dict.fromkeys( ["Protocol", "Host", "Port", "WSUrl", "Path", "FileName"], "") try: if ":" not in pfn: # pfn = /a/b/c pfnDict["Path"] = os.path.dirname(pfn) pfnDict["FileName"] = os.path.basename(pfn) else: # pfn = protocol:/a/b/c # pfn = protocol://host/a/b/c # pfn = protocol://host:port/a/b/c # pfn = protocol://host:port/wsurl?=/a/b/c pfnDict["Protocol"] = pfn[0:pfn.index(":")] # # remove protocol: pfn = pfn[len(pfnDict["Protocol"]):] # # remove :// or : pfn = pfn[3:] if pfn.startswith("://") else pfn[1:] if pfn.startswith("/"): # # /a/b/c pfnDict["Path"] = os.path.dirname(pfn) pfnDict["FileName"] = os.path.basename(pfn) else: # # host/a/b/c # # host:port/a/b/c # # host:port/wsurl?=/a/b/c if ":" not in pfn: # # host/a/b/c pfnDict["Host"] = pfn[0:pfn.index("/")] pfn = pfn[len(pfnDict["Host"]):] pfnDict["Path"] = os.path.dirname(pfn) pfnDict["FileName"] = os.path.basename(pfn) else: # # host:port/a/b/c # # host:port/wsurl?=/a/b/c pfnDict["Host"] = pfn[0:pfn.index(":")] # # port/a/b/c # # port/wsurl?=/a/b/c pfn = pfn[len(pfnDict["Host"]) + 1:] pfnDict["Port"] = pfn[0:pfn.index("/")] # # /a/b/c # # /wsurl?=/a/b/c pfn = pfn[len(pfnDict["Port"]):] WSUrl = pfn.find("?") WSUrlEnd = pfn.find("=") if WSUrl == -1 and WSUrlEnd == -1: # # /a/b/c pfnDict["Path"] = os.path.dirname(pfn) pfnDict["FileName"] = os.path.basename(pfn) else: # # /wsurl?blah=/a/b/c pfnDict["WSUrl"] = pfn[0:WSUrlEnd + 1] # # /a/b/c pfn = pfn[len(pfnDict["WSUrl"]):] pfnDict["Path"] = os.path.dirname(pfn) pfnDict["FileName"] = os.path.basename(pfn) return S_OK(pfnDict) except Exception: # pylint: disable=broad-except errStr = "Pfn.srm_pfnparse: Exception while parsing pfn: " + str(pfn) gLogger.exception(errStr) return S_ERROR(errStr)
])) Script.parseCommandLine(ignoreErrors=True) runRanges = [] for arg in Script.getPositionalArgs(): runRanges += arg.split(',') runSet = set() for run in runRanges: try: if ':' in arg: run1, run2 = run.split(':') runSet.update(range(int(run1), int(run2) + 1)) else: runSet.add(int(run)) except (ValueError, IndexError) as e: gLogger.exception("Invalid run number", arg, lException=e) DIRAC.exit(1) production = None item = None byValue = False active = False for switch in Script.getUnprocessedSwitches(): if switch[0] == 'Production': try: production = [int(prod) for prod in switch[1].split(',')] except ValueError as e: gLogger.exception('Bad production ID', lException=e) DIRAC.exit(1) elif switch[0] == 'Information': item = switch[1]
class TransformationAgent(AgentModule): def initialize(self): self.pluginLocation = self.am_getOption( 'PluginLocation', 'DIRAC.TransformationSystem.Agent.TransformationPlugin') self.checkCatalog = self.am_getOption('CheckCatalog', 'yes') # This sets the Default Proxy to used as that defined under # /Operations/Shifter/ProductionManager # the shifterProxy option in the Configuration can be used to change this default. self.am_setOption('shifterProxy', 'ProductionManager') self.transDB = TransformationClient('TransformationDB') self.rm = ReplicaManager() return S_OK() def execute(self): # Get the transformations to process res = self.getTransformations() if not res['OK']: gLogger.info("%s.execute: Failed to obtain transformations: %s" % (AGENT_NAME, res['Message'])) return S_OK() # Process the transformations for transDict in res['Value']: transID = long(transDict['TransformationID']) gLogger.info("%s.execute: Processing transformation %s." % (AGENT_NAME, transID)) startTime = time.time() res = self.processTransformation(transDict) if not res['OK']: gLogger.info( "%s.execute: Failed to process transformation: %s" % (AGENT_NAME, res['Message'])) else: gLogger.info( "%s.execute: Processed transformation in %.1f seconds" % (AGENT_NAME, time.time() - startTime)) return S_OK() def getTransformations(self): # Obtain the transformations to be executed transName = self.am_getOption('Transformation', 'All') if transName == 'All': gLogger.info( "%s.getTransformations: Initializing general purpose agent." % AGENT_NAME) res = self.transDB.getTransformations( {'Status': ['Active', 'Completing', 'Flush']}, extraParams=True) if not res['OK']: gLogger.error( "%s.getTransformations: Failed to get transformations." % AGENT_NAME, res['Message']) return res transformations = res['Value'] gLogger.info( "%s.getTransformations: Obtained %d transformations to process" % (AGENT_NAME, len(transformations))) else: gLogger.info( "%s.getTransformations: Initializing for transformation %s." % (AGENT_NAME, transName)) res = self.transDB.getTransformation(transName, extraParams=True) if not res['OK']: gLogger.error( "%s.getTransformations: Failed to get transformation." % AGENT_NAME, res['Message']) return res transformations = [res['Value']] return S_OK(transformations) def processTransformation(self, transDict): transID = transDict['TransformationID'] # First get the LFNs associated to the transformation res = self.transDB.getTransformationFiles(condDict={ 'TransformationID': transID, 'Status': 'Unused' }) if not res['OK']: gLogger.error( "%s.processTransformation: Failed to obtain input data." % AGENT_NAME, res['Message']) return res transFiles = res['Value'] lfns = res['LFNs'] if not lfns: gLogger.info( "%s.processTransformation: No 'Unused' files found for transformation." % AGENT_NAME) if transDict['Status'] == 'Flush': res = self.transDB.setTransformationParameter( transID, 'Status', 'Active') if not res['OK']: gLogger.error( "%s.execute: Failed to update transformation status to 'Active'." % AGENT_NAME, res['Message']) else: gLogger.info( "%s.execute: Updated transformation status to 'Active'." % AGENT_NAME) return S_OK() # Check the data is available with replicas res = self.__getDataReplicas(transID, lfns, active=(transDict['Type'].lower() not in ["replication", "removal"])) if not res['OK']: gLogger.error( "%s.processTransformation: Failed to get data replicas" % AGENT_NAME, res['Message']) return res dataReplicas = res['Value'] # Get the plug-in type and create the plug-in object plugin = 'Standard' if transDict.has_key('Plugin') and transDict['Plugin']: plugin = transDict['Plugin'] gLogger.info( "%s.processTransformation: Processing transformation with '%s' plug-in." % (AGENT_NAME, plugin)) res = self.__generatePluginObject(plugin) if not res['OK']: return res oPlugin = res['Value'] # Get the plug-in and set the required params oPlugin.setParameters(transDict) oPlugin.setInputData(dataReplicas) oPlugin.setTransformationFiles(transFiles) res = oPlugin.generateTasks() if not res['OK']: gLogger.error( "%s.processTransformation: Failed to generate tasks for transformation." % AGENT_NAME, res['Message']) return res tasks = res['Value'] # Create the tasks allCreated = True created = 0 for se, lfns in tasks: res = self.transDB.addTaskForTransformation(transID, lfns, se) if not res['OK']: gLogger.error( "%s.processTransformation: Failed to add task generated by plug-in." % AGENT_NAME, res['Message']) allCreated = False else: created += 1 if created: gLogger.info( "%s.processTransformation: Successfully created %d tasks for transformation." % (AGENT_NAME, created)) # If this production is to Flush if transDict['Status'] == 'Flush' and allCreated: res = self.transDB.setTransformationParameter( transID, 'Status', 'Active') if not res['OK']: gLogger.error( "%s.execute: Failed to update transformation status to 'Active'." % AGENT_NAME, res['Message']) else: gLogger.info( "%s.execute: Updated transformation status to 'Active'." % AGENT_NAME) return S_OK() ###################################################################### # # Internal methods used by the agent # def __generatePluginObject(self, plugin): """ This simply instantiates the TransformationPlugin class with the relevant plugin name """ try: plugModule = __import__(self.pluginLocation, globals(), locals(), ['TransformationPlugin']) except Exception, x: gLogger.exception( "%s.__generatePluginObject: Failed to import 'TransformationPlugin'" % AGENT_NAME, '', x) return S_ERROR() try: evalString = "plugModule.TransformationPlugin('%s')" % plugin return S_OK(eval(evalString)) except Exception, x: gLogger.exception( "%s.__generatePluginObject: Failed to create %s()." % (AGENT_NAME, plugin), '', x) return S_ERROR()
def initialize(self): # Build the URLs self._url = self._cfg.getURL() if not self._url: return S_ERROR("Could not build service URL for %s" % self._name) gLogger.verbose("Service URL is %s" % self._url) # Load handler result = self._loadHandlerInit() if not result["OK"]: return result self._handler = result["Value"] # Initialize lock manager self._lockManager = LockManager(self._cfg.getMaxWaitingPetitions()) self._threadPool = ThreadPoolExecutor(max(0, self._cfg.getMaxThreads())) self._msgBroker = MessageBroker("%sMSB" % self._name, threadPool=self._threadPool) # Create static dict self._serviceInfoDict = { "serviceName": self._name, "serviceSectionPath": PathFinder.getServiceSection(self._name), "URL": self._cfg.getURL(), "messageSender": MessageSender(self._name, self._msgBroker), "validNames": self._validNames, "csPaths": [ PathFinder.getServiceSection(svcName) for svcName in self._validNames ], } self.securityLogging = Operations().getValue( "EnableSecurityLogging", True) and getServiceOption( self._serviceInfoDict, "EnableSecurityLogging", True) # Initialize Monitoring # The import needs to be here because of the CS must be initialized before importing # this class (see https://github.com/DIRACGrid/DIRAC/issues/4793) from DIRAC.MonitoringSystem.Client.MonitoringReporter import MonitoringReporter self.activityMonitoringReporter = MonitoringReporter( monitoringType="ServiceMonitoring") self._initMonitoring() # Call static initialization function try: self._handler["class"]._rh__initializeClass( dict(self._serviceInfoDict), self._lockManager, self._msgBroker, self.activityMonitoringReporter) if self._handler["init"]: for initFunc in self._handler["init"]: gLogger.verbose("Executing initialization function") try: result = initFunc(dict(self._serviceInfoDict)) except Exception as excp: gLogger.exception( "Exception while calling initialization function", lException=excp) return S_ERROR( "Exception while calling initialization function: %s" % str(excp)) if not isReturnStructure(result): return S_ERROR( "Service initialization function %s must return S_OK/S_ERROR" % initFunc) if not result["OK"]: return S_ERROR("Error while initializing %s: %s" % (self._name, result["Message"])) except Exception as e: errMsg = "Exception while initializing %s" % self._name gLogger.exception(e) gLogger.exception(errMsg) return S_ERROR(errMsg) if self.activityMonitoring: gThreadScheduler.addPeriodicTask(30, self.__reportActivity) gThreadScheduler.addPeriodicTask( 100, self.__activityMonitoringReporting) # Load actions after the handler has initialized itself result = self._loadActions() if not result["OK"]: return result self._actions = result["Value"] return S_OK()
'Exception while calling LFC Mirror service ' + str(x)) i += 1 count += 1 # Return environment to the master LFC instance os.environ['LFC_HOST'] = self.master_host # Call the master LFC if all the mirrors failed if not result['OK']: try: result = S_OK() if userDN: resAuth = pythonCall(self.timeout, self.lfc.setAuthorizationId, userDN) if not resAuth['OK']: result = S_ERROR('Failed to set user authorization') if result['OK']: method = getattr(self.lfc, self.call) resMeth = method(*parms, **kws) if not resMeth['OK']: result = S_ERROR('Timout calling ' + self.call + " method") else: result = resMeth except Exception, x: gLogger.exception('Exception while calling LFC Master service') result = S_ERROR( 'Exception while calling LFC Master service ' + str(x)) return result
def doCommand(self, sites=None): """ Returns downtimes information for all the sites in input. :params: :attr:`sites`: list of site names (when not given, take every site) :returns: {'SiteName': {'SEVERITY': 'OUTAGE'|'AT_RISK', 'StartDate': 'aDate', ...} ... } """ if self.client is None: from DIRAC.Core.LCG.GOCDBClient import GOCDBClient self.client = GOCDBClient() if sites is None: # from DIRAC.Core.DISET.RPCClient import RPCClient RPC = RPCClient("ResourceStatus/ResourceStatus") GOC_sites = RPC.getGridSitesList() if not GOC_sites['OK']: raise RSSException, where( self, self.doCommand) + " " + sites['Message'] else: GOC_sites = GOC_sites['Value'] else: GOC_sites = [getGOCSiteName(x)['Value'] for x in sites] try: res = self.client.getStatus('Site', GOC_sites, None, 120) except: gLogger.exception("Exception when calling GOCDBClient.") return {} if not res['OK']: raise RSSException, where(self, self.doCommand) + " " + res['Message'] else: res = res['Value'] if res == None: return {} resToReturn = {} for dt_ID in res: try: dt = {} dt['ID'] = dt_ID dt['StartDate'] = res[dt_ID]['FORMATED_START_DATE'] dt['EndDate'] = res[dt_ID]['FORMATED_END_DATE'] dt['Severity'] = res[dt_ID]['SEVERITY'] dt['Description'] = res[dt_ID]['DESCRIPTION'].replace('\'', '') dt['Link'] = res[dt_ID]['GOCDB_PORTAL_URL'] DIRACnames = getDIRACSiteName(res[dt_ID]['SITENAME']) if not DIRACnames['OK']: raise RSSException, DIRACnames['Message'] DIRACnames = DIRACnames['Value'] for DIRACname in DIRACnames: resToReturn[dt_ID.split()[0] + ' ' + DIRACname] = dt except KeyError: continue return resToReturn
def execute(arguments): """ The only real function executed here """ global gJobReport jobID = arguments['Job']['JobID'] os.environ['JOBID'] = jobID jobID = int(jobID) if 'WorkingDirectory' in arguments: wdir = os.path.expandvars(arguments['WorkingDirectory']) if os.path.isdir(wdir): os.chdir(wdir) else: try: os.makedirs( wdir ) # this will raise an exception if wdir already exists (which is ~OK) if os.path.isdir(wdir): os.chdir(wdir) except OSError as osError: if osError.errno == errno.EEXIST and os.path.isdir(wdir): gLogger.exception( 'JobWrapperTemplate found that the working directory already exists' ) rescheduleResult = rescheduleFailedJob( jobID, 'Working Directory already exists') else: gLogger.exception( 'JobWrapperTemplate could not create working directory' ) rescheduleResult = rescheduleFailedJob( jobID, 'Could Not Create Working Directory') return 1 gJobReport = JobReport(jobID, 'JobWrapper') try: job = JobWrapper(jobID, gJobReport) job.initialize(arguments) # initialize doesn't return S_OK/S_ERROR except Exception as exc: #pylint: disable=broad-except gLogger.exception('JobWrapper failed the initialization phase', lException=exc) rescheduleResult = rescheduleFailedJob(jobID, 'Job Wrapper Initialization', gJobReport) try: job.sendJobAccounting(rescheduleResult, 'Job Wrapper Initialization') except Exception as exc: #pylint: disable=broad-except gLogger.exception('JobWrapper failed sending job accounting', lException=exc) return 1 if 'InputSandbox' in arguments['Job']: gJobReport.commit() try: result = job.transferInputSandbox(arguments['Job']['InputSandbox']) if not result['OK']: gLogger.warn(result['Message']) raise JobWrapperError(result['Message']) except JobWrapperError: gLogger.exception('JobWrapper failed to download input sandbox') rescheduleResult = rescheduleFailedJob(jobID, 'Input Sandbox Download', gJobReport) job.sendJobAccounting(rescheduleResult, 'Input Sandbox Download') return 1 except Exception as exc: #pylint: disable=broad-except gLogger.exception( 'JobWrapper raised exception while downloading input sandbox', lException=exc) rescheduleResult = rescheduleFailedJob(jobID, 'Input Sandbox Download', gJobReport) job.sendJobAccounting(rescheduleResult, 'Input Sandbox Download') return 1 else: gLogger.verbose('Job has no InputSandbox requirement') gJobReport.commit() if 'InputData' in arguments['Job']: if arguments['Job']['InputData']: try: result = job.resolveInputData() if not result['OK']: gLogger.warn(result['Message']) raise JobWrapperError(result['Message']) except JobWrapperError: gLogger.exception('JobWrapper failed to resolve input data') rescheduleResult = rescheduleFailedJob( jobID, 'Input Data Resolution', gJobReport) job.sendJobAccounting(rescheduleResult, 'Input Data Resolution') return 1 except Exception as exc: #pylint: disable=broad-except gLogger.exception( 'JobWrapper raised exception while resolving input data', lException=exc) rescheduleResult = rescheduleFailedJob( jobID, 'Input Data Resolution', gJobReport) job.sendJobAccounting(rescheduleResult, 'Input Data Resolution') return 1 else: gLogger.verbose('Job has a null InputData requirement:') gLogger.verbose(arguments) else: gLogger.verbose('Job has no InputData requirement') gJobReport.commit() try: result = job.execute(arguments) if not result['OK']: gLogger.error('Failed to execute job', result['Message']) raise JobWrapperError((result['Message'], result['Errno'])) except JobWrapperError as exc: if exc.value[1] == 0 or str(exc.value[0]) == '0': gLogger.verbose('JobWrapper exited with status=0 after execution') if exc.value[1] == DErrno.EWMSRESC: gLogger.warn("Asked to reschedule job") rescheduleResult = rescheduleFailedJob(jobID, 'JobWrapper execution', gJobReport) job.sendJobAccounting(rescheduleResult, 'JobWrapper execution') return 1 gLogger.exception('Job failed in execution phase') gJobReport.setJobParameter('Error Message', str(exc), sendFlag=False) gJobReport.setJobStatus('Failed', 'Exception During Execution', sendFlag=False) job.sendFailoverRequest('Failed', 'Exception During Execution') return 1 except Exception as exc: #pylint: disable=broad-except gLogger.exception('Job raised exception during execution phase', lException=exc) gJobReport.setJobParameter('Error Message', str(exc), sendFlag=False) gJobReport.setJobStatus('Failed', 'Exception During Execution', sendFlag=False) job.sendFailoverRequest('Failed', 'Exception During Execution') return 1 if 'OutputSandbox' in arguments['Job'] or 'OutputData' in arguments['Job']: try: result = job.processJobOutputs(arguments) if not result['OK']: gLogger.warn(result['Message']) raise JobWrapperError(result['Message']) except JobWrapperError as exc: gLogger.exception('JobWrapper failed to process output files') gJobReport.setJobParameter('Error Message', str(exc), sendFlag=False) gJobReport.setJobStatus('Failed', 'Uploading Job Outputs', sendFlag=False) job.sendFailoverRequest('Failed', 'Uploading Job Outputs') return 2 except Exception as exc: # pylint: disable=broad-except gLogger.exception( 'JobWrapper raised exception while processing output files', lException=exc) gJobReport.setJobParameter('Error Message', str(exc), sendFlag=False) gJobReport.setJobStatus('Failed', 'Uploading Job Outputs', sendFlag=False) job.sendFailoverRequest('Failed', 'Uploading Job Outputs') return 2 else: gLogger.verbose('Job has no OutputData or OutputSandbox requirement') try: # Failed jobs will return 1 / successful jobs will return 0 return job.finalize() except Exception as exc: #pylint: disable=broad-except gLogger.exception( 'JobWrapper raised exception during the finalization phase', lException=exc) return 2
def initialize(self): #Build the URLs self._url = self._cfg.getURL() if not self._url: return S_ERROR("Could not build service URL for %s" % self._name) gLogger.verbose("Service URL is %s" % self._url) #Load handler result = self._loadHandlerInit() if not result['OK']: return result self._handler = result['Value'] #Initialize lock manager self._lockManager = LockManager(self._cfg.getMaxWaitingPetitions()) self._initMonitoring() self._threadPool = ThreadPool(max(1, self._cfg.getMinThreads()), max(0, self._cfg.getMaxThreads()), self._cfg.getMaxWaitingPetitions()) self._threadPool.daemonize() self._msgBroker = MessageBroker("%sMSB" % self._name, threadPool=self._threadPool) #Create static dict self._serviceInfoDict = { 'serviceName': self._name, 'serviceSectionPath': PathFinder.getServiceSection(self._name), 'URL': self._cfg.getURL(), 'messageSender': MessageSender(self._name, self._msgBroker), 'validNames': self._validNames, 'csPaths': [ PathFinder.getServiceSection(svcName) for svcName in self._validNames ] } #Call static initialization function try: self._handler['class']._rh__initializeClass( dict(self._serviceInfoDict), self._lockManager, self._msgBroker, self._monitor) if self._handler['init']: for initFunc in self._handler['init']: gLogger.verbose("Executing initialization function") try: result = initFunc(dict(self._serviceInfoDict)) except Exception as excp: gLogger.exception( "Exception while calling initialization function", lException=excp) return S_ERROR( "Exception while calling initialization function: %s" % str(excp)) if not isReturnStructure(result): return S_ERROR( "Service initialization function %s must return S_OK/S_ERROR" % initFunc) if not result['OK']: return S_ERROR("Error while initializing %s: %s" % (self._name, result['Message'])) except Exception as e: errMsg = "Exception while initializing %s" % self._name gLogger.exception(e) gLogger.exception(errMsg) return S_ERROR(errMsg) #Load actions after the handler has initialized itself result = self._loadActions() if not result['OK']: return result self._actions = result['Value'] gThreadScheduler.addPeriodicTask(30, self.__reportThreadPoolContents) return S_OK()
getFileDir = "%s/getFile" % BASE_PATH if os.path.exists(getFileDir): try: shutil.rmtree(getFileDir) gLogger.debug("Cleared existing getFile cache") except Exception, x: gLogger.exception("Failed to remove destination directory.", getFileDir, x) os.mkdir(getFileDir) # Get the file to the cache try: storageElement = StorageElement(se) except AttributeError, x: errStr = "prepareFile: Exception while instantiating the Storage Element." gLogger.exception(errStr, se, str(x)) return S_ERROR(errStr) res = storageElement.getFile(pfn, "%s/getFile" % BASE_PATH, True) if not res['OK']: gLogger.error("prepareFile: Failed to get local copy of file.", res['Message']) return res return S_OK() types_prepareFileForHTTP = [list(StringTypes) + [ListType]] def export_prepareFileForHTTP(self, lfn): """ This method simply gets the file to the local storage area using LFN """ # Do clean-up, should be a separate regular thread
def doCommand(self, SEs=None): """ Returns transfer quality using the DIRAC accounting system for every SE :params: :attr:`SEs`: list of storage elements (when not given, take every SE) :returns: {'SiteName': {TQ : 'Good'|'Fair'|'Poor'|'Idle'|'Bad'} ...} """ if SEs is None: # from DIRAC.Core.DISET.RPCClient import RPCClient RPC_RSS = RPCClient("ResourceStatus/ResourceStatus") SEs = RPC_RSS.getStorageElementsList() if not SEs['OK']: raise RSSException, where( self, self.doCommand) + " " + SEs['Message'] else: SEs = SEs['Value'] if self.RPC is None: # from DIRAC.Core.DISET.RPCClient import RPCClient self.RPC = RPCClient("Accounting/ReportGenerator", timeout=self.timeout) if self.client is None: from DIRAC.AccountingSystem.Client.ReportsClient import ReportsClient self.client = ReportsClient(rpcClient=self.RPC) fromD = datetime.datetime.utcnow() - datetime.timedelta(hours=2) toD = datetime.datetime.utcnow() try: qualityAll = self.client.getReport( 'DataOperation', 'Quality', fromD, toD, { 'OperationType': 'putAndRegister', 'Destination': SEs }, 'Channel') if not qualityAll['OK']: raise RSSException, where( self, self.doCommand) + " " + qualityAll['Message'] else: qualityAll = qualityAll['Value']['data'] except: gLogger.exception( "Exception when calling TransferQualityEverySEs_Command") return {} listOfDestSEs = [] for k in qualityAll.keys(): try: key = k.split(' -> ')[1] if key not in listOfDestSEs: listOfDestSEs.append(key) except: continue meanQuality = {} for destSE in listOfDestSEs: s = 0 n = 0 for k in qualityAll.keys(): try: if k.split(' -> ')[1] == destSE: n = n + len(qualityAll[k]) s = s + sum(qualityAll[k].values()) except: continue meanQuality[destSE] = s / n resToReturn = {} for se in meanQuality: resToReturn[se] = {'TQ': meanQuality[se]} return resToReturn
infoStr = "Using gfalthr from: \n%s" % gfal.__file__ gLogger.info( infoStr ) infoStr = "The version of gfalthr is %s" % gfal.gfal_version() gLogger.info( infoStr ) except Exception, x: errStr = "SRM2Storage.__init__: Failed to import gfalthr: %s." % ( x ) gLogger.warn( errStr ) try: import gfal infoStr = "Using gfal from: %s" % gfal.__file__ gLogger.info( infoStr ) infoStr = "The version of gfal is %s" % gfal.gfal_version() gLogger.info( infoStr ) except Exception, x: errStr = "SRM2Storage.__init__: Failed to import gfal: %s" % ( x ) gLogger.exception( errStr ) defaultProtocols = gConfig.getValue( '/Resources/StorageElements/DefaultProtocols', [] ) gLogger.info( 'Default list of protocols are: %s' % ( ', '.join( defaultProtocols ) ) ) return S_OK() ############################################################################# def getSiteProtocols( self, site, printOutput = False ): """ Allows to check the defined protocols for each site SE. """ result = self.__checkSiteIsValid( site ) if not result['OK']: return result
replace = False verbose = False for switch in Script.getUnprocessedSwitches(): if switch[0] == 'FixIt': fixIt = True elif switch[0] == 'NoBK': bkCheck = False elif switch[0] == 'Replace': replace = True elif switch[0] == 'Verbose': verbose = True elif switch[0] == 'MaxFiles': try: maxFiles = int(switch[1]) except Exception as e: gLogger.exception("Invalid value for MaxFiles", lException=e) pass # imports from LHCbDIRAC.DataManagementSystem.Client.ConsistencyChecks import ConsistencyChecks if verbose: gLogger.setLevel('INFO') cc = ConsistencyChecks() cc.directories = dmScript.getOption('Directory', []) cc.lfns = dmScript.getOption('LFNs', []) + [lfn for arg in Script.getPositionalArgs() for lfn in arg.split(',')] bkQuery = dmScript.getBKQuery(visible='All') if bkQuery: bkQuery.setOption('ReplicaFlag', 'All') cc.bkQuery = bkQuery cc.seList = __getSEsFromOptions(dmScript)
def pfnparse(pfn): pfnDict = { 'Protocol': '', 'Host': '', 'Port': '', 'WSUrl': '', 'Path': '', 'FileName': '' } try: #gLogger.debug("Pfn.pfnunparse: Attempting to parse pfn %s." % pfn) if not re.search(':', pfn): # pfn = 'fullPath' directory = os.path.dirname(pfn) pfnDict['Path'] = directory fileName = os.path.basename(pfn) pfnDict['FileName'] = fileName else: #pfn = 'protocol:/fullPath' #pfn = 'protocol://host/fullPath' #pfn = 'protocol://host:port/fullPath' #pfn = 'protocol://host:port/wsurl/fullPath' protocol = pfn.split(':', 1)[0] pfnDict['Protocol'] = protocol if re.search('%s://' % protocol, pfn): pfn = pfn.replace('%s://' % protocol, '') else: pfn = pfn.replace('%s:' % protocol, '') #pfn = 'fullPath' #pfn = 'host/fullPath' #pfn = 'host:port/fullPath' #pfn = 'host:port/wsurl/fullPath' if pfn[0] == '/': #pfn = 'fullPath' directory = os.path.dirname(pfn) pfnDict['Path'] = directory fileName = os.path.basename(pfn) pfnDict['FileName'] = fileName else: #pfn = 'host/fullPath' #pfn = 'host:port/fullPath' #pfn = 'host:port/wsurl/fullPath' if not re.search(':', pfn): #pfn = 'host/fullPath' host = pfn.split('/', 1)[0] pfnDict['Host'] = host fullPath = pfn.replace(host, '') directory = os.path.dirname(fullPath) pfnDict['Path'] = directory fileName = os.path.basename(fullPath) pfnDict['FileName'] = fileName else: #pfn = 'host:port/fullPath' #pfn = 'host:port/wsurl/fullPath' host = pfn.split(':', 1)[0] pfnDict['Host'] = host pfn = pfn.replace('%s:' % host, '') port = pfn.split('/', 1)[0] pfnDict['Port'] = port pfn = pfn.replace(port, '', 1) #pfn = '/fullPath' #pfn = '/wsurl/fullPath' if re.search('\?', pfn): #/wsurl/fullPath' wsurl = '%s' % pfn.split('=', 1)[0] pfnDict['WSUrl'] = wsurl + '=' pfn = pfn.replace(wsurl + '=', '') #pfn = '/fullPath' directory = os.path.dirname(pfn) pfnDict['Path'] = directory fileName = os.path.basename(pfn) pfnDict['FileName'] = fileName #gLogger.debug("Pfn.pfnparse: Successfully parsed pfn.") return S_OK(pfnDict) except Exception: errStr = "Pfn.pfnparse: Exception while parsing pfn: " + str(pfn) gLogger.exception(errStr) return S_ERROR(errStr)
gLogger.verbose(arguments) else: gLogger.verbose('Job has no InputData requirement') gJobReport.commit() try: result = job.execute(arguments) if not result['OK']: gLogger.error(result['Message']) raise JobWrapperError(result['Message']) except Exception, x: if str(x) == '0': gLogger.verbose('JobWrapper exited with status=0 after execution') else: gLogger.exception('Job failed in execution phase') gJobReport.setJobParameter('Error Message', str(x), sendFlag=False) gJobReport.setJobStatus('Failed', 'Exception During Execution', sendFlag=False) job.sendFailoverRequest('Failed', 'Exception During Execution') return 1 if arguments['Job'].has_key('OutputSandbox') or arguments['Job'].has_key( 'OutputData'): try: result = job.processJobOutputs(arguments) if not result['OK']: gLogger.warn(result['Message']) raise JobWrapperError(result['Message']) except Exception, x:
def execute(arguments): global gJobReport jobID = arguments['Job']['JobID'] os.environ['JOBID'] = jobID jobID = int(jobID) # Fix in the environment to get a reasonable performance from dCache, # until we move to a new version of root # os.environ['DCACHE_RAHEAD'] = str(1) # os.environ['DCACHE_RA_BUFFER'] = str(50*1024) if arguments.has_key('WorkingDirectory'): wdir = os.path.expandvars(arguments['WorkingDirectory']) if os.path.isdir(wdir): os.chdir(wdir) else: try: os.makedirs(wdir) if os.path.isdir(wdir): os.chdir(wdir) except Exception: gLogger.exception( 'JobWrapperTemplate could not create working directory') rescheduleFailedJob(jobID, 'Could Not Create Working Directory') return 1 #root = arguments['CE']['Root'] gJobReport = JobReport(jobID, 'JobWrapper') try: job = JobWrapper(jobID, gJobReport) job.initialize(arguments) except Exception: gLogger.exception('JobWrapper failed the initialization phase') rescheduleFailedJob(jobID, 'Job Wrapper Initialization', gJobReport) job.sendWMSAccounting('Failed', 'Job Wrapper Initialization') return 1 if arguments['Job'].has_key('InputSandbox'): gJobReport.commit() try: result = job.transferInputSandbox(arguments['Job']['InputSandbox']) if not result['OK']: gLogger.warn(result['Message']) raise JobWrapperError(result['Message']) except Exception: gLogger.exception('JobWrapper failed to download input sandbox') rescheduleFailedJob(jobID, 'Input Sandbox Download') job.sendWMSAccounting('Failed', 'Input Sandbox Download') return 1 else: gLogger.verbose('Job has no InputSandbox requirement') gJobReport.commit() if arguments['Job'].has_key('InputData'): if arguments['Job']['InputData']: try: result = job.resolveInputData() if not result['OK']: gLogger.warn(result['Message']) raise JobWrapperError(result['Message']) except Exception, x: gLogger.exception('JobWrapper failed to resolve input data') rescheduleFailedJob(jobID, 'Input Data Resolution') job.sendWMSAccounting('Failed', 'Input Data Resolution') return 1 else: gLogger.verbose('Job has a null InputData requirement:') gLogger.verbose(arguments)
def execute(self): """ The main TokenAgent execution method. Checks for tokens owned by users that are expiring, and notifies those users. Calls rsDB.setToken() to set 'RS_SVC' as owner for those tokens that expired. """ adminMail = '' try: #reAssign the token to RS_SVC for g in self.ELEMENTS: tokensExpired = self.rsDB.getTokens(g, None, datetime.datetime.utcnow()) if tokensExpired: adminMail += '\nLIST OF EXPIRED TOKENS\n' for token in tokensExpired: name = token[0] user = token[1] self.rsDB.setToken( g, name, 'RS_SVC', datetime.datetime(9999, 12, 31, 23, 59, 59)) adminMail += ' %s %s\n' % (user.ljust(20), name) #notify token owners inNHours = datetime.datetime.utcnow() + datetime.timedelta( hours=self.notifyHours) for g in self.ELEMENTS: tokensExpiring = self.rsDB.getTokens(g, None, inNHours) if tokensExpiring: adminMail += '\nLIST OF EXPIRING TOKENS\n' for token in tokensExpiring: name = token[0] user = token[1] adminMail += '\n %s %s\n' % (user.ljust(20), name) if user == 'RS_SVC': continue pdp = PDP(self.VOExt, granularity=g, name=name) decision = pdp.takeDecision() pcresult = decision['PolicyCombinedResult'] spresult = decision['SinglePolicyResults'] expiration = token[2] mailMessage = "The token for %s %s " % (g, name) mailMessage = mailMessage + "will expire on %s\n\n" % expiration mailMessage = mailMessage + "You can renew it with command 'dirac-rss-renew-token'.\n" mailMessage = mailMessage + "If you don't take any action, RSS will take control of the resource.\n\n" policyMessage = '' if pcresult: policyMessage += " Policies applied will set status to %s.\n" % pcresult[ 'Status'] for spr in spresult: policyMessage += " %s Status->%s\n" % ( spr['PolicyName'].ljust(25), spr['Status']) mailMessage += policyMessage adminMail += policyMessage self.nc.sendMail( getMailForUser(user)['Value'][0], 'Token for %s is expiring' % name, mailMessage) if adminMail != '': self.nc.sendMail( getMailForUser('ubeda')['Value'][0], "Token's summary", adminMail) return S_OK() except Exception: errorStr = "TokenAgent execution" gLogger.exception(errorStr) return S_ERROR(errorStr) #############################################################################
def doCommand( self ): """ Returns jobs accounting info for sites in the last 24h `args`: - args[0]: string - should be a ValidElement - args[1]: string - should be the name of the ValidElement - args[2]: string - should be 'Job' or 'Pilot' or 'DataOperation' or 'WMSHistory' (??) or 'SRM' (??) - args[3]: string - should be the plot to generate (e.g. CPUEfficiency) - args[4]: dictionary - e.g. {'Format': 'LastHours', 'hours': 24} - args[5]: string - should be the grouping - args[6]: dictionary - optional conditions """ super( DIRACAccounting_Command, self ).doCommand() self.APIs = initAPIs( self.__APIs__, self.APIs ) self.APIs[ 'ReportsClient' ].rpcClient = self.APIs[ 'ReportGenerator' ] try: granularity = self.args[0] name = self.args[1] accounting = self.args[2] plot = self.args[3] period = self.args[4] grouping = self.args[5] if period[ 'Format' ] == 'LastHours': fromT = datetime.utcnow() - timedelta( hours = period[ 'hours' ] ) toT = datetime.utcnow() elif period[ 'Format' ] == 'Periods': #TODO pass if self.args[6] is not None: conditions = self.args[6] else: conditions = {} if accounting == 'Job' or accounting == 'Pilot': if granularity == 'Resource': conditions[ 'GridCE' ] = [ name ] elif granularity == 'Service': conditions[ 'Site' ] = [ name.split('@').pop() ] elif granularity == 'Site': conditions[ 'Site' ] = [ name ] else: return { 'Result' : S_ERROR( '%s is not a valid granularity' % granularity ) } elif accounting == 'DataOperation': conditions[ 'Destination' ] = [ name ] res = self.APIs[ 'ReportsClient' ].getReport( accounting, plot, fromT, toT, conditions, grouping ) except Exception, e: _msg = '%s (%s): %s' % ( self.__class__.__name__, self.args, e ) gLogger.exception( _msg ) return { 'Result' : S_ERROR( _msg ) }
def _executeAction(self, trid, proposalTuple, handlerObj): try: return handlerObj._rh_executeAction(proposalTuple) except Exception as e: gLogger.exception("Exception while executing handler action") return S_ERROR("Server error while executing action: %s" % str(e))
def execute(arguments): """The only real function executed here""" global gJobReport jobID = arguments["Job"].get("JobID", 0) os.environ["JOBID"] = str(jobID) jobID = int(jobID) if "WorkingDirectory" in arguments: wdir = os.path.expandvars(arguments["WorkingDirectory"]) if os.path.isdir(wdir): os.chdir(wdir) else: try: os.makedirs( wdir ) # this will raise an exception if wdir already exists (which is ~OK) if os.path.isdir(wdir): os.chdir(wdir) except OSError as osError: if osError.errno == errno.EEXIST and os.path.isdir(wdir): gLogger.exception( "JobWrapperTemplate found that the working directory already exists" ) rescheduleResult = rescheduleFailedJob( jobID, "Working Directory already exists") else: gLogger.exception( "JobWrapperTemplate could not create working directory" ) rescheduleResult = rescheduleFailedJob( jobID, "Could Not Create Working Directory") return 1 gJobReport = JobReport(jobID, "JobWrapper") try: job = JobWrapper(jobID, gJobReport) job.initialize(arguments) # initialize doesn't return S_OK/S_ERROR except Exception as exc: # pylint: disable=broad-except gLogger.exception("JobWrapper failed the initialization phase", lException=exc) rescheduleResult = rescheduleFailedJob( jobID=jobID, minorStatus=JobMinorStatus.JOB_WRAPPER_INITIALIZATION, jobReport=gJobReport) job.sendJobAccounting( status=rescheduleResult, minorStatus=JobMinorStatus.JOB_WRAPPER_INITIALIZATION) return 1 if "InputSandbox" in arguments["Job"]: gJobReport.commit() try: result = job.transferInputSandbox(arguments["Job"]["InputSandbox"]) if not result["OK"]: gLogger.warn(result["Message"]) raise JobWrapperError(result["Message"]) except JobWrapperError: gLogger.exception("JobWrapper failed to download input sandbox") rescheduleResult = rescheduleFailedJob( jobID=jobID, minorStatus=JobMinorStatus.DOWNLOADING_INPUT_SANDBOX, jobReport=gJobReport) job.sendJobAccounting( status=rescheduleResult, minorStatus=JobMinorStatus.DOWNLOADING_INPUT_SANDBOX) return 1 except Exception as exc: # pylint: disable=broad-except gLogger.exception( "JobWrapper raised exception while downloading input sandbox", lException=exc) rescheduleResult = rescheduleFailedJob( jobID=jobID, minorStatus=JobMinorStatus.DOWNLOADING_INPUT_SANDBOX, jobReport=gJobReport) job.sendJobAccounting( status=rescheduleResult, minorStatus=JobMinorStatus.DOWNLOADING_INPUT_SANDBOX) return 1 else: gLogger.verbose("Job has no InputSandbox requirement") gJobReport.commit() if "InputData" in arguments["Job"]: if arguments["Job"]["InputData"]: try: result = job.resolveInputData() if not result["OK"]: gLogger.warn(result["Message"]) raise JobWrapperError(result["Message"]) except JobWrapperError: gLogger.exception("JobWrapper failed to resolve input data") rescheduleResult = rescheduleFailedJob( jobID=jobID, minorStatus=JobMinorStatus.INPUT_DATA_RESOLUTION, jobReport=gJobReport) job.sendJobAccounting( status=rescheduleResult, minorStatus=JobMinorStatus.INPUT_DATA_RESOLUTION) return 1 except Exception as exc: # pylint: disable=broad-except gLogger.exception( "JobWrapper raised exception while resolving input data", lException=exc) rescheduleResult = rescheduleFailedJob( jobID=jobID, minorStatus=JobMinorStatus.INPUT_DATA_RESOLUTION, jobReport=gJobReport) job.sendJobAccounting( status=rescheduleResult, minorStatus=JobMinorStatus.INPUT_DATA_RESOLUTION) return 1 else: gLogger.verbose("Job has a null InputData requirement:") gLogger.verbose(arguments) else: gLogger.verbose("Job has no InputData requirement") gJobReport.commit() try: result = job.execute() if not result["OK"]: gLogger.error("Failed to execute job", result["Message"]) raise JobWrapperError((result["Message"], result["Errno"])) except JobWrapperError as exc: if exc.value[1] == 0 or str(exc.value[0]) == "0": gLogger.verbose("JobWrapper exited with status=0 after execution") if exc.value[1] == DErrno.EWMSRESC: gLogger.warn("Asked to reschedule job") rescheduleResult = rescheduleFailedJob( jobID=jobID, minorStatus=JobMinorStatus.JOB_WRAPPER_EXECUTION, jobReport=gJobReport) job.sendJobAccounting( status=rescheduleResult, minorStatus=JobMinorStatus.JOB_WRAPPER_EXECUTION) return 1 gLogger.exception("Job failed in execution phase") gJobReport.setJobParameter("Error Message", repr(exc), sendFlag=False) gJobReport.setJobStatus( status=JobStatus.FAILED, minorStatus=JobMinorStatus.EXCEPTION_DURING_EXEC, sendFlag=False) job.sendFailoverRequest() job.sendJobAccounting(status=JobStatus.FAILED, minorStatus=JobMinorStatus.EXCEPTION_DURING_EXEC) return 1 except Exception as exc: # pylint: disable=broad-except gLogger.exception("Job raised exception during execution phase", lException=exc) gJobReport.setJobParameter("Error Message", repr(exc), sendFlag=False) gJobReport.setJobStatus( status=JobStatus.FAILED, minorStatus=JobMinorStatus.EXCEPTION_DURING_EXEC, sendFlag=False) job.sendFailoverRequest() job.sendJobAccounting(status=JobStatus.FAILED, minorStatus=JobMinorStatus.EXCEPTION_DURING_EXEC) return 1 if "OutputSandbox" in arguments["Job"] or "OutputData" in arguments["Job"]: try: result = job.processJobOutputs() if not result["OK"]: gLogger.warn(result["Message"]) raise JobWrapperError(result["Message"]) except JobWrapperError as exc: gLogger.exception("JobWrapper failed to process output files") gJobReport.setJobParameter("Error Message", repr(exc), sendFlag=False) gJobReport.setJobStatus( status=JobStatus.FAILED, minorStatus=JobMinorStatus.UPLOADING_JOB_OUTPUTS, sendFlag=False) job.sendFailoverRequest() job.sendJobAccounting( status=JobStatus.FAILED, minorStatus=JobMinorStatus.UPLOADING_JOB_OUTPUTS) return 2 except Exception as exc: # pylint: disable=broad-except gLogger.exception( "JobWrapper raised exception while processing output files", lException=exc) gJobReport.setJobParameter("Error Message", repr(exc), sendFlag=False) gJobReport.setJobStatus( status=JobStatus.FAILED, minorStatus=JobMinorStatus.UPLOADING_JOB_OUTPUTS, sendFlag=False) job.sendFailoverRequest() job.sendJobAccounting( status=JobStatus.FAILED, minorStatus=JobMinorStatus.UPLOADING_JOB_OUTPUTS) return 2 else: gLogger.verbose("Job has no OutputData or OutputSandbox requirement") try: # Failed jobs will return !=0 / successful jobs will return 0 return job.finalize() except Exception as exc: # pylint: disable=broad-except gLogger.exception( "JobWrapper raised exception during the finalization phase", lException=exc) return 2