class Worker(MainLoopApplication): class CommandWatcher(object): def __init__(self): self.id = None self.processId = None self.startTime = None self.processObj = None self.timeOut = None self.commandId = None self.command = None self.modified = True self.finished = False @property def modifiedCommandWatchers(self): return (watcher for watcher in self.commandWatchers.values() if watcher.modified) @property def finishedCommandWatchers(self): return (watcher for watcher in self.commandWatchers.values() if watcher.finished and not watcher.modified) def __init__(self, framework): super(Worker, self).__init__(self) LOGGER.info("Starting worker on %s:%d.", settings.ADDRESS, settings.PORT) self.framework = framework self.data = None self.requestManager = RequestManager(settings.DISPATCHER_ADDRESS, settings.DISPATCHER_PORT) self.commandWatchers = {} self.commands = {} self.port = settings.PORT self.computerName = COMPUTER_NAME_TEMPLATE % (settings.ADDRESS, settings.PORT) self.lastSysInfosMessageTime = 0 self.sysInfosMessagePeriod = 6 self.httpconn = httplib.HTTPConnection(settings.DISPATCHER_ADDRESS, settings.DISPATCHER_PORT) self.PID_DIR = os.path.dirname(settings.PIDFILE) if not os.path.isdir(self.PID_DIR): LOGGER.warning("Worker pid directory does not exist, creating...") try: os.makedirs(self.PID_DIR, 0777) LOGGER.info("Worker pid directory created.") except OSError: LOGGER.error("Failed to create pid directory.") sys.exit(1) elif not os.access(self.PID_DIR, os.R_OK | os.W_OK): LOGGER.error("Missing read or write access on %s", self.PID_DIR) sys.exit(1) self.status = rendernode.RN_BOOTING self.updateSys = False self.isPaused = False self.toberestarted = False self.speed = 1.0 self.cpuName = "" self.distrib = "" self.mikdistrib = "" self.openglversion = "" def prepare(self): for name in (name for name in dir(settings) if name.isupper()): LOGGER.info("settings.%s = %r", name, getattr(settings, name)) self.registerWorker() def getNbCores(self): import multiprocessing return multiprocessing.cpu_count() def getTotalMemory(self): memTotal = 1024 if os.path.isfile('/proc/meminfo'): try: # get total memory f = open('/proc/meminfo', 'r') for line in f.readlines(): if line.split()[0] == 'MemTotal:': memTotal = line.split()[1] f.close() break except: pass return int(memTotal) / 1024 def getCpuInfo(self): if os.path.isfile('/proc/cpuinfo'): try: # get cpu speed f = open('/proc/cpuinfo', 'r') for line in f.readlines(): if 'model name' in line: self.cpuName = line.split(':')[1].strip() elif 'MHz' in line: speedStr = line.split(':')[1].strip() self.speed = "%.1f" % (float(speedStr) / 1000) break f.close() except: pass def getDistribName(self): if os.path.isfile('/etc/mik-release'): try: f = open('/etc/mik-release', 'r') for line in f.readlines(): if 'MIK-VERSION' in line or 'MIK-RELEASE' in line: self.mikdistrib = line.split()[1] elif 'openSUSE' in line: if '=' in line: self.distrib = line.split('=')[1].strip() else: self.distrib = line break f.close() except: pass def getOpenglVersion(self): import subprocess import re p = subprocess.Popen("glxinfo", stdout=subprocess.PIPE, stderr=subprocess.PIPE) output, errors = p.communicate() outputList = output.split("\n") for line in outputList: if "OpenGL version string" in line: LOGGER.info("found : %s" % line) oglpattern = re.compile("(\d.\d.\d)") res = oglpattern.search(line) self.openglversion = res.group() break def updateSysInfos(self, ticket): self.updateSys = True def fetchSysInfos(self): infos = {} if self.updateSys: self.getCpuInfo() self.getDistribName() self.getOpenglVersion() infos['cores'] = self.getNbCores() infos['ram'] = self.getTotalMemory() self.updateSys = False # system info values: infos['caracteristics'] = {"os": platform.system().lower(), "softs": [], "cpuname": self.cpuName, "distribname": self.distrib, "mikdistrib": self.mikdistrib, "openglversion": self.openglversion} infos['name'] = self.computerName infos['port'] = self.port infos['status'] = self.status infos['pools'] = [] infos['speed'] = float(self.speed) return infos def setPerformanceIndex(self, ticket, performance): LOGGER.warning("set perf idx") dct = json.dumps({'performance': performance}) headers = {} headers['content-length'] = len(dct) LOGGER.warning(dct) try: self.requestManager.put("/rendernodes/%s/sysinfos" % self.computerName, dct, headers) except RequestManager.RequestError, err: if err.status == 404: # the dispatcher doesn't know the worker # it may have been launched before the dispatcher itself # and not be mentioned in the tree.description file self.registerWorker() else: raise except httplib.BadStatusLine: LOGGER.exception('Sending sys infos has failed with a BadStatusLine error')
class CommandWatcher(object): intervalTimeExec = 3 intervalTimePostExec = 3 threadList = {} ## Creates a new CmdWatcher. # # @param id the id of the command # @param runner the runner type name # @param arguments the arguments of the command # def __init__(self, workerPort, id, runner, validationExpression, arguments): self.id = id self.requestManager = RequestManager("127.0.0.1", workerPort) self.workerPort = workerPort self.completion = 0.0 self.message = "loading command script" self.arguments = arguments self.finalState = CMD_DONE self.logger = logging.getLogger("cmdwatcher") self.runnerErrorInExec = None self.runnerErrorInPostExec = None # check that the job type is a registered one from puliclient.jobs import loadCommandRunner try: runnerClass = loadCommandRunner(runner) except ImportError: self.logger.exception("Command runner loading failed.") self.updateCommandStatus(CMD_ERROR) sys.exit(1) self.commandValidationExpression = validationExpression # instanciation of the jobtype script try: self.job = runnerClass() self.job.associatedWatcher = self self.mainActions() except Exception: self.updateCommandStatus(CMD_ERROR) self.logger.exception("CommandWatcher failed. This is a bug, please report it.") sys.exit(1) ## The main actions. # def mainActions(self): try: self.job.validate(self.arguments) except Exception: self.logger.exception("Caught some unexpected exception while validating command %d." % (self.id)) self.finalState = CMD_ERROR self.updateCommandStatusAndCompletion(self.finalState, True) return try: self.executeScript() except Exception: self.logger.exception("Caught some unexpected exception (%s) while executing command %d." % (self.id)) self.finalState = CMD_ERROR self.updateCommandStatusAndCompletion(self.finalState, True) return self.execScriptChecker() ## Creates a thread for the script corresponding to the provided action name. # @param action the name of the action to thread (jobtype script method) # def threadAction(self, action): tmpThread = CmdThreader(self.job, action, self.arguments, self.updateCompletionCallback, self.updateMessageCallback) tmpThread.setName('jobMain') # add this thread to the list self.threadList[action] = tmpThread # launch it tmpThread.start() ## Updates the status of the command. # @param status # def updateCommandStatus(self, status): self.logger.debug('Updating status: %s' % status) dct = json.dumps({"id": self.id, "status": status}) headers = {} headers['Content-Length'] = len(dct) try: self.requestManager.put("/commands/%d/" % self.id, dct, headers) except http.BadStatusLine: self.logger.debug('Updating status has failed with a BadStatusLine error') def updateValidatorResult(self, msg, errorInfos): self.logger.debug('Updating msg and errorInfos : %s,%s' % (msg, str(errorInfos))) dct = json.dumps({"id": self.id, "validatorMessage": msg, "errorInfos": errorInfos}) headers = {} headers['Content-Length'] = len(dct) try: self.requestManager.put("/commands/%d/" % self.id, dct, headers) except http.BadStatusLine: self.logger.debug('Updating msg and errorInfos has failed with a BadStatusLine error') def updateCommandStatusAndCompletion(self, status, retry=False): self.logger.debug('Updating status: %s' % status) completion = self.completion self.logger.debug('Updating completion: %s' % completion) body = json.dumps({"id": self.id, "status": status, "completion": completion, "message": self.message}) headers = {} headers['Content-Length'] = len(body) headers['Content-Type'] = 'application/json' import httplib def onResponse(request, response): request.done = True if response.status == 202: return elif response.status == 404: self.logger.debug("Command is not registered anymore on the worker") else: self.logger("Unexpected response to status update request: %d %s" % (response.status, response.reason)) def onError(request, error): self.logger.debug("Update request failed: %s", error) delay = 0.5 request = Request('PUT', '/commands/%d/' % self.id, headers, body) request.done = False conn = httplib.HTTPConnection('127.0.0.1', self.workerPort) while retry and not request.done: request.call(conn, onResponse, onError) conn.close() time.sleep(delay) delay = max(2.0 * delay, 30.0) ## Updates the completion of the command. # def updateCommandCompletion(self): completion = self.completion self.logger.debug('Updating completion: %s' % completion) dct = json.dumps({"id": self.id, "completion": completion, "message": self.message}) headers = {} headers['Content-Length'] = len(dct) try: self.requestManager.put("/commands/%d/" % self.id, dct, headers) except http.BadStatusLine: self.logger.debug('Updating completion has failed with a BadStatusLine error') ## Threads the post execution of the corresponding runner. # def executeScript(self): logger.debug("Starting execution...") self.threadAction(EXEC) ## Controls the execution of the main command. # def execScriptChecker(self): self.logger.debug("Checking Execution...") timeOut = self.job.scriptTimeOut while not(self.threadList[EXEC].stopped): tmpTime = time.time() self.updateCommandCompletion() if timeOut is not None: if timeOut < 0: self.logger.error("execute Script timeout reached !") self.finalState = CMD_ERROR if self.finalState == CMD_ERROR or self.finalState == CMD_CANCELED: self.killCommand() break time.sleep(self.intervalTimeExec) if timeOut is not None: timeOut -= time.time() - tmpTime if self.threadList[EXEC].stopped == COMMAND_FAILED: self.finalState = CMD_ERROR self.logger.error("Error: %s", self.threadList[EXEC].errorInfo) self.runnerErrorInExec = str(self.threadList[EXEC].errorInfo) elif self.threadList[EXEC].stopped == COMMAND_CRASHED: self.logger.error("Job script raised some unexpected exception :") error = str(self.threadList[EXEC].errorInfo) or ("None") for line in error.strip().split("\n"): self.logger.error(line) self.finalState = CMD_ERROR self.runnerErrorInExec = str(self.threadList[EXEC].errorInfo) self.updateCommandStatusAndCompletion(self.finalState, True) ## Kills all processes launched by the command. # def killCommand(self): # FIXME: maybe we ought to provide a way to ask the command to stop itself self.threadList[EXEC].stop() def updateCompletionCallback(self, completion): self.completion = completion def updateMessageCallback(self, message): self.message = message