예제 #1
0
class Worker(MainLoopApplication):

    class CommandWatcher(object):
        def __init__(self):
            self.id = None
            self.processId = None
            self.startTime = None
            self.processObj = None
            self.timeOut = None
            self.commandId = None
            self.command = None
            self.modified = True
            self.finished = False

    @property
    def modifiedCommandWatchers(self):
        return (watcher for watcher in self.commandWatchers.values() if watcher.modified)

    @property
    def finishedCommandWatchers(self):
        return (watcher for watcher in self.commandWatchers.values() if watcher.finished and not watcher.modified)

    def __init__(self, framework):
        super(Worker, self).__init__(self)
        LOGGER.info("Starting worker on %s:%d.", settings.ADDRESS, settings.PORT)
        self.framework = framework
        self.data = None
        self.requestManager = RequestManager(settings.DISPATCHER_ADDRESS,
                                             settings.DISPATCHER_PORT)
        self.commandWatchers = {}
        self.commands = {}
        self.port = settings.PORT
        self.computerName = COMPUTER_NAME_TEMPLATE % (settings.ADDRESS,
                                                      settings.PORT)
        self.lastSysInfosMessageTime = 0
        self.sysInfosMessagePeriod = 6
        self.httpconn = httplib.HTTPConnection(settings.DISPATCHER_ADDRESS, settings.DISPATCHER_PORT)
        self.PID_DIR = os.path.dirname(settings.PIDFILE)
        if not os.path.isdir(self.PID_DIR):
            LOGGER.warning("Worker pid directory does not exist, creating...")
            try:
                os.makedirs(self.PID_DIR, 0777)
                LOGGER.info("Worker pid directory created.")
            except OSError:
                LOGGER.error("Failed to create pid directory.")
                sys.exit(1)
        elif not os.access(self.PID_DIR, os.R_OK | os.W_OK):
            LOGGER.error("Missing read or write access on %s", self.PID_DIR)
            sys.exit(1)
        self.status = rendernode.RN_BOOTING
        self.updateSys = False
        self.isPaused = False
        self.toberestarted = False
        self.speed = 1.0
        self.cpuName = ""
        self.distrib = ""
        self.mikdistrib = ""
        self.openglversion = ""

    def prepare(self):
        for name in (name for name in dir(settings) if name.isupper()):
            LOGGER.info("settings.%s = %r", name, getattr(settings, name))
        self.registerWorker()

    def getNbCores(self):
        import multiprocessing
        return multiprocessing.cpu_count()

    def getTotalMemory(self):
        memTotal = 1024
        if os.path.isfile('/proc/meminfo'):
            try:
                # get total memory
                f = open('/proc/meminfo', 'r')
                for line in f.readlines():
                    if line.split()[0] == 'MemTotal:':
                        memTotal = line.split()[1]
                        f.close()
                        break
            except:
                pass
        return int(memTotal) / 1024

    def getCpuInfo(self):
        if os.path.isfile('/proc/cpuinfo'):
            try:
                # get cpu speed
                f = open('/proc/cpuinfo', 'r')
                for line in f.readlines():
                    if 'model name' in line:
                        self.cpuName = line.split(':')[1].strip()
                    elif 'MHz' in line:
                        speedStr = line.split(':')[1].strip()
                        self.speed = "%.1f" % (float(speedStr) / 1000)
                        break
                f.close()
            except:
                pass

    def getDistribName(self):
        if os.path.isfile('/etc/mik-release'):
            try:
                f = open('/etc/mik-release', 'r')
                for line in f.readlines():
                    if 'MIK-VERSION' in line or 'MIK-RELEASE' in line:
                        self.mikdistrib = line.split()[1]
                    elif 'openSUSE' in line:
                        if '=' in line:
                            self.distrib = line.split('=')[1].strip()
                        else:
                            self.distrib = line
                        break
                f.close()
            except:
                pass

    def getOpenglVersion(self):
        import subprocess
        import re
        p = subprocess.Popen("glxinfo", stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        output, errors = p.communicate()
        outputList = output.split("\n")
        for line in outputList:
            if "OpenGL version string" in line:
                LOGGER.info("found : %s" % line)
                oglpattern = re.compile("(\d.\d.\d)")
                res = oglpattern.search(line)
                self.openglversion = res.group()
                break

    def updateSysInfos(self, ticket):
        self.updateSys = True

    def fetchSysInfos(self):
        infos = {}
        if self.updateSys:
            self.getCpuInfo()
            self.getDistribName()
            self.getOpenglVersion()
            infos['cores'] = self.getNbCores()
            infos['ram'] = self.getTotalMemory()
            self.updateSys = False
            # system info values:
            infos['caracteristics'] = {"os": platform.system().lower(),
                                        "softs": [],
                                        "cpuname": self.cpuName,
                                        "distribname": self.distrib,
                                        "mikdistrib": self.mikdistrib,
                                        "openglversion": self.openglversion}
        infos['name'] = self.computerName
        infos['port'] = self.port
        infos['status'] = self.status
        infos['pools'] = []
        infos['speed'] = float(self.speed)
        return infos

    def setPerformanceIndex(self, ticket, performance):
        LOGGER.warning("set perf idx")
        dct = json.dumps({'performance': performance})
        headers = {}
        headers['content-length'] = len(dct)

        LOGGER.warning(dct)

        try:
            self.requestManager.put("/rendernodes/%s/sysinfos" % self.computerName, dct, headers)
        except RequestManager.RequestError, err:
            if err.status == 404:
                # the dispatcher doesn't know the worker
                # it may have been launched before the dispatcher itself
                # and not be mentioned in the tree.description file
                self.registerWorker()
            else:
                raise
        except httplib.BadStatusLine:
            LOGGER.exception('Sending sys infos has failed with a BadStatusLine error')
class CommandWatcher(object):

    intervalTimeExec = 3
    intervalTimePostExec = 3
    threadList = {}

    ## Creates a new CmdWatcher.
    #
    # @param id the id of the command
    # @param runner the runner type name
    # @param arguments the arguments of the command
    #
    def __init__(self, workerPort, id, runner, validationExpression, arguments):

        self.id = id
        self.requestManager = RequestManager("127.0.0.1", workerPort)
        self.workerPort = workerPort
        self.completion = 0.0
        self.message = "loading command script"
        self.arguments = arguments

        self.finalState = CMD_DONE
        self.logger = logging.getLogger("cmdwatcher")

        self.runnerErrorInExec = None
        self.runnerErrorInPostExec = None

        # check that the job type is a registered one
        from puliclient.jobs import loadCommandRunner
        try:
            runnerClass = loadCommandRunner(runner)
        except ImportError:
            self.logger.exception("Command runner loading failed.")
            self.updateCommandStatus(CMD_ERROR)
            sys.exit(1)

        self.commandValidationExpression = validationExpression

        # instanciation of the jobtype script
        try:
            self.job = runnerClass()
            self.job.associatedWatcher = self
            self.mainActions()
        except Exception:
            self.updateCommandStatus(CMD_ERROR)
            self.logger.exception("CommandWatcher failed. This is a bug, please report it.")
            sys.exit(1)

    ## The main actions.
    #
    def mainActions(self):

        try:
            self.job.validate(self.arguments)
        except Exception:
            self.logger.exception("Caught some unexpected exception while validating command %d." % (self.id))
            self.finalState = CMD_ERROR
            self.updateCommandStatusAndCompletion(self.finalState, True)
            return

        try:
            self.executeScript()
        except Exception:
            self.logger.exception("Caught some unexpected exception (%s) while executing command %d." % (self.id))
            self.finalState = CMD_ERROR
            self.updateCommandStatusAndCompletion(self.finalState, True)
            return

        self.execScriptChecker()

    ## Creates a thread for the script corresponding to the provided action name.
    # @param action the name of the action to thread (jobtype script method)
    #
    def threadAction(self, action):
        tmpThread = CmdThreader(self.job, action, self.arguments, self.updateCompletionCallback, self.updateMessageCallback)
        tmpThread.setName('jobMain')
        # add this thread to the list
        self.threadList[action] = tmpThread
        # launch it
        tmpThread.start()

    ## Updates the status of the command.
    # @param status
    #
    def updateCommandStatus(self, status):
        self.logger.debug('Updating status: %s' % status)
        dct = json.dumps({"id": self.id, "status": status})
        headers = {}
        headers['Content-Length'] = len(dct)
        try:
            self.requestManager.put("/commands/%d/" % self.id, dct, headers)
        except http.BadStatusLine:
            self.logger.debug('Updating status has failed with a BadStatusLine error')

    def updateValidatorResult(self, msg, errorInfos):
        self.logger.debug('Updating msg and errorInfos : %s,%s' % (msg, str(errorInfos)))
        dct = json.dumps({"id": self.id, "validatorMessage": msg, "errorInfos": errorInfos})
        headers = {}
        headers['Content-Length'] = len(dct)
        try:
            self.requestManager.put("/commands/%d/" % self.id, dct, headers)
        except http.BadStatusLine:
            self.logger.debug('Updating  msg and errorInfos has failed with a BadStatusLine error')

    def updateCommandStatusAndCompletion(self, status, retry=False):
        self.logger.debug('Updating status: %s' % status)
        completion = self.completion
        self.logger.debug('Updating completion: %s' % completion)

        body = json.dumps({"id": self.id, "status": status, "completion": completion, "message": self.message})
        headers = {}
        headers['Content-Length'] = len(body)
        headers['Content-Type'] = 'application/json'

        import httplib

        def onResponse(request, response):
            request.done = True
            if response.status == 202:
                return
            elif response.status == 404:
                self.logger.debug("Command is not registered anymore on the worker")
            else:
                self.logger("Unexpected response to status update request: %d %s" % (response.status, response.reason))

        def onError(request, error):
            self.logger.debug("Update request failed: %s", error)

        delay = 0.5
        request = Request('PUT', '/commands/%d/' % self.id, headers, body)
        request.done = False
        conn = httplib.HTTPConnection('127.0.0.1', self.workerPort)
        while retry and not request.done:
            request.call(conn, onResponse, onError)
            conn.close()
            time.sleep(delay)
            delay = max(2.0 * delay, 30.0)

    ## Updates the completion of the command.
    #
    def updateCommandCompletion(self):
        completion = self.completion
        self.logger.debug('Updating completion: %s' % completion)
        dct = json.dumps({"id": self.id,
                          "completion": completion,
                          "message": self.message})
        headers = {}
        headers['Content-Length'] = len(dct)
        try:
            self.requestManager.put("/commands/%d/" % self.id, dct, headers)
        except http.BadStatusLine:
            self.logger.debug('Updating completion has failed with a BadStatusLine error')

    ## Threads the post execution of the corresponding runner.
    #
    def executeScript(self):
        logger.debug("Starting execution...")
        self.threadAction(EXEC)

    ## Controls the execution of the main command.
    #
    def execScriptChecker(self):
        self.logger.debug("Checking Execution...")

        timeOut = self.job.scriptTimeOut

        while not(self.threadList[EXEC].stopped):
            tmpTime = time.time()
            self.updateCommandCompletion()

            if timeOut is not None:
                if timeOut < 0:
                    self.logger.error("execute Script timeout reached !")
                    self.finalState = CMD_ERROR

            if self.finalState == CMD_ERROR or self.finalState == CMD_CANCELED:
                self.killCommand()
                break

            time.sleep(self.intervalTimeExec)

            if timeOut is not None:
                timeOut -= time.time() - tmpTime

        if self.threadList[EXEC].stopped == COMMAND_FAILED:
            self.finalState = CMD_ERROR
            self.logger.error("Error: %s", self.threadList[EXEC].errorInfo)
            self.runnerErrorInExec = str(self.threadList[EXEC].errorInfo)
        elif self.threadList[EXEC].stopped == COMMAND_CRASHED:
            self.logger.error("Job script raised some unexpected exception :")
            error = str(self.threadList[EXEC].errorInfo) or ("None")
            for line in error.strip().split("\n"):
                self.logger.error(line)
            self.finalState = CMD_ERROR
            self.runnerErrorInExec = str(self.threadList[EXEC].errorInfo)

        self.updateCommandStatusAndCompletion(self.finalState, True)

    ## Kills all processes launched by the command.
    #
    def killCommand(self):
        # FIXME: maybe we ought to provide a way to ask the command to stop itself
        self.threadList[EXEC].stop()

    def updateCompletionCallback(self, completion):
        self.completion = completion

    def updateMessageCallback(self, message):
        self.message = message