Example #1
0
    def unstickTask(self):
        """Cleanup task if the node starts with one assigned to it
        (Like if the node crashed/restarted)"""
        #self.thisNode will be updated in in init statement
        if self.thisNode.task_id:
            logger.info("Rouge task discovered. Unsticking...")
            self.HydraTask = hydra_taskboard.fetch("WHERE id = %s", (self.thisNode.task_id,),
                                            cols=["id", "job_id", "renderLayer",
                                                    "status", "exitCode",
                                                    "endTime", "host",
                                                    "currentFrame"])
            self.HydraJob = hydra_jobboard.fetch("WHERE id = %s", (self.HydraTask.job_id,),
                                            cols=["jobType", "renderLayerTracker"])
            self.logPath = self.HydraTask.getLogPath()

            self.HydraTask.kill(CRASHED, False)

            self.progressUpdate()

            self.thisNode.status = IDLE if self.thisNode.status == STARTED else OFFLINE
            self.thisNode.task_id = None

        elif self.thisNode.status in [STARTED, PENDING]:
            logger.warning("Reseting bad status, node set %s but no task found!", self.thisNode.status)
            self.thisNode.status = IDLE if self.thisNode.status == STARTED else OFFLINE
Example #2
0
    def kill(self, statusAfterDeath="K", TCPKill=True):
        if self.status == STARTED:
            killed = False
            updateNode = True
            node = hydra_rendernode.fetch("WHERE host = %s", (self.host,),
                                            cols=["status", "task_id"])

            if TCPKill:
                if node.task_id != self.id:
                    logger.warning("Node is not running the given task! Marking as dead.")
                    updateNode = False

                else:
                    killed = self.sendKillQuestion(statusAfterDeath)
                    #If killed returns None then the node is probably offline
                    if killed:
                        return True if killed > 0 else False

            #If it was not killed by the node then we need to mark it as dead here instead
            if not killed:
                logger.debug("TCPKill recived None, marking task as killed")
                self.status = statusAfterDeath
                self.exitCode = 1
                self.endTime = datetime.datetime.now()
                with transaction() as t:
                    self.update(t)
                    if updateNode:
                        node.status = IDLE if node.status == STARTED else OFFLINE
                        node.task_id = None
                        node.update(t)
                return True
        else:
            logger.debug("Task Kill is skipping task %s because of status %s", self.id, self.status)
            return True
Example #3
0
def softwareUpdater():
    hydraPath = os.getenv("HYDRA")

    if not hydraPath:
        logger.error("HYDRA enviromental variable does not exit!")
        return False

    hydraPath, thisVersion = os.path.split(hydraPath)
    try:
        currentVersion = float(thisVersion.split("_")[-1])
    except ValueError:
        logger.warning("Unable to obtain version number from file path. Assuming version number from Constants")
        currentVersion = Constants.VERSION

    versions = os.listdir(hydraPath)
    versions = [float(x.split("_")[-1]) for x in versions if x.startswith("dist_")]
    if not versions:
        return False
    highestVersion = max(versions)
    logger.debug("Comparing versions. Env: %s Latest: %s", currentVersion, highestVersion)
    if highestVersion > currentVersion:
        logger.info("Update found! Current Version is %s / New Version is %s", currentVersion, highestVersion)
        newPath = os.path.join(hydraPath, "dist_{}".format(highestVersion))
        response = changeHydraEnviron(newPath)
        if not response:
            logger.critical("Could not update to newest environ for some reason!")
        return response
    else:
        return False
Example #4
0
 def openLogFile(task_id):
     """Opens the default texteditor with the log for the given task_id"""
     taskOBJ = hydra_taskboard.fetch("WHERE id =  %s", (task_id,),
                                     cols=["id", "host"])
     logPath = taskOBJ.getLogPath()
     if os.path.isfile(logPath):
         webbrowser.open(logPath)
     else:
         logger.warning("Log file does not exist or is unreachable.")
Example #5
0
    def sendKillQuestion(self, newStatus):
        """Kill the current task running on the renderhost. Return True if successful,
        else False"""
        logger.debug('Kill task on %s', self.host)
        connection = TCPConnection(hostname=self.host)
        answer = connection.getAnswer(KillCurrentTaskQuestion(newStatus))
        if answer is None:
            logger.debug("%s appears to be offline or unresponsive. Treating as dead.", self.host)
        else:
            logger.debug("Child killed return code '%s' for node '%s'", answer, self.host)
            if answer < 0:
                logger.warning("%s tried to kill its job but failed for some reason.", self.host)

        return answer
Example #6
0
    def __init__(self):
        #Setup Class Variables
        self.renderThread = None
        self.childProcess = None
        self.PSUtilProc = None
        self.statusAfterDeath = None
        self.childKilled = 0
        self.HydraJob = None
        self.HydraTask = None
        self.logPath = None

        #Get this node data from the database and make sure it exists
        self.thisNode = getThisNodeOBJ()
        logger.debug(self.thisNode)
        if not self.thisNode:
            logger.critical(
                "This node does not exist in the database! Please Register this node and try again."
            )
            sys.exit(-1)
            return

        #Detect RedShift GPUs
        self.rsGPUs = Utils.getRedshiftPreference("SelectedCudaDevices")
        if self.rsGPUs:
            self.rsGPUs = self.rsGPUs.split(",")[:-1]
            self.rsGPUids = [x.split(":")[0] for x in self.rsGPUs]
            if len(self.rsGPUs) != len(self.rsGPUids):
                logger.warning("Problems parsing Redshift Preferences")
            logger.info("%s Redshift Enabled GPU(s) found on this node",
                        len(self.rsGPUs))
            logger.debug("GPUs available for rendering are %s", self.rsGPUs)
        else:
            logger.warning("Could not find available Redshift GPUs")

        #Create RenderLog Directory if it doesn't exit
        if not os.path.isdir(Constants.RENDERLOGDIR):
            os.makedirs(Constants.RENDERLOGDIR)

        self.unstickTask()
        self.thisNode.software_version = Constants.VERSION

        with transaction() as t:
            self.thisNode.update(t)

        #Run The Server
        port = int(Utils.getInfoFromCFG("network", "port"))
        self.startServerThread(port)
    def remove(self):
        if not self.locked:
            return

        if sys.platform == "win32":
            if hasattr(self, "tempFile"):
                try:
                    os.close(self.tempFile)
                    os.unlink(self.tempFilePath)
                except Exception as e:
                    logger.error(e)
            else:
                logger.warning("No temp file found for %s", self.name)
        else:
            try:
                fnctl.lockf(self.tempFile, fcntl.LOCK_UN)
                if os.path.isfile(self.tempFilePath):
                    os.unlink(self.tempFilePath)
            except Exception as e:
                logger.error(e)
Example #8
0
    def reset(self, resetData):
        if not resetData:
            logger.debug("No reset data recieved")
            return 0

        resetRLs = resetData[0]
        currentFrame = resetData[1]
        nodeReset = resetData[2]
        responses = []

        if nodeReset:
            responses.append(self.updateAttr("failures", ""))
            responses.append(self.updateAttr("attempts", 0))

        if resetRLs:
            if currentFrame > self.endFrame:
                logger.error("New start frame is higher than the end frame! Aboring!")
                return -1

            if currentFrame < self.startFrame:
                logger.warning("New start frame is lower than original start frame, resetting to default.")
                currentFrame = 0

            if currentFrame == self.startFrame:
                currentFrame = 0

            idxList = [self.renderLayers.split(",").index(x) for x in resetRLs]
            rlTracker = self.renderLayerTracker.split(",")
            for i in idxList:
                rlTracker[i] = str(currentFrame)

            responses.append(self.updateAttr("renderLayerTracker", ",".join(rlTracker)))
            if self.status in [KILLED, FINISHED]:
                responses.append(self.updateAttr("status", PAUSED))

        return 0 if all(responses) else -2