def unstickTask(self): """Cleanup task if the node starts with one assigned to it (Like if the node crashed/restarted)""" #self.thisNode will be updated in in init statement if self.thisNode.task_id: logger.info("Rouge task discovered. Unsticking...") self.HydraTask = hydra_taskboard.fetch("WHERE id = %s", (self.thisNode.task_id,), cols=["id", "job_id", "renderLayer", "status", "exitCode", "endTime", "host", "currentFrame"]) self.HydraJob = hydra_jobboard.fetch("WHERE id = %s", (self.HydraTask.job_id,), cols=["jobType", "renderLayerTracker"]) self.logPath = self.HydraTask.getLogPath() self.HydraTask.kill(CRASHED, False) self.progressUpdate() self.thisNode.status = IDLE if self.thisNode.status == STARTED else OFFLINE self.thisNode.task_id = None elif self.thisNode.status in [STARTED, PENDING]: logger.warning("Reseting bad status, node set %s but no task found!", self.thisNode.status) self.thisNode.status = IDLE if self.thisNode.status == STARTED else OFFLINE
def kill(self, statusAfterDeath="K", TCPKill=True): if self.status == STARTED: killed = False updateNode = True node = hydra_rendernode.fetch("WHERE host = %s", (self.host,), cols=["status", "task_id"]) if TCPKill: if node.task_id != self.id: logger.warning("Node is not running the given task! Marking as dead.") updateNode = False else: killed = self.sendKillQuestion(statusAfterDeath) #If killed returns None then the node is probably offline if killed: return True if killed > 0 else False #If it was not killed by the node then we need to mark it as dead here instead if not killed: logger.debug("TCPKill recived None, marking task as killed") self.status = statusAfterDeath self.exitCode = 1 self.endTime = datetime.datetime.now() with transaction() as t: self.update(t) if updateNode: node.status = IDLE if node.status == STARTED else OFFLINE node.task_id = None node.update(t) return True else: logger.debug("Task Kill is skipping task %s because of status %s", self.id, self.status) return True
def softwareUpdater(): hydraPath = os.getenv("HYDRA") if not hydraPath: logger.error("HYDRA enviromental variable does not exit!") return False hydraPath, thisVersion = os.path.split(hydraPath) try: currentVersion = float(thisVersion.split("_")[-1]) except ValueError: logger.warning("Unable to obtain version number from file path. Assuming version number from Constants") currentVersion = Constants.VERSION versions = os.listdir(hydraPath) versions = [float(x.split("_")[-1]) for x in versions if x.startswith("dist_")] if not versions: return False highestVersion = max(versions) logger.debug("Comparing versions. Env: %s Latest: %s", currentVersion, highestVersion) if highestVersion > currentVersion: logger.info("Update found! Current Version is %s / New Version is %s", currentVersion, highestVersion) newPath = os.path.join(hydraPath, "dist_{}".format(highestVersion)) response = changeHydraEnviron(newPath) if not response: logger.critical("Could not update to newest environ for some reason!") return response else: return False
def openLogFile(task_id): """Opens the default texteditor with the log for the given task_id""" taskOBJ = hydra_taskboard.fetch("WHERE id = %s", (task_id,), cols=["id", "host"]) logPath = taskOBJ.getLogPath() if os.path.isfile(logPath): webbrowser.open(logPath) else: logger.warning("Log file does not exist or is unreachable.")
def sendKillQuestion(self, newStatus): """Kill the current task running on the renderhost. Return True if successful, else False""" logger.debug('Kill task on %s', self.host) connection = TCPConnection(hostname=self.host) answer = connection.getAnswer(KillCurrentTaskQuestion(newStatus)) if answer is None: logger.debug("%s appears to be offline or unresponsive. Treating as dead.", self.host) else: logger.debug("Child killed return code '%s' for node '%s'", answer, self.host) if answer < 0: logger.warning("%s tried to kill its job but failed for some reason.", self.host) return answer
def __init__(self): #Setup Class Variables self.renderThread = None self.childProcess = None self.PSUtilProc = None self.statusAfterDeath = None self.childKilled = 0 self.HydraJob = None self.HydraTask = None self.logPath = None #Get this node data from the database and make sure it exists self.thisNode = getThisNodeOBJ() logger.debug(self.thisNode) if not self.thisNode: logger.critical( "This node does not exist in the database! Please Register this node and try again." ) sys.exit(-1) return #Detect RedShift GPUs self.rsGPUs = Utils.getRedshiftPreference("SelectedCudaDevices") if self.rsGPUs: self.rsGPUs = self.rsGPUs.split(",")[:-1] self.rsGPUids = [x.split(":")[0] for x in self.rsGPUs] if len(self.rsGPUs) != len(self.rsGPUids): logger.warning("Problems parsing Redshift Preferences") logger.info("%s Redshift Enabled GPU(s) found on this node", len(self.rsGPUs)) logger.debug("GPUs available for rendering are %s", self.rsGPUs) else: logger.warning("Could not find available Redshift GPUs") #Create RenderLog Directory if it doesn't exit if not os.path.isdir(Constants.RENDERLOGDIR): os.makedirs(Constants.RENDERLOGDIR) self.unstickTask() self.thisNode.software_version = Constants.VERSION with transaction() as t: self.thisNode.update(t) #Run The Server port = int(Utils.getInfoFromCFG("network", "port")) self.startServerThread(port)
def remove(self): if not self.locked: return if sys.platform == "win32": if hasattr(self, "tempFile"): try: os.close(self.tempFile) os.unlink(self.tempFilePath) except Exception as e: logger.error(e) else: logger.warning("No temp file found for %s", self.name) else: try: fnctl.lockf(self.tempFile, fcntl.LOCK_UN) if os.path.isfile(self.tempFilePath): os.unlink(self.tempFilePath) except Exception as e: logger.error(e)
def reset(self, resetData): if not resetData: logger.debug("No reset data recieved") return 0 resetRLs = resetData[0] currentFrame = resetData[1] nodeReset = resetData[2] responses = [] if nodeReset: responses.append(self.updateAttr("failures", "")) responses.append(self.updateAttr("attempts", 0)) if resetRLs: if currentFrame > self.endFrame: logger.error("New start frame is higher than the end frame! Aboring!") return -1 if currentFrame < self.startFrame: logger.warning("New start frame is lower than original start frame, resetting to default.") currentFrame = 0 if currentFrame == self.startFrame: currentFrame = 0 idxList = [self.renderLayers.split(",").index(x) for x in resetRLs] rlTracker = self.renderLayerTracker.split(",") for i in idxList: rlTracker[i] = str(currentFrame) responses.append(self.updateAttr("renderLayerTracker", ",".join(rlTracker))) if self.status in [KILLED, FINISHED]: responses.append(self.updateAttr("status", PAUSED)) return 0 if all(responses) else -2