def __init__(self, name): self.locked = False self.name = name self.tempFilePath = os.path.join(BASEDIR, "{}.lock".format(self.name)) self.tempFilePath = os.path.abspath(self.tempFilePath) logger.info("Temp File: %s", self.tempFilePath) #Windows if sys.platform == "win32": try: if os.path.exists(self.tempFilePath): os.unlink(self.tempFilePath) logger.debug("Unlink %s", self.tempFilePath) self.tempFile = os.open(self.tempFilePath, os.O_CREAT | os.O_EXCL | os.O_RDWR) self.locked = True except Exception as e: if e.errno == 13: logger.error("Another Instance of %s is already running!", self.name) else: logger.error(e) #Linux else: import fcntl self.tempFile = open(self.tempFilePath, "w") self.tempFile.flush() try: fcntl.lockf(self.tempFile, fcntl.LOCK_EX | fcntl.LOCK_NB) self.locked = True except IOError: logger.error("Another Instance of %s is already running", self.name)
def checkOnTimeouts(self, runningNodes): logger.debug("Checking on running Nodes for timeouts") now = datetime.datetime.now() for nodeOBJ in runningNodes: if (now - nodeOBJ.pulse) > datetime.timedelta(hours=3): logger.debug("Node %s is timed out!", nodeOBJ.host) self.getOffNode(nodeOBJ)
def softwareUpdater(): hydraPath = os.getenv("HYDRA") if not hydraPath: logger.error("HYDRA enviromental variable does not exit!") return False hydraPath, thisVersion = os.path.split(hydraPath) try: currentVersion = float(thisVersion.split("_")[-1]) except ValueError: logger.warning("Unable to obtain version number from file path. Assuming version number from Constants") currentVersion = Constants.VERSION versions = os.listdir(hydraPath) versions = [float(x.split("_")[-1]) for x in versions if x.startswith("dist_")] if not versions: return False highestVersion = max(versions) logger.debug("Comparing versions. Env: %s Latest: %s", currentVersion, highestVersion) if highestVersion > currentVersion: logger.info("Update found! Current Version is %s / New Version is %s", currentVersion, highestVersion) newPath = os.path.join(hydraPath, "dist_{}".format(highestVersion)) response = changeHydraEnviron(newPath) if not response: logger.critical("Could not update to newest environ for some reason!") return response else: return False
def getTotalRenderTime(self, returnDateTime=True): if not self.filteredLines: self.filterLines() if not self.filteredLines: return None reg = re.compile(r"total time for \d+ frames:.*") matches = [] for line in self.filteredLines: matches += reg.findall(line) if len(matches) > 1: logger.critical("More than one total time found in %s", self.fp) return None elif not matches: logger.debug("No total frame time found in %s", self.fp) return None line = matches[0].strip() h, m, s = self.getRsRenderTimeMatch(line) if h > 24: logger.critical("Critical! Log Parser is not setup to handle times longer than 24 hours yet...") return None totalTime = datetime.timedelta(hours=h, minutes=m, seconds=s) if returnDateTime: return totalTime else: return str(totalTime)
def getDatabaseInfo(): logger.debug("Finding login information...") #Get databse information host = Utils.getInfoFromCFG("database", "host") domain = Utils.getInfoFromCFG("network", "dnsDomainExtension").replace(" ", "") if domain != "" and host != "localhost": host += ".{}".format(domain) databaseName = Utils.getInfoFromCFG("database", "db") port = int(Utils.getInfoFromCFG("database", "port")) db_username = Utils.getInfoFromCFG("database", "username") #Get login information autoLogin = Utils.getInfoFromCFG("database", "autologin") autoLogin = True if str(autoLogin).lower()[0] == "t" else False if autoLogin: _db_password = PasswordStorage.loadCredentials(db_username) if not _db_password: autoLogin = False if not autoLogin: returnValues = PasswordStorage.qtPrompt() if not returnValues[0] or not returnValues[1]: logger.error("Could not login!") sys.exit(1) else: db_username = returnValues[0] _db_password = returnValues[1] return host, db_username, _db_password, databaseName, port
def kill(self, statusAfterDeath="K", TCPKill=True): if self.status == STARTED: killed = False updateNode = True node = hydra_rendernode.fetch("WHERE host = %s", (self.host,), cols=["status", "task_id"]) if TCPKill: if node.task_id != self.id: logger.warning("Node is not running the given task! Marking as dead.") updateNode = False else: killed = self.sendKillQuestion(statusAfterDeath) #If killed returns None then the node is probably offline if killed: return True if killed > 0 else False #If it was not killed by the node then we need to mark it as dead here instead if not killed: logger.debug("TCPKill recived None, marking task as killed") self.status = statusAfterDeath self.exitCode = 1 self.endTime = datetime.datetime.now() with transaction() as t: self.update(t) if updateNode: node.status = IDLE if node.status == STARTED else OFFLINE node.task_id = None node.update(t) return True else: logger.debug("Task Kill is skipping task %s because of status %s", self.id, self.status) return True
def online(self): if self.status == "O": return self.updateAttr("status", "I") elif self.status == "P": return self.updateAttr("status", "S") else: logger.debug("No status changes made to %s", self.host) return True
def assignTask(node, task, job): logger.debug("Assigning task with id %d to node %s", task.id, node.host) connection = TCPConnection(hostname=node.host) response = connection.getAnswer(StartRenderQuestion(job, task)) if response: logger.debug("Task %d was accepted on %s", task.id, node.host) else: logger.error("Task %d was declined on %s", task.id, node.host) return response
def killTask(self, statusAfterDeath=KILLED): if self.status == "S" and self.task_id: taskOBJ = hydra_taskboard.fetch("WHERE id = %s", self.task_id, cols=["host", "status", "exitCode", "endTime"]) logger.debug("Killing task %d on %s", self.task_id, self.host) return taskOBJ.kill(statusAfterDeath, True) else: logger.info("No task to kill on %s", self.host) return True
def checkNodeStatus(idleNodes): logger.debug("Checking Node Status on Idle Nodes") onlineList = [] for nodeOBJ in idleNodes: connection = TCPConnection(hostname=nodeOBJ.host) answer = connection.getAnswer(IsAliveQuestion()) if not answer: logger.debug("%s could not be reached! Removing from Idle Nodes.", nodeOBJ.host) else: onlineList.append(nodeOBJ) return onlineList
def resetThisNode(self): """Resets node after render, sets current task to None and updates node status.""" with transaction() as t: self.thisNode = hydra_rendernode.fetch("WHERE host = %s", (self.thisNode.host,), explicitTransaction=t) status = IDLE if self.thisNode.status == STARTED else OFFLINE self.thisNode.status = status logger.debug("New Node Status: %s", self.thisNode.status) self.thisNode.task_id = None self.thisNode.update(t)
def update(self, trans): names = list(self.__dirty__) if not names: logger.info("Nothing to update on %s", self.tableName()) return values = ([getattr(self, n) for n in names] + [getattr(self, self.primaryKey)]) assignments = ", ".join(["{} = %s".format(n) for n in names]) query = "UPDATE {0} SET {1} WHERE {2} = %s".format(self.tableName(), assignments, self.primaryKey) logger.debug((query, values)) trans.cur.executemany(query, [values]) return True
def shutdown(self): logger.info("Shutting down...") if self.schedThreadStatus: self.schedThread.terminate() if self.autoUpdateStatus: self.autoUpdateThread.terminate() if self.renderServerStatus: self.renderServer.shutdown() logger.debug("All Servers Shutdown") self.trayIcon.hide() event.accept() sys.exit(0)
def selectByHostHandler(self): """Selects hosts in the nodeTree via host name""" reply = strBox(self, "Select By Host Name", "Host (using * as wildcard):") if reply[1]: colCount = self.renderNodeTree.columnCount() - 1 searchString = str(reply[0]) rows = self.renderNodeTree.rowCount() for rowIndex in range(0, rows): item = str(self.renderNodeTree.item(rowIndex, 0).text()) if fnmatch.fnmatch(item, searchString): mySel = QTableWidgetSelectionRange(rowIndex, 0, rowIndex, colCount) self.renderNodeTree.setRangeSelected(mySel, True) logger.debug("Selecting %s matched with %s", item, searchString)
def setupGlobals(self): #Scene file should be first sys.argv try: self.scene = sys.argv[1] self.scene = self.scene.replace('\\', '/') except IndexError: self.scene = "" #Get the -flag args try: opts = getopt.getopt(sys.argv[2:], "s:e:n:p:l:x:m:d:c:q:t:")[0] except getopt.GetoptError: logger.error("Bad Opt!") aboutBox(self, "Bad Opt!", "One of the command line options you entered was invalid.\n"+ "\nPlease remove any unkown opts and try again.") sys.exit(2) #Defaults defName = self.scene.split("/")[-1] self.settingsDict = {"-s":101, #Start Frame (Int) "-e":101, #End Frame (Int) "-n":defName, #Nice Name (Str) "-p":"", #Proj (Str) "-l":"", #Render Layers (Str,Sep,By,Comma) "-x":"", #Executabe (Str) "-m":"", #CMD (Str) "-d":"", #RenderDirectory (Str) "-c":"", #Compatabilities (Str,Sep,By,Comma) "-q":"", #Project Name (Str) "-t":"", #Job Type (Str) } #Apply the -flag args optsDict = dict(opts) keys = list(optsDict.keys()) for key in keys: self.settingsDict[key] = optsDict[key] logger.debug("Setting Key '%s' with opt: '%s'", key, str(optsDict[key])) #Fix paths self.settingsDict["-p"] = self.settingsDict["-p"].replace('\\', '/') #Fix Compatabilities self.settingsDict["-c"] = self.settingsDict["-c"].split(",") #Add underscores to niceName self.settingsDict["-n"] = self.settingsDict["-n"].replace(" ", "_") #Move RenderDir to Base CMD if self.settingsDict["-d"] != "": self.settingsDict["-d"] = self.settingsDict["-d"].replace('\\', '/') self.settingsDict["-m"] += " -rd \"{0}\"".format(self.settingsDict["-d"])
def main(): logger.info("Starting in %s", os.getcwd()) logger.info("arglist %s", sys.argv) #Check for other RenderNode isntances lockFile = InstanceLock("HydraRenderManager") lockStatus = lockFile.isLocked() logger.debug("Lock File Status: %s", lockStatus) if not lockStatus: logger.critical("Only one RenderManager is allowed to run at a time! Exiting...") sys.exit(-1) socketServer = RenderManagementServer() socketServer.createIdleLoop("Process_Render_Tasks_Thread", socketServer.processRenderTasks, interval=15)
def findNextEvent(now, dbData): """Take the current datetime and the decoded schedule data from the DB and find the next scheduling event""" nowDayOfWeek = now.isoweekday() nowTime = now.time() dataList = dbData.split(",") if len(dataList) < 2: return None dataDict = {} for actionItem in dataList: actionItemList = actionItem.split("-") dayOfWeek = int(actionItemList[0]) action = int(actionItemList[2]) timeList = [int(t) for t in actionItemList[1].split(":")] timeObject = datetime.time(timeList[0], timeList[1]) try: dataDict[dayOfWeek] += [[timeObject, action]] except KeyError: dataDict[dayOfWeek] = [[timeObject, action]] #scheule is a nested list like [[time, action], [time,action]] todaySchedule, newDayOfWeek = findSchedule(nowDayOfWeek, dataDict) if not todaySchedule: return None sched = None #Check each schedule item's activation time, if one of them is after now # then this schedule will work for today for schedItem in todaySchedule: if schedItem[0] > nowTime: logger.debug("Schedule Found: %s", sched) sched = schedItem #If not then the next schdule item is probably on a date later in the week. #Iterate the day of week and look again. if not sched: newDayOfWeek += 1 todaySchedule, newDayOfWeek = findSchedule(newDayOfWeek, dataDict) sched = todaySchedule[0] if not sched: logger.error("Could not find schedule") return None return [newDayOfWeek] + sched
def __init__(self): #Setup Class Variables self.renderThread = None self.childProcess = None self.PSUtilProc = None self.statusAfterDeath = None self.childKilled = 0 self.HydraJob = None self.HydraTask = None self.logPath = None #Get this node data from the database and make sure it exists self.thisNode = getThisNodeOBJ() logger.debug(self.thisNode) if not self.thisNode: logger.critical( "This node does not exist in the database! Please Register this node and try again." ) sys.exit(-1) return #Detect RedShift GPUs self.rsGPUs = Utils.getRedshiftPreference("SelectedCudaDevices") if self.rsGPUs: self.rsGPUs = self.rsGPUs.split(",")[:-1] self.rsGPUids = [x.split(":")[0] for x in self.rsGPUs] if len(self.rsGPUs) != len(self.rsGPUids): logger.warning("Problems parsing Redshift Preferences") logger.info("%s Redshift Enabled GPU(s) found on this node", len(self.rsGPUs)) logger.debug("GPUs available for rendering are %s", self.rsGPUs) else: logger.warning("Could not find available Redshift GPUs") #Create RenderLog Directory if it doesn't exit if not os.path.isdir(Constants.RENDERLOGDIR): os.makedirs(Constants.RENDERLOGDIR) self.unstickTask() self.thisNode.software_version = Constants.VERSION with transaction() as t: self.thisNode.update(t) #Run The Server port = int(Utils.getInfoFromCFG("network", "port")) self.startServerThread(port)
def fetch(cls, whereClause="", whereTuple=None, cols=None, orderTuples=None, limit=None, multiReturn=False, explicitTransaction=None): """A fetch function with paramater binding. ie. thisNode = hydra_rendernode.fetch("WHERE host = %s", ("test",)) idAttr = thisNode.id""" #Column Clause colStatement = "*" if cols and len(cols) > 0: cols = [str(x) for x in cols] if cls.primaryKey not in cols: cols += [cls.primaryKey] colStatement = ",".join(cols) queryTuple = tuple() #Where Clause if whereClause and whereTuple: queryTuple += whereTuple #Order Clause orderClause = "" if orderTuples: orderClause = "ORDER BY" for oTuple in orderTuples: orderClause += " %s %s" queryTuple += oTuple #Limit Clause limitClause = "" if limit: limitClause = "LIMIT %s " queryTuple += (limit,) #Build Select Statement select = "SELECT {0} FROM {1} {2} {3} {4}" select = select.format(colStatement, cls.tableName(), whereClause, orderClause, limitClause) #pylint: disable=W1201 logger.debug(select % queryTuple) #Fetch the data if explicitTransaction: return cls.doFetch(explicitTransaction, select, queryTuple, multiReturn) else: with transaction() as t: return cls.doFetch(t, select, queryTuple, multiReturn)
def progressUpdate(self, commit=True): """Parse the render log file and update the databse with the currently rendering frame, MPF (minutes per frame) and the renderLayerTracker. Optional commit can stop the data from being updated on the databse if set to False.""" if not all([self.HydraTask, self.HydraJob, self.logPath]): logger.debug("Could not update progress") return #Get Log Parser and find the highest rendered frame HydraLogObject = LogParsers.getLog(self.HydraJob, self.logPath) if not HydraLogObject: logger.debug("Log for job %s could not be parsed", self.HydraJob.id) return newCurrentFrame = HydraLogObject.getNewCurrentFrame() if not newCurrentFrame: newCurrentFrame = self.HydraTask.currentFrame else: #If we have a valid new currentFrame add one since it's now on the next frame newCurrentFrame += 1 mpf = HydraLogObject.getAverageRenderTime() #currentFrame, renderLayerTracker self.HydraTask.currentFrame = newCurrentFrame self.HydraJob.renderLayerTracker = self.getNewRLTracker( self.HydraJob, self.HydraTask) #MintuesPerFrame if mpf: self.HydraTask.mpf = mpf if self.HydraJob.mpf: tSecs = int( (self.HydraJob.mpf.total_seconds() + mpf.total_seconds()) / 2) self.HydraJob.mpf = datetime.timedelta(seconds=tSecs) else: self.HydraJob.mpf = mpf if commit: with transaction() as t: self.HydraJob.update(t) self.HydraTask.update(t)
def sendQuestion(self, question): """Send question without waiting for a response""" sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) returnVal = False try: #Connect to the server logger.debug("TCP Connect to %s %d", self.hostname, self.port) if self.hostname is None: return None sock.connect((self.hostname, self.port)) questionBytes = pickle.dumps(question) sock.sendall(questionBytes) sock.shutdown(socket.SHUT_WR) sock.close() returnVal = True except socket.error as err: logger.error(err) logger.debug("TCP Connection to %s %d was closed.", self.hostname, self.port) return returnVal
def getAnswer(self, question): """Send the question to a remote server and get an answer back""" #Create a TCP socket sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) try: #Connect to the server logger.debug("TCP Connect to %s %d", self.hostname, self.port) if self.hostname is None: return None try: sock.connect((self.hostname, self.port)) #Convert the question to ASCII questionBytes = pickle.dumps(question) #Send the question sock.sendall(questionBytes) #Close the sending half of the connection so the other side #Knows we're done sending sock.shutdown(socket.SHUT_WR) #Read the response, an ASCII encoded object answerBytes = sock.recv(Constants.MANYBYTES) #Convert the response to an object try: answer = pickle.loads(answerBytes) except EOFError: logger.error("EOF Error on Connections.TCPConnection.getAnswer()") logger.error("answerBytes = %s", str(answerBytes)) answer = None except socket.error as err: logger.debug(err) answer = None finally: sock.close() logger.debug("TCP Connection to %s %d was closed.", self.hostname, self.port) return answer
def startupServers(self): logger.debug("Firing up main threads") #Start Render Server self.renderServer = RenderNode.RenderTCPServer() self.renderServerStatus = True self.renderServerPixmap.setPixmap(self.donePixmap) logger.info("Render Server Started!") #Start Pulse Thread self.renderServer.createIdleLoop("Pulse_Thread", pulse, 60) self.pulseThreadStatus = True self.pulseThreadPixmap.setPixmap(self.donePixmap) logger.info("Pulse Thread started!") #Start Auto Update Thread SIGNAL("updateThisNodeInfo") QObject.connect(self, SIGNAL("updateThisNodeInfo"), self.updateThisNodeInfo) self.autoUpdateStatus = True self.autoUpdateThread = stoppableThread(self.updateThisNodeInfoSignaler, 15, "AutoUpdate_Thread") self.startScheduleThread()
def reset(self, resetData): if not resetData: logger.debug("No reset data recieved") return 0 resetRLs = resetData[0] currentFrame = resetData[1] nodeReset = resetData[2] responses = [] if nodeReset: responses.append(self.updateAttr("failures", "")) responses.append(self.updateAttr("attempts", 0)) if resetRLs: if currentFrame > self.endFrame: logger.error("New start frame is higher than the end frame! Aboring!") return -1 if currentFrame < self.startFrame: logger.warning("New start frame is lower than original start frame, resetting to default.") currentFrame = 0 if currentFrame == self.startFrame: currentFrame = 0 idxList = [self.renderLayers.split(",").index(x) for x in resetRLs] rlTracker = self.renderLayerTracker.split(",") for i in idxList: rlTracker[i] = str(currentFrame) responses.append(self.updateAttr("renderLayerTracker", ",".join(rlTracker))) if self.status in [KILLED, FINISHED]: responses.append(self.updateAttr("status", PAUSED)) return 0 if all(responses) else -2
def getNewCurrentFrame(self): renderedFrames = self.getSavedFrameNumbers() if not renderedFrames: logger.debug("No renderedFrames found") return None logger.debug(renderedFrames) newCurrentFrame = max(renderedFrames) logger.debug("New currentFrame is: %s", newCurrentFrame) return newCurrentFrame
def softwareUpdaterLoop(): """Checks for a new verison in the HYDRA environ, if one is found it starts a batch process to start the new verison and kills the current one running.""" logger.debug("Checking for updates...") updateAnswer = Utils.softwareUpdater() if updateAnswer: logger.debug("Update found!") Utils.launchHydraApp("RenderNodeConsole", 10) socketServer.shutdown() sys.exit(0) else: logger.debug("No updates found")
def sendKillQuestion(self, newStatus): """Kill the current task running on the renderhost. Return True if successful, else False""" logger.debug('Kill task on %s', self.host) connection = TCPConnection(hostname=self.host) answer = connection.getAnswer(KillCurrentTaskQuestion(newStatus)) if answer is None: logger.debug("%s appears to be offline or unresponsive. Treating as dead.", self.host) else: logger.debug("Child killed return code '%s' for node '%s'", answer, self.host) if answer < 0: logger.warning("%s tried to kill its job but failed for some reason.", self.host) return answer
def shuffleQueue(jobList, taskList): ######################################################################## ######################################################################## #TODO: UNTESTED!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ######################################################################## ######################################################################## logger.debug("Shuffling Render Tasks") sortedTasks = sorted(taskList, key=attrgetter("priority")) sortedJobs = sorted(jobList, key=attrgetter("priority")) for task in sortedTasks: for job in sortedJobs: logger.debug((task.priority / job.priority)) #If the job priority is 25% or more higher than the task's, # kill the task to make room for the new job if (task.priority / job.priority) < .75: logger.debug("Killing task %s", task.id) break else: return
def filterTask(job, node): if job.priority < node.minPriority: logger.debug("Skipping job %d because it does not meet %s's minPriority requirement", job.id, node.host) return False if job.failures and job.failures != "": failures = job.failures.split(",") if node.host in failures: logger.debug("Skipping job %d because it has failed on node %s in the past", job.id, node.host) return False if job.requirements and job.requirements != "": jobReqs = job.requirements.split(",") nodeCaps = node.capabilities.split(" ") checker = [x in nodeCaps for x in jobReqs] if not all(checker): logger.debug("Skipping job %d because node %s cannot meet its feature requirements", job.id, node.host) return False #If all of the above tests pass return True
def assignRenderJobs(self, renderJobs, idleNodes, allJobs): if len(renderJobs) < 1 or len(idleNodes) < 1: logger.debug("No Idle Nodes or Ready Jobs found. Skipping assignment...") return True logger.debug("Assigning Render Tasks") for nodeOBJ in idleNodes: for jobID, renderLayer in renderJobs: jobOBJ = allJobs[jobID] response = self.filterTask(jobOBJ, nodeOBJ) if response: startFrame = self.getStartFrame(jobOBJ, renderLayer) taskOBJ = hydra_taskboard(job_id=jobID, status="S", startTime=datetime.datetime.now(), host=nodeOBJ.host, renderLayer=renderLayer, startFrame=startFrame, endFrame=jobOBJ.endFrame, currentFrame=startFrame) with transaction() as t: taskOBJ.insert(t) result = self.assignTask(nodeOBJ, taskOBJ, jobOBJ) #pylint: disable=E1101 if result: with transaction() as t: nodeOBJ.status = STARTED nodeOBJ.task_id = taskOBJ.id nodeOBJ.update(t) renderJobs.remove([jobID, renderLayer]) break else: logger.debug("Cleaning up task %s", taskOBJ.id) taskOBJ.status = KILLED taskOBJ.endTime = datetime.datetime.now() taskOBJ.exitCode = 101 #Mark job failure? Offline node? with transaction() as t: taskOBJ.update(t) break