def monitorJobTracker(self): """This method is periodically called to monitor the JobTracker of the cluster.""" try: if self.__isIdle(): if self.__idleJobTrackerHandler: self.__log.info('Detected cluster as idle. Calling registered callback handler.') self.__idleJobTrackerHandler.handleIdleJobTracker() except: self.__log.debug('Exception while monitoring job tracker. %s' % get_exception_string())
def monitorJobTracker(self): """This method is periodically called to monitor the JobTracker of the cluster.""" try: if self.__isIdle(): if self.__idleJobTrackerHandler: self.__log.info('Detected cluster as idle. Calling registered callback handler.') self.__idleJobTrackerHandler.handleIdleJobTracker() except: self.__log.debug('Exception while monitoring job tracker. %s' % get_exception_string())
def getJobsStatus(self): """This method should return the status of all jobs that are run on the HOD allocated hadoop cluster""" jobStatusList = [] try: hadoop16Version = { 'major' : '0', 'minor' : '16' } if self.__isCompatibleHadoopVersion(hadoop16Version): jtStatusCommand = self.__initStatusCommand(option='-list all') jtStatusCommand.start() jtStatusCommand.wait() jtStatusCommand.join() if jtStatusCommand.exit_code() == 0: for line in jtStatusCommand.output(): jobStatus = self.__extractJobStatus(line) if jobStatus is not None: jobStatusList.append(jobStatus) except: self.__log.debug('Exception while getting job statuses. %s' % get_exception_string()) return jobStatusList
def getJobsStatus(self): """This method should return the status of all jobs that are run on the HOD allocated hadoop cluster""" jobStatusList = [] try: hadoop16Version = { 'major' : '0', 'minor' : '16' } if self.__isCompatibleHadoopVersion(hadoop16Version): jtStatusCommand = self.__initStatusCommand(option='-list all') jtStatusCommand.start() jtStatusCommand.wait() jtStatusCommand.join() if jtStatusCommand.exit_code() == 0: for line in jtStatusCommand.output(): jobStatus = self.__extractJobStatus(line) if jobStatus is not None: jobStatusList.append(jobStatus) except: self.__log.debug('Exception while getting job statuses. %s' % get_exception_string()) return jobStatusList
opList = self.__check_operation(operation) if self.__opCode == 0: if not self.__userState.checkStateFile(CLUSTER_DATA_FILE, (os.R_OK,)): self.__log.critical(INVALID_STATE_FILE_MSGS[0] % \ self.__userState.get_state_file()) self.__opCode = 1 return self.__opCode getattr(self, "_op_%s" % opList[0])(opList) except HodInterruptException, h: self.__log.critical("op: %s failed because of a process interrupt." \ % operation) self.__opCode = HOD_INTERRUPTED_CODE except: self.__log.critical("op: %s failed: %s" % (operation, get_exception_error_string())) self.__log.debug(get_exception_string()) self.__cleanup() self.__log.debug("return code: %s" % self.__opCode) return self.__opCode def script(self): errorFlag = False errorMsgs = [] scriptRet = 0 # return from the script, if run script = self.__cfg['hod']['script'] nodes = self.__cfg['hod']['nodecount'] clusterDir = self.__cfg['hod']['clusterdir']
if self.__opCode == 0: if not self.__userState.checkStateFile(CLUSTER_DATA_FILE, (os.R_OK, )): self.__log.critical(INVALID_STATE_FILE_MSGS[0] % \ self.__userState.get_state_file()) self.__opCode = 1 return self.__opCode getattr(self, "_op_%s" % opList[0])(opList) except HodInterruptException, h: self.__log.critical("op: %s failed because of a process interrupt." \ % operation) self.__opCode = HOD_INTERRUPTED_CODE except: self.__log.critical("op: %s failed: %s" % (operation, get_exception_error_string())) self.__log.debug(get_exception_string()) self.__cleanup() self.__log.debug("return code: %s" % self.__opCode) return self.__opCode def script(self): errorFlag = False errorMsgs = [] scriptRet = 0 # return from the script, if run script = self.__cfg['hod']['script'] nodes = self.__cfg['hod']['nodecount'] clusterDir = self.__cfg['hod']['clusterdir']