def __copy_tarball(self, destDir): """Copy the hadoop tar ball from a remote location to the specified destination directory. Based on the URL it executes an appropriate copy command. Throws an exception if the command returns a non-zero exit code.""" # for backwards compatibility, treat the default case as file:// url = '' if self.tarSrcLoc.startswith('/'): url = 'file:/' src = '%s%s' % (url, self.tarSrcLoc) if src.startswith('file://'): src = src[len('file://') - 1:] cpCmd = '/bin/cp' cmd = '%s %s %s' % (cpCmd, src, destDir) self.log.debug('Command to execute: %s' % cmd) copyProc = simpleCommand('remote copy', cmd) copyProc.start() copyProc.wait() copyProc.join() ret = copyProc.exit_code() self.log.debug('Completed command execution. Exit Code: %s.' % ret) if ret != 0: output = copyProc.output() raise Exception( 'Could not copy tarball using command %s. Exit code: %s. Output: %s' % (cmd, ret, output)) else: raise Exception('Unsupported URL for file: %s' % src)
def removeMRSystemDirectory(self): jtActive = isProcessRunning(self.__jtPid) count = 0 # try for a max of a minute for the process to end while jtActive and (count<self.__retries): time.sleep(0.5) jtActive = isProcessRunning(self.__jtPid) count += 1 if count == self.__retries: self.__log.warn('Job Tracker did not exit even after a minute. Not going to try and cleanup the system directory') return self.__log.debug('jt is now inactive') cmd = "%s dfs -fs hdfs://%s -rmr %s" % (self.__hadoopPath, self.__fsName, \ self.__mrSysDir) self.__log.debug('Command to run to remove system directory: %s' % (cmd)) try: hadoopCommand = simpleCommand('mr-sys-dir-cleaner', cmd) hadoopCommand.start() hadoopCommand.wait() hadoopCommand.join() ret = hadoopCommand.exit_code() if ret != 0: self.__log.warn("Error in removing MapReduce system directory '%s' from '%s' using path '%s'" \ % (self.__mrSysDir, self.__fsName, self.__hadoopPath)) self.__log.warn(pprint.pformat(hadoopCommand.output())) else: self.__log.info("Removed MapReduce system directory successfully.") except: self.__log.error('Exception while cleaning up MapReduce system directory. May not be cleaned up. %s', \ get_exception_error_string()) self.__log.debug(get_exception_string())
def qsub(self, argList, stdinList): jobID = False exitCode = 0 qsubCommand = "%s %s" % (self.__qsub, args_to_string(argList)) self.__log.debug("qsub -> %s" % qsubCommand) qsubProcess = simpleCommand('qsub', qsubCommand, env=self.__env) qsubProcess.start() while qsubProcess.stdin == None: time.sleep(.2) try: for line in stdinList: self.__log.debug("qsub stdin: %s" % line) print >>qsubProcess.stdin, line qsubProcess.stdin.close() except IOError, i: # If torque's qsub is given invalid params, it fails & returns immediately # Check for such errors here # Wait for command execution to finish qsubProcess.wait() qsubProcess.join() output = qsubProcess.output() if output!=[]: self.__log.critical("qsub Failure : %s " % output[0].strip()) self.__log.critical("qsub Command : %s" % qsubCommand) return None, qsubProcess.exit_code()
def __copy_tarball(self, destDir): """Copy the hadoop tar ball from a remote location to the specified destination directory. Based on the URL it executes an appropriate copy command. Throws an exception if the command returns a non-zero exit code.""" # for backwards compatibility, treat the default case as file:// url = "" if self.tarSrcLoc.startswith("/"): url = "file:/" src = "%s%s" % (url, self.tarSrcLoc) if src.startswith("file://"): src = src[len("file://") - 1 :] cpCmd = "/bin/cp" cmd = "%s %s %s" % (cpCmd, src, destDir) self.log.debug("Command to execute: %s" % cmd) copyProc = simpleCommand("remote copy", cmd) copyProc.start() copyProc.wait() copyProc.join() ret = copyProc.exit_code() self.log.debug("Completed command execution. Exit Code: %s." % ret) if ret != 0: output = copyProc.output() raise Exception( "Could not copy tarball using command %s. Exit code: %s. Output: %s" % (cmd, ret, output) ) else: raise Exception("Unsupported URL for file: %s" % src)
def qsub(self, argList, stdinList): jobID = False exitCode = 0 qsubCommand = "%s %s" % (self.__qsub, args_to_string(argList)) self.__log.debug("qsub -> %s" % qsubCommand) qsubProcess = simpleCommand('qsub', qsubCommand, env=self.__env) qsubProcess.start() while qsubProcess.stdin == None: time.sleep(.2) try: for line in stdinList: self.__log.debug("qsub stdin: %s" % line) print >> qsubProcess.stdin, line qsubProcess.stdin.close() except IOError, i: # If torque's qsub is given invalid params, it fails & returns immediately # Check for such errors here # Wait for command execution to finish qsubProcess.wait() qsubProcess.join() output = qsubProcess.output() if output != []: self.__log.critical("qsub Failure : %s " % output[0].strip()) self.__log.critical("qsub Command : %s" % qsubCommand) return None, qsubProcess.exit_code()
def testProcessStatus(self): sc = simpleCommand('testsleep', 'sleep 60') sc.start() pid = sc.getPid() while pid is None: pid = sc.getPid() self.assertTrue(isProcessRunning(pid)) sc.kill() sc.wait() sc.join() self.assertFalse(isProcessRunning(pid))
def untar(file, targetDir): status = False command = 'tar -C %s -zxf %s' % (targetDir, file) commandObj = simpleCommand('untar', command) commandObj.start() commandObj.wait() commandObj.join() if commandObj.exit_code() == 0: status = True return status
def __initStatusCommand(self, option="-list"): """This method initializes the command to run to check the JT status""" cmd = None hadoopPath = os.path.join(self.__hadoopDir, 'bin', 'hadoop') cmdStr = "%s job -jt %s" % (hadoopPath, self.__jobTrackerURL) cmdStr = "%s %s" % (cmdStr, option) self.__log.debug('cmd str %s' % cmdStr) env = os.environ env['JAVA_HOME'] = self.__javaHome cmd = simpleCommand('HadoopStatus', cmdStr, env) return cmd
def qalter(self, fieldName, fieldValue, jobId): """Update the job field with fieldName with the fieldValue. The fieldValue must be modifiable after the job is submitted.""" # E.g. to alter comment: qalter -W notes='value` jobId qalterCmd = '%s -W %s=\"%s\" %s' % (self.__qalter, fieldName, fieldValue, jobId) self.__log.debug("qalter command: %s" % qalterCmd) qalterProcess = simpleCommand('qalter', qalterCmd, env=self.__env) qalterProcess.start() qalterProcess.wait() qalterProcess.join() exitCode = qalterProcess.exit_code() return exitCode
def testUnresponsiveJobTracker(self): # simulate an unresponsive job tracker, by giving a command that runs longer than the retries # verify that the program returns with the right error message. sc = simpleCommand("sleep", "sleep 300") sc.start() pid = sc.getPid() while pid is None: pid = sc.getPid() sysDirMgr = MRSystemDirectoryManager(pid, '/user/yhemanth/mapredsystem/hoduser.123.abc.com', \ 'def.com:5678', '/usr/bin/hadoop', self.log, retries=3) sysDirMgr.removeMRSystemDirectory() self.log.hasMessage("Job Tracker did not exit even after a minute. Not going to try and cleanup the system directory", 'warn') sc.kill() sc.wait() sc.join()
def testRedirectedStderr(self): self.testFile= tempfile.NamedTemporaryFile(dir=self.rootDir, \ prefix=self.prefix) cmd=simpleCommand('helper','%s %s 2 2>%s' % \ (sys.executable, \ os.path.join(rootDirectory, "testing", "helper.py"), \ self.testFile.name)) cmd.start() cmd.join() self.testFile.seek(0) stderror = self.testFile.read() # print stderror, sampleText assert(stderror == sampleText) pass
def testRedirectedStderr(self): self.testFile= tempfile.NamedTemporaryFile(dir=self.rootDir, \ prefix=self.prefix) cmd=simpleCommand('helper','%s %s 2 2>%s' % \ (sys.executable, \ os.path.join(rootDirectory, "testing", "helper.py"), \ self.testFile.name)) cmd.start() cmd.join() self.testFile.seek(0) stderror = self.testFile.read() # print stderror, sampleText assert (stderror == sampleText) pass
def testUnresponsiveJobTracker(self): # simulate an unresponsive job tracker, by giving a command that runs longer than the retries # verify that the program returns with the right error message. sc = simpleCommand("sleep", "sleep 300") sc.start() pid = sc.getPid() while pid is None: pid = sc.getPid() sysDirMgr = MRSystemDirectoryManager(pid, '/user/yhemanth/mapredsystem/hoduser.123.abc.com', \ 'def.com:5678', '/usr/bin/hadoop', self.log, retries=3) sysDirMgr.removeMRSystemDirectory() self.log.hasMessage( "Job Tracker did not exit even after a minute. Not going to try and cleanup the system directory", 'warn') sc.kill() sc.wait() sc.join()
def qdel(self, jobId, force=False): exitCode = 0 qdel = self.__qdel if force: qdel = "%s -p %s" % (qdel, jobId) else: qdel = "%s %s" % (qdel, jobId) self.__log.debug(qdel) qdelProcess = simpleCommand('qdel', qdel, env=self.__env) qdelProcess.start() qdelProcess.wait() qdelProcess.join() exitCode = qdelProcess.exit_code() return exitCode
def pbsdsh(self, arguments): status = None pbsdshCommand = "%s %s" % (self.__pbsdsh, args_to_string(arguments)) self.__log.debug("pbsdsh command: %s" % pbsdshCommand) pbsdsh = simpleCommand('pbsdsh', pbsdshCommand, env=self.__env) pbsdsh.start() for i in range(0, 30): status = pbsdsh.exit_code() if status: self.__log.error("pbsdsh failed: %s" % pbsdsh.exit_status_string()) break if not status: status = 0 return status
def tar(tarFile, tarDirectory, tarList): currentDir = os.getcwd() os.chdir(tarDirectory) status = False command = 'tar -czf %s ' % (tarFile) for file in tarList: command = "%s%s " % (command, file) commandObj = simpleCommand('tar', command) commandObj.start() commandObj.wait() commandObj.join() if commandObj.exit_code() == 0: status = True else: status = commandObj.exit_status_string() os.chdir(currentDir) return status
def hadoopVersion(hadoopDir, java_home, log): # Determine the version of hadoop being used by executing the # hadoop version command. Code earlier in idleTracker.py hadoopVersion = { 'major' : None, 'minor' : None } hadoopPath = os.path.join(hadoopDir, 'bin', 'hadoop') cmd = "%s version" % hadoopPath log.debug('Executing command %s to find hadoop version' % cmd) env = os.environ env['JAVA_HOME'] = java_home hadoopVerCmd = simpleCommand('HadoopVersion', cmd, env) hadoopVerCmd.start() hadoopVerCmd.wait() hadoopVerCmd.join() if hadoopVerCmd.exit_code() == 0: verLine = hadoopVerCmd.output()[0] log.debug('Version from hadoop command: %s' % verLine) hadoopVerRegExp = re.compile("Hadoop ([0-9]+)\.([0-9]+).*") verMatch = hadoopVerRegExp.match(verLine) if verMatch != None: hadoopVersion['major'] = verMatch.group(1) hadoopVersion['minor'] = verMatch.group(2) return hadoopVersion
def removeMRSystemDirectory(self): jtActive = isProcessRunning(self.__jtPid) count = 0 # try for a max of a minute for the process to end while jtActive and (count < self.__retries): time.sleep(0.5) jtActive = isProcessRunning(self.__jtPid) count += 1 if count == self.__retries: self.__log.warn( 'Job Tracker did not exit even after a minute. Not going to try and cleanup the system directory' ) return self.__log.debug('jt is now inactive') cmd = "%s dfs -fs hdfs://%s -rmr %s" % (self.__hadoopPath, self.__fsName, \ self.__mrSysDir) self.__log.debug('Command to run to remove system directory: %s' % (cmd)) try: hadoopCommand = simpleCommand('mr-sys-dir-cleaner', cmd) hadoopCommand.start() hadoopCommand.wait() hadoopCommand.join() ret = hadoopCommand.exit_code() if ret != 0: self.__log.warn("Error in removing MapReduce system directory '%s' from '%s' using path '%s'" \ % (self.__mrSysDir, self.__fsName, self.__hadoopPath)) self.__log.warn(pprint.pformat(hadoopCommand.output())) else: self.__log.info( "Removed MapReduce system directory successfully.") except: self.__log.error('Exception while cleaning up MapReduce system directory. May not be cleaned up. %s', \ get_exception_error_string()) self.__log.debug(get_exception_string())
def qstat(self, jobID): qstatInfo = None qstatCommand = "%s -f -1 %s" % (self.__qstat, jobID) self.__log.debug(qstatCommand) qstatProcess = simpleCommand('qstat', qstatCommand, env=self.__env) qstatProcess.start() qstatProcess.wait() qstatProcess.join() exitCode = qstatProcess.exit_code() if exitCode > 0: self.__log.warn('qstat error: %s' % qstatProcess.exit_status_string()) else: qstatInfo = {} for line in qstatProcess.output(): line = line.rstrip() if line.find('=') != -1: qstatMatch = reQstatLine.match(line) if qstatMatch: key = qstatMatch.group(1) value = qstatMatch.group(2) qstatInfo[key] = value if 'exec_host' in qstatInfo: list = qstatInfo['exec_host'].split('+') addrList = [] for item in list: [head, end] = item.split('/', 1) addrList.append(head) qstatInfo['exec_host'] = addrList return qstatInfo, exitCode
def run(self, dir): status = True args = [] desc = self.desc self.log.debug(pprint.pformat(desc.dict)) self.log.debug("Got package dir of %s" % dir) self.path = os.path.join(dir, self.program) self.log.debug("path: %s" % self.path) args.append(self.path) args.extend(desc.getArgv()) envs = desc.getEnvs() fenvs = os.environ for k, v in envs.iteritems(): fenvs[k] = v if envs.has_key('HADOOP_OPTS'): fenvs['HADOOP_OPTS'] = envs['HADOOP_OPTS'] self.log.debug("HADOOP_OPTS : %s" % fenvs['HADOOP_OPTS']) fenvs['JAVA_HOME'] = self.javahome fenvs['HADOOP_CONF_DIR'] = self.confdir fenvs['HADOOP_LOG_DIR'] = self.logdir self.log.info(pprint.pformat(fenvs)) hadoopCommand = '' for item in args: hadoopCommand = "%s%s " % (hadoopCommand, item) # Redirecting output and error to self.out and self.err hadoopCommand = hadoopCommand + ' 1>%s 2>%s ' % (self.out, self.err) self.log.debug('running command: %s' % (hadoopCommand)) self.log.debug('hadoop env: %s' % fenvs) self.log.debug('Command stdout will be redirected to %s ' % self.out + \ 'and command stderr to %s' % self.err) self.__hadoopThread = simpleCommand('hadoop', hadoopCommand, env=fenvs) self.__hadoopThread.start() while self.__hadoopThread.stdin == None: time.sleep(.2) self.log.debug("hadoopThread still == None ...") input = desc.getStdin() self.log.debug("hadoop input: %s" % input) if input: if self.__hadoopThread.is_running(): print >>self.__hadoopThread.stdin, input else: self.log.error("hadoop command failed to start") self.__hadoopThread.stdin.close() self.log.debug("isForground: %s" % desc.isForeground()) if desc.isForeground(): self.log.debug("Waiting on hadoop to finish...") self.__hadoopThread.wait() self.log.debug("Joining hadoop thread...") self.__hadoopThread.join() if self.__hadoopThread.exit_code() != 0: status = False else: status = self.getCommandStatus() self.log.debug("hadoop run status: %s" % status) if status == False: self.handleFailedCommand() if (status == True) or (not desc.isIgnoreFailures()): return status else: self.log.error("Ignoring Failure") return True
def run(self, dir): status = True args = [] desc = self.desc self.log.debug(pprint.pformat(desc.dict)) self.log.debug("Got package dir of %s" % dir) self.path = os.path.join(dir, self.program) self.log.debug("path: %s" % self.path) args.append(self.path) args.extend(desc.getArgv()) envs = desc.getEnvs() fenvs = os.environ for k, v in envs.iteritems(): fenvs[k] = v if envs.has_key('HADOOP_OPTS'): fenvs['HADOOP_OPTS'] = envs['HADOOP_OPTS'] self.log.debug("HADOOP_OPTS : %s" % fenvs['HADOOP_OPTS']) fenvs['JAVA_HOME'] = self.javahome fenvs['HADOOP_CONF_DIR'] = self.confdir fenvs['HADOOP_LOG_DIR'] = self.logdir self.log.info(pprint.pformat(fenvs)) hadoopCommand = '' for item in args: hadoopCommand = "%s%s " % (hadoopCommand, item) # Redirecting output and error to self.out and self.err hadoopCommand = hadoopCommand + ' 1>%s 2>%s ' % (self.out, self.err) self.log.debug('running command: %s' % (hadoopCommand)) self.log.debug('hadoop env: %s' % fenvs) self.log.debug('Command stdout will be redirected to %s ' % self.out + \ 'and command stderr to %s' % self.err) self.__hadoopThread = simpleCommand('hadoop', hadoopCommand, env=fenvs) self.__hadoopThread.start() while self.__hadoopThread.stdin == None: time.sleep(.2) self.log.debug("hadoopThread still == None ...") input = desc.getStdin() self.log.debug("hadoop input: %s" % input) if input: if self.__hadoopThread.is_running(): print >> self.__hadoopThread.stdin, input else: self.log.error("hadoop command failed to start") self.__hadoopThread.stdin.close() self.log.debug("isForground: %s" % desc.isForeground()) if desc.isForeground(): self.log.debug("Waiting on hadoop to finish...") self.__hadoopThread.wait() self.log.debug("Joining hadoop thread...") self.__hadoopThread.join() if self.__hadoopThread.exit_code() != 0: status = False else: status = self.getCommandStatus() self.log.debug("hadoop run status: %s" % status) if status == False: self.handleFailedCommand() if (status == True) or (not desc.isIgnoreFailures()): return status else: self.log.error("Ignoring Failure") return True