def execute_job(self, job, node=None, verbose=True): """Execute the given Spark job in the specified node. Args: job (SparkJob): The job object. node (Host, optional): The host were the command should be executed. If not provided, self.master is chosen. verbose (bool, optional): If True stdout and stderr of remote process is displayed. Returns (tuple of str): A tuple with the standard and error outputs of the process executing the job. """ if not self.running: logger.warn("The cluster was stopped. Starting it automatically") self.start() if node is None: node = self.master exec_dir = "/tmp" # Copy necessary files to cluster files_to_copy = job.get_files_to_copy() action = Put([node], files_to_copy, exec_dir) action.run() # Get command command = job.get_command(exec_dir) # Execute logger.info("Executing spark job. Command = {" + self.bin_dir + "/spark-submit " + command + "} in " + str(node)) proc = SshProcess(self.bin_dir + "/spark-submit " + command, node) if verbose: red_color = '\033[01;31m' proc.stdout_handlers.append(sys.stdout) proc.stderr_handlers.append( ColorDecorator(sys.stderr, red_color)) proc.start() proc.wait() # Get job info job.stdout = proc.stdout job.stderr = proc.stderr job.success = (proc.exit_code == 0) return proc.stdout, proc.stderr
def execute_job(self, job, node=None, verbose=True): """Execute the given Spark job in the specified node. Args: job (SparkJob): The job object. node (Host, optional): The host were the command should be executed. If not provided, self.master is chosen. verbose (bool, optional): If True stdout and stderr of remote process is displayed. Returns (tuple of str): A tuple with the standard and error outputs of the process executing the job. """ if not self.running: logger.warn("The cluster was stopped. Starting it automatically") self.start() if node is None: node = self.master exec_dir = "/tmp" # Copy necessary files to cluster files_to_copy = job.get_files_to_copy() action = Put([node], files_to_copy, exec_dir) action.run() # Get command command = job.get_command(exec_dir) # Execute logger.info("Executing spark job. Command = {" + self.bin_dir + "/spark-submit " + command + "} in " + str(node)) proc = SshProcess(self.bin_dir + "/spark-submit " + command, node) if verbose: red_color = '\033[01;31m' proc.stdout_handlers.append(sys.stdout) proc.stderr_handlers.append(ColorDecorator(sys.stderr, red_color)) proc.start() proc.wait() # Get job info job.stdout = proc.stdout job.stderr = proc.stderr job.success = (proc.exit_code == 0) return proc.stdout, proc.stderr
def execute(self, command, node=None, should_be_running=True, verbose=True): """Execute the given Hadoop command in the given node. Args: command (str): The command to be executed. node (Host, optional): The host were the command should be executed. If not provided, self.master is chosen. should_be_running (bool, optional): True if the cluster needs to be running in order to execute the command. If so, and it is not running, it is automatically started. verbose: (bool, optional): If True stdout and stderr of remote process is displayed. Returns (tuple of str): A tuple with the standard and error outputs of the process executing the command. """ self._check_initialization() if should_be_running and not self.running: logger.warn("The cluster was stopped. Starting it automatically") self.start() if not node: node = self.master if verbose: logger.info("Executing {" + self.bin_dir + "/hadoop " + command + "} in " + str(node)) proc = SshProcess(self.bin_dir + "/hadoop " + command, node) if verbose: red_color = '\033[01;31m' proc.stdout_handlers.append(sys.stdout) proc.stderr_handlers.append(ColorDecorator(sys.stderr, red_color)) proc.start() proc.wait() return proc.stdout, proc.stderr
def execute(self, command, node=None, should_be_running=True, verbose=True): """Execute the given Hadoop command in the given node. Args: command (str): The command to be executed. node (Host, optional): The host were the command should be executed. If not provided, self.master is chosen. should_be_running (bool, optional): True if the cluster needs to be running in order to execute the command. If so, and it is not running, it is automatically started. verbose: (bool, optional): If True stdout and stderr of remote process is displayed. Returns (tuple of str): A tuple with the standard and error outputs of the process executing the command. """ self._check_initialization() if should_be_running and not self.running: logger.warn("The cluster was stopped. Starting it automatically") self.start() if not node: node = self.master if verbose: logger.info("Executing {" + self.bin_dir + "/hadoop " + command + "} in " + str(node)) proc = SshProcess(self.bin_dir + "/hadoop " + command, node) if verbose: red_color = '\033[01;31m' proc.stdout_handlers.append(sys.stdout) proc.stderr_handlers.append( ColorDecorator(sys.stderr, red_color)) proc.start() proc.wait() return (proc.stdout, proc.stderr)
def execute(self, command, node=None, verbose=True): if not node: node = self.hc.master if verbose: logger.info("Executing {" + self.bin_dir + "/mahout " + command + "} in " + str(node)) proc = SshProcess("export JAVA_HOME='" + self.hc.java_home + "';" + "export HADOOP_HOME='" + self.hc.base_dir + "';" + self.bin_dir + "/mahout " + command, node) if verbose: red_color = '\033[01;31m' proc.stdout_handlers.append(sys.stdout) proc.stderr_handlers.append(ColorDecorator(sys.stderr, red_color)) proc.start() proc.wait() return proc.stdout, proc.stderr
def execute_job(self, job, node=None, verbose=True): """Execute the given MapReduce job in the specified node. Args: job (HadoopJarJob): The job object. node (Host, optional): The host were the command should be executed. If not provided, self.master is chosen. verbose (bool, optional): If True stdout and stderr of remote process is displayed. Returns (tuple of str): A tuple with the standard and error outputs of the process executing the job. """ self._check_initialization() if not self.running: logger.warn("The cluster was stopped. Starting it automatically") self.start() if not node: node = self.master exec_dir = "/tmp" # Copy necessary files to cluster files_to_copy = job.get_files_to_copy() action = Put([node], files_to_copy, exec_dir) action.run() # Get command command = job.get_command(exec_dir) # Execute logger.info("Executing jar job. Command = {" + self.bin_dir + "/hadoop " + command + "} in " + str(node)) proc = SshProcess(self.bin_dir + "/hadoop " + command, node) if verbose: red_color = '\033[01;31m' proc.stdout_handlers.append(sys.stdout) proc.stderr_handlers.append( ColorDecorator(sys.stderr, red_color)) proc.start() proc.wait() # Get job info job.stdout = proc.stdout job.stderr = proc.stderr job.success = (proc.exit_code == 0) for line in job.stdout.splitlines(): if "Running job" in line: if "mapred.JobClient" in line or "mapreduce.Job" in line: # TODO: more possible formats? try: match = re.match('.*Running job: (.*)', line) job.job_id = match.group(1) break except: pass return (proc.stdout, proc.stderr)