예제 #1
0
    def execute_job(self, job, node=None, verbose=True):
        """Execute the given Spark job in the specified node.

        Args:
          job (SparkJob):
            The job object.
          node (Host, optional):
            The host were the command should be executed. If not provided,
            self.master is chosen.
          verbose (bool, optional):
            If True stdout and stderr of remote process is displayed.

        Returns (tuple of str):
          A tuple with the standard and error outputs of the process executing
          the job.
        """

        if not self.running:
            logger.warn("The cluster was stopped. Starting it automatically")
            self.start()

        if node is None:
            node = self.master

        exec_dir = "/tmp"

        # Copy necessary files to cluster
        files_to_copy = job.get_files_to_copy()
        action = Put([node], files_to_copy, exec_dir)
        action.run()

        # Get command
        command = job.get_command(exec_dir)

        # Execute
        logger.info("Executing spark job. Command = {" + self.bin_dir +
                    "/spark-submit " + command + "} in " + str(node))

        proc = SshProcess(self.bin_dir + "/spark-submit " + command, node)

        if verbose:
            red_color = '\033[01;31m'

            proc.stdout_handlers.append(sys.stdout)
            proc.stderr_handlers.append(
                ColorDecorator(sys.stderr, red_color))

        proc.start()
        proc.wait()

        # Get job info
        job.stdout = proc.stdout
        job.stderr = proc.stderr
        job.success = (proc.exit_code == 0)

        return proc.stdout, proc.stderr
예제 #2
0
    def execute_job(self, job, node=None, verbose=True):
        """Execute the given Spark job in the specified node.

        Args:
          job (SparkJob):
            The job object.
          node (Host, optional):
            The host were the command should be executed. If not provided,
            self.master is chosen.
          verbose (bool, optional):
            If True stdout and stderr of remote process is displayed.

        Returns (tuple of str):
          A tuple with the standard and error outputs of the process executing
          the job.
        """

        if not self.running:
            logger.warn("The cluster was stopped. Starting it automatically")
            self.start()

        if node is None:
            node = self.master

        exec_dir = "/tmp"

        # Copy necessary files to cluster
        files_to_copy = job.get_files_to_copy()
        action = Put([node], files_to_copy, exec_dir)
        action.run()

        # Get command
        command = job.get_command(exec_dir)

        # Execute
        logger.info("Executing spark job. Command = {" + self.bin_dir +
                    "/spark-submit " + command + "} in " + str(node))

        proc = SshProcess(self.bin_dir + "/spark-submit " + command, node)

        if verbose:
            red_color = '\033[01;31m'

            proc.stdout_handlers.append(sys.stdout)
            proc.stderr_handlers.append(ColorDecorator(sys.stderr, red_color))

        proc.start()
        proc.wait()

        # Get job info
        job.stdout = proc.stdout
        job.stderr = proc.stderr
        job.success = (proc.exit_code == 0)

        return proc.stdout, proc.stderr
예제 #3
0
    def execute(self,
                command,
                node=None,
                should_be_running=True,
                verbose=True):
        """Execute the given Hadoop command in the given node.

        Args:
          command (str):
            The command to be executed.
          node (Host, optional):
            The host were the command should be executed. If not provided,
            self.master is chosen.
          should_be_running (bool, optional):
            True if the cluster needs to be running in order to execute the
            command. If so, and it is not running, it is automatically started.
          verbose: (bool, optional):
            If True stdout and stderr of remote process is displayed.

        Returns (tuple of str):
          A tuple with the standard and error outputs of the process executing
          the command.
        """

        self._check_initialization()

        if should_be_running and not self.running:
            logger.warn("The cluster was stopped. Starting it automatically")
            self.start()

        if not node:
            node = self.master

        if verbose:
            logger.info("Executing {" + self.bin_dir + "/hadoop " + command +
                        "} in " + str(node))

        proc = SshProcess(self.bin_dir + "/hadoop " + command, node)

        if verbose:
            red_color = '\033[01;31m'

            proc.stdout_handlers.append(sys.stdout)
            proc.stderr_handlers.append(ColorDecorator(sys.stderr, red_color))

        proc.start()
        proc.wait()

        return proc.stdout, proc.stderr
예제 #4
0
    def execute(self, command, node=None, should_be_running=True,
                verbose=True):
        """Execute the given Hadoop command in the given node.

        Args:
          command (str):
            The command to be executed.
          node (Host, optional):
            The host were the command should be executed. If not provided,
            self.master is chosen.
          should_be_running (bool, optional):
            True if the cluster needs to be running in order to execute the
            command. If so, and it is not running, it is automatically started.
          verbose: (bool, optional):
            If True stdout and stderr of remote process is displayed.

        Returns (tuple of str):
          A tuple with the standard and error outputs of the process executing
          the command.
        """

        self._check_initialization()

        if should_be_running and not self.running:
            logger.warn("The cluster was stopped. Starting it automatically")
            self.start()

        if not node:
            node = self.master

        if verbose:
            logger.info("Executing {" + self.bin_dir + "/hadoop " +
                        command + "} in " + str(node))

        proc = SshProcess(self.bin_dir + "/hadoop " + command, node)

        if verbose:
            red_color = '\033[01;31m'

            proc.stdout_handlers.append(sys.stdout)
            proc.stderr_handlers.append(
                ColorDecorator(sys.stderr, red_color))

        proc.start()
        proc.wait()

        return (proc.stdout, proc.stderr)
예제 #5
0
    def execute(self, command, node=None, verbose=True):

        if not node:
            node = self.hc.master

        if verbose:
            logger.info("Executing {" + self.bin_dir + "/mahout " +
                        command + "} in " + str(node))

        proc = SshProcess("export JAVA_HOME='" + self.hc.java_home + "';" +
                          "export HADOOP_HOME='" + self.hc.base_dir + "';" +
                          self.bin_dir + "/mahout " + command, node)

        if verbose:
            red_color = '\033[01;31m'

            proc.stdout_handlers.append(sys.stdout)
            proc.stderr_handlers.append(ColorDecorator(sys.stderr, red_color))

        proc.start()
        proc.wait()

        return proc.stdout, proc.stderr
예제 #6
0
    def execute(self, command, node=None, verbose=True):

        if not node:
            node = self.hc.master

        if verbose:
            logger.info("Executing {" + self.bin_dir + "/mahout " +
                        command + "} in " + str(node))

        proc = SshProcess("export JAVA_HOME='" + self.hc.java_home + "';" +
                          "export HADOOP_HOME='" + self.hc.base_dir + "';" +
                          self.bin_dir + "/mahout " + command, node)

        if verbose:
            red_color = '\033[01;31m'

            proc.stdout_handlers.append(sys.stdout)
            proc.stderr_handlers.append(ColorDecorator(sys.stderr, red_color))

        proc.start()
        proc.wait()

        return proc.stdout, proc.stderr
예제 #7
0
    def execute_job(self, job, node=None, verbose=True):
        """Execute the given MapReduce job in the specified node.
        
        Args:
          job (HadoopJarJob):
            The job object.
          node (Host, optional):
            The host were the command should be executed. If not provided,
            self.master is chosen.
          verbose (bool, optional):
            If True stdout and stderr of remote process is displayed.

        Returns (tuple of str):
          A tuple with the standard and error outputs of the process executing
          the job.
        """

        self._check_initialization()

        if not self.running:
            logger.warn("The cluster was stopped. Starting it automatically")
            self.start()

        if not node:
            node = self.master

        exec_dir = "/tmp"

        # Copy necessary files to cluster
        files_to_copy = job.get_files_to_copy()
        action = Put([node], files_to_copy, exec_dir)
        action.run()

        # Get command
        command = job.get_command(exec_dir)

        # Execute
        logger.info("Executing jar job. Command = {" + self.bin_dir +
                    "/hadoop " + command + "} in " + str(node))

        proc = SshProcess(self.bin_dir + "/hadoop " + command, node)

        if verbose:
            red_color = '\033[01;31m'

            proc.stdout_handlers.append(sys.stdout)
            proc.stderr_handlers.append(
                ColorDecorator(sys.stderr, red_color))

        proc.start()
        proc.wait()

        # Get job info
        job.stdout = proc.stdout
        job.stderr = proc.stderr
        job.success = (proc.exit_code == 0)

        for line in job.stdout.splitlines():
            if "Running job" in line:
                if "mapred.JobClient" in line or "mapreduce.Job" in line:
                    # TODO: more possible formats?
                    try:
                        match = re.match('.*Running job: (.*)', line)
                        job.job_id = match.group(1)
                        break
                    except:
                        pass

        return (proc.stdout, proc.stderr)
예제 #8
0
    def execute_job(self, job, node=None, verbose=True):
        """Execute the given MapReduce job in the specified node.
        
        Args:
          job (HadoopJarJob):
            The job object.
          node (Host, optional):
            The host were the command should be executed. If not provided,
            self.master is chosen.
          verbose (bool, optional):
            If True stdout and stderr of remote process is displayed.

        Returns (tuple of str):
          A tuple with the standard and error outputs of the process executing
          the job.
        """

        self._check_initialization()

        if not self.running:
            logger.warn("The cluster was stopped. Starting it automatically")
            self.start()

        if not node:
            node = self.master

        exec_dir = "/tmp"

        # Copy necessary files to cluster
        files_to_copy = job.get_files_to_copy()
        action = Put([node], files_to_copy, exec_dir)
        action.run()

        # Get command
        command = job.get_command(exec_dir)

        # Execute
        logger.info("Executing jar job. Command = {" + self.bin_dir +
                    "/hadoop " + command + "} in " + str(node))

        proc = SshProcess(self.bin_dir + "/hadoop " + command, node)

        if verbose:
            red_color = '\033[01;31m'

            proc.stdout_handlers.append(sys.stdout)
            proc.stderr_handlers.append(
                ColorDecorator(sys.stderr, red_color))

        proc.start()
        proc.wait()

        # Get job info
        job.stdout = proc.stdout
        job.stderr = proc.stderr
        job.success = (proc.exit_code == 0)

        for line in job.stdout.splitlines():
            if "Running job" in line:
                if "mapred.JobClient" in line or "mapreduce.Job" in line:
                    # TODO: more possible formats?
                    try:
                        match = re.match('.*Running job: (.*)', line)
                        job.job_id = match.group(1)
                        break
                    except:
                        pass

        return (proc.stdout, proc.stderr)