Exemple #1
0
    def execute_job(self, job, node=None, verbose=True):
        """Execute the given Spark job in the specified node.

        Args:
          job (SparkJob):
            The job object.
          node (Host, optional):
            The host were the command should be executed. If not provided,
            self.master is chosen.
          verbose (bool, optional):
            If True stdout and stderr of remote process is displayed.

        Returns (tuple of str):
          A tuple with the standard and error outputs of the process executing
          the job.
        """

        if not self.running:
            logger.warn("The cluster was stopped. Starting it automatically")
            self.start()

        if node is None:
            node = self.master

        exec_dir = "/tmp"

        # Copy necessary files to cluster
        files_to_copy = job.get_files_to_copy()
        action = Put([node], files_to_copy, exec_dir)
        action.run()

        # Get command
        command = job.get_command(exec_dir)

        # Execute
        logger.info("Executing spark job. Command = {" + self.bin_dir +
                    "/spark-submit " + command + "} in " + str(node))

        proc = SshProcess(self.bin_dir + "/spark-submit " + command, node)

        if verbose:
            red_color = '\033[01;31m'

            proc.stdout_handlers.append(sys.stdout)
            proc.stderr_handlers.append(
                ColorDecorator(sys.stderr, red_color))

        proc.start()
        proc.wait()

        # Get job info
        job.stdout = proc.stdout
        job.stderr = proc.stderr
        job.success = (proc.exit_code == 0)

        return proc.stdout, proc.stderr
Exemple #2
0
    def execute_job(self, job, node=None, verbose=True):
        """Execute the given Spark job in the specified node.

        Args:
          job (SparkJob):
            The job object.
          node (Host, optional):
            The host were the command should be executed. If not provided,
            self.master is chosen.
          verbose (bool, optional):
            If True stdout and stderr of remote process is displayed.

        Returns (tuple of str):
          A tuple with the standard and error outputs of the process executing
          the job.
        """

        if not self.running:
            logger.warn("The cluster was stopped. Starting it automatically")
            self.start()

        if node is None:
            node = self.master

        exec_dir = "/tmp"

        # Copy necessary files to cluster
        files_to_copy = job.get_files_to_copy()
        action = Put([node], files_to_copy, exec_dir)
        action.run()

        # Get command
        command = job.get_command(exec_dir)

        # Execute
        logger.info("Executing spark job. Command = {" + self.bin_dir +
                    "/spark-submit " + command + "} in " + str(node))

        proc = SshProcess(self.bin_dir + "/spark-submit " + command, node)

        if verbose:
            red_color = '\033[01;31m'

            proc.stdout_handlers.append(sys.stdout)
            proc.stderr_handlers.append(ColorDecorator(sys.stderr, red_color))

        proc.start()
        proc.wait()

        # Get job info
        job.stdout = proc.stdout
        job.stderr = proc.stderr
        job.success = (proc.exit_code == 0)

        return proc.stdout, proc.stderr
Exemple #3
0
    def change_conf(self, params):
        """Create a new properties file from configuration and transfer it to
        the host.

        Args:
          params (dict): The parameters of the test.
        """

        # Create a local temporary file with the params
        (_, temp_file) = tempfile.mkstemp("", "div_p2p-conf-", "/tmp")
        props = open(temp_file, "w")
        for key in params:
            props.write(str(key) + "=" + str(params[key]) + "\n")
        props.close()

        # Copy the file to the remote location
        copy_props = Put([self.host], [temp_file], self.props_path)
        copy_props.run()

        # Remove temporary file
        os.remove(temp_file)
Exemple #4
0
    def bootstrap(self, tar_file):

        # 1. Remove used dirs if existing
        action = Remote("rm -rf " + self.base_dir, self.hc.hosts)
        action.run()
        action = Remote("rm -rf " + self.conf_dir, self.hc.hosts)
        action.run()

        # 1. Copy Mahout tar file and uncompress
        logger.info("Copy " + tar_file + " to hosts and uncompress")
        action = Put(self.hc.hosts, [tar_file], "/tmp")
        action.run()
        action = Remote(
            "tar xf /tmp/" + os.path.basename(tar_file) + " -C /tmp",
            self.hc.hosts)
        action.run()

        # 2. Move installation to base dir
        logger.info("Create installation directories")
        action = Remote(
            "mv /tmp/" +
            os.path.basename(tar_file).replace(".tar.gz", "") + " " +
            self.base_dir,
            self.hc.hosts)
        action.run()

        # 3 Create other dirs
        action = Remote("mkdir -p " + self.conf_dir, self.hc.hosts)
        action.run()

        # 4. Include libraries in Hadoop's classpath
        list_dirs = SshProcess("ls -1 " + self.base_dir + "/*.jar",
                               self.hc.master)
        list_dirs.run()
        libs = " ".join(list_dirs.stdout.splitlines())
        action = Remote("cp " + libs + " " + self.hc.base_dir + "/lib",
                        self.hc.hosts)
        action.run()

        initialized = True  # No need to call initialize()
Exemple #5
0
        def copy_function(host, files_to_copy, collector=None):
            action = Put([host], files_to_copy, tmp_dir)
            action.run()

            local_final_size = 0

            for f in files_to_copy:
                src_file = os.path.join(tmp_dir, os.path.basename(f))
                if self.pre_load_function:
                    src_file = self.pre_load_function(src_file, host)

                    action = SshProcess("du -b " + src_file + "| cut -f1", host)
                    action.run()

                    local_final_size += int(action.stdout.strip())

                hc.execute("fs -put " + src_file + " " +
                           os.path.join(dest, os.path.basename(src_file)),
                           host, True, False)

            if collector:
                collector.increment(local_final_size)
Exemple #6
0
        def copy_function(host, files_to_copy, collector=None):
            action = Put([host], files_to_copy, tmp_dir)
            action.run()

            local_final_size = 0

            for f in files_to_copy:
                src_file = os.path.join(tmp_dir, os.path.basename(f))
                if self.pre_load_function:
                    src_file = self.pre_load_function(src_file, host)

                    action = SshProcess("du -b " + src_file + "| cut -f1", host)
                    action.run()

                    local_final_size += int(action.stdout.strip())

                hc.execute("fs -put " + src_file + " " +
                           os.path.join(dest, os.path.basename(src_file)),
                           host, True, False)

            if collector:
                collector.increment(local_final_size)
Exemple #7
0
    def execute_job(self, job, node=None, verbose=True):
        """Execute the given MapReduce job in the specified node.
        
        Args:
          job (HadoopJarJob):
            The job object.
          node (Host, optional):
            The host were the command should be executed. If not provided,
            self.master is chosen.
          verbose (bool, optional):
            If True stdout and stderr of remote process is displayed.

        Returns (tuple of str):
          A tuple with the standard and error outputs of the process executing
          the job.
        """

        self._check_initialization()

        if not self.running:
            logger.warn("The cluster was stopped. Starting it automatically")
            self.start()

        if not node:
            node = self.master

        exec_dir = "/tmp"

        # Copy necessary files to cluster
        files_to_copy = job.get_files_to_copy()
        action = Put([node], files_to_copy, exec_dir)
        action.run()

        # Get command
        command = job.get_command(exec_dir)

        # Execute
        logger.info("Executing jar job. Command = {" + self.bin_dir +
                    "/hadoop " + command + "} in " + str(node))

        proc = SshProcess(self.bin_dir + "/hadoop " + command, node)

        if verbose:
            red_color = '\033[01;31m'

            proc.stdout_handlers.append(sys.stdout)
            proc.stderr_handlers.append(
                ColorDecorator(sys.stderr, red_color))

        proc.start()
        proc.wait()

        # Get job info
        job.stdout = proc.stdout
        job.stderr = proc.stderr
        job.success = (proc.exit_code == 0)

        for line in job.stdout.splitlines():
            if "Running job" in line:
                if "mapred.JobClient" in line or "mapreduce.Job" in line:
                    # TODO: more possible formats?
                    try:
                        match = re.match('.*Running job: (.*)', line)
                        job.job_id = match.group(1)
                        break
                    except:
                        pass

        return (proc.stdout, proc.stderr)
Exemple #8
0
    def execute_job(self, job, node=None, verbose=True):
        """Execute the given MapReduce job in the specified node.
        
        Args:
          job (HadoopJarJob):
            The job object.
          node (Host, optional):
            The host were the command should be executed. If not provided,
            self.master is chosen.
          verbose (bool, optional):
            If True stdout and stderr of remote process is displayed.

        Returns (tuple of str):
          A tuple with the standard and error outputs of the process executing
          the job.
        """

        self._check_initialization()

        if not self.running:
            logger.warn("The cluster was stopped. Starting it automatically")
            self.start()

        if not node:
            node = self.master

        exec_dir = "/tmp"

        # Copy necessary files to cluster
        files_to_copy = job.get_files_to_copy()
        action = Put([node], files_to_copy, exec_dir)
        action.run()

        # Get command
        command = job.get_command(exec_dir)

        # Execute
        logger.info("Executing jar job. Command = {" + self.bin_dir +
                    "/hadoop " + command + "} in " + str(node))

        proc = SshProcess(self.bin_dir + "/hadoop " + command, node)

        if verbose:
            red_color = '\033[01;31m'

            proc.stdout_handlers.append(sys.stdout)
            proc.stderr_handlers.append(
                ColorDecorator(sys.stderr, red_color))

        proc.start()
        proc.wait()

        # Get job info
        job.stdout = proc.stdout
        job.stderr = proc.stderr
        job.success = (proc.exit_code == 0)

        for line in job.stdout.splitlines():
            if "Running job" in line:
                if "mapred.JobClient" in line or "mapreduce.Job" in line:
                    # TODO: more possible formats?
                    try:
                        match = re.match('.*Running job: (.*)', line)
                        job.job_id = match.group(1)
                        break
                    except:
                        pass

        return (proc.stdout, proc.stderr)
    jobs_specs = get_jobs_specs(wanted, name=job_name)
    for sub, frontend in jobs_specs:
        sub.walltime = walltime
        sub.job_type = "deploy"
    job = oarsub(jobs_specs)[0]

nodes = get_oar_job_nodes(job[0], job[1])
logger.info('Deploying host %s', nodes[0].address)
deployed, undeployed = deploy(Deployment(nodes, env_name="jessie-x64-base"))

execware_host = list(deployed)[0]
logger.info('Installing required packages %s', style.emph(packages))
install_packages = SshProcess(
    'apt-get update && apt-get install -y ' + packages, execware_host).run()
logger.info('Copying files to host')
put_files = Put(execware_host, [source_code], remote_location="/tmp").run()

xml_file = """
<settings>
     <proxies>
      <proxy>
         <id>g5k-proxy</id>
         <active>true</active>
         <protocol>http</protocol>
         <host>proxy</host>
         <port>3128</port>
       </proxy>
      <proxy>
         <id>g5k-proxy-https</id>
         <active>true</active>
         <protocol>https</protocol>