def __force_clean(self): """Stop previous Hive processes (if any) and remove all remote files created by it.""" hive_processes = [] force_kill = False for h in self.hosts: proc = SshProcess("jps", self.master) proc.run() ids_to_kill = [] for line in proc.stdout.splitlines(): field = line.split() if field[1] in hive_processes: ids_to_kill.append(field[0]) if ids_to_kill: force_kill = True ids_to_kill_str = "" for pid in ids_to_kill: ids_to_kill_str += " " + pid proc = SshProcess("kill -9" + ids_to_kill_str, h) proc.run() if force_kill: logger.info( "Processes from previous hadoop deployments had to be killed") self.clean_logs()
def copy_history(self, dest, job_ids=None): """Copy history logs from master. Args: dest (str): The path of the local dir where the logs will be copied. job_ids (list of str, optional): A list with the ids of the jobs for which the history should be copied. If nothing is passed, the history of all jobs is copied. """ if not os.path.exists(dest): logger.warning("Destination directory " + dest + " does not exist. It will be created") os.makedirs(dest) history_dir = os.path.join(self.logs_dir, "history") if job_ids: pattern = " -o ".join("-name " + jid + "*" for jid in job_ids) list_dirs = SshProcess("find " + history_dir + " " + pattern, self.master) list_dirs.run() else: list_dirs = SshProcess("find " + history_dir + " -name job_*", self.master) list_dirs.run() remote_files = [] for line in list_dirs.stdout.splitlines(): remote_files.append(line) action = Get([self.master], remote_files, dest) action.run()
def bootstrap(self, tar_file): # 0. Check that required packages are present required_packages = "openjdk-7-jre openjdk-7-jdk" check_packages = TaktukRemote("dpkg -s " + required_packages, self.hosts) for p in check_packages.processes: p.nolog_exit_code = p.nolog_error = True check_packages.run() if not check_packages.ok: logger.info("Packages not installed, trying to install") install_packages = TaktukRemote( "export DEBIAN_MASTER=noninteractive ; " + "apt-get update && apt-get install -y --force-yes " + required_packages, self.hosts).run() if not install_packages.ok: logger.error("Unable to install the packages") get_java_home = SshProcess( 'echo $(readlink -f /usr/bin/javac | ' 'sed "s:/bin/javac::")', self.master) get_java_home.run() self.java_home = get_java_home.stdout.strip() logger.info("All required packages are present") # 1. Copy Hive tar file and uncompress logger.info("Copy " + tar_file + " to hosts and uncompress") rm_dirs = TaktukRemote( "rm -rf " + self.base_dir + " " + self.conf_dir + " " + self.warehouse_dir + " " + self.logs_dir, self.hosts) put_tar = TaktukPut(self.hosts, [tar_file], "/tmp") tar_xf = TaktukRemote( "tar xf /tmp/" + os.path.basename(tar_file) + " -C /tmp", self.hosts) SequentialActions([rm_dirs, put_tar, tar_xf]).run() # 2. Move installation to base dir logger.info("Create installation directories") mv_base_dir = TaktukRemote( "mv /tmp/" + os.path.basename(tar_file).replace(".tar.gz", "") + " " + self.base_dir, self.hosts) mkdirs = TaktukRemote( "mkdir -p " + self.conf_dir + " && mkdir -p " + self.warehouse_dir, self.hosts) chmods = TaktukRemote( "chmod g+w " + self.base_dir + " && chmod g+w " + self.conf_dir + " && chmod g+w " + self.warehouse_dir, self.hosts) SequentialActions([mv_base_dir, mkdirs, chmods]).run() # 3. Specify environment variables command = "cat >> " + self.conf_dir + "/hive-env.sh << EOF\n" command += "JAVA_HOME=" + self.java_home + "\n" command += "HIVE_HOME=" + self.base_dir + "\n" command += "HIVE_CONF_DIR=" + self.conf_dir + "\n" command += "HADOOP_HOME=" + self.hc.base_dir + "\n" command += "EOF\n" command += "chmod +x " + self.conf_dir + "/hive-env.sh" action = Remote(command, self.hosts) action.run()
def __force_clean(self): """Stop previous Spark processes (if any) and remove all remote files created by it.""" spark_processes = ["Master", "Worker"] force_kill = False for h in self.hosts: proc = SshProcess("jps", h) proc.run() ids_to_kill = [] for line in proc.stdout.splitlines(): field = line.split() if field[1] in spark_processes: ids_to_kill.append(field[0]) if ids_to_kill: force_kill = True ids_to_kill_str = "" for pid in ids_to_kill: ids_to_kill_str += " " + pid logger.warn("Killing running Spark processes in host %s" % style.host(h.address.split('.')[0])) proc = SshProcess("kill -9" + ids_to_kill_str, h) proc.run() if force_kill: logger.info( "Processes from previous hadoop deployments had to be killed") self.clean_logs()
def _copy_xp_output(self): """Copy experiment's output.""" if self.output_path: remote_path = self.macro_manager.test_macros["xp.output"] # TODO: what happens if not specified? local_path = os.path.join(self.output_path, str(self.comb_id)) logger.info("Copying output to " + local_path) tmp_dir = "/tmp" # Remove file in tmp dir if exists proc = SshProcess("rm -rf " + os.path.join(tmp_dir, os.path.basename(remote_path)), self.hc.master) proc.run() # Get files in master self.hc.execute("fs -get " + remote_path + " " + tmp_dir, verbose=False) # Copy files from master action = Get([self.hc.master], [os.path.join(tmp_dir, os.path.basename(remote_path))], local_path) action.run()
def _copy_xp_output(self): """Copy experiment's output.""" if self.output_path: remote_path = self.macro_manager.test_macros[ "xp.output"] # TODO: what happens if not specified? local_path = os.path.join(self.output_path, str(self.comb_id)) logger.info("Copying output to " + local_path) tmp_dir = "/tmp" # Remove file in tmp dir if exists proc = SshProcess( "rm -rf " + os.path.join(tmp_dir, os.path.basename(remote_path)), self.hc.master) proc.run() # Get files in master self.hc.execute("fs -get " + remote_path + " " + tmp_dir, verbose=False) # Copy files from master action = Get( [self.hc.master], [os.path.join(tmp_dir, os.path.basename(remote_path))], local_path) action.run()
def start_spark(self): """Start spark processes. In STANDALONE mode it starts the master and slaves. In YARN mode it just checks that Hadoop is running, and starts it if not. """ logger.info("Starting Spark") if self.running: logger.warn("Spark was already started") return if self.mode == STANDALONE_MODE: proc = SshProcess( self.sbin_dir + "/start-master.sh;" + self.sbin_dir + "/start-slaves.sh;", self.master) proc.run() if not proc.finished_ok: logger.warn("Error while starting Spark") return elif self.mode == YARN_MODE: if not self.hc.running: logger.warn("YARN services must be started first") self.hc.start_and_wait() self.running = True
def bootstrap(self, tar_file): # 0. Check that required packages are present required_packages = "openjdk-7-jre openjdk-7-jdk" check_packages = TaktukRemote("dpkg -s " + required_packages, self.hosts) for p in check_packages.processes: p.nolog_exit_code = p.nolog_error = True check_packages.run() if not check_packages.ok: logger.info("Packages not installed, trying to install") install_packages = TaktukRemote( "export DEBIAN_MASTER=noninteractive ; " + "apt-get update && apt-get install -y --force-yes " + required_packages, self.hosts).run() if not install_packages.ok: logger.error("Unable to install the packages") get_java_home = SshProcess('echo $(readlink -f /usr/bin/javac | ' 'sed "s:/bin/javac::")', self.master) get_java_home.run() self.java_home = get_java_home.stdout.strip() logger.info("All required packages are present") # 1. Copy hadoop tar file and uncompress logger.info("Copy " + tar_file + " to hosts and uncompress") rm_dirs = TaktukRemote("rm -rf " + self.base_dir + " " + self.conf_dir, self.hosts) put_tar = TaktukPut(self.hosts, [tar_file], "/tmp") tar_xf = TaktukRemote( "tar xf /tmp/" + os.path.basename(tar_file) + " -C /tmp", self.hosts) SequentialActions([rm_dirs, put_tar, tar_xf]).run() # 2. Move installation to base dir logger.info("Create installation directories") mv_base_dir = TaktukRemote( "mv /tmp/" + os.path.basename(tar_file).replace(".tgz", "") + " " + self.base_dir, self.hosts) mkdirs = TaktukRemote("mkdir -p " + self.conf_dir, self.hosts) chmods = TaktukRemote("chmod g+w " + self.base_dir + " && chmod g+w " + self.conf_dir, self.hosts) SequentialActions([mv_base_dir, mkdirs, chmods]).run() # 3. Specify environment variables command = "cat >> " + self.conf_dir + "/spark-env.sh << EOF\n" command += "JAVA_HOME=" + self.java_home + "\n" command += "SPARK_LOG_DIR=" + self.logs_dir + "\n" if self.hc: command += "HADOOP_CONF_DIR=" + self.hc.conf_dir + "\n" if self.mode == YARN_MODE: command += "YARN_CONF_DIR=" + self.hc.conf_dir + "\n" command += "EOF\n" command += "chmod +x " + self.conf_dir + "/spark-env.sh" action = Remote(command, self.hosts) action.run()
def start_spark(self): """Start spark processes. In STANDALONE mode it starts the master and slaves. In YARN mode it just checks that Hadoop is running, and starts it if not. """ logger.info("Starting Spark") if self.running: logger.warn("Spark was already started") return if self.mode == STANDALONE_MODE: proc = SshProcess(self.sbin_dir + "/start-master.sh;" + self.sbin_dir + "/start-slaves.sh;", self.master) proc.run() if not proc.finished_ok: logger.warn("Error while starting Spark") return elif self.mode == YARN_MODE: if not self.hc.running: logger.warn("YARN services must be started first") self.hc.start_and_wait() self.running = True
def get_version(self): """Return the Hadoop version. Returns (str): The version used by the Hadoop cluster. """ proc = SshProcess("export JAVA_HOME=" + self.java_home + ";" + self.bin_dir + "/hadoop version", self.master) proc.run() version = proc.stdout.splitlines()[0] return version
def format_dfs(self): """Format the distributed filesystem.""" logger.info("Formatting HDFS") proc = SshProcess(self.bin_dir + "/hadoop namenode -format", self.master) proc.run() if proc.finished_ok: logger.info("HDFS formatted successfully") else: logger.warn("Error while formatting HDFS")
def stop_yarn(self): """Stop the YARN ResourceManager and NodeManagers.""" self._check_initialization() logger.info("Stopping YARN") proc = SshProcess(self.sbin_dir + "/stop-yarn.sh", self.master) proc.run() if not proc.finished_ok: logger.warn("Error while stopping YARN") else: self.running_yarn = False
def stop_dfs(self): """Stop the NameNode and DataNodes.""" self._check_initialization() logger.info("Stopping HDFS") proc = SshProcess(self.sbin_dir + "/stop-dfs.sh", self.master) proc.run() if not proc.finished_ok: logger.warn("Error while stopping HDFS") else: self.running_dfs = False
def stop_map_reduce(self): """Stop the JobTracker and TaskTrackers.""" self._check_initialization() logger.info("Stopping MapReduce") proc = SshProcess(self.sbin_dir + "/stop-mapred.sh", self.master) proc.run() if not proc.finished_ok: logger.warn("Error while stopping MapReduce") else: self.running_map_reduce = False
def stop_spark(self): """Stop Spark processes.""" logger.info("Stopping Spark") if self.mode == STANDALONE_MODE: proc = SshProcess(self.sbin_dir + "/stop-slaves.sh;" + self.sbin_dir + "/stop-master.sh;", self.master) proc.run() if not proc.finished_ok: logger.warn("Error while stopping Spark") return self.running = False
def stop_spark(self): """Stop Spark processes.""" logger.info("Stopping Spark") if self.mode == STANDALONE_MODE: proc = SshProcess( self.sbin_dir + "/stop-slaves.sh;" + self.sbin_dir + "/stop-master.sh;", self.master) proc.run() if not proc.finished_ok: logger.warn("Error while stopping Spark") return self.running = False
def start_dfs_and_wait(self): """Start the NameNode and DataNodes and wait for exiting safemode.""" self._check_initialization() self.start_dfs() logger.info("Waiting for safe mode to be off") proc = SshProcess(self.bin_dir + "/hadoop dfsadmin -safemode wait", self.master) proc.run() if not proc.finished_ok: logger.warn("Error while starting HDFS") else: self.running_dfs = True
def start_yarn(self): """Start the YARN ResourceManager and NodeManagers.""" logger.info("Starting YARN") self._check_initialization() proc = SshProcess(self.sbin_dir + "/start-yarn.sh", self.master) proc.run() if not proc.finished_ok: logger.warn("Error while starting YARN") else: #TODO: get success or not from super. self.running_yarn = True if self.running_dfs: self.running = True
def start_map_reduce(self): """Start the JobTracker and TaskTrackers.""" self._check_initialization() logger.info("Starting MapReduce") if self.running_map_reduce: logger.warn("Error while starting MapReduce") return proc = SshProcess(self.sbin_dir + "/start-mapred.sh", self.master) proc.run() if not proc.finished_ok: logger.info("MapReduce started successfully") else: self.running_map_reduce = True
def execute(self): """Execute a single test. Return: str: Local path of the file containing the process output. """ test = SshProcess("java -jar " + self.jar_path + " -p " + self.props_path, self.host) # Output is stored in a local temporary file (_, temp_file) = tempfile.mkstemp("", "div_p2p-out-", "/tmp") test.stdout_handlers.append(temp_file) test.run() return temp_file
def start_dfs(self): """Start the NameNode and DataNodes.""" self._check_initialization() logger.info("Starting HDFS") if self.running_dfs: logger.warn("Dfs was already started") return proc = SshProcess(self.sbin_dir + "/start-dfs.sh", self.master) proc.run() if not proc.finished_ok: logger.warn("Error while starting HDFS") else: self.running_dfs = True
def _copy_base_conf(self): """Copy base configuration files to tmp dir.""" self.temp_conf_dir = tempfile.mkdtemp("", "hive-", "/tmp") if os.path.exists(self.local_base_conf_dir): base_conf_files = [ os.path.join(self.local_base_conf_dir, f) for f in os.listdir(self.local_base_conf_dir) ] for f in base_conf_files: shutil.copy(f, self.temp_conf_dir) else: logger.warn( "Local conf dir does not exist. Using default configuration") base_conf_files = [] mandatory_files = ["hive-site.xml"] missing_conf_files = mandatory_files for f in base_conf_files: f_base_name = os.path.basename(f) if f_base_name in missing_conf_files: missing_conf_files.remove(f_base_name) # Copy or create mandatory files action = SshProcess("ls -1 " + self.conf_dir, self.master) action.run() files_in_conf_dir = action.stdout remote_missing_files = [] for f in missing_conf_files: if f in files_in_conf_dir: remote_missing_files.append(os.path.join(self.conf_dir, f)) else: create_xml_file(os.path.join(self.temp_conf_dir, f)) if remote_missing_files: logger.info("Copying missing conf files from master: " + str(remote_missing_files)) action = Get([self.master], remote_missing_files, self.temp_conf_dir) action.run()
def copy_history(self, dest, job_ids=None): """Copy history logs from dfs. Args: dest (str): The path of the local dir where the logs will be copied. job_ids (list of str, optional): A list with the ids of the jobs for which the history should be copied. If nothing is passed, the history of all jobs is copied. """ if not os.path.exists(dest): logger.warning("Destination directory " + dest + " does not exist. It will be created") os.makedirs(dest) # Dirs used user_login = getpass.getuser() hist_dfs_dir = "/tmp/hadoop-yarn/staging/history/done_intermediate/" + \ user_login hist_tmp_dir = "/tmp/hadoop_hist" # Remove file in tmp dir if exists proc = SshProcess("rm -rf " + hist_tmp_dir, self.master) proc.run() # Get files in master if job_ids: proc = SshProcess("mkdir " + hist_tmp_dir, self.master) proc.run() for jid in job_ids: self.execute("fs -get " + hist_dfs_dir + "/" + jid + "* " + hist_tmp_dir, verbose=False) else: self.execute("fs -get " + hist_dfs_dir + " " + hist_tmp_dir, verbose=False) # Copy files from master action = Get([self.master], [hist_tmp_dir], dest) action.run()
def _copy_base_conf(self): """Copy base configuration files to tmp dir.""" self.temp_conf_dir = tempfile.mkdtemp("", "hive-", "/tmp") if os.path.exists(self.local_base_conf_dir): base_conf_files = [os.path.join(self.local_base_conf_dir, f) for f in os.listdir(self.local_base_conf_dir)] for f in base_conf_files: shutil.copy(f, self.temp_conf_dir) else: logger.warn( "Local conf dir does not exist. Using default configuration") base_conf_files = [] mandatory_files = ["hive-site.xml"] missing_conf_files = mandatory_files for f in base_conf_files: f_base_name = os.path.basename(f) if f_base_name in missing_conf_files: missing_conf_files.remove(f_base_name) # Copy or create mandatory files action = SshProcess("ls -1 " + self.conf_dir, self.master) action.run() files_in_conf_dir = action.stdout remote_missing_files = [] for f in missing_conf_files: if f in files_in_conf_dir: remote_missing_files.append(os.path.join(self.conf_dir, f)) else: create_xml_file(os.path.join(self.temp_conf_dir, f)) if remote_missing_files: logger.info("Copying missing conf files from master: " + str( remote_missing_files)) action = Get([self.master], remote_missing_files, self.temp_conf_dir) action.run()
def bootstrap(self, tar_file): # 1. Remove used dirs if existing action = Remote("rm -rf " + self.base_dir, self.hc.hosts) action.run() action = Remote("rm -rf " + self.conf_dir, self.hc.hosts) action.run() # 1. Copy Mahout tar file and uncompress logger.info("Copy " + tar_file + " to hosts and uncompress") action = Put(self.hc.hosts, [tar_file], "/tmp") action.run() action = Remote( "tar xf /tmp/" + os.path.basename(tar_file) + " -C /tmp", self.hc.hosts) action.run() # 2. Move installation to base dir logger.info("Create installation directories") action = Remote( "mv /tmp/" + os.path.basename(tar_file).replace(".tar.gz", "") + " " + self.base_dir, self.hc.hosts) action.run() # 3 Create other dirs action = Remote("mkdir -p " + self.conf_dir, self.hc.hosts) action.run() # 4. Include libraries in Hadoop's classpath list_dirs = SshProcess("ls -1 " + self.base_dir + "/*.jar", self.hc.master) list_dirs.run() libs = " ".join(list_dirs.stdout.splitlines()) action = Remote("cp " + libs + " " + self.hc.base_dir + "/lib", self.hc.hosts) action.run() initialized = True # No need to call initialize()
def __force_clean(self): """Stop previous Spark processes (if any) and remove all remote files created by it.""" spark_processes = [ "Master", "Worker" ] force_kill = False for h in self.hosts: proc = SshProcess("jps", h) proc.run() ids_to_kill = [] for line in proc.stdout.splitlines(): field = line.split() if field[1] in spark_processes: ids_to_kill.append(field[0]) if ids_to_kill: force_kill = True ids_to_kill_str = "" for pid in ids_to_kill: ids_to_kill_str += " " + pid logger.warn( "Killing running Spark processes in host %s" % style.host(h.address.split('.')[0])) proc = SshProcess("kill -9" + ids_to_kill_str, h) proc.run() if force_kill: logger.info( "Processes from previous hadoop deployments had to be killed") self.clean_logs()
def bootstrap(self, tar_file): """Install Hadoop in all cluster nodes from the specified tar.gz file. Args: tar_file (str): The file containing Hadoop binaries. """ # 0. Check that required packages are present required_packages = "openjdk-7-jre openjdk-7-jdk" check_packages = TaktukRemote("dpkg -s " + required_packages, self.hosts) for p in check_packages.processes: p.nolog_exit_code = p.nolog_error = True check_packages.run() if not check_packages.ok: logger.info("Packages not installed, trying to install") install_packages = TaktukRemote( "export DEBIAN_MASTER=noninteractive ; " + "apt-get update && apt-get install -y --force-yes " + required_packages, self.hosts).run() if not install_packages.ok: logger.error("Unable to install the packages") get_java_home = SshProcess('echo $(readlink -f /usr/bin/javac | ' 'sed "s:/bin/javac::")', self.master) get_java_home.run() self.java_home = get_java_home.stdout.strip() logger.info("All required packages are present") # 1. Copy hadoop tar file and uncompress logger.info("Copy " + tar_file + " to hosts and uncompress") rm_dirs = Remote("rm -rf " + self.base_dir + " " + self.conf_dir + " " + self.logs_dir + " " + self.hadoop_temp_dir, self.hosts) put_tar = TaktukPut(self.hosts, [tar_file], "/tmp") tar_xf = TaktukRemote( "tar xf /tmp/" + os.path.basename(tar_file) + " -C /tmp", self.hosts) SequentialActions([rm_dirs, put_tar, tar_xf]).run() # 2. Move installation to base dir and create other dirs logger.info("Create installation directories") mv_base_dir = TaktukRemote( "mv /tmp/" + os.path.basename(tar_file).replace(".tar.gz", "") + " " + self.base_dir, self.hosts) mkdirs = TaktukRemote("mkdir -p " + self.conf_dir + " && mkdir -p " + self.logs_dir + " && mkdir -p " + self.hadoop_temp_dir, self.hosts) chmods = TaktukRemote("chmod g+w " + self.base_dir + " && chmod g+w " + self.conf_dir + " && chmod g+w " + self.logs_dir + " && chmod g+w " + self.hadoop_temp_dir, self.hosts) SequentialActions([mv_base_dir, mkdirs, chmods]).run() # 4. Specify environment variables command = "cat >> " + self.conf_dir + "/hadoop-env.sh << EOF\n" command += "export JAVA_HOME=" + self.java_home + "\n" command += "export HADOOP_LOG_DIR=" + self.logs_dir + "\n" command += "HADOOP_HOME_WARN_SUPPRESS=\"TRUE\"\n" command += "EOF" action = Remote(command, self.hosts) action.run() # 5. Check version return self._check_version_compliance()