def _copy_xp_output(self): """Copy experiment's output.""" if self.output_path: remote_path = self.macro_manager.test_macros[ "xp.output"] # TODO: what happens if not specified? local_path = os.path.join(self.output_path, str(self.comb_id)) logger.info("Copying output to " + local_path) tmp_dir = "/tmp" # Remove file in tmp dir if exists proc = SshProcess( "rm -rf " + os.path.join(tmp_dir, os.path.basename(remote_path)), self.hc.master) proc.run() # Get files in master self.hc.execute("fs -get " + remote_path + " " + tmp_dir, verbose=False) # Copy files from master action = Get( [self.hc.master], [os.path.join(tmp_dir, os.path.basename(remote_path))], local_path) action.run()
def _copy_base_conf(self): """Copy base configuration files to tmp dir.""" self.temp_conf_dir = tempfile.mkdtemp("", "hadoop-", "/tmp") if os.path.exists(self.local_base_conf_dir): base_conf_files = [os.path.join(self.local_base_conf_dir, f) for f in os.listdir(self.local_base_conf_dir)] for f in base_conf_files: shutil.copy(f, self.temp_conf_dir) else: logger.warn( "Local conf dir does not exist. Using default configuration") base_conf_files = [] mandatory_files = [CORE_CONF_FILE, HDFS_CONF_FILE, MR_CONF_FILE] missing_conf_files = mandatory_files for f in base_conf_files: f_base_name = os.path.basename(f) if f_base_name in missing_conf_files: missing_conf_files.remove(f_base_name) logger.info("Copying missing conf files from master: " + str( missing_conf_files)) remote_missing_files = [os.path.join(self.conf_dir, f) for f in missing_conf_files] action = Get([self.master], remote_missing_files, self.temp_conf_dir) action.run()
def copy_history(self, dest, job_ids=None): """Copy history logs from master. Args: dest (str): The path of the local dir where the logs will be copied. job_ids (list of str, optional): A list with the ids of the jobs for which the history should be copied. If nothing is passed, the history of all jobs is copied. """ if not os.path.exists(dest): logger.warning("Destination directory " + dest + " does not exist. It will be created") os.makedirs(dest) history_dir = os.path.join(self.logs_dir, "history") if job_ids: pattern = " -o ".join("-name " + jid + "*" for jid in job_ids) list_dirs = SshProcess("find " + history_dir + " " + pattern, self.master) list_dirs.run() else: list_dirs = SshProcess("find " + history_dir + " -name job_*", self.master) list_dirs.run() remote_files = [] for line in list_dirs.stdout.splitlines(): remote_files.append(line) action = Get([self.master], remote_files, dest) action.run()
def _initialize_conf(self): """Merge locally-specified configuration files with default files from the distribution.""" if os.path.exists(self.local_base_conf_dir): base_conf_files = [os.path.join(self.local_base_conf_dir, f) for f in os.listdir(self.local_base_conf_dir)] for f in base_conf_files: shutil.copy(f, self.init_conf_dir) else: logger.warn( "Local conf dir does not exist. Using default configuration") base_conf_files = [] missing_conf_files = self.conf_mandatory_files for f in base_conf_files: f_base_name = os.path.basename(f) if f_base_name in missing_conf_files: missing_conf_files.remove(f_base_name) logger.info("Copying missing conf files from master: " + str( missing_conf_files)) remote_missing_files = [os.path.join(self.conf_dir, f) for f in missing_conf_files] action = Get([self.master], remote_missing_files, self.init_conf_dir) action.run()
def _copy_xp_output(self): """Copy experiment's output.""" if self.output_path: remote_path = self.macro_manager.test_macros["xp.output"] # TODO: what happens if not specified? local_path = os.path.join(self.output_path, str(self.comb_id)) logger.info("Copying output to " + local_path) tmp_dir = "/tmp" # Remove file in tmp dir if exists proc = SshProcess("rm -rf " + os.path.join(tmp_dir, os.path.basename(remote_path)), self.hc.master) proc.run() # Get files in master self.hc.execute("fs -get " + remote_path + " " + tmp_dir, verbose=False) # Copy files from master action = Get([self.hc.master], [os.path.join(tmp_dir, os.path.basename(remote_path))], local_path) action.run()
def _copy_base_conf(self): """Copy base configuration files to tmp dir.""" self.temp_conf_dir = tempfile.mkdtemp("", "spark-", "/tmp") if os.path.exists(self.local_base_conf_dir): base_conf_files = [ os.path.join(self.local_base_conf_dir, f) for f in os.listdir(self.local_base_conf_dir) ] for f in base_conf_files: shutil.copy(f, self.temp_conf_dir) else: logger.warn( "Local conf dir does not exist. Using default configuration") base_conf_files = [] mandatory_files = [] missing_conf_files = mandatory_files for f in base_conf_files: f_base_name = os.path.basename(f) if f_base_name in missing_conf_files: missing_conf_files.remove(f_base_name) logger.info("Copying missing conf files from master: " + str(missing_conf_files)) remote_missing_files = [ os.path.join(self.conf_dir, f) for f in missing_conf_files ] action = Get([self.master], remote_missing_files, self.temp_conf_dir) action.run()
def _initialize_conf(self): """Merge locally-specified configuration files with default files from the distribution.""" if os.path.exists(self.local_base_conf_dir): base_conf_files = [ os.path.join(self.local_base_conf_dir, f) for f in os.listdir(self.local_base_conf_dir) ] for f in base_conf_files: shutil.copy(f, self.init_conf_dir) else: logger.warn( "Local conf dir does not exist. Using default configuration") base_conf_files = [] missing_conf_files = self.conf_mandatory_files for f in base_conf_files: f_base_name = os.path.basename(f) if f_base_name in missing_conf_files: missing_conf_files.remove(f_base_name) logger.info("Copying missing conf files from master: " + str(missing_conf_files)) remote_missing_files = [ os.path.join(self.conf_dir, f) for f in missing_conf_files ] action = Get([self.master], remote_missing_files, self.init_conf_dir) action.run()
def change_conf(self, params, conf_file=None, default_file=MR_CONF_FILE): """Modify Hadoop configuration. This method copies the configuration files from the first host of each g5k cluster conf dir into a local temporary dir, do all the changes in place and broadcast the new configuration files to all hosts. Args: params (dict of str:str): The parameters to be changed in the form key:value. conf_file (str, optional): The file where parameters should be set. If not specified, all files are checked for the parameter name and the parameter is set in the file where the property is found. If not found, the parameter is set in the default file. default_file (str, optional): The default conf file where to set the parameter if not found. Only applies when conf_file is not set. """ for cluster in self.hw.get_clusters(): hosts = cluster.get_hosts() # Copy conf files from first host in the cluster action = Remote("ls " + self.conf_dir + "/*.xml", [hosts[0]]) action.run() output = action.processes[0].stdout remote_conf_files = [] for f in output.split(): remote_conf_files.append(os.path.join(self.conf_dir, f)) tmp_dir = "/tmp/mliroz_temp_hadoop/" if not os.path.exists(tmp_dir): os.makedirs(tmp_dir) action = Get([hosts[0]], remote_conf_files, tmp_dir) action.run() # Do replacements in temp file if conf_file: f = os.path.join(tmp_dir, conf_file) for name, value in params.iteritems(): replace_in_xml_file(f, name, value, True) else: temp_conf_files = [os.path.join(tmp_dir, f) for f in os.listdir(tmp_dir)] for name, value in params.iteritems(): for f in temp_conf_files: if replace_in_xml_file(f, name, value): break else: # Property not found - add it in MR_CONF_FILE logger.info("Parameter with name " + name + " has not " "been found in any conf file. Setting it " "in " + default_file) f = os.path.join(tmp_dir, default_file) replace_in_xml_file(f, name, value, True) # Copy back the files to all hosts self._copy_conf(tmp_dir, hosts)
def change_conf(self, params): """Modify Hadoop configuration. This method copies the configuration files from the first host of each g5k cluster conf dir into a local temporary dir, do all the changes in place and broadcast the new configuration files to all hosts. Args: params (dict of str:str): The parameters to be changed in the form key:value. """ for g5k_cluster in self.host_clusters: hosts = self.host_clusters[g5k_cluster] # Copy conf files from first host in the cluster action = Remote("ls " + self.conf_dir + "/*.xml", [hosts[0]]) action.run() output = action.processes[0].stdout remote_conf_files = [] for f in output.split(): remote_conf_files.append(os.path.join(self.conf_dir, f)) tmp_dir = "/tmp/mliroz_temp_hadoop/" if not os.path.exists(tmp_dir): os.makedirs(tmp_dir) action = Get([hosts[0]], remote_conf_files, tmp_dir) action.run() # Do replacements in temp file temp_conf_files = [os.path.join(tmp_dir, f) for f in os.listdir(tmp_dir)] for name, value in params.iteritems(): for f in temp_conf_files: if replace_in_xml_file(f, name, value): break else: # Property not found - provisionally add it in MR_CONF_FILE f = os.path.join(tmp_dir, MR_CONF_FILE) replace_in_xml_file(f, name, value, True) # Copy back the files to all hosts self._copy_conf(tmp_dir, hosts)
def _copy_base_conf(self): """Copy base configuration files to tmp dir.""" self.temp_conf_dir = tempfile.mkdtemp("", "hive-", "/tmp") if os.path.exists(self.local_base_conf_dir): base_conf_files = [ os.path.join(self.local_base_conf_dir, f) for f in os.listdir(self.local_base_conf_dir) ] for f in base_conf_files: shutil.copy(f, self.temp_conf_dir) else: logger.warn( "Local conf dir does not exist. Using default configuration") base_conf_files = [] mandatory_files = ["hive-site.xml"] missing_conf_files = mandatory_files for f in base_conf_files: f_base_name = os.path.basename(f) if f_base_name in missing_conf_files: missing_conf_files.remove(f_base_name) # Copy or create mandatory files action = SshProcess("ls -1 " + self.conf_dir, self.master) action.run() files_in_conf_dir = action.stdout remote_missing_files = [] for f in missing_conf_files: if f in files_in_conf_dir: remote_missing_files.append(os.path.join(self.conf_dir, f)) else: create_xml_file(os.path.join(self.temp_conf_dir, f)) if remote_missing_files: logger.info("Copying missing conf files from master: " + str(remote_missing_files)) action = Get([self.master], remote_missing_files, self.temp_conf_dir) action.run()
def _get_conf_files(self, host): action = Remote("ls " + self.conf_dir + "/*.xml", [host]) action.run() output = action.processes[0].stdout remote_conf_files = [] for f in output.split(): remote_conf_files.append(os.path.join(self.conf_dir, f)) tmp_dir = "/tmp/mliroz_temp_hadoop/" if not os.path.exists(tmp_dir): os.makedirs(tmp_dir) action = Get([host], remote_conf_files, tmp_dir) action.run() temp_conf_files = [os.path.join(tmp_dir, f) for f in os.listdir(tmp_dir)] return temp_conf_files