def get_xml_params(f, param_names): if not param_names: return {} local_param_names = param_names[:] params = {} for name in local_param_names: params[name] = None with open(f) as inf: line = inf.readline() while line != "": for name in local_param_names: if "<name>" + name + "</name>" in line: if "<value>" in line: match = re.match('.*<value>([^<]*)</value>.*', line) params[name] = match.group(1) else: line = inf.readline() if line != "": match = re.match('.*<value>([^<]*)</value>.*', line) params[name] = match.group(1) else: logger.error("Configuration file " + f + " is not correctly formatted") del(name) line = inf.readline() inf.close() return params
def _exec_on_node(self, command, machine, log): logger.info(log) rem = ex.action.Remote(command, machine, connection_params={'user':'******'}).run() if rem.ok : logger.info("Success") else: logger.error("Failure")
def load(self): """Load the configuration file""" # Load the configuration file try: with open(self.config_path) as config_file: config = yaml.load(config_file) except: logger.error("Error reading configuration file %s" % self.config_path) t, value, tb = sys.exc_info() print("%s %s" % (str(t), str(value))) sys.exit(23) # Load g5k networks with open(NETWORK_FILE) as network_file: self.networks = yaml.load(network_file) self.config = {} self.config.update(DEFAULT_CONFIG) self.config.update(config) logger.info("Configuration file loaded : %s" % self.config_path) logger.info(pf(self.config)) return self.config
def bootstrap(self, tar_file): # 0. Check that required packages are present required_packages = "openjdk-7-jre openjdk-7-jdk" check_packages = TaktukRemote("dpkg -s " + required_packages, self.hosts) for p in check_packages.processes: p.nolog_exit_code = p.nolog_error = True check_packages.run() if not check_packages.ok: logger.info("Packages not installed, trying to install") install_packages = TaktukRemote( "export DEBIAN_MASTER=noninteractive ; " + "apt-get update && apt-get install -y --force-yes " + required_packages, self.hosts).run() if not install_packages.ok: logger.error("Unable to install the packages") get_java_home = SshProcess( 'echo $(readlink -f /usr/bin/javac | ' 'sed "s:/bin/javac::")', self.master) get_java_home.run() self.java_home = get_java_home.stdout.strip() logger.info("All required packages are present") # 1. Copy Hive tar file and uncompress logger.info("Copy " + tar_file + " to hosts and uncompress") rm_dirs = TaktukRemote( "rm -rf " + self.base_dir + " " + self.conf_dir + " " + self.warehouse_dir + " " + self.logs_dir, self.hosts) put_tar = TaktukPut(self.hosts, [tar_file], "/tmp") tar_xf = TaktukRemote( "tar xf /tmp/" + os.path.basename(tar_file) + " -C /tmp", self.hosts) SequentialActions([rm_dirs, put_tar, tar_xf]).run() # 2. Move installation to base dir logger.info("Create installation directories") mv_base_dir = TaktukRemote( "mv /tmp/" + os.path.basename(tar_file).replace(".tar.gz", "") + " " + self.base_dir, self.hosts) mkdirs = TaktukRemote( "mkdir -p " + self.conf_dir + " && mkdir -p " + self.warehouse_dir, self.hosts) chmods = TaktukRemote( "chmod g+w " + self.base_dir + " && chmod g+w " + self.conf_dir + " && chmod g+w " + self.warehouse_dir, self.hosts) SequentialActions([mv_base_dir, mkdirs, chmods]).run() # 3. Specify environment variables command = "cat >> " + self.conf_dir + "/hive-env.sh << EOF\n" command += "JAVA_HOME=" + self.java_home + "\n" command += "HIVE_HOME=" + self.base_dir + "\n" command += "HIVE_CONF_DIR=" + self.conf_dir + "\n" command += "HADOOP_HOME=" + self.hc.base_dir + "\n" command += "EOF\n" command += "chmod +x " + self.conf_dir + "/hive-env.sh" action = Remote(command, self.hosts) action.run()
def get_host(self): """Returns the hosts from an existing reservation (if any), or from a new reservation""" # Look if there is a running job self.site = get_cluster_site(self.config['cluster']) jobs = EX5.get_current_oar_jobs([self.site]) self.job_id = None for t in jobs: if EX5.get_oar_job_info( t[0], self.site)['name'] == self.options.job_name: self.job_id = t[0] break if self.job_id: logger.info('Using job %s' % style.emph(self.job_id)) else: logger.info('Making a new reservation') self._make_reservation(self.site) if not self.job_id: logger.error("Could not get a reservation for the job") exit(6) EX5.wait_oar_job_start(self.job_id, self.site) pp(EX5.get_oar_job_nodes(self.job_id, self.site)) return EX5.get_oar_job_nodes(self.job_id, self.site)[0]
def deploy(self): # we put the nodes in the first vlan we have vlan = self._get_primary_vlan() # Deploy all the nodes logger.info("Deploying %s on %d nodes %s" % (self.config['env_name'], len(self.nodes), '(forced)' if self.force_deploy else '')) deployed, undeployed = EX5.deploy( EX5.Deployment( self.nodes, env_name=self.config['env_name'], vlan = vlan[1] ), check_deployed_command=not self.force_deploy) # Check the deployment if len(undeployed) > 0: logger.error("%d nodes where not deployed correctly:" % len(undeployed)) for n in undeployed: logger.error(style.emph(n)) # Updating nodes names with vlans self.nodes = sorted(translate_to_vlan(self.nodes, vlan[1]), key = lambda n: n.address) logger.info(self.nodes) self.deployed_nodes = sorted(translate_to_vlan( map(lambda n: EX.Host(n), deployed), vlan[1]), key = lambda n: n.address) logger.info(self.deployed_nodes) check_nodes( nodes = self.deployed_nodes, resources = self.config['resources'], mode = self.config['role_distribution']) return deployed, undeployed
def download_file_sdk(self, service, drive_file, pathFile): """Download a file's content. Args: service: Drive API service instance. drive_file: Drive File instance. Returns: File if successful, None otherwise. """ if drive_file: download_url = drive_file.get('downloadUrl') else: download_url = self.retrieve_file_metadata(service, pathFile.split('/')[-1]) if download_url: try: resp, content = service._http.request(download_url) if resp.status == 200: out = open(pathFile, 'wb') out.write(content) out.close() return out except errors.HttpError as e: error = simplejson.loads(e.content) logger.error('Error in Download ' + error.get('code') + error.get('message')) else: # The file doesn't have any content stored on Drive. return None
def upload_file_sdk(self, service, filePath, fileName, fileType): """Upload a file's content. Args: service: Drive API service instance. filePath : Path to the file you want to upload. fileName : Name of the new file in the drive. fileType : Type of the file (text, ...) fileDescription : A short text to describe the file. Returns: File uploaded. """ media_body = apiclient.http.MediaFileUpload(filePath, mimetype=fileType, resumable=True) body = { 'title': fileName, 'description': 'Temporary file', } new_file = None try: new_file = service.files().insert(body=body, media_body=media_body).execute() except errors.HttpError as e: error = simplejson.loads(e.content) logger.error('Error in Upload ' + error.get('code') + error.get('message')) return new_file
def __define_ds_parameters(self, config): ds_parameters_names = config.options("ds_parameters") self.ds_parameters = {} ds_class_parameters = {} ds_classes = [] for pn in ds_parameters_names: pv = config.get("ds_parameters", pn).split(",") if pn.startswith("ds.class."): ds_class_parameters[pn[len("ds.class."):]] = \ [v.strip() for v in pv] elif pn == "ds.class": ds_classes = [v.strip() for v in pv] else: self.ds_parameters[pn] = [v.strip() for v in pv] # Create ds configurations self.ds_config = [] for (idx, ds_class) in enumerate(ds_classes): this_ds_params = {} for pn, pv in ds_class_parameters.iteritems(): if len(pv) == len(ds_classes): if pv[idx]: this_ds_params[pn] = pv[idx] elif len(pv) == 1: this_ds_params[pn] = pv[0] else: logger.error("Number of ds_class does not much number of " + pn) raise ParameterException("Number of ds_class does not much " "number of " + pn) self.ds_config.append((ds_class, this_ds_params)) self.ds_parameters["ds.config"] = range(0, len(self.ds_config))
def __init__(self, jar_path, params=None, lib_paths=None): """Creates a new Hadoop MapReduce jar job with the given parameters. Args: jar_path (str): The local path of the jar containing the job. params (list of str, optional): The list of parameters of the job. lib_paths (list of str, optional): The list of local paths to the libraries used by the job. """ if not params: params = [] if not lib_paths: lib_paths = [] # Check if the jar file exists if not os.path.exists(jar_path): logger.error("Jar file " + jar_path + " does not exist") raise HadoopJobException("Jar file " + jar_path + " does not exist") # Check if the libraries exist for lp in lib_paths: if not os.path.exists(lp): logger.warn("Lib file " + lp + " does not exist") return # TODO - exception self.jar_path = jar_path self.params = params self.lib_paths = lib_paths
def bootstrap(self, tar_file): # 0. Check that required packages are present required_packages = "openjdk-7-jre openjdk-7-jdk" check_packages = TaktukRemote("dpkg -s " + required_packages, self.hosts) for p in check_packages.processes: p.nolog_exit_code = p.nolog_error = True check_packages.run() if not check_packages.ok: logger.info("Packages not installed, trying to install") install_packages = TaktukRemote( "export DEBIAN_MASTER=noninteractive ; " + "apt-get update && apt-get install -y --force-yes " + required_packages, self.hosts).run() if not install_packages.ok: logger.error("Unable to install the packages") get_java_home = SshProcess('echo $(readlink -f /usr/bin/javac | ' 'sed "s:/bin/javac::")', self.master) get_java_home.run() self.java_home = get_java_home.stdout.strip() logger.info("All required packages are present") # 1. Copy hadoop tar file and uncompress logger.info("Copy " + tar_file + " to hosts and uncompress") rm_dirs = TaktukRemote("rm -rf " + self.base_dir + " " + self.conf_dir, self.hosts) put_tar = TaktukPut(self.hosts, [tar_file], "/tmp") tar_xf = TaktukRemote( "tar xf /tmp/" + os.path.basename(tar_file) + " -C /tmp", self.hosts) SequentialActions([rm_dirs, put_tar, tar_xf]).run() # 2. Move installation to base dir logger.info("Create installation directories") mv_base_dir = TaktukRemote( "mv /tmp/" + os.path.basename(tar_file).replace(".tgz", "") + " " + self.base_dir, self.hosts) mkdirs = TaktukRemote("mkdir -p " + self.conf_dir, self.hosts) chmods = TaktukRemote("chmod g+w " + self.base_dir + " && chmod g+w " + self.conf_dir, self.hosts) SequentialActions([mv_base_dir, mkdirs, chmods]).run() # 3. Specify environment variables command = "cat >> " + self.conf_dir + "/spark-env.sh << EOF\n" command += "JAVA_HOME=" + self.java_home + "\n" command += "SPARK_LOG_DIR=" + self.logs_dir + "\n" if self.hc: command += "HADOOP_CONF_DIR=" + self.hc.conf_dir + "\n" if self.mode == YARN_MODE: command += "YARN_CONF_DIR=" + self.hc.conf_dir + "\n" command += "EOF\n" command += "chmod +x " + self.conf_dir + "/spark-env.sh" action = Remote(command, self.hosts) action.run()
def __init__(self, params): """Create a static dataset with the given params. Args: params (dict): A dictionary with the parameters. This dataset needs the following parameters: - local_path: The path to the directory where the dataset is stored locally. - pre_load_function: A function to be applied after transfers and before loading to dfs (usually decompression). """ super(StaticDataset, self).__init__(params) local_path = params["local_path"] if not os.path.exists(local_path): logger.error("The dataset local dir does not exist") if "pre_load_function" in params: pre_load_function_name = params["pre_load_function"] self.pre_load_function = import_function(pre_load_function_name) else: self.pre_load_function = None self.local_path = local_path
def make_reservation(self): """Perform a reservation of the required number of nodes""" logger.info('Performing reservation') starttime = int(time.time() + timedelta_to_seconds(datetime.timedelta(minutes=1))) endtime = int(starttime + timedelta_to_seconds(datetime.timedelta(days=3, minutes=1))) startdate, n_nodes = self._get_nodes(starttime, endtime) while not n_nodes: logger.info('No enough nodes found between %s and %s, ' + \ 'increasing time window', format_date(starttime), format_date(endtime)) starttime = endtime endtime = int(starttime + timedelta_to_seconds(datetime.timedelta(days=3, minutes=1))) startdate, n_nodes = self._get_nodes(starttime, endtime) if starttime > int(time.time() + timedelta_to_seconds( datetime.timedelta(weeks=6))): logger.error('There are not enough nodes on %s for your ' + \ 'experiments, abort ...', self.cluster) exit() jobs_specs = get_jobs_specs({self.cluster: n_nodes}, name=self.__class__.__name__) sub = jobs_specs[0][0] sub.walltime = self.options.walltime sub.additional_options = '-t deploy' sub.reservation_date = startdate (self.oar_job_id, self.frontend) = oarsub(jobs_specs)[0] logger.info('Startdate: %s, n_nodes: %s', format_date(startdate), str(n_nodes))
def make_reservation(self): """Perform a reservation of the required number of nodes""" logger.info('Performing reservation') starttime = int(time.time() + timedelta_to_seconds(datetime.timedelta(minutes=1))) endtime = int( starttime + timedelta_to_seconds(datetime.timedelta(days=3, minutes=1))) startdate, n_nodes = self._get_nodes(starttime, endtime) while not n_nodes: logger.info('No enough nodes found between %s and %s, ' + \ 'increasing time window', format_date(starttime), format_date(endtime)) starttime = endtime endtime = int( starttime + timedelta_to_seconds(datetime.timedelta(days=3, minutes=1))) startdate, n_nodes = self._get_nodes(starttime, endtime) if starttime > int(time.time() + timedelta_to_seconds(datetime.timedelta( weeks=6))): logger.error('There are not enough nodes on %s for your ' + \ 'experiments, abort ...', self.cluster) exit() jobs_specs = get_jobs_specs({self.cluster: n_nodes}, name=self.__class__.__name__) sub = jobs_specs[0][0] sub.walltime = self.options.walltime sub.additional_options = '-t deploy' sub.reservation_date = startdate (self.oar_job_id, self.frontend) = oarsub(jobs_specs)[0] logger.info('Startdate: %s, n_nodes: %s', format_date(startdate), str(n_nodes))
def retrieve_file_metadata(self, service, fname): """Retrieve a list of File resources. Args: service: Drive API service instance. Returns: List of File resources. """ result = [] page_token = None while True: try: param = {'maxResults': 1, 'q': "title = '" + fname + "'"} if page_token: param['pageToken'] = page_token files = service.files().list(**param).execute() result.extend(files['items']) page_token = files.get('nextPageToken') if not page_token: break except errors.HttpError, error: logger.error('An error occurred: ' + error) break print result return result[0]['downloadUrl']
def prepare_global_vlan(self): vlans = g5k.get_oar_job_kavlan(*self.globalvlan_job) if len(vlans) > 0: self.global_vlan = vlans[0] logger.debug("Global VLAN ID: {}".format(self.global_vlan)) else: logger.error("Could not reserve global VLAN") sys.exit(1)
def _check_initialization(self): """ Check whether the cluster is initialized and raise and exception if not. """ if not self.initialized: logger.error("The cluster should be initialized") raise ClusterNotInitializedException("The cluster should be initialized")
def delete_file(self, client, fname): try: client.file_delete(fname) except ErrorResponse as e: logger.error('Error in Delete ' + str(e.status) + ' ' + e.reason + ' : ' + e.error_msg) pass return True
def _check_version_compliance(self): if self.get_major_version() != 2: logger.error("Version of HadoopCluster is not compliant with the " "distribution provided in the bootstrap option. Use " "the appropiate parameter for --version when creating " "the cluster or use another distribution.") return False else: return True
def _check_version_compliance(self): if self.get_major_version() >= 2: logger.error("Version of HadoopCluster is not compliant with the " "distribution provided in the bootstrap option. Use " "the appropriate parameter for --version when " "creating the cluster or use another distribution.") return False else: return True
def replace_in_xml_file(f, name, value, create_if_absent=False): """Assign the given value to variable name in xml file f. Args: f (str): The path of the file. name (str): The name of the variable. value (str): The new value to be assigned: create_if_absent (bool, optional): If True, the variable will be created at the end of the file in case it was not already present. Returns (bool): True if the assignment has been made, False otherwise. """ changed = False (_, temp_file) = tempfile.mkstemp("", "xmlf-", "/tmp") inf = open(f) outf = open(temp_file, "w") line = inf.readline() while line != "": if "<name>" + name + "</name>" in line: if "<value>" in line: outf.write(__replace_line(line, value)) changed = True else: outf.write(line) line = inf.readline() if line != "": outf.write(__replace_line(line, value)) changed = True else: logger.error("Configuration file " + f + " is not correctly formatted") else: if ("</configuration>" in line and create_if_absent and not changed): outf.write(" <property><name>" + name + "</name>" + "<value>" + str(value) + "</value></property>\n") outf.write(line) changed = True else: outf.write(line) line = inf.readline() inf.close() outf.close() if changed: shutil.copyfile(temp_file, f) os.remove(temp_file) return changed
def _check_version_compliance(self): version = self.get_version() if not version.startswith("Hadoop 2."): logger.error( "Version of HadoopCluster is not compliant with the " "distribution provided in the bootstrap option. Use " "the appropiate parameter for --version when creating " "the cluster or use another distribution.") return False else: return True
def workflow(self, comb): """ Compute one application launch using a given parameter group """ comb_ok = False try: # Generate configuration file needed by MPI processes logger.info("Generating assembly file...") py = comb['cores'] / comb['px'] prepare = Process('cd %s && python %s %d %d %d %d %d %s app.lad' % (self.workingPath, self.genLadScript, comb['datasize'], comb['datasize'], comb['datasize'], comb['px'], py, comb['transposition'])) prepare.shell = True prepare.run() # Generate the MPI host file mfile = self.generate_machine_file() # Start L2C lad = "./app.lad" logger.info("Computing...") res = Process("export OAR_JOB_KEY_FILE=~/.oar_key ; cd %s && l2c_loader -M,-machinefile,%s --mpi -c %d %s" % (self.workingPath, mfile, comb['cores'], lad)) res.shell = True res.stdout_handlers.append(os.path.join(self.result_dir, slugify(comb) + '.out')) res.stdout_handlers.append(sys.stdout) res.stderr_handlers.append(os.path.join(self.result_dir, slugify(comb) + '.err')) res.stderr_handlers.append(sys.stderr) res.run() if not res.ok: logger.error('Bad L2C termination') raise Exception('Bad L2C termination') if len(res.stderr) > 0: # WARNING: when L2C cannot find the LAD file or something strange like this logger.warning('Not empty error output') # Clean configuration files logger.info("Removing assembly files...") res = Process('cd %s && rm -f app.lad*' % self.workingPath) res.shell = True res.run() comb_ok = True except Exception: pass finally: if comb_ok: self.sweeper.done(comb) logger.info(style.host(slugify(comb)) + ' has been done') else: self.sweeper.cancel(comb) logger.warning(style.host(slugify(comb)) + ' has been canceled') logger.info(style.step('%s Remaining'), len(self.sweeper.get_remaining()))
def _check_version_compliance(self): version = self.get_version() if not (version.startswith("Hadoop 0.") or version.startswith("Hadoop 1.")): logger.error("Version of HadoopCluster is not compliant with the " "distribution provided in the bootstrap option. Use " "the appropiate parameter for --version when creating " "the cluster or use another distribution.") return False else: return True
def finish_deploy_server(self, deploy_process): deployed = deploy_process.deployed_hosts if len(deployed) == 0: logger.error("Could not deploy server") sys.exit(1) if self.multi_site(): logger.debug("Deployed, transforming {} into {}".format( self.server.address, g5k.get_kavlan_host_name(self.server.address, self.global_vlan))) self.server.address = g5k.get_kavlan_host_name( self.server.address, self.global_vlan)
def _check_initialization(self): """ Check whether the cluster is initialized and raise and exception if not. Raises: HadoopNotInitializedException: If self.initialized = False """ if not self.initialized: logger.error("The cluster should be initialized") raise HadoopNotInitializedException( "The cluster should be initialized")
def delete_file_sdk(self, service, file_id): """Permanently delete a file, skipping the trash. Args: service: Drive API service instance. file_id: ID of the file to delete. """ try: service.files().delete(fileId=file_id).execute() except errors.HttpError as e: error = simplejson.loads(e.content) logger.error('Error in delete ' + error.get('code') + error.get('message'))
def finish_deploy_vmhosts(self, deploy_process): deployed = deploy_process.deployed_hosts if len(deployed) != len(self.vm_hosts): logger.error( "Could not deploy all VM hosts, only {}/{} deployed".format( len(deployed), len(self.vm_hosts))) sys.exit(1) if self.multi_site(): logger.debug( "Deployed, transforming VM hosts name to be able to reach them in the new VLAN" ) for host in self.vm_hosts: host.address = g5k.get_kavlan_host_name( host.address, self.global_vlan)
def bootstrap(self, tar_file): """Install Cassandra in all cluster nodes from the specified tar.gz file. Args: tar_file (str): The file containing Cassandra binaries. """ # 0. Check that required packages are present required_packages = "openjdk-7-jre openjdk-7-jdk" check_packages = TaktukRemote("dpkg -s " + required_packages, self.hosts) for p in check_packages.processes: p.nolog_exit_code = p.nolog_error = True check_packages.run() if not check_packages.ok: logger.info("Packages not installed, trying to install") install_packages = TaktukRemote( "export DEBIAN_MASTER=noninteractive ; " + "apt-get update && apt-get install -y --force-yes " + required_packages, self.hosts, ).run() if not install_packages.ok: logger.error("Unable to install the packages") get_java_home = SshProcess("echo $(readlink -f /usr/bin/javac | " 'sed "s:/bin/javac::")', self.master) get_java_home.run() self.java_home = get_java_home.stdout.strip() logger.info("All required packages are present") # 1. Copy hadoop tar file and uncompress logger.info("Copy " + tar_file + " to hosts and uncompress") rm_dirs = TaktukRemote("rm -rf " + self.base_dir + " " + self.conf_dir + " " + self.logs_dir, self.hosts) put_tar = TaktukPut(self.hosts, [tar_file], "/tmp") tar_xf = TaktukRemote("tar xf /tmp/" + os.path.basename(tar_file) + " -C /tmp", self.hosts) SequentialActions([rm_dirs, put_tar, tar_xf]).run() # 2. Move installation to base dir and create other dirs logger.info("Create installation directories") mv_base_dir = TaktukRemote( "mv /tmp/" + os.path.basename(tar_file).replace(".tar.gz", "") + " " + self.base_dir, self.hosts ) mkdirs = TaktukRemote("mkdir -p " + self.conf_dir + " && mkdir -p " + self.logs_dir, self.hosts) chmods = TaktukRemote( "chmod g+w " + self.base_dir + " && chmod g+w " + self.conf_dir + " && chmod g+w " + self.logs_dir, self.hosts, ) SequentialActions([mv_base_dir, mkdirs, chmods]).run()
def upload_file_sdk(self, client, filePath, fileName): """Upload a file's content. Args: service: Drive API service instance. filePath : Path to the file you want to upload. fileName : Name of the new file in the drive. """ f = open(filePath, 'rb') try: client.put_file(fileName, f) except ErrorResponse as e: logger.error('Error in Upload ' + str(e.status) + ' ' + e.reason + ' : ' + e.error_msg) pass return True
def workflow(self, comb, host, comb_dir): """ """ comb_ok = False thread_name = style.Thread(host.split('.')[0]) + ': ' logger.info(thread_name + 'Starting combination ' + slugify(comb)) try: logger.info(thread_name + 'Generate conf file') param_str = self.create_string(comb) Remote( "python /home/Work/sgcbntier/paasage_demo/xml_gen_execo.py --cb " + param_str, [host]).run() logger.info(thread_name + 'Run code') Remote( "cd /home/Work/sgcbntier/paasage_demo/ ; python run_all_execo.py --cb %s" % param_str, [host]).run() logger.info(thread_name + 'Get results') traceFile = "ntier_" + param_str get_results = Get([host], [ "/home/Work/sgcbntier/paasage_demo/csv/REQTASK_" + traceFile + ".csv" ], local_location=comb_dir).run() for p in get_results.processes: if not p.ok: logger.error( host + ': Unable to retrieve the files for combination %s', slugify(comb)) exit() comb_ok = True finally: if comb_ok: self.sweeper.done(comb) logger.info(thread_name + ': ' + slugify(comb) + \ ' has been done') else: self.sweeper.cancel(comb) logger.warning(thread_name + ': ' + slugify(comb) + \ ' has been canceled') logger.info(style.step('%s Remaining'), len(self.sweeper.get_remaining()))
def make_reservation(self): """Perform a reservation of the required number of nodes.""" logger.info('Performing reservation') now = int(time.time() + timedelta_to_seconds(datetime.timedelta(minutes=1))) starttime = now endtime = int( starttime + timedelta_to_seconds(datetime.timedelta(days=3, minutes=1))) startdate, n_nodes = self._get_nodes(starttime, endtime) search_time = 3 * 24 * 60 * 60 # 3 days walltime_seconds = get_seconds(self.options.walltime) iteration = 0 while not n_nodes: iteration += 1 logger.info( 'Not enough nodes found between %s and %s, ' + 'increasing time window', format_date(starttime), format_date(endtime)) starttime = max(now, now + iteration * search_time - walltime_seconds) endtime = int(now + (iteration + 1) * search_time) startdate, n_nodes = self._get_nodes(starttime, endtime) if starttime > int(time.time() + timedelta_to_seconds(datetime.timedelta( weeks=6))): logger.error( 'There are not enough nodes on %s for your ' + 'experiments, abort ...', self.cluster) exit() jobs_specs = get_jobs_specs({self.cluster: n_nodes}, name=self.__class__.__name__) sub = jobs_specs[0][0] sub.walltime = self.options.walltime if self.use_kadeploy: sub.additional_options = '-t deploy' else: sub.additional_options = '-t allow_classic_ssh' sub.reservation_date = startdate (self.oar_job_id, self.frontend) = oarsub(jobs_specs)[0] logger.info('Startdate: %s, n_nodes: %s, job_id: %s', format_date(startdate), str(n_nodes), str(self.oar_job_id))
def download_file_sdk(self, client, fileName, filePath): """Download a file's content. Args: client: Dropbox client instance. fileName: Name of the file you want to download. filePath: Name of the new local file. """ try: f, _ = client.get_file_and_metadata(fileName) out = open(filePath, 'wb') out.write(f.read()) out.close() except ErrorResponse as e: logger.error('Error in Download ' + str(e.status) + ' ' + e.reason + ' : ' + e.error_msg) pass return True
def make_reservation(self): """Perform a reservation of the required number of nodes.""" logger.info('Performing reservation') now = int(time.time() + timedelta_to_seconds(datetime.timedelta(minutes=1))) starttime = now endtime = int(starttime + timedelta_to_seconds(datetime.timedelta(days=3, minutes=1))) startdate, n_nodes = self._get_nodes(starttime, endtime) search_time = 3 * 24 * 60 * 60 # 3 days walltime_seconds = get_seconds(self.options.walltime) iteration = 0 while not n_nodes: iteration += 1 logger.info('Not enough nodes found between %s and %s, ' + 'increasing time window', format_date(starttime), format_date(endtime)) starttime = max(now, now + iteration * search_time - walltime_seconds) endtime = int(now + (iteration + 1) * search_time) startdate, n_nodes = self._get_nodes(starttime, endtime) if starttime > int(time.time() + timedelta_to_seconds( datetime.timedelta(weeks=6))): logger.error('There are not enough nodes on %s for your ' + 'experiments, abort ...', self.cluster) exit() jobs_specs = get_jobs_specs({self.cluster: n_nodes}, name=self.__class__.__name__) sub = jobs_specs[0][0] sub.walltime = self.options.walltime if self.use_kadeploy: sub.additional_options = '-t deploy' else: sub.additional_options = '-t allow_classic_ssh' sub.reservation_date = startdate (self.oar_job_id, self.frontend) = oarsub(jobs_specs)[0] logger.info('Startdate: %s, n_nodes: %s, job_id: %s', format_date(startdate), str(n_nodes), str(self.oar_job_id))
def __define_test_parameters(self, config): if config.has_section("test_parameters"): test_parameters_names = config.options("test_parameters") if "test.stats_path" in test_parameters_names: self.stats_manager.stats_path = \ config.get("test_parameters", "test.stats_path") if not os.path.exists(self.stats_manager.stats_path): os.makedirs(self.stats_manager.stats_path) if "test.summary_file" in test_parameters_names: self.stats_manager.summary_file_name = \ config.get("test_parameters", "test.summary_file") if "test.ds_summary_file" in test_parameters_names: self.stats_manager.ds_summary_file_name = \ config.get("test_parameters", "test.ds_summary_file") if "test.num_repetitions" in test_parameters_names: self.comb_manager.num_repetitions = \ int(config.get("test_parameters", "test.num_repetitions")) if "test.jar_file" in test_parameters_names: self.jar_file = config.get("test_parameters", "test.jar_file") if "test.remote_dir" in test_parameters_names: self.remote_dir = config.get("test_parameters", "test.remote_dir") if "test.use_kadeploy" in test_parameters_names: self.use_kadeploy = config.getboolean("test_parameters", "test.use_kadeploy") if self.use_kadeploy: if "test.kadeploy.env_file" in test_parameters_names: self.kadeploy_env_file = \ config.get("test_parameters", "test.kadeploy.env_file") elif "test.kadeploy.env_name" in test_parameters_names: self.kadeploy_env_name = \ config.get("test_parameters", "test.kadeploy.env_name") else: logger.error("Either test.kadeploy.env_file or " "test.kadeploy.env_name should be specified") raise ParameterException("Either test.kadeploy.env_file or " "test.kadeploy.env_name should be " "specified")
def prepare_bench(self): """bench configuration and compilation, copy binaries to frontends return True if preparation is ok """ logger.info("preparation: configure and compile benchmark") # the involved sites. We will do the compilation on the first of these. sites = list(set(map(get_cluster_site, self.parameters['cluster']))) # generate the bench compilation configuration bench_list = '\n'.join([ 'lu\t%s\t%s' % (size, n_core) for n_core in self.parameters['n_core'] for size in self.parameters['size'] ]) # Reserving a node because compiling on the frontend is forbidden # and because we need mpif77 jobs = oarsub([(OarSubmission(resources = "nodes=1", job_type = 'allow_classic_ssh', walltime ='0:10:00'), sites[0])]) if jobs[0][0]: try: logger.info("copying bench archive to %s" % (sites[0],)) copy_bench = Put([sites[0]], ['NPB3.3-MPI.tar.bz2']).run() logger.info("extracting bench archive on %s" % (sites[0],)) extract_bench = Remote('tar -xjf NPB3.3-MPI.tar.bz2', [sites[0]]).run() logger.info("waiting job start %s" % (jobs[0],)) wait_oar_job_start(*jobs[0], prediction_callback = pred_cb) logger.info("getting nodes of %s" % (jobs[0],)) nodes = get_oar_job_nodes(*jobs[0]) logger.info("configure bench compilation") conf_bench = Remote('echo "%s" > ~/NPB3.3-MPI/config/suite.def' % bench_list, nodes).run() logger.info("compil bench") compilation = Remote('cd NPB3.3-MPI && make clean && make suite', nodes).run() logger.info("compil finished") except: logger.error("unable to compile bench") return False finally: oardel(jobs) # Copying binaries to all other frontends frontends = sites[1:] rsync = Remote('rsync -avuP ~/NPB3.3-MPI/ {{frontends}}:NPB3.3-MPI', [get_host_site(nodes[0])] * len(frontends)) rsync.run() return compilation.ok and rsync.ok
def _make_reservation(self, site): """Make a new reservation""" elements = {self.config['cluster']: 1} logger.info('Finding slot for the experiment ' '\nrally %s:1', style.host(self.config['cluster']).rjust(5)) planning = funk.get_planning(elements) slots = funk.compute_slots(planning, walltime=self.config['walltime'].encode( 'ascii', 'ignore'), excluded_elements=EXCLUDED_ELEMENTS) startdate, enddate, resources = funk.find_free_slot(slots, elements) resources = funk.distribute_hosts(resources, elements, EXCLUDED_ELEMENTS) if startdate is None: logger.error("Sorry, could not find the resources requested.") exit(4) jobs_specs = funk.get_jobs_specs(resources, name=self.options.job_name, excluded_elements=EXCLUDED_ELEMENTS) print jobs_specs sub, site = jobs_specs[0] sub.additional_options = "-t deploy" sub.reservation_date = startdate sub.walltime = self.config['walltime'].encode('ascii', 'ignore') sub.name = self.options.job_name if 'testing' in EX5.get_cluster_attributes( self.config['cluster'])['queues']: sub.queue = 'testing' jobs = EX5.oarsub([(sub, site)]) self.job_id = jobs[0][0] logger.info('Job %s will start at %s', style.emph(self.job_id), style.log_header(EX.time_utils.format_date(startdate)))
def start_shell(self, language="IPYTHON", node=None, exec_params=None): """Open a Spark shell. Args: language (str, optional): The language to be used in the shell. node (Host, optional): The host were the shell is to be started. If not provided, self.master is chosen. exec_params (str, optional): The list of parameters used in job execution (e.g., driver-memory). """ if not node: node = self.master # Configure execution options if exec_params is None: exec_params = [] if self.mode == YARN_MODE: exec_params.append("--master yarn-client") params_str = " " + " ".join(exec_params) # Execute shell if language.upper() == "IPYTHON": call("ssh -t " + node.address + " " + "IPYTHON=1 " + self.bin_dir + "/pyspark" + params_str, shell=True) elif language.upper() == "PYTHON": call("ssh -t " + node.address + " " + self.bin_dir + "/pyspark" + params_str, shell=True) elif language.upper() == "SCALA": call("ssh -t " + node.address + " " + self.bin_dir + "/spark-shell" + params_str, shell=True) else: logger.error("Unknown language " + language) return
def __init__(self, job_path, exec_params=None, app_params=None, lib_paths=None): """Create a new Spark job with the given parameters. Args: job_path (str): The local path of the file containing the job binaries. exec_params (list of str, optional): The list of parameters used in job execution (e.g., driver-memory). app_params (list of str, optional): The list of parameters of the application. lib_paths (list of str, optional): The list of local paths to the libraries used by the job. """ if exec_params is None: exec_params = [] if app_params is None: app_params = [] if lib_paths is None: lib_paths = [] # Check if the jar file exists if not os.path.exists(job_path): logger.error("Job binaries file " + job_path + " does not exist") raise SparkJobException("Job binaries file " + job_path + " does not exist") # Check if the libraries exist for lp in lib_paths: if not os.path.exists(lp): logger.warn("Lib file " + lp + " does not exist") return # TODO - exception self.job_path = job_path self.exec_params = exec_params self.app_params = app_params self.lib_paths = lib_paths
def add_dependency(self, m1, m2): """Include a new macro dependency: m1 -> m2. This means that to obtain the value of m2 we use the value of m1. Args: m1 (string): The name of the param used. m2 (string): The name of the param being specified. Raises: MacroException: If the order of sections (test -> ds -> xp) is not respected. """ # Check if dependency is correct if m1 in self.ds_params: if m2 in self.test_macros: logger.error("Not allowed dependency: ds -> test") raise MacroException("Not allowed dependency: ds -> test") elif m1 in self.xp_params: if m2 in self.test_macros: logger.error("Not allowed dependency: xp -> test") raise MacroException("Not allowed dependency: xp -> test") elif m2 in self.ds_params: logger.error("Not allowed dependency: xp -> ds") raise MacroException("Not allowed dependency: xp -> ds") # Add dependency self.dep_graph.add_edge(m1, m2)
def _make_reservation(self): """Make a new reservation.""" # Extract the list of criteria (ie, `oarsub -l # *criteria*`) in order to compute a specification for the # reservation. criteria = {} # Actual criteria are : # - Number of node per site for cluster, roles in self.config["resources"].items(): site = get_cluster_site(cluster) nb_nodes = reduce(operator.add, map(int, roles.values())) criterion = "{cluster='%s'}/nodes=%s" % (cluster, nb_nodes) criteria.setdefault(site, []).append(criterion) for site, vlan in self.config["vlans"].items(): criteria.setdefault(site, []).append(vlan) # Compute the specification for the reservation jobs_specs = [(OarSubmission(resources = '+'.join(c), name = self.config["name"]), s) for s, c in criteria.items()] logger.info("Criteria for the reservation: %s" % pf(jobs_specs)) # Make the reservation gridjob, _ = EX5.oargridsub( jobs_specs, reservation_date=self.config['reservation'], walltime=self.config['walltime'].encode('ascii', 'ignore'), job_type='deploy' ) # TODO - move this upper to not have a side effect here if gridjob is not None: self.gridjob = gridjob logger.info("Using new oargrid job %s" % style.emph(self.gridjob)) else: logger.error("No oar job was created.") sys.exit(26)
def deploy_nodes(self, min_deployed_hosts=1, max_tries=3): """Deploy nodes in the cluster. If the number of deployed nodes is less that the specified min, try again. Args: min_deployed_hosts (int, optional): Minimum number of nodes to be deployed. max_tries (int, optional): Maximum number of tries to reach the minimum number of nodes. """ logger.info("Deploying " + str(len(self.hosts)) + " nodes") def correct_deployment(deployed, undeployed): return len(deployed) >= min_deployed_hosts if self.kadeploy_env_file: deployment = Deployment(self.hosts, env_file=self.kadeploy_env_file) elif self.kadeploy_env_name: deployment = Deployment(self.hosts, env_name=self.kadeploy_env_name) else: logger.error("Neither env_file nor env_name are specified") raise ParameterException( "Neither env_file nor env_name are specified") (deployed, undeployed) = deploy(deployment, num_tries=max_tries, check_enough_func=correct_deployment, out=True) logger.info("%i deployed, %i undeployed" % (len(deployed), len(undeployed))) if not correct_deployment(deployed, undeployed): logger.error("It was not possible to deploy min number of hosts") return (deployed, undeployed)
def deploy_nodes(self, min_deployed_hosts=1, max_tries=3): """Deploy nodes in the cluster. If the number of deployed nodes is less that the specified min, try again. Args: min_deployed_hosts (int, optional): minimum number of nodes to be deployed (default: 1). max_tries (int, optional): maximum number of tries to reach the minimum number of nodes (default: 3). """ logger.info("Deploying " + str(len(self.hosts)) + " nodes") def correct_deployment(deployed, undeployed): return len(deployed) >= min_deployed_hosts if self.kadeploy_env_file: deployment = Deployment(self.hosts, env_file=self.kadeploy_env_file) elif self.kadeploy_env_name: deployment = Deployment(self.hosts, env_name=self.kadeploy_env_name) else: logger.error("Neither env_file nor env_name are specified") raise ParameterException("Neither env_file nor env_name are " "specified") (deployed, undeployed) = deploy( deployment, num_tries=max_tries, check_enough_func=correct_deployment, out=True ) logger.info("%i deployed, %i undeployed" % (len(deployed), len(undeployed))) if not correct_deployment(deployed, undeployed): logger.error("It was not possible to deploy min number of hosts") return (deployed, undeployed)
def run(self): """Execute a test suite. The execution workflow is as follows: 1. Parse command-line arguments. 2. Define the parameters of the tests from the specified configuration file. Generate all the combination to test from the given parameters. 3. Consume the combinations. 3.1. Setup the cluster if it has not been done (first time or after a reservation ends. 3.2. Load the dataset into the Hadoop cluster. 3.3. Perform the experiments corresponding to the combinations linked to the loaded dataset. 4. Clean all resources. """ # Get parameters self.cluster = self.args[0] self.n_nodes = int(self.args[1]) self.config_file = self.args[2] self.site = get_cluster_site(self.cluster) if not os.path.exists(self.config_file): logger.error("Params file " + self.config_file + " does not exist") sys.exit(1) # Set oar job id if self.options.oar_job_id: self.oar_job_id = self.options.oar_job_id else: self.oar_job_id = None # Main try: # Creation of the main iterator used for the first control loop. self.define_parameters() job_is_dead = False # While they are combinations to treat while len(self.sweeper.get_remaining()) > 0: # SETUP # If no job, we make a reservation and prepare the hosts for the # experiments if job_is_dead or self.oar_job_id is None: self.make_reservation() success = self.setup() if not success: break else: self.hosts = get_oar_job_nodes(self.oar_job_id, self.frontend) if not self.hc: self.hc = HadoopCluster(self.hosts) # SETUP FINISHED # Getting the next combination (which requires a ds deployment) comb = self.sweeper.get_next() self.raw_comb = comb.copy() self.comb = comb self.prepare_dataset(comb) self.xp_wrapper(comb) # subloop over the combinations that use the same dataset while True: newcomb = self.sweeper.get_next( lambda r: filter(self._uses_same_ds, r)) if newcomb: self.raw_comb = newcomb.copy() try: self.xp_wrapper(newcomb) except: break else: break if get_oar_job_info(self.oar_job_id, self.frontend)['state'] == 'Error': job_is_dead = True finally: if self.oar_job_id is not None: if not self.options.keep_alive: pass logger.info('Deleting job') oardel([(self.oar_job_id, self.frontend)]) else: logger.info('Keeping job alive for debugging') # Clean cluster if self.hc: if self.hc.initialized: self.hc.clean() # Close summary files if self.summary_file: self.summary_file.close() if self.ds_summary_file: self.ds_summary_file.close()
def init_os(): # Authenticate to keystone # http://docs.openstack.org/developer/keystoneauth/using-sessions.html # http://docs.openstack.org/developer/python-glanceclient/apiv2.html keystone_addr = STATE['config']['vip'] auth = v3.Password(auth_url='http://%s:5000/v3' % keystone_addr, username='******', password='******', project_name='admin', user_domain_id='default', project_domain_id='default') sess = session.Session(auth=auth) # Install `member` role keystone = kclient.Client(session=sess) role_name = 'member' if role_name not in map(attrgetter('name'), keystone.roles.list()): logger.info("Creating role %s" % role_name) keystone.roles.create(role_name) # Install cirros with glance client if absent glance = gclient.Client('2', session=sess) cirros_name = 'cirros.uec' if cirros_name not in map(itemgetter('name'), glance.images.list()): # Download cirros image_url = 'http://download.cirros-cloud.net/0.3.4/' image_name = 'cirros-0.3.4-x86_64-disk.img' logger.info("Downloading %s at %s..." % (cirros_name, image_url)) cirros_img = requests.get(image_url + '/' + image_name) # Install cirros cirros = glance.images.create(name=cirros_name, container_format='bare', disk_format='qcow2', visibility='public') glance.images.upload(cirros.id, cirros_img.content) logger.info("%s has been created on OpenStack" % cirros_name) # Install default flavors nova = nclient.Client('2', session=sess) default_flavors = [ # name, ram, disk, vcpus ('m1.tiny', 512, 1, 1), ('m1.small', 2048, 20, 1), ('m1.medium', 4096, 40, 2), ('m1.large', 8192, 80, 4), ('m1.xlarge', 16384, 160,8) ] current_flavors = map(attrgetter('name'), nova.flavors.list()) for flavor in default_flavors: if flavor[0] not in current_flavors: nova.flavors.create(name=flavor[0], ram=flavor[1], disk=flavor[2], vcpus=flavor[3]) logger.info("%s has been created on OpenStack" % flavor[0]) # Install default network neutron = ntnclient.Client('2', session=sess) network_name = 'public1' network_id = '' networks = neutron.list_networks()['networks'] if network_name not in map(itemgetter('name'), networks): network = {'name': network_name, 'provider:network_type': 'flat', 'provider:physical_network': 'physnet1', 'router:external': True } res = neutron.create_network({'network': network}) network_id = res['network']['id'] logger.info("%s network has been created on OpenStack" % network_name) if not network_id: logger.error("no network_id for %s network" % network_name) sys.exit(32) # Install default subnet subnet_name = '1-subnet' subnets = neutron.list_subnets()['subnets'] if subnet_name not in map(itemgetter('name'), subnets): subnet = {'name': subnet_name, 'network_id': network_id, 'cidr': '10.0.2.0/24', 'ip_version': 4} neutron.create_subnet({'subnet': subnet}) logger.info("%s has been created on OpenStack" % subnet_name)
def run(self): """Inherited method, put here the code for running the engine.""" # Get parameters self.cluster = self.args[0] self.n_nodes = int(self.args[1]) self.config_file = self.args[2] self.site = get_cluster_site(self.cluster) if not os.path.exists(self.config_file): logger.error("Params file " + self.config_file + " does not exist") sys.exit(1) # Set oar job id if self.options.oar_job_id: self.oar_job_id = self.options.oar_job_id else: self.oar_job_id = None # Main try: # Creation of the main iterator used for the first control loop. self.define_parameters() job_is_dead = False # While they are combinations to treat while len(self.sweeper.get_remaining()) > 0: ## SETUP # If no job, we make a reservation and prepare the hosts for the # experiments if job_is_dead or self.oar_job_id is None: self.make_reservation() success = self.setup() if not success: break else: self.hosts = get_oar_job_nodes(self.oar_job_id, self.frontend) ## SETUP FINISHED logger.info("Setup finished in hosts " + str(self.hosts)) test_threads = [] for h in self.hosts: t = TestThread(h, self.comb_manager, self.stats_manager) test_threads.append(t) t.name = "th_" + str(h.address).split(".")[0] t.start() for t in test_threads: t.join() if get_oar_job_info(self.oar_job_id, self.frontend)['state'] == 'Error': job_is_dead = True finally: if self.oar_job_id is not None: if not self.options.keep_alive: pass logger.info('Deleting job') oardel([(self.oar_job_id, self.frontend)]) else: logger.info('Keeping job alive for debugging') # Close stats self.stats_manager.close()
def bootstrap(self, tar_file): """Install Hadoop in all cluster nodes from the specified tar.gz file. Args: tar_file (str): The file containing Hadoop binaries. """ # 0. Check that required packages are present required_packages = "openjdk-7-jre openjdk-7-jdk" check_packages = TaktukRemote("dpkg -s " + required_packages, self.hosts) for p in check_packages.processes: p.nolog_exit_code = p.nolog_error = True check_packages.run() if not check_packages.ok: logger.info("Packages not installed, trying to install") install_packages = TaktukRemote( "export DEBIAN_MASTER=noninteractive ; " + "apt-get update && apt-get install -y --force-yes " + required_packages, self.hosts).run() if not install_packages.ok: logger.error("Unable to install the packages") get_java_home = SshProcess('echo $(readlink -f /usr/bin/javac | ' 'sed "s:/bin/javac::")', self.master) get_java_home.run() self.java_home = get_java_home.stdout.strip() logger.info("All required packages are present") # 1. Copy hadoop tar file and uncompress logger.info("Copy " + tar_file + " to hosts and uncompress") rm_dirs = Remote("rm -rf " + self.base_dir + " " + self.conf_dir + " " + self.logs_dir + " " + self.hadoop_temp_dir, self.hosts) put_tar = TaktukPut(self.hosts, [tar_file], "/tmp") tar_xf = TaktukRemote( "tar xf /tmp/" + os.path.basename(tar_file) + " -C /tmp", self.hosts) SequentialActions([rm_dirs, put_tar, tar_xf]).run() # 2. Move installation to base dir and create other dirs logger.info("Create installation directories") mv_base_dir = TaktukRemote( "mv /tmp/" + os.path.basename(tar_file).replace(".tar.gz", "") + " " + self.base_dir, self.hosts) mkdirs = TaktukRemote("mkdir -p " + self.conf_dir + " && mkdir -p " + self.logs_dir + " && mkdir -p " + self.hadoop_temp_dir, self.hosts) chmods = TaktukRemote("chmod g+w " + self.base_dir + " && chmod g+w " + self.conf_dir + " && chmod g+w " + self.logs_dir + " && chmod g+w " + self.hadoop_temp_dir, self.hosts) SequentialActions([mv_base_dir, mkdirs, chmods]).run() # 4. Specify environment variables command = "cat >> " + self.conf_dir + "/hadoop-env.sh << EOF\n" command += "export JAVA_HOME=" + self.java_home + "\n" command += "export HADOOP_LOG_DIR=" + self.logs_dir + "\n" command += "HADOOP_HOME_WARN_SUPPRESS=\"TRUE\"\n" command += "EOF" action = Remote(command, self.hosts) action.run() # 5. Check version return self._check_version_compliance()
def bootstrap(self, tar_file): """Install Hadoop in all cluster nodes from the specified tar.gz file. Args: tar_file (str): The file containing Hadoop binaries. """ # 0. Check requirements java_major_version = 7 if not check_java_version(java_major_version, self.hosts): msg = "Java 1.%d+ required" % java_major_version logger.error(msg) raise HadoopException(msg) self.java_home = get_java_home(self.master) # 1. Copy hadoop tar file and uncompress logger.info("Copy " + tar_file + " to hosts and uncompress") rm_dirs = TaktukRemote("rm -rf " + self.base_dir + " " + self.conf_dir + " " + self.logs_dir + " " + self.hadoop_temp_dir, self.hosts) put_tar = TaktukPut(self.hosts, [tar_file], "/tmp") tar_xf = TaktukRemote( "tar xf /tmp/" + os.path.basename(tar_file) + " -C /tmp", self.hosts) rm_tar = TaktukRemote( "rm /tmp/" + os.path.basename(tar_file), self.hosts) SequentialActions([rm_dirs, put_tar, tar_xf, rm_tar]).run() # 2. Move installation to base dir and create other dirs logger.info("Create installation directories") mv_base_dir = TaktukRemote( "mv /tmp/" + os.path.basename(tar_file).replace(".tar.gz", "") + " " + self.base_dir, self.hosts) mkdirs = TaktukRemote("mkdir -p " + self.conf_dir + " && mkdir -p " + self.logs_dir + " && mkdir -p " + self.hadoop_temp_dir, self.hosts) chmods = TaktukRemote("chmod g+w " + self.base_dir + " && chmod g+w " + self.conf_dir + " && chmod g+w " + self.logs_dir + " && chmod g+w " + self.hadoop_temp_dir, self.hosts) SequentialActions([mv_base_dir, mkdirs, chmods]).run() # 4. Specify environment variables command = "cat >> " + self.conf_dir + "/hadoop-env.sh << EOF\n" command += "export JAVA_HOME=" + self.java_home + "\n" command += "export HADOOP_LOG_DIR=" + self.logs_dir + "\n" command += "HADOOP_HOME_WARN_SUPPRESS=\"TRUE\"\n" command += "EOF" action = Remote(command, self.hosts) action.run() # 5. Check version (cannot do it before) if not self._check_version_compliance(): return False # 6. Generate initial configuration self._initialize_conf() return True
def __define_test_parameters(self, config): if config.has_section("test_parameters"): test_parameters_names = config.options("test_parameters") if "test.stats_path" in test_parameters_names: self.stats_path = config.get("test_parameters", "test.stats_path") if not os.path.exists(self.stats_path): os.makedirs(self.stats_path) if "test.remove_output" in test_parameters_names: self.remove_output = \ bool(config.get("test_parameters", "test.remove_output")) if "test.output_path" in test_parameters_names: self.output_path = \ config.get("test_parameters", "test.output_path") if not os.path.exists(self.output_path): os.makedirs(self.output_path) if "test.summary_file" in test_parameters_names: self.summary_file_name = \ config.get("test_parameters", "test.summary_file") if "test.ds_summary_file" in test_parameters_names: self.ds_summary_file_name = \ config.get("test_parameters", "test.ds_summary_file") if "test.num_repetitions" in test_parameters_names: self.num_repetitions = \ int(config.get("test_parameters", "test.num_repetitions")) if "test.hadoop.properties" in test_parameters_names: self.hadoop_props = \ config.get("test_parameters", "test.hadoop.properties") if not os.path.exists(self.hadoop_props): logger.error("Hadoop properties file " + self.hadoop_props + " does not exist") raise ParameterException("Hadoop properties file " + self.hadoop_props + " does not exist") if "test.use_kadeploy" in test_parameters_names: self.use_kadeploy = config.getboolean("test_parameters", "test.use_kadeploy") if self.use_kadeploy: if "test.kadeploy.env_file" in test_parameters_names: self.kadeploy_env_file = \ config.get("test_parameters", "test.kadeploy.env_file") elif "test.kadeploy.env_name" in test_parameters_names: self.kadeploy_env_name = \ config.get("test_parameters", "test.kadeploy.env_name") else: logger.error("Either test.kadeploy.env_file or " "test.kadeploy.env_name should be specified") raise ParameterException("Either test.kadeploy.env_file or " "test.kadeploy.env_name should be " "specified") else: if "test.hadoop.tar_file" in test_parameters_names: self.hadoop_tar_file = \ config.get("test_parameters", "test.hadoop.tar_file") else: logger.error("test.hadoop.tar_file should be specified") raise ParameterException("test.hadoop.tar_file should be " "specified")