def action_create(self): group = self.group if not group: command = ['groupadd'] Logger.info("Adding group %s" % self.resource) else: command = ['groupmod'] for option_name, attributes in self.options.iteritems(): if getattr(self.resource, option_name) != None and getattr(self.resource, option_name) != attributes[0](self): break else: return Logger.info("Modifying group %s" % (self.resource.group_name)) for option_name, attributes in self.options.iteritems(): option_value = getattr(self.resource, option_name) if attributes[1] and option_value: command += [attributes[1], str(option_value)] command.append(self.resource.group_name) # if trying to modify existing group, but no values to modify are provided if self.group and len(command) == 1: return shell.checked_call(command, sudo=True)
def read_file(filename): tmpf = tempfile.NamedTemporaryFile() shell.checked_call(["cp", "-f", filename, tmpf.name], sudo=True) with tmpf: with open(tmpf.name, "rb") as fp: return fp.read()
def action_run(self): if self.resource.creates: if sudo.path_exists(self.resource.creates): Logger.info("Skipping %s due to creates" % self.resource) return env = self.resource.environment for i in range (0, self.resource.tries): try: shell.checked_call(self.resource.command, logoutput=self.resource.logoutput, cwd=self.resource.cwd, env=env, preexec_fn=_preexec_fn(self.resource), user=self.resource.user, wait_for_finish=self.resource.wait_for_finish, timeout=self.resource.timeout, path=self.resource.path, sudo=self.resource.sudo, on_new_line=self.resource.on_new_line) break except Fail as ex: if i == self.resource.tries-1: # last try raise ex else: Logger.info("Retrying after %d seconds. Reason: %s" % (self.resource.try_sleep, str(ex))) time.sleep(self.resource.try_sleep) except ExecuteTimeoutException: err_msg = ("Execution of '%s' was killed due timeout after %d seconds") % (self.resource.command, self.resource.timeout) if self.resource.on_timeout: Logger.info("Executing '%s'. Reason: %s" % (self.resource.on_timeout, err_msg)) shell.checked_call(self.resource.on_timeout) else: raise Fail(err_msg)
def action_create(self): with Environment.get_instance_copy() as env: repo_file_name = self.resource.repo_file_name repo_dir = get_repo_dir() new_content = InlineTemplate(self.resource.repo_template, repo_id=self.resource.repo_id, repo_file_name=self.resource.repo_file_name, base_url=self.resource.base_url, mirror_list=self.resource.mirror_list) repo_file_path = format("{repo_dir}/{repo_file_name}.repo") if os.path.isfile(repo_file_path): existing_content_str = sudo.read_file(repo_file_path) new_content_str = new_content.get_content() if existing_content_str != new_content_str and OSCheck.is_suse_family(): # We need to reset package manager's cache when we replace base urls # at existing repo. That is a case at least under SLES Logger.info("Flushing package manager cache since repo file content is about to change") checked_call(self.update_cmd, sudo=True) if self.resource.append_to_file: content = existing_content_str + '\n' + new_content_str else: content = new_content_str else: # If repo file does not exist yet content = new_content File(repo_file_path, content=content )
def action_run(self): if self.resource.creates: if os.path.exists(self.resource.creates): return Logger.debug("Executing %s" % self.resource) if self.resource.path != []: if not self.resource.environment: self.resource.environment = {} self.resource.environment['PATH'] = os.pathsep.join(self.resource.path) for i in range (0, self.resource.tries): try: shell.checked_call(self.resource.command, logoutput=self.resource.logoutput, cwd=self.resource.cwd, env=self.resource.environment, preexec_fn=_preexec_fn(self.resource), user=self.resource.user, wait_for_finish=self.resource.wait_for_finish) break except Fail as ex: if i == self.resource.tries-1: # last try raise ex else: Logger.info("Retrying after %d seconds. Reason: %s" % (self.resource.try_sleep, str(ex))) time.sleep(self.resource.try_sleep)
def remove_package(self, name): if self._check_existence(name): cmd = REMOVE_CMD % (name) Logger.info("Removing package %s ('%s')" % (name, cmd)) shell.checked_call(cmd) else: Logger.info("Skipping removing non-existent package %s" % (name))
def install_package(self, name): if not self._check_existence(name): cmd = INSTALL_CMD % (name) Logger.info("Installing package %s ('%s')" % (name, cmd)) shell.checked_call(cmd) else: Logger.info("Skipping installing existent package %s" % (name))
def remove_package(self, name): if self._check_existence(name): cmd = REMOVE_CMD[self.get_logoutput()] + [name] Logger.info("Removing package %s ('%s')" % (name, string_cmd_from_args_list(cmd))) shell.checked_call(cmd, sudo=True, logoutput=self.get_logoutput()) else: Logger.info("Skipping removal of non-existing package %s" % (name))
def chmod_recursive(path, recursive_mode_flags, recursion_follow_links): find_flags = [] if recursion_follow_links: find_flags.append('-L') for key, flags in recursive_mode_flags.iteritems(): shell.checked_call(["find"] + find_flags + [path, "-type", key, "-exec" , "chmod", flags ,"{}" ,";"])
def action_run(self): if self.resource.creates: if os.path.exists(self.resource.creates): return Logger.debug("Executing %s" % self.resource) if self.resource.path != []: if not self.resource.environment: self.resource.environment = {} self.resource.environment['PATH'] = os.pathsep.join(self.resource.path) for i in range (0, self.resource.tries): try: shell.checked_call(self.resource.command, logoutput=self.resource.logoutput, cwd=self.resource.cwd, env=self.resource.environment, preexec_fn=_preexec_fn(self.resource), user=self.resource.user, wait_for_finish=self.resource.wait_for_finish, timeout=self.resource.timeout, pid_file=self.resource.pid_file, poll_after=self.resource.poll_after) break except Fail as ex: if i == self.resource.tries-1: # last try raise ex else: Logger.info("Retrying after %d seconds. Reason: %s" % (self.resource.try_sleep, str(ex))) time.sleep(self.resource.try_sleep) except ExecuteTimeoutException: err_msg = ("Execution of '%s' was killed due timeout after %d seconds") % (self.resource.command, self.resource.timeout) if self.resource.on_timeout: Logger.info("Executing '%s'. Reason: %s" % (self.resource.on_timeout, err_msg)) shell.checked_call(self.resource.on_timeout) else: raise Fail(err_msg)
def select(stack_name, package, version, try_create=True, ignore_errors=False): """ Selects a config version for the specified package. If this detects that the stack supports configuration versioning but /etc/<component>/conf is a directory, then it will attempt to bootstrap the conf.backup directory and change /etc/<component>/conf into a symlink. :param stack_name: the name of the stack :param package: the name of the package, as-used by <conf-selector-tool> :param version: the version number to create :param try_create: optional argument to attempt to create the directory before setting it :param ignore_errors: optional argument to ignore any error and simply log a warning """ try: # do nothing if the stack does not support versioned configurations if not _valid(stack_name, package, version): return if try_create: create(stack_name, package, version) shell.checked_call(_get_cmd("set-conf-dir", package, version), logoutput=False, quiet=False, sudo=True) # for consistency sake, we must ensure that the /etc/<component>/conf symlink exists and # points to <stack-root>/current/<component>/conf - this is because some people still prefer to # use /etc/<component>/conf even though <stack-root> is the "future" package_dirs = get_package_dirs() if package in package_dirs: Logger.info("Ensuring that {0} has the correct symlink structure".format(package)) directory_list = package_dirs[package] for directory_structure in directory_list: conf_dir = directory_structure["conf_dir"] current_dir = directory_structure["current_dir"] # if /etc/<component>/conf is missing or is not a symlink if not os.path.islink(conf_dir): # if /etc/<component>/conf is not a link and it exists, convert it to a symlink if os.path.exists(conf_dir): parent_directory = os.path.dirname(conf_dir) conf_backup_dir = os.path.join(parent_directory, "conf.backup") # create conf.backup and copy files to it (if it doesn't exist) Execute(("cp", "-R", "-p", conf_dir, conf_backup_dir), not_if = format("test -e {conf_backup_dir}"), sudo = True) # delete the old /etc/<component>/conf directory and link to the backup Directory(conf_dir, action="delete") Link(conf_dir, to = conf_backup_dir) else: # missing entirely # /etc/<component>/conf -> <stack-root>/current/<component>/conf Link(conf_dir, to = current_dir) except Exception, exception: if ignore_errors is True: Logger.warning("Could not select the directory for package {0}. Error: {1}".format(package, str(exception))) else: raise
def action_create(self): if not self.user: command = ['useradd', "-m"] Logger.info("Adding user %s" % self.resource) else: command = ['usermod'] Logger.info("Modifying user %s" % (self.resource.username)) options = dict( comment="-c", gid="-g", uid="-u", shell="-s", password="******", home="-d", ) if self.resource.system and not self.user: command.append("--system") if self.resource.groups: command += ["-G", ",".join(self.resource.groups)] for option_name, option_flag in options.items(): option_value = getattr(self.resource, option_name) if option_flag and option_value: command += [option_flag, str(option_value)] command.append(self.resource.username) shell.checked_call(command)
def chown_recursive(path, owner, group, follow_links=False): owner = owner.pw_name if owner else "" group = group.gr_name if group else "" if owner or group: flags = ["-R"] if follow_links: flags.append("-L") shell.checked_call(["chown"] + flags + [owner+":"+group, path], sudo=True)
def action_install(self): package_name = self.resource.package_name location = self.resource.location if not self._check_existence(package_name, location): cmd = TAR_CMD % (package_name, location) if package_name.lower().endswith("zip"): cmd = ZIP_CMD % (package_name, location) Logger.info("Installing tarball %s at %s (%s)" % (package_name, location, cmd)) shell.checked_call(cmd)
def read_file(filename, encoding=None): tmpf = tempfile.NamedTemporaryFile() shell.checked_call(["cp", "-f", filename, tmpf.name], sudo=True) with tmpf: with open(tmpf.name, "rb") as fp: content = fp.read() content = content.decode(encoding) if encoding else content return content
def install_package(self, name, use_repos=[]): if not self._check_existence(name) or use_repos: cmd = INSTALL_CMD[self.get_logoutput()] if use_repos: enable_repo_option = "--enablerepo=" + ",".join(use_repos) cmd = cmd + ["--disablerepo=*", enable_repo_option] cmd = cmd + [name] Logger.info("Installing package %s ('%s')" % (name, string_cmd_from_args_list(cmd))) shell.checked_call(cmd, sudo=True, logoutput=self.get_logoutput()) else: Logger.info("Skipping installing existent package %s" % (name))
def create_file(filename, content, encoding=None): """ if content is None, create empty file """ content = content if content else "" content = content.encode(encoding) if encoding else content tmpf_name = tempfile.gettempdir() + os.sep + tempfile.template + str(time.time()) + "_" + str(random.randint(0, 1000)) try: with open(tmpf_name, "wb") as fp: fp.write(content) shell.checked_call(["cp", "-f", tmpf_name, filename], sudo=True) finally: os.unlink(tmpf_name)
def create_file(filename, content): """ if content is None, create empty file """ tmpf = tempfile.NamedTemporaryFile() if content: with open(tmpf.name, "wb") as fp: fp.write(content) with tmpf: shell.checked_call(["cp", "-f", tmpf.name, filename], sudo=True) # set default files mode chmod(filename, 0644)
def action_run(self): if self.resource.creates: if sudo.path_exists(self.resource.creates): Logger.info("Skipping %s due to creates" % self.resource) return shell.checked_call(self.resource.command, logoutput=self.resource.logoutput, cwd=self.resource.cwd, env=self.resource.environment, preexec_fn=_preexec_fn(self.resource), user=self.resource.user, wait_for_finish=self.resource.wait_for_finish, timeout=self.resource.timeout,on_timeout=self.resource.on_timeout, path=self.resource.path, sudo=self.resource.sudo, on_new_line=self.resource.on_new_line, stdout=self.resource.stdout,stderr=self.resource.stderr, tries=self.resource.tries, try_sleep=self.resource.try_sleep)
def listdir(path): if not path_isdir(path): raise Fail("{0} is not a directory. Cannot list files of it.".format(path)) code, out, err = shell.checked_call(["ls", path], sudo=True, stderr=subprocess.PIPE) files = out.splitlines() return files
def action_create(self): with Environment.get_instance_copy() as env: with tempfile.NamedTemporaryFile() as tmpf: repo_file_name = format("{repo_file_name}.list", repo_file_name=self.resource.repo_file_name) repo_file_path = format("{repo_dir}/{repo_file_name}", repo_dir=self.repo_dir) new_content = Template( self.resource.repo_template, package_type=self.package_type, base_url=self.resource.base_url, components=" ".join(self.resource.components), ).get_content() old_content = "" if self.resource.append_to_file and os.path.isfile(repo_file_path): with open(repo_file_path) as repo_file: old_content = repo_file.read() + "\n" File(tmpf.name, content=old_content + new_content) if not os.path.isfile(repo_file_path) or not filecmp.cmp(tmpf.name, repo_file_path): File(repo_file_path, content=StaticFile(tmpf.name)) update_cmd_formatted = [format(x) for x in self.update_cmd] # this is time expensive retcode, out = checked_call(update_cmd_formatted, sudo=True) # add public keys for new repos missing_pkeys = set(re.findall(self.missing_pkey_regex, out)) for pkey in missing_pkeys: Execute( format(self.add_pkey_cmd), timeout=15, # in case we are on the host w/o internet (using localrepo), we should ignore hanging ignore_failures=True, )
def __init__(self, path): cmd = ["stat", "-c", "%u %g %a", path] code, out, err = shell.checked_call(cmd, sudo=True, stderr=subprocess.PIPE) values = out.split(' ') if len(values) != 3: raise Fail("Execution of '{0}' returned unexpected output. {2}\n{3}".format(cmd, code, err, out)) uid_str, gid_str, mode_str = values self.st_uid, self.st_gid, self.st_mode = int(uid_str), int(gid_str), int(mode_str, 8)
def get_unique_id_and_date(): out = shell.checked_call("hostid")[1] id = out.strip() now = datetime.datetime.now() date = now.strftime("%M%d%y") return "id{id}_date{date}".format(id=id, date=date)
def get_unique_id_and_date(): out = shell.checked_call("hostid")[1].split('\n')[-1] # bugfix: take the lastline (stdin is not tty part cut) id = out.strip() now = datetime.datetime.now() date = now.strftime("%M%d%y") return "id{id}_date{date}".format(id=id, date=date)
def install_package(self, name, use_repos=[]): if not self._check_existence(name) or use_repos: cmd = INSTALL_CMD[self.get_logoutput()] if use_repos: active_base_repos = get_active_base_repos() if 'base' in use_repos: use_repos = filter(lambda x: x != 'base', use_repos) use_repos.extend(active_base_repos) use_repos_options = [] for repo in use_repos: use_repos_options = use_repos_options + ['--repo', repo] cmd = cmd + use_repos_options cmd = cmd + [name] Logger.info("Installing package %s ('%s')" % (name, string_cmd_from_args_list(cmd))) shell.checked_call(cmd, sudo=True, logoutput=self.get_logoutput()) else: Logger.info("Skipping installing existent package %s" % (name))
def install_package(self, name): if not self._check_existence(name): cmd = INSTALL_CMD % (name) Logger.info("Installing package %s ('%s')" % (name, cmd)) code, out = shell.call(cmd) # apt-get update wasn't done too long if code: Logger.info("Execution of '%s' returned %d. %s" % (cmd, code, out)) Logger.info("Failed to install package %s. Executing `%s`" % (name, REPO_UPDATE_CMD)) code, out = shell.call(REPO_UPDATE_CMD) if code: Logger.info("Execution of '%s' returned %d. %s" % (REPO_UPDATE_CMD, code, out)) Logger.info("Retrying to install package %s" % (name)) shell.checked_call(cmd) else: Logger.info("Skipping installing existent package %s" % (name))
def install_package(self, name, use_repos=[], skip_repos=[]): if use_repos or not self._check_existence(name): cmd = INSTALL_CMD[self.get_logoutput()] copied_sources_files = [] is_tmp_dir_created = False if use_repos: is_tmp_dir_created = True apt_sources_list_tmp_dir = tempfile.mkdtemp(suffix="-ambari-apt-sources-d") Logger.info("Temporal sources directory was created: %s" % apt_sources_list_tmp_dir) if 'base' not in use_repos: cmd = cmd + ['-o', 'Dir::Etc::SourceList=%s' % EMPTY_FILE] for repo in use_repos: if repo != 'base': new_sources_file = os.path.join(apt_sources_list_tmp_dir, repo + '.list') Logger.info("Temporal sources file will be copied: %s" % new_sources_file) shutil.copy(os.path.join(APT_SOURCES_LIST_DIR, repo + '.list'), new_sources_file) copied_sources_files.append(new_sources_file) cmd = cmd + ['-o', 'Dir::Etc::SourceParts=%s' % apt_sources_list_tmp_dir] cmd = cmd + [name] Logger.info("Installing package %s ('%s')" % (name, string_cmd_from_args_list(cmd))) code, out = shell.call(cmd, sudo=True, env=INSTALL_CMD_ENV, logoutput=self.get_logoutput()) # apt-get update wasn't done too long if code: Logger.info("Execution of '%s' returned %d. %s" % (cmd, code, out)) Logger.info("Failed to install package %s. Executing `%s`" % (name, string_cmd_from_args_list(REPO_UPDATE_CMD))) code, out = shell.call(REPO_UPDATE_CMD, sudo=True, logoutput=self.get_logoutput()) if code: Logger.info("Execution of '%s' returned %d. %s" % (REPO_UPDATE_CMD, code, out)) Logger.info("Retrying to install package %s" % (name)) shell.checked_call(cmd, sudo=True, logoutput=self.get_logoutput()) if is_tmp_dir_created: for temporal_sources_file in copied_sources_files: Logger.info("Removing temporal sources file: %s" % temporal_sources_file) os.remove(temporal_sources_file) Logger.info("Removing temporal sources directory: %s" % apt_sources_list_tmp_dir) os.rmdir(apt_sources_list_tmp_dir) else: Logger.info("Skipping installation of existing package %s" % (name))
def action_create(self): group = self.group if not group: command = ["groupadd"] Logger.info("Adding group %s" % self.resource) else: command = ["groupmod"] Logger.info("Modifying group %s" % (self.resource.group_name)) options = dict(gid="-g", password="******") for option_name, option_flag in options.items(): option_value = getattr(self.resource, option_name) if option_flag and option_value: command += [option_flag, str(option_value)] command.append(self.resource.group_name) shell.checked_call(command) group = self.group
def action_create(self): if not self.user: command = ['useradd', "-m"] Logger.info("Adding user %s" % self.resource) else: command = ['usermod'] Logger.info("Modifying user %s" % (self.resource.username)) options = dict( comment="-c", gid="-g", uid="-u", shell="-s", password="******", home="-d", ) if self.resource.system and not self.user: command.append("--system") if self.resource.groups: groups = self.resource.groups if self.user and self.user_groups: groups += self.user_groups command += ["-G", ",".join(groups)] for option_name, option_flag in options.items(): option_value = getattr(self.resource, option_name) if option_flag and option_value: command += [option_flag, str(option_value)] # if trying to modify existing user, but no values to modify are provided if self.user and len(command) == 1: return command.append(self.resource.username) shell.checked_call(command, sudo=True)
def select(stack_name, package, version, try_create=True): """ Selects a config version for the specified package. :stack_name: the name of the stack :package: the name of the package, as-used by conf-select :version: the version number to create :try_create: optional argument to attempt to create the directory before setting it """ if not _valid(stack_name, package, version): return if try_create: create(stack_name, package, version) shell.checked_call(get_cmd("set-conf-dir", package, version), logoutput=False, quiet=False, sudo=True) # for consistency sake, we must ensure that the /etc/<component>/conf symlink exists and # points to /usr/hdp/current/<component>/conf - this is because some people still prefer to # use /etc/<component>/conf even though /usr/hdp is the "future" if package in PACKAGE_DIRS: Logger.info("Ensuring that {0} has the correct symlink structure".format(package)) directory_list = PACKAGE_DIRS[package] for directory_structure in directory_list: conf_dir = directory_structure["conf_dir"] current_dir = directory_structure["current_dir"] # if /etc/<component>/conf is not a symlink, we need to change it if not os.path.islink(conf_dir): # if it exists, try to back it up if os.path.exists(conf_dir): parent_directory = os.path.dirname(conf_dir) conf_install_dir = os.path.join(parent_directory, "conf.backup") Execute(("cp", "-R", "-p", conf_dir, conf_install_dir), not_if = format("test -e {conf_install_dir}"), sudo = True) Directory(conf_dir, action="delete") Link(conf_dir, to = current_dir)
def execute(configurations={}, parameters={}, host_name=None): """ Returns a tuple containing the result code and a pre-formatted result label Keyword arguments: configurations (dictionary): a mapping of configuration key to value parameters (dictionary): a mapping of script parameter key to value host_name (string): the name of this host where the alert is running """ if configurations is None: return (UKNOWN_STATUS_CODE, ['There were no configurations supplied to the script.']) result_code = None try: use_external_hbase = False if USE_EXTERNAL_HBASE_KEY in configurations: use_external_hbase = str( configurations[USE_EXTERNAL_HBASE_KEY]).upper() == 'TRUE' if use_external_hbase: return (OK_RESULT_CODE, ['use_external_hbase set to true.']) is_hbase_system_service_launch = False if ATS_HBASE_SYSTEM_SERVICE_LAUNCH_KEY in configurations: is_hbase_system_service_launch = str( configurations[ATS_HBASE_SYSTEM_SERVICE_LAUNCH_KEY]).upper( ) == 'TRUE' yarn_hbase_user = "******" if ATS_HBASE_USER_KEY in configurations: yarn_hbase_user = configurations[ATS_HBASE_USER_KEY] if not is_hbase_system_service_launch: yarn_hbase_pid_dir_prefix = "" if ATS_HBASE_PID_DIR_PREFIX in configurations: yarn_hbase_pid_dir_prefix = configurations[ ATS_HBASE_PID_DIR_PREFIX] else: return (UKNOWN_STATUS_CODE, [ 'The yarn_hbase_pid_dir_prefix is a required parameter.' ]) yarn_hbase_pid_dir = format( "{yarn_hbase_pid_dir_prefix}/{yarn_hbase_user}") master_pid_file = format( "{yarn_hbase_pid_dir}/hbase-{yarn_hbase_user}-master.pid") rs_pid_file = format( "{yarn_hbase_pid_dir}/hbase-{yarn_hbase_user}-regionserver.pid" ) if host_name is None: host_name = socket.getfqdn() master_process_running = is_monitor_process_live(master_pid_file) rs_process_running = is_monitor_process_live(rs_pid_file) alert_state = OK_RESULT_CODE if master_process_running and rs_process_running else CRITICAL_RESULT_CODE alert_label = 'ATS embedded HBase is running on {0}' if master_process_running and rs_process_running else 'ATS embedded HBase is NOT running on {0}' alert_label = alert_label.format(host_name) return (alert_state, [alert_label]) else: security_enabled = False if SECURITY_ENABLED_KEY in configurations: security_enabled = str( configurations[SECURITY_ENABLED_KEY]).upper() == 'TRUE' check_command_timeout = CHECK_COMMAND_TIMEOUT_DEFAULT if CHECK_COMMAND_TIMEOUT_KEY in configurations: check_command_timeout = int( parameters[CHECK_COMMAND_TIMEOUT_KEY]) if security_enabled: if ATS_HBASE_PRINCIPAL_KEY in configurations: ats_hbase_app_principal = configurations[ ATS_HBASE_PRINCIPAL_KEY] ats_hbase_app_principal = ats_hbase_app_principal.replace( '_HOST', host_name.lower()) if ATS_HBASE_PRINCIPAL_KEYTAB_KEY in configurations: ats_hbase_app_keytab = configurations[ ATS_HBASE_PRINCIPAL_KEYTAB_KEY] # Get the configured Kerberos executable search paths, if any if KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY in configurations: kerberos_executable_search_paths = configurations[ KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY] else: kerberos_executable_search_paths = None kinit_path_local = get_kinit_path( kerberos_executable_search_paths) kinitcmd = format( "{kinit_path_local} -kt {ats_hbase_app_keytab} {ats_hbase_app_principal}; " ) # prevent concurrent kinit kinit_lock = global_lock.get_lock( global_lock.LOCK_TYPE_KERBEROS) kinit_lock.acquire() try: Execute(kinitcmd, user=yarn_hbase_user, path=["/bin/", "/usr/bin/", "/usr/sbin/"], timeout=10) finally: kinit_lock.release() start_time = time.time() ats_hbase_status_cmd = STACK_ROOT_DEFAULT + format( "/current/hadoop-yarn-client/bin/yarn app -status ats-hbase") code, output, error = shell.checked_call( ats_hbase_status_cmd, user=yarn_hbase_user, stderr=subprocess.PIPE, timeout=check_command_timeout, logoutput=False) if code != 0: alert_label = traceback.format_exc() result_code = UKNOWN_STATUS_CODE return (result_code, [alert_label]) # Call for getting JSON ats_hbase_app_info = make_valid_json(output) if ats_hbase_app_info is None: alert_label = CRITICAL_MESSAGE result_code = CRITICAL_RESULT_CODE return (result_code, [alert_label]) if 'state' not in ats_hbase_app_info: alert_label = traceback.format_exc() result_code = UKNOWN_STATUS_CODE return (result_code, [alert_label]) retrieved_ats_hbase_app_state = ats_hbase_app_info['state'].upper() if retrieved_ats_hbase_app_state in ['STABLE']: result_code = OK_RESULT_CODE total_time = time.time() - start_time alert_label = OK_MESSAGE.format(retrieved_ats_hbase_app_state, total_time) else: result_code = CRITICAL_RESULT_CODE total_time = time.time() - start_time alert_label = CRITICAL_MESSAGE_WITH_STATE.format( retrieved_ats_hbase_app_state, total_time) except: alert_label = traceback.format_exc() traceback.format_exc() result_code = CRITICAL_RESULT_CODE return (result_code, [alert_label])
def copy(src, dst): shell.checked_call(["sudo", "cp", "-r", src, dst], sudo=True)
def rmtree(path): shell.checked_call(["rm", "-rf", path], sudo=True)
def link(source, link_name): shell.checked_call(["ln", "-f", source, link_name], sudo=True)
def chmod_extended(path, mode): shell.checked_call(["chmod", mode, path], sudo=True)
def chown(path, owner, group): owner = owner.pw_name if owner else "" group = group.gr_name if group else "" if owner or group: shell.checked_call(["chown", owner + ":" + group, path], sudo=True)
def service_check(self, env): import params env.set_params(params) params.HdfsResource( format("/user/{smokeuser}"), type="directory", action="create_on_execute", owner=params.smokeuser, mode=params.smoke_hdfs_user_mode, ) if params.stack_version_formatted_major and check_stack_feature( StackFeature.ROLLING_UPGRADE, params.stack_version_formatted_major): path_to_distributed_shell_jar = format( "{stack_root}/current/hadoop-yarn-client/hadoop-yarn-applications-distributedshell.jar" ) else: path_to_distributed_shell_jar = "/usr/lib/hadoop-yarn/hadoop-yarn-applications-distributedshell*.jar" yarn_distrubuted_shell_check_params = [ "yarn org.apache.hadoop.yarn.applications.distributedshell.Client", "-shell_command", "ls", "-num_containers", "{number_of_nm}", "-jar", "{path_to_distributed_shell_jar}", "-timeout", "300000", "--queue", "{service_check_queue_name}" ] yarn_distrubuted_shell_check_cmd = format( " ".join(yarn_distrubuted_shell_check_params)) if params.security_enabled: kinit_cmd = format( "{kinit_path_local} -kt {smoke_user_keytab} {smokeuser_principal};" ) smoke_cmd = format( "{kinit_cmd} {yarn_distrubuted_shell_check_cmd}") else: smoke_cmd = yarn_distrubuted_shell_check_cmd return_code, out = shell.checked_call( smoke_cmd, path='/usr/sbin:/sbin:/usr/local/bin:/bin:/usr/bin', user=params.smokeuser, ) m = re.search("appTrackingUrl=(.*),\s", out) app_url = m.group(1) splitted_app_url = str(app_url).split('/') for item in splitted_app_url: if "application" in item: application_name = item # Find out the active RM from RM list # Raise an exception if the active rm cannot be determined active_rm_webapp_address = self.get_active_rm_webapp_address() Logger.info("Active Resource Manager web app address is : " + active_rm_webapp_address) # Verify job state from active resource manager via rest api info_app_url = params.scheme + "://" + active_rm_webapp_address + "/ws/v1/cluster/apps/" + application_name get_app_info_cmd = "curl --negotiate -u : -ks --location-trusted --connect-timeout " + CURL_CONNECTION_TIMEOUT + " " + info_app_url return_code, stdout, _ = get_user_call_output( get_app_info_cmd, user=params.smokeuser, path='/usr/sbin:/sbin:/usr/local/bin:/bin:/usr/bin', ) try: json_response = json.loads(stdout) except Exception as e: raise Fail( format( "Response from YARN API was not a valid JSON. Response: {stdout}" )) if json_response is None or 'app' not in json_response or \ 'state' not in json_response['app'] or 'finalStatus' not in json_response['app']: raise Fail("Application " + app_url + " returns invalid data.") if json_response['app']['state'] != "FINISHED" or json_response['app'][ 'finalStatus'] != "SUCCEEDED": raise Fail( "Application " + app_url + " state/status is not valid. Should be FINISHED/SUCCEEDED.")
def checkAndStopRegistyDNS(): import params import status_params componentName = 'registrydns' action = 'stop' daemon = format("{yarn_bin}/yarn") hadoop_env_exports = {'HADOOP_LIBEXEC_DIR': params.hadoop_libexec_dir} # When registry dns is switched from non-privileged to privileged mode or the other way, # then the previous instance of registry dns has a different pid/user. # Checking if either of the processes are running and shutting them down if they are. # privileged mode dns_pid_file = status_params.yarn_registry_dns_priv_pid_file dns_user = status_params.root_user Logger.info("checking any existing dns pid file = '" + dns_pid_file + "' dns user '" + dns_user + "'") try: # these are needed for unknown reasons env_exports = { 'HADOOP_PID_DIR': params.yarn_pid_dir, 'HADOOP_SECURE_PID_DIR': params.yarn_pid_dir, 'HADOOP_LOG_DIR': params.yarn_log_dir, 'HADOOP_SECURE_LOG_DIR': params.yarn_log_dir } env_exports.update(hadoop_env_exports) cmd = [ daemon, "--config", params.hadoop_conf_dir, "--daemon", action, componentName ] daemon_cmd = as_sudo(cmd) process_id_exists_command = as_sudo([ "test", "-f", dns_pid_file ]) + " && " + as_sudo(["pgrep", "-F", dns_pid_file]) Execute(daemon_cmd, only_if=process_id_exists_command, environment=env_exports) except: # When the registry dns port is modified but registry dns is not started # immediately, then the configs in yarn-env.sh & yarn-site.xml related # to registry dns may have already changed. This introduces a discrepancy # between the actual process that is running and the configs. # For example, when port is changed from 5300 to 53, # then dns port = 53 in yarn-site and YARN_REGISTRYDNS_SECURE_* envs in yarn-env.sh # are saved. So, while trying to shutdown the stray non-privileged registry dns process # after sometime, yarn daemon from the configs thinks that it needs privileged # access and throws an exception. In such cases, we try to kill the stray process. pass process_id_does_not_exist_command = format( "! ( {process_id_exists_command} )") code, out = shell.call(process_id_does_not_exist_command, env=env_exports, tries=5, try_sleep=5) if code != 0: code, out, err = shell.checked_call(("pgrep", "-f", dns_pid_file), sudo=True, env=env_exports, stderr=subprocess32.PIPE) Logger.info("PID to kill was retrieved: '" + out + "'.") for pid in out.splitlines(): try: Execute(("kill", "-9", pid), sudo=True) except: # ignoring failures Logger.warning("failed to kill pid '" + pid + "'.") pass File(dns_pid_file, action="delete") # non-privileged mode dns_pid_file = status_params.yarn_registry_dns_pid_file dns_user = params.yarn_user Logger.info("checking any existing dns pid file = '" + dns_pid_file + "' dns user '" + dns_user + "'") try: cmd = format( "{daemon} --config {hadoop_conf_dir} --daemon {action} {componentName}" ) daemon_cmd = as_user(cmd, dns_user) Execute(daemon_cmd, environment=hadoop_env_exports) except: pass
def select(stack_name, package, version, try_create=True, ignore_errors=False): """ Selects a config version for the specified package. If this detects that the stack supports configuration versioning but /etc/<component>/conf is a directory, then it will attempt to bootstrap the conf.backup directory and change /etc/<component>/conf into a symlink. :param stack_name: the name of the stack :param package: the name of the package, as-used by <conf-selector-tool> :param version: the version number to create :param try_create: optional argument to attempt to create the directory before setting it :param ignore_errors: optional argument to ignore any error and simply log a warning """ try: # do nothing if the stack does not support versioned configurations if not _valid(stack_name, package, version): return if try_create: create(stack_name, package, version) shell.checked_call(_get_cmd("set-conf-dir", package, version), logoutput=False, quiet=False, sudo=True) # for consistency sake, we must ensure that the /etc/<component>/conf symlink exists and # points to <stack-root>/current/<component>/conf - this is because some people still prefer to # use /etc/<component>/conf even though <stack-root> is the "future" package_dirs = get_package_dirs() if package in package_dirs: Logger.info( "Ensuring that {0} has the correct symlink structure".format( package)) directory_list = package_dirs[package] for directory_structure in directory_list: conf_dir = directory_structure["conf_dir"] current_dir = directory_structure["current_dir"] # if /etc/<component>/conf is missing or is not a symlink if not os.path.islink(conf_dir): # if /etc/<component>/conf is not a link and it exists, convert it to a symlink if os.path.exists(conf_dir): parent_directory = os.path.dirname(conf_dir) conf_backup_dir = os.path.join(parent_directory, "conf.backup") # create conf.backup and copy files to it (if it doesn't exist) Execute(("cp", "-R", "-p", conf_dir, conf_backup_dir), not_if=format("test -e {conf_backup_dir}"), sudo=True) # delete the old /etc/<component>/conf directory and link to the backup Directory(conf_dir, action="delete") Link(conf_dir, to=conf_backup_dir) else: # missing entirely # /etc/<component>/conf -> <stack-root>/current/<component>/conf Link(conf_dir, to=current_dir) except Exception, exception: if ignore_errors is True: Logger.warning( "Could not select the directory for package {0}. Error: {1}". format(package, str(exception))) else: raise
def execute(configurations={}, parameters={}, host_name=None): """ Returns a tuple containing the result code and a pre-formatted result label Keyword arguments: configurations (dictionary): a mapping of configuration key to value parameters (dictionary): a mapping of script parameter key to value host_name (string): the name of this host where the alert is running """ LLAP_APP_STATUS_CMD_TIMEOUT = 0 if configurations is None: return ('UNKNOWN', ['There were no configurations supplied to the script.']) result_code = None try: security_enabled = False if SECURITY_ENABLED_KEY in configurations: security_enabled = str( configurations[SECURITY_ENABLED_KEY]).upper() == 'TRUE' check_command_timeout = CHECK_COMMAND_TIMEOUT_DEFAULT if CHECK_COMMAND_TIMEOUT_KEY in configurations: check_command_timeout = int(parameters[CHECK_COMMAND_TIMEOUT_KEY]) hive_user = HIVE_USER_DEFAULT if HIVE_USER_KEY in configurations: hive_user = configurations[HIVE_USER_KEY] llap_app_name = LLAP_APP_NAME_DEFAULT if LLAP_APP_NAME_KEY in configurations: llap_app_name = configurations[LLAP_APP_NAME_KEY] if security_enabled: if HIVE_PRINCIPAL_KEY in configurations: llap_principal = configurations[HIVE_PRINCIPAL_KEY] else: llap_principal = HIVE_PRINCIPAL_DEFAULT llap_principal = llap_principal.replace('_HOST', host_name.lower()) llap_keytab = HIVE_PRINCIPAL_KEYTAB_DEFAULT if HIVE_PRINCIPAL_KEYTAB_KEY in configurations: llap_keytab = configurations[HIVE_PRINCIPAL_KEYTAB_KEY] # Get the configured Kerberos executable search paths, if any if KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY in configurations: kerberos_executable_search_paths = configurations[ KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY] else: kerberos_executable_search_paths = None kinit_path_local = get_kinit_path(kerberos_executable_search_paths) kinitcmd = format( "{kinit_path_local} -kt {llap_keytab} {llap_principal}; ") # prevent concurrent kinit kinit_lock = global_lock.get_lock(global_lock.LOCK_TYPE_KERBEROS) kinit_lock.acquire() try: Execute(kinitcmd, user=hive_user, path=[ "/bin/", "/usr/bin/", "/usr/lib/hive/bin/", "/usr/sbin/" ], timeout=10) finally: kinit_lock.release() start_time = time.time() if STACK_NAME in configurations and STACK_ROOT in configurations: stack_root = stack_tools.get_stack_root(configurations[STACK_NAME], configurations[STACK_ROOT]) llap_status_cmd = stack_root + format( "/current/hive-server2-hive2/bin/hive --service llapstatus --name {llap_app_name} --findAppTimeout {LLAP_APP_STATUS_CMD_TIMEOUT}" ) else: llap_status_cmd = STACK_ROOT_DEFAULT + format( "/current/hive-server2-hive2/bin/hive --service llapstatus --name {llap_app_name} --findAppTimeout {LLAP_APP_STATUS_CMD_TIMEOUT}" ) code, output, error = shell.checked_call(llap_status_cmd, user=hive_user, stderr=subprocess.PIPE, timeout=check_command_timeout, logoutput=False) # Call for getting JSON llap_app_info = make_valid_json(output) if llap_app_info is None or 'state' not in llap_app_info: alert_label = traceback.format_exc() result_code = UKNOWN_STATUS_CODE return (result_code, [alert_label]) retrieved_llap_app_state = llap_app_info['state'].upper() if retrieved_llap_app_state in ['RUNNING_ALL']: result_code = OK_RESULT_CODE total_time = time.time() - start_time alert_label = OK_MESSAGE.format( llap_app_state_dict.get(retrieved_llap_app_state, retrieved_llap_app_state), total_time) elif retrieved_llap_app_state in ['RUNNING_PARTIAL']: live_instances = 0 desired_instances = 0 percentInstancesUp = 0 percent_desired_instances_to_be_up = 80 # Get 'live' and 'desired' instances if 'liveInstances' not in llap_app_info or 'desiredInstances' not in llap_app_info: result_code = CRITICAL_RESULT_CODE total_time = time.time() - start_time alert_label = CRITICAL_MESSAGE_WITH_STATE.format( llap_app_state_dict.get(retrieved_llap_app_state, retrieved_llap_app_state), total_time) return (result_code, [alert_label]) live_instances = llap_app_info['liveInstances'] desired_instances = llap_app_info['desiredInstances'] if live_instances < 0 or desired_instances <= 0: result_code = CRITICAL_RESULT_CODE total_time = time.time() - start_time alert_label = CRITICAL_MESSAGE_WITH_STATE.format( llap_app_state_dict.get(retrieved_llap_app_state, retrieved_llap_app_state), total_time) return (result_code, [alert_label]) percentInstancesUp = float( live_instances) / desired_instances * 100 if percentInstancesUp >= percent_desired_instances_to_be_up: result_code = OK_RESULT_CODE total_time = time.time() - start_time alert_label = MESSAGE_WITH_STATE_AND_INSTANCES.format( llap_app_state_dict.get(retrieved_llap_app_state, retrieved_llap_app_state), total_time, llap_app_info['liveInstances'], llap_app_info['desiredInstances']) else: result_code = CRITICAL_RESULT_CODE total_time = time.time() - start_time alert_label = MESSAGE_WITH_STATE_AND_INSTANCES.format( llap_app_state_dict.get(retrieved_llap_app_state, retrieved_llap_app_state), total_time, llap_app_info['liveInstances'], llap_app_info['desiredInstances']) else: result_code = CRITICAL_RESULT_CODE total_time = time.time() - start_time alert_label = CRITICAL_MESSAGE_WITH_STATE.format( llap_app_state_dict.get(retrieved_llap_app_state, retrieved_llap_app_state), total_time) except: alert_label = traceback.format_exc() traceback.format_exc() result_code = UKNOWN_STATUS_CODE return (result_code, [alert_label])
def _llap_start(self, env, cleanup=False): import params env.set_params(params) Logger.info("Starting LLAP") # TODO, start only if not already running. # TODO : Currently hardcoded the params. Need to read the suggested values from hive2/hive-site.xml. # TODO, ensure that script works as hive from cmd when not cd'ed in /homve/hive # Needs permission to write to hive home dir. cmd = '' if params.security_enabled: cmd = format("{stack_root}/current/hive-server2-hive2/bin/hive --service llap --instances 1 -slider-am-container-mb " "{slider_am_container_mb} --slider-keytab-dir .slider/keytabs/{params.hive_user}/ --slider-keytab " "{hive_llap_keytab_file} --slider-principal {hive_headless_keytab} --loglevel INFO") else: cmd = format("{stack_root}/current/hive-server2-hive2/bin/hive --service llap --instances 1 -slider-am-container-mb {slider_am_container_mb} --loglevel INFO") run_file_path = None try: Logger.info(format("Command: {cmd}")) cmd = cmd.split() code, output, error = shell.checked_call(cmd, user=params.hive_user, stderr=subprocess.PIPE, logoutput=True) if code != 0 or output is None: raise Fail("Command failed with either non-zero return code or no output.") # E.g., output: # Prepared llap-slider-05Apr2016/run.sh for running LLAP on Slider exp = r"Prepared (.*?run.sh) for running LLAP" m = re.match(exp, output, re.I) if m and len(m.groups()) == 1: run_file_name = m.group(1) run_file_path = os.path.join(params.hive_user_home_dir, run_file_name) else: raise Fail("Did not find run.sh file in output: " + str(output)) Logger.info(format("Run file path: {run_file_path}")) if os.path.isfile(run_file_path): Execute(run_file_path, user=params.hive_user) # TODO : Sleep below is not a good idea. We need to check the status of LLAP app to figure out it got # launched properly and is in running state. Then go ahead with Hive Interactive Server start. Logger.info("Sleeping for 30 secs") time.sleep(30) Logger.info("LLAP app deployed successfully.") return True else: raise Fail(format("Did not find run file {run_file_path}")) except: # Attempt to clean up the packaged application, or potentially rename it with a .bak if run_file_path is not None and cleanup: try: parent_dir = os.path.dirname(run_file_path) if os.path.isdir(parent_dir): shutil.rmtree(parent_dir) except Exception, e: Logger.error("Could not cleanup LLAP app package. Error: " + str(e)) # throw the original exception raise
http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ''' # MUST be run on ambari-server host import json import time from resource_management.core.shell import checked_call, call # Change this to hostname of your ambari-server HOSTNAME = checked_call("hostname -f")[1].strip() ############# Configurations (feel free to change) ############# SERVICE_NAME = "STORM" COMPONENTS = ["NIMBUS", "SUPERVISOR"] COMPONENTS_TO_HOSTS = [ { "NIMBUS": HOSTNAME }, { "SUPERVISOR": HOSTNAME }, #{"SUPERVISOR": "c6402.ambari.apache.org"},
def main(): # add service checked_call( 'curl -H \'X-Requested-By:anything\' -i -X POST -d \'[{{"ServiceInfo":{{"service_name":"{service_name}"}}}}]\' -u admin:admin {server_url}/api/v1/clusters/{cluster_name}/services' .format(service_name=SERVICE_NAME, server_url=SERVER_URL, cluster_name=CLUSTER_NAME)) # add components for component in COMPONENTS: checked_call( 'curl -H \'X-Requested-By:anything\' -i -X POST -d \'{{"components":[{{"ServiceComponentInfo":{{"component_name":"{component}"}}}}]}}\' -u admin:admin {server_url}/api/v1/clusters/{cluster_name}/services?ServiceInfo/service_name={service_name}' .format(service_name=SERVICE_NAME, component=component, server_url=SERVER_URL, cluster_name=CLUSTER_NAME)) # assign components to hosts for x in COMPONENTS_TO_HOSTS: for component, host in x.iteritems(): checked_call( 'curl -H \'X-Requested-By:anything\' -i -X POST -d \'{{"host_components":[{{"HostRoles":{{"component_name":"{component}"}}}}]}}\' -u admin:admin {server_url}/api/v1/clusters/{cluster_name}/hosts?Hosts/host_name={host}' .format(host=host, component=component, server_url=SERVER_URL, cluster_name=CLUSTER_NAME)) # update and create all the service-specific configurations checked_call( 'curl -H \'X-Requested-By:anything\'-X GET -u admin:admin {server_url}/api/v1/stacks2/HDP/versions/{stack_version}/stackServices/{service_name}/configurations?fields=* > /tmp/config.json' .format(server_url=SERVER_URL, stack_version=STACK_VERSION, service_name=SERVICE_NAME)) with open('/tmp/config.json', "r") as f: d = json.load(f) configs = {} for x in d['items']: site_name = x['StackConfigurations']['type'][:-4] if not site_name in configs: configs[site_name] = {} config = configs[site_name] config[x['StackConfigurations'] ['property_name']] = x['StackConfigurations']['property_value'] for site_name, site_content in configs.iteritems(): code = call( '/var/lib/ambari-server/resources/scripts/configs.sh get {hostname} {cluster_name} {site_name}' .format(hostname=HOSTNAME, cluster_name=CLUSTER_NAME, site_name=site_name))[0] if code: print "Adding new site: " + site_name checked_call( 'curl -i -H \'X-Requested-By:anything\' -X PUT -d \'{{"Clusters":{{"desired_configs":{{"type":"{site_name}","tag":"version1","properties":{site_content}}}}}}}\' -u admin:admin {server_url}/api/v1/clusters/{cluster_name}' .format(site_name=site_name, site_content=json.dumps(site_content), server_url=SERVER_URL, cluster_name=CLUSTER_NAME)) else: timestamp = int(time.time()) print "Modifiying site: " + site_name + " version" + str(timestamp) checked_call( '/var/lib/ambari-server/resources/scripts/configs.sh get {hostname} {cluster_name} {site_name} /tmp/current_site.json' .format(hostname=HOSTNAME, cluster_name=CLUSTER_NAME, site_name=site_name)) with open('/tmp/current_site.json', "r") as f: fcontent = f.read() d = json.loads("{" + fcontent + "}") for k, v in site_content.iteritems(): d['properties'][k] = v checked_call( 'curl -i -H \'X-Requested-By:anything\' -X PUT -d \'{{"Clusters":{{"desired_configs":{{"type":"{site_name}","tag":"version{timestamp}","properties":{site_content}}}}}}}\' -u admin:admin {server_url}/api/v1/clusters/{cluster_name}' .format(site_name=site_name, timestamp=timestamp, site_content=json.dumps(d['properties']), server_url=SERVER_URL, cluster_name=CLUSTER_NAME)) for site_name, site_configs in CONFIGS_TO_CHANGE.iteritems(): for config_name, config_value in site_configs.iteritems(): print "Adding config " + config_name + "=" + config_value + " to " + site_name checked_call( '/var/lib/ambari-server/resources/scripts/configs.sh set {hostname} {cluster_name} {site_name} {config_name} {config_value}' .format(config_name=config_name, config_value=config_value, hostname=HOSTNAME, cluster_name=CLUSTER_NAME, site_name=site_name)) # install all new components checked_call( 'curl -H \'X-Requested-By:anything\' -i -X PUT -d \'{{"RequestInfo": {{"context" :"Installing Services"}}, "Body": {{"ServiceInfo": {{"state": "INSTALLED"}}}}}}\' -u admin:admin {server_url}/api/v1/clusters/{cluster_name}/services?ServiceInfo/state=INIT' .format(server_url=SERVER_URL, cluster_name=CLUSTER_NAME))
def curl_krb_request(tmp_dir, keytab, principal, url, cache_file_prefix, krb_exec_search_paths, return_only_http_code, caller_label, user, connection_timeout = CONNECTION_TIMEOUT_DEFAULT, kinit_timer_ms=DEFAULT_KERBEROS_KINIT_TIMER_MS, method = '',body='',header=''): """ Makes a curl request using the kerberos credentials stored in a calculated cache file. The cache file is created by combining the supplied principal, keytab, user, and request name into a unique hash. This function will use the klist command to determine if the cache is expired and will perform a kinit if necessary. Additionally, it has an internal timer to force a kinit after a configurable amount of time. This is to prevent boundary issues where requests hit the edge of a ticket's lifetime. :param tmp_dir: the directory to use for storing the local kerberos cache for this request. :param keytab: the location of the keytab to use when performing a kinit :param principal: the principal to use when performing a kinit :param url: the URL to request :param cache_file_prefix: an identifier used to build the unique cache name for this request. This ensures that multiple requests can use the same cache. :param krb_exec_search_paths: the search path to use for invoking kerberos binaries :param return_only_http_code: True to return only the HTTP code, False to return GET content :param caller_label: an identifier to give context into the caller of this module (used for logging) :param user: the user to invoke the curl command as :param connection_timeout: if specified, a connection timeout for curl (default 10 seconds) :param kinit_timer_ms: if specified, the time (in ms), before forcing a kinit even if the klist cache is still valid. :return: """ import uuid # start off false is_kinit_required = False # Create the kerberos credentials cache (ccache) file and set it in the environment to use # when executing curl. Use the md5 hash of the combination of the principal and keytab file # to generate a (relatively) unique cache filename so that we can use it as needed. Scope # this file by user in order to prevent sharing of cache files by multiple users. ccache_file_name = _md5("{0}|{1}".format(principal, keytab)).hexdigest() curl_krb_cache_path = os.path.join(tmp_dir, "curl_krb_cache") if not os.path.exists(curl_krb_cache_path): os.makedirs(curl_krb_cache_path) os.chmod(curl_krb_cache_path, 0777) ccache_file_path = "{0}{1}{2}_{3}_cc_{4}".format(curl_krb_cache_path, os.sep, cache_file_prefix, user, ccache_file_name) kerberos_env = {'KRB5CCNAME': ccache_file_path} # concurrent kinit's can cause the following error: # Internal credentials cache error while storing credentials while getting initial credentials kinit_lock = global_lock.get_lock(global_lock.LOCK_TYPE_KERBEROS) kinit_lock.acquire() try: # If there are no tickets in the cache or they are expired, perform a kinit, else use what # is in the cache if krb_exec_search_paths: klist_path_local = get_klist_path(krb_exec_search_paths) else: klist_path_local = get_klist_path() # take a look at the last time kinit was run for the specified cache and force a new # kinit if it's time; this helps to avoid problems approaching ticket boundary when # executing a klist and then a curl last_kinit_time = _KINIT_CACHE_TIMES.get(ccache_file_name, 0) current_time = long(time.time()) if current_time - kinit_timer_ms > last_kinit_time: is_kinit_required = True # if the time has not expired, double-check that the cache still has a valid ticket if not is_kinit_required: klist_command = "{0} -s {1}".format(klist_path_local, ccache_file_path) is_kinit_required = (shell.call(klist_command, user=user)[0] != 0) # if kinit is required, the perform the kinit if is_kinit_required: if krb_exec_search_paths: kinit_path_local = get_kinit_path(krb_exec_search_paths) else: kinit_path_local = get_kinit_path() logger.debug("Enabling Kerberos authentication for %s via GSSAPI using ccache at %s", caller_label, ccache_file_path) # kinit; there's no need to set a ticket timeout as this will use the default invalidation # configured in the krb5.conf - regenerating keytabs will not prevent an existing cache # from working correctly shell.checked_call("{0} -c {1} -kt {2} {3} > /dev/null".format(kinit_path_local, ccache_file_path, keytab, principal), user=user) # record kinit time _KINIT_CACHE_TIMES[ccache_file_name] = current_time else: # no kinit needed, use the cache logger.debug("Kerberos authentication for %s via GSSAPI already enabled using ccache at %s.", caller_label, ccache_file_path) finally: kinit_lock.release() # check if cookies dir exists, if not then create it cookies_dir = os.path.join(tmp_dir, "cookies") if not os.path.exists(cookies_dir): os.makedirs(cookies_dir) cookie_file_name = str(uuid.uuid4()) cookie_file = os.path.join(cookies_dir, cookie_file_name) start_time = time.time() error_msg = None # setup timeouts for the request; ensure we use integers since that is what curl needs connection_timeout = int(connection_timeout) maximum_timeout = connection_timeout + 2 try: if return_only_http_code: _, curl_stdout, curl_stderr = get_user_call_output(['curl', '--location-trusted', '-k', '--negotiate', '-u', ':', '-b', cookie_file, '-c', cookie_file, '-w', '%{http_code}', url, '--connect-timeout', str(connection_timeout), '--max-time', str(maximum_timeout), '-o', '/dev/null'], user=user, env=kerberos_env) else: curl_command = ['curl', '--location-trusted', '-k', '--negotiate', '-u', ':', '-b', cookie_file, '-c', cookie_file, url, '--connect-timeout', str(connection_timeout), '--max-time', str(maximum_timeout)] # returns response body if len(method) > 0 and len(body) == 0 and len(header) == 0: curl_command.extend(['-X', method]) elif len(method) > 0 and len(body) == 0 and len(header) > 0: curl_command.extend(['-H', header, '-X', method]) elif len(method) > 0 and len(body) > 0 and len(header) == 0: curl_command.extend(['-X', method, '-d', body]) elif len(method) > 0 and len(body) > 0 and len(header) > 0: curl_command.extend(['-H', header, '-X', method, '-d', body]) _, curl_stdout, curl_stderr = get_user_call_output(curl_command, user=user, env=kerberos_env) except Fail: if logger.isEnabledFor(logging.DEBUG): logger.exception("Unable to make a curl request for {0}.".format(caller_label)) raise finally: if os.path.isfile(cookie_file): os.remove(cookie_file) # empty quotes evaluates to false if curl_stderr: error_msg = curl_stderr time_millis = time.time() - start_time # empty quotes evaluates to false if curl_stdout: if return_only_http_code: return (int(curl_stdout), error_msg, time_millis) else: return (curl_stdout, error_msg, time_millis) logger.debug("The curl response for %s is empty; standard error = %s", caller_label, str(error_msg)) return ("", error_msg, time_millis)
def _llap_start(self, env, cleanup=False): import params env.set_params(params) if params.hive_server_interactive_ha: """ Check llap app state """ Logger.info( "HSI HA is enabled. Checking if LLAP is already running ...") status = self.check_llap_app_status( params.llap_app_name, 2, params.hive_server_interactive_ha) if status: Logger.info("LLAP app '{0}' is already running.".format( params.llap_app_name)) return True else: Logger.info( "LLAP app '{0}' is not running. llap will be started.". format(params.llap_app_name)) pass # Call for cleaning up the earlier run(s) LLAP package folders. self._cleanup_past_llap_package_dirs() Logger.info("Starting LLAP") LLAP_PACKAGE_CREATION_PATH = Script.get_tmp_dir() unique_name = "llap-yarn-service_%s" % datetime.utcnow().strftime( '%Y-%m-%d_%H-%M-%S') cmd = format( "{stack_root}/current/hive-server2/bin/hive --service llap --size {params.llap_daemon_container_size}m --startImmediately --name {params.llap_app_name} " "--cache {params.hive_llap_io_mem_size}m --xmx {params.llap_heap_size}m --loglevel {params.llap_log_level} " "--output {LLAP_PACKAGE_CREATION_PATH}/{unique_name}") # Append params that are supported from Hive llap GA version. # TODO: All the code related to Slider Anti-affinity will be removed and # replaced with YARN rich placement once YARN-6599 (umbrella YARN-6592) # is committed. # Figure out the Slider Anti-affinity to be used. # YARN does not support anti-affinity, and therefore Slider implements AA by the means of exclusion lists, i.e, it # starts containers one by one and excludes the nodes it gets (adding a delay of ~2sec./machine). When the LLAP # container memory size configuration is more than half of YARN node memory, AA is implicit and should be avoided. slider_placement = 4 if long(params.llap_daemon_container_size) > ( 0.5 * long(params.yarn_nm_mem)): slider_placement = 0 Logger.info( "Setting slider_placement : 0, as llap_daemon_container_size : {0} > 0.5 * " "YARN NodeManager Memory({1})".format( params.llap_daemon_container_size, params.yarn_nm_mem)) else: Logger.info( "Setting slider_placement: 4, as llap_daemon_container_size : {0} <= 0.5 * " "YARN NodeManager Memory({1})".format( params.llap_daemon_container_size, params.yarn_nm_mem)) cmd += format( " --service-placement {slider_placement} --skiphadoopversion --auxhbase=false --skiphbasecp --instances {params.num_llap_daemon_running_nodes}" ) # Setup the logger for the ga version only cmd += format(" --logger {params.llap_logger}") if params.security_enabled: llap_keytab_splits = params.hive_llap_keytab_file.split("/") Logger.debug("llap_keytab_splits : {0}".format(llap_keytab_splits)) cmd += format( " --service-keytab-dir .yarn/keytabs/{params.hive_user}/ --service-keytab " "{llap_keytab_splits[4]} --service-principal {params.hive_llap_principal}" ) # Add the aux jars if they are specified. If empty, dont need to add this param. if params.hive_aux_jars: cmd += format(" --auxjars {params.hive_aux_jars}") # Append args. llap_java_args = InlineTemplate( params.llap_app_java_opts).get_content() cmd += format(" --args \" {llap_java_args}\"") # Append metaspace size to args. if params.java_version > 7 and params.llap_daemon_container_size > 4096: if params.llap_daemon_container_size <= 32768: metaspaceSize = "256m" else: metaspaceSize = "1024m" cmd = cmd[:-1] + " -XX:MetaspaceSize=" + metaspaceSize + "\"" try: Logger.info(format("LLAP start command: {cmd}")) code, output, error = shell.checked_call( cmd, user=params.hive_user, quiet=True, stderr=subprocess.PIPE, logoutput=True, env={'HIVE_CONF_DIR': params.hive_server_interactive_conf_dir}) if code != 0 or output is None: raise Fail( "Command failed with either non-zero return code or no output." ) # We need to check the status of LLAP app to figure out it got # launched properly and is in running state. Then go ahead with Hive Interactive Server start. status = self.check_llap_app_status( params.llap_app_name, params.num_retries_for_checking_llap_status) if status: Logger.info("LLAP app '{0}' deployed successfully.".format( params.llap_app_name)) return True else: Logger.error("LLAP app '{0}' deployment unsuccessful.".format( params.llap_app_name)) return False except: if params.hive_server_interactive_ha: Logger.error( "Exception occured. Checking if LLAP was started by another HSI instance ..." ) status = self.check_llap_app_status( params.llap_app_name, 2, params.hive_server_interactive_ha) if status: Logger.info("LLAP app '{0}' is running.".format( params.llap_app_name)) return True else: Logger.info("LLAP app '{0}' is not running.".format( params.llap_app_name)) raise # throw the original exception
def action_remove(self): if self.user: command = ['userdel', self.resource.username] shell.checked_call(command) Logger.info("Removed user %s" % self.resource)
def chmod(path, mode): shell.checked_call(["chmod", oct(mode), path], sudo=True)
def flume(action=None): import params if action == 'config': # remove previously defined meta's for n in find_expected_agent_names(params.flume_conf_dir): File( os.path.join(params.flume_conf_dir, n, 'ambari-meta.json'), action="delete", ) Directory( params.flume_run_dir, group=params.user_group, owner=params.flume_user, ) Directory( params.flume_conf_dir, create_parents=True, owner=params.flume_user, ) Directory( params.flume_log_dir, group=params.user_group, owner=params.flume_user, create_parents=True, cd_access="a", mode=0755, ) flume_agents = {} if params.flume_conf_content is not None: flume_agents = build_flume_topology(params.flume_conf_content) for agent in flume_agents.keys(): flume_agent_conf_dir = os.path.join(params.flume_conf_dir, agent) flume_agent_conf_file = os.path.join(flume_agent_conf_dir, 'flume.conf') flume_agent_meta_file = os.path.join(flume_agent_conf_dir, 'ambari-meta.json') flume_agent_log4j_file = os.path.join(flume_agent_conf_dir, 'log4j.properties') flume_agent_env_file = os.path.join(flume_agent_conf_dir, 'flume-env.sh') Directory( flume_agent_conf_dir, owner=params.flume_user, ) PropertiesFile(flume_agent_conf_file, properties=flume_agents[agent], owner=params.flume_user, mode=0644) File(flume_agent_log4j_file, content=InlineTemplate(params.flume_log4j_content, agent_name=agent), owner=params.flume_user, mode=0644) File(flume_agent_meta_file, content=json.dumps(ambari_meta(agent, flume_agents[agent])), owner=params.flume_user, mode=0644) File(flume_agent_env_file, owner=params.flume_user, content=InlineTemplate(params.flume_env_sh_template)) if params.has_metric_collector: File(os.path.join(flume_agent_conf_dir, "flume-metrics2.properties"), owner=params.flume_user, content=Template("flume-metrics2.properties.j2")) elif action == 'start': # desired state for service should be STARTED if len(params.flume_command_targets) == 0: _set_desired_state('STARTED') # It is important to run this command as a background process. flume_base = as_user(format( "{flume_bin} agent --name {{0}} --conf {{1}} --conf-file {{2}} {{3}} > {flume_log_dir}/{{4}}.out 2>&1" ), params.flume_user, env={'JAVA_HOME': params.java_home}) + " &" for agent in cmd_target_names(): flume_agent_conf_dir = params.flume_conf_dir + os.sep + agent flume_agent_conf_file = flume_agent_conf_dir + os.sep + "flume.conf" flume_agent_pid_file = params.flume_run_dir + os.sep + agent + ".pid" if not os.path.isfile(flume_agent_conf_file): continue if not is_flume_process_live(flume_agent_pid_file): # TODO someday make the ganglia ports configurable extra_args = '' if params.ganglia_server_host is not None: extra_args = '-Dflume.monitoring.type=ganglia -Dflume.monitoring.hosts={0}:{1}' extra_args = extra_args.format(params.ganglia_server_host, '8655') if params.has_metric_collector: extra_args = '-Dflume.monitoring.type=org.apache.hadoop.metrics2.sink.flume.FlumeTimelineMetricsSink ' \ '-Dflume.monitoring.node={0}:{1}' # TODO check if this is used. extra_args = extra_args.format( params.metric_collector_host, params.metric_collector_port) flume_cmd = flume_base.format(agent, flume_agent_conf_dir, flume_agent_conf_file, extra_args, agent) Execute(flume_cmd, wait_for_finish=False, environment={'JAVA_HOME': params.java_home}) # sometimes startup spawns a couple of threads - so only the first line may count pid_cmd = as_sudo(('pgrep', '-o', '-u', params.flume_user, '-f', format('^{java_home}.*{agent}.*'))) + \ " | " + as_sudo(('tee', flume_agent_pid_file)) + " && test ${PIPESTATUS[0]} -eq 0" try: Execute(pid_cmd, logoutput=True, tries=20, try_sleep=10) except: show_logs(params.flume_log_dir, params.flume_user) raise pass elif action == 'stop': # desired state for service should be INSTALLED if len(params.flume_command_targets) == 0: _set_desired_state('INSTALLED') pid_files = glob.glob(params.flume_run_dir + os.sep + "*.pid") if 0 == len(pid_files): return agent_names = cmd_target_names() for agent in agent_names: pid_file = format("{flume_run_dir}/{agent}.pid") if is_flume_process_live(pid_file): pid = shell.checked_call(("cat", pid_file), sudo=True)[1].strip() Execute(("kill", "-15", pid), sudo=True) # kill command has to be a tuple if not await_flume_process_termination(pid_file, try_count=30): Execute(("kill", "-9", pid), sudo=True) if not await_flume_process_termination(pid_file, try_count=10): show_logs(params.flume_log_dir, params.flume_user) raise Fail("Can't stop flume agent: {0}".format(agent)) File(pid_file, action='delete')
def makedir(path, mode): shell.checked_call(["mkdir", path], sudo=True) chmod(path, mode)
def _llap_start(self, env, cleanup=False): import params env.set_params(params) Logger.info("Starting LLAP") LLAP_PACKAGE_CREATION_PATH = Script.get_tmp_dir() LLAP_APP_NAME = 'llap0' unique_name = "llap-slider%s" % datetime.utcnow().strftime( '%Y-%m-%d_%H-%M-%S') cmd = format( "{stack_root}/current/hive-server2-hive2/bin/hive --service llap --instances {params.num_llap_nodes}" " --slider-am-container-mb {params.slider_am_container_mb} --size {params.llap_daemon_container_size}m " " --cache {params.hive_llap_io_mem_size}m --xmx {params.llap_heap_size}m --loglevel {params.llap_log_level}" " --output {LLAP_PACKAGE_CREATION_PATH}/{unique_name}") if params.security_enabled: llap_keytab_splits = params.hive_llap_keytab_file.split("/") Logger.debug("llap_keytab_splits : {0}".format(llap_keytab_splits)) cmd += format( " --slider-keytab-dir .slider/keytabs/{params.hive_user}/ --slider-keytab " "{llap_keytab_splits[4]} --slider-principal {params.hive_llap_principal}" ) # Add the aux jars if they are specified. If empty, dont need to add this param. if params.hive_aux_jars: cmd += format(" --auxjars {params.hive_aux_jars}") # Append args. llap_java_args = InlineTemplate( params.llap_app_java_opts).get_content() cmd += format(" --args \" {llap_java_args}\"") run_file_path = None try: Logger.info(format("Command: {cmd}")) code, output, error = shell.checked_call(cmd, user=params.hive_user, stderr=subprocess.PIPE, logoutput=True) if code != 0 or output is None: raise Fail( "Command failed with either non-zero return code or no output." ) # E.g., output: # Prepared llap-slider-05Apr2016/run.sh for running LLAP on Slider exp = r"Prepared (.*?run.sh) for running LLAP" run_file_path = None out_splits = output.split("\n") for line in out_splits: line = line.strip() m = re.match(exp, line, re.I) if m and len(m.groups()) == 1: run_file_name = m.group(1) run_file_path = os.path.join(params.hive_user_home_dir, run_file_name) break if not run_file_path: raise Fail("Did not find run.sh file in output: " + str(output)) Logger.info(format("Run file path: {run_file_path}")) Execute(run_file_path, user=params.hive_user, logoutput=True) Logger.info("Submitted LLAP app name : {0}".format(LLAP_APP_NAME)) # We need to check the status of LLAP app to figure out it got # launched properly and is in running state. Then go ahead with Hive Interactive Server start. status = self.check_llap_app_status( LLAP_APP_NAME, params.num_retries_for_checking_llap_status) if status: Logger.info("LLAP app '{0}' deployed successfully.".format( LLAP_APP_NAME)) return True else: Logger.error("LLAP app '{0}' deployment unsuccessful.".format( LLAP_APP_NAME)) return False except: # Attempt to clean up the packaged application, or potentially rename it with a .bak if run_file_path is not None and cleanup: try: parent_dir = os.path.dirname(run_file_path) if os.path.isdir(parent_dir): shutil.rmtree(parent_dir) except Exception, e: Logger.error( "Could not cleanup LLAP app package. Error: " + str(e)) # throw the original exception raise
def unlink(path): shell.checked_call(["rm", "-f", path], sudo=True)
def _llap_start(self, env, cleanup=False): import params env.set_params(params) if params.hive_server_interactive_ha: """ Check llap app state """ Logger.info( "HSI HA is enabled. Checking if LLAP is already running ...") if params.stack_supports_hive_interactive_ga: status = self.check_llap_app_status_in_llap_ga( params.llap_app_name, 2, params.hive_server_interactive_ha) else: status = self.check_llap_app_status_in_llap_tp( params.llap_app_name, 2, params.hive_server_interactive_ha) if status: Logger.info("LLAP app '{0}' is already running.".format( params.llap_app_name)) return True else: Logger.info( "LLAP app '{0}' is not running. llap will be started.". format(params.llap_app_name)) pass # Call for cleaning up the earlier run(s) LLAP package folders. self._cleanup_past_llap_package_dirs() Logger.info("Starting LLAP") LLAP_PACKAGE_CREATION_PATH = Script.get_tmp_dir() unique_name = "llap-slider%s" % datetime.utcnow().strftime( '%Y-%m-%d_%H-%M-%S') cmd = format( "{stack_root}/current/hive-server2-hive2/bin/hive --service llap --slider-am-container-mb {params.slider_am_container_mb} " "--size {params.llap_daemon_container_size}m --cache {params.hive_llap_io_mem_size}m --xmx {params.llap_heap_size}m " "--loglevel {params.llap_log_level} {params.llap_extra_slider_opts} --output {LLAP_PACKAGE_CREATION_PATH}/{unique_name}" ) # Append params that are supported from Hive llap GA version. if params.stack_supports_hive_interactive_ga: # Figure out the Slider Anti-affinity to be used. # YARN does not support anti-affinity, and therefore Slider implements AA by the means of exclusion lists, i.e, it # starts containers one by one and excludes the nodes it gets (adding a delay of ~2sec./machine). When the LLAP # container memory size configuration is more than half of YARN node memory, AA is implicit and should be avoided. slider_placement = 4 if long(params.llap_daemon_container_size) > ( 0.5 * long(params.yarn_nm_mem)): slider_placement = 0 Logger.info( "Setting slider_placement : 0, as llap_daemon_container_size : {0} > 0.5 * " "YARN NodeManager Memory({1})".format( params.llap_daemon_container_size, params.yarn_nm_mem)) else: Logger.info( "Setting slider_placement: 4, as llap_daemon_container_size : {0} <= 0.5 * " "YARN NodeManager Memory({1})".format( params.llap_daemon_container_size, params.yarn_nm_mem)) cmd += format( " --slider-placement {slider_placement} --skiphadoopversion --skiphbasecp --instances {params.num_llap_daemon_running_nodes}" ) # Setup the logger for the ga version only cmd += format(" --logger {params.llap_logger}") else: cmd += format(" --instances {params.num_llap_nodes}") if params.security_enabled: llap_keytab_splits = params.hive_llap_keytab_file.split("/") Logger.debug("llap_keytab_splits : {0}".format(llap_keytab_splits)) cmd += format( " --slider-keytab-dir .slider/keytabs/{params.hive_user}/ --slider-keytab " "{llap_keytab_splits[4]} --slider-principal {params.hive_llap_principal}" ) # Add the aux jars if they are specified. If empty, dont need to add this param. if params.hive_aux_jars: cmd += format(" --auxjars {params.hive_aux_jars}") # Append args. llap_java_args = InlineTemplate( params.llap_app_java_opts).get_content() cmd += format(" --args \" {llap_java_args}\"") # Append metaspace size to args. if params.java_version > 7 and params.llap_daemon_container_size > 4096: if params.llap_daemon_container_size <= 32768: metaspaceSize = "256m" else: metaspaceSize = "1024m" cmd = cmd[:-1] + " -XX:MetaspaceSize=" + metaspaceSize + "\"" run_file_path = None try: Logger.info(format("LLAP start command: {cmd}")) code, output, error = shell.checked_call(cmd, user=params.hive_user, quiet=True, stderr=subprocess.PIPE, logoutput=True) if code != 0 or output is None: raise Fail( "Command failed with either non-zero return code or no output." ) # E.g., output: # Prepared llap-slider-05Apr2016/run.sh for running LLAP on Slider exp = r"Prepared (.*?run.sh) for running LLAP" run_file_path = None out_splits = output.split("\n") for line in out_splits: line = line.strip() m = re.match(exp, line, re.I) if m and len(m.groups()) == 1: run_file_name = m.group(1) run_file_path = os.path.join(params.hive_user_home_dir, run_file_name) break if not run_file_path: raise Fail("Did not find run.sh file in output: " + str(output)) Logger.info(format("Run file path: {run_file_path}")) Execute(run_file_path, user=params.hive_user, logoutput=True) Logger.info("Submitted LLAP app name : {0}".format( params.llap_app_name)) # We need to check the status of LLAP app to figure out it got # launched properly and is in running state. Then go ahead with Hive Interactive Server start. if params.stack_supports_hive_interactive_ga: status = self.check_llap_app_status_in_llap_ga( params.llap_app_name, params.num_retries_for_checking_llap_status) else: status = self.check_llap_app_status_in_llap_tp( params.llap_app_name, params.num_retries_for_checking_llap_status) if status: Logger.info("LLAP app '{0}' deployed successfully.".format( params.llap_app_name)) return True else: Logger.error("LLAP app '{0}' deployment unsuccessful.".format( params.llap_app_name)) return False except: # Attempt to clean up the packaged application, or potentially rename it with a .bak if run_file_path is not None and cleanup: parent_dir = os.path.dirname(run_file_path) Directory( parent_dir, action="delete", ignore_failures=True, ) # throw the original exception raise
def action_remove(self): if self.group: command = ['groupdel', self.resource.group_name] shell.checked_call(command) Logger.info("Removed group %s" % self.resource)
def copy_atlas_hive_hook_to_dfs_share_lib(upgrade_type=None, upgrade_direction=None): """ If the Atlas Hive Hook direcotry is present, Atlas is installed, and this is the first Oozie Server, then copy the entire contents of that directory to the Oozie Sharelib in DFS, e.g., /usr/$stack/$current_version/atlas/hook/hive/ -> hdfs:///user/oozie/share/lib/lib_$timetamp/hive :param upgrade_type: If in the middle of a stack upgrade, the type as UPGRADE_TYPE_ROLLING or UPGRADE_TYPE_NON_ROLLING :param upgrade_direction: If in the middle of a stack upgrade, the direction as Direction.UPGRADE or Direction.DOWNGRADE. """ import params # Calculate the effective version since this code can also be called during EU/RU in the upgrade direction. effective_version = params.stack_version_formatted if upgrade_type is None else format_stack_version(params.version) if not check_stack_feature(StackFeature.ATLAS_HOOK_SUPPORT, effective_version): return # Important that oozie_server_hostnames is sorted by name so that this only runs on a single Oozie server. if not (len(params.oozie_server_hostnames) > 0 and params.hostname == params.oozie_server_hostnames[0]): Logger.debug("Will not attempt to copy Atlas Hive hook to DFS since this is not the first Oozie Server " "sorted by hostname.") return if not has_atlas_in_cluster(): Logger.debug("Will not attempt to copy Atlas Hve hook to DFS since Atlas is not installed on the cluster.") return if upgrade_type is not None and upgrade_direction == Direction.DOWNGRADE: Logger.debug("Will not attempt to copy Atlas Hve hook to DFS since in the middle of Rolling/Express upgrade " "and performing a Downgrade.") return current_version = get_current_version() atlas_hive_hook_dir = format("{stack_root}/{current_version}/atlas/hook/hive/") if not os.path.exists(atlas_hive_hook_dir): Logger.error(format("ERROR. Atlas is installed in cluster but this Oozie server doesn't " "contain directory {atlas_hive_hook_dir}")) return atlas_hive_hook_impl_dir = os.path.join(atlas_hive_hook_dir, "atlas-hive-plugin-impl") num_files = len([name for name in os.listdir(atlas_hive_hook_impl_dir) if os.path.exists(os.path.join(atlas_hive_hook_impl_dir, name))]) Logger.info("Found %d files/directories inside Atlas Hive hook impl directory %s"% (num_files, atlas_hive_hook_impl_dir)) # This can return over 100 files, so take the first 5 lines after "Available ShareLib" # Use -oozie http(s):localhost:{oozie_server_admin_port}/oozie as oozie-env does not export OOZIE_URL command = format(r'source {conf_dir}/oozie-env.sh ; oozie admin -oozie {oozie_base_url} -shareliblist hive | grep "\[Available ShareLib\]" -A 5') code, out = checked_call(command, user=params.oozie_user, tries=10, try_sleep=5, logoutput=True) hive_sharelib_dir = __parse_sharelib_from_output(out) if hive_sharelib_dir is None: raise Fail("Could not parse Hive sharelib from output.") Logger.info(format("Parsed Hive sharelib = {hive_sharelib_dir} and will attempt to copy/replace {num_files} files to it from {atlas_hive_hook_impl_dir}")) params.HdfsResource(hive_sharelib_dir, type="directory", action="create_on_execute", source=atlas_hive_hook_impl_dir, user=params.hdfs_user, owner=params.oozie_user, group=params.hdfs_user, mode=0755, recursive_chown=True, recursive_chmod=True, replace_existing_files=True ) Logger.info("Copying Atlas Hive hook properties file to Oozie Sharelib in DFS.") atlas_hook_filepath_source = os.path.join(params.hive_conf_dir, params.atlas_hook_filename) atlas_hook_file_path_dest_in_dfs = os.path.join(hive_sharelib_dir, params.atlas_hook_filename) params.HdfsResource(atlas_hook_file_path_dest_in_dfs, type="file", source=atlas_hook_filepath_source, action="create_on_execute", owner=params.oozie_user, group=params.hdfs_user, mode=0755, replace_existing_files=True ) params.HdfsResource(None, action="execute") # Update the sharelib after making any changes # Use -oozie http(s):localhost:{oozie_server_admin_port}/oozie as oozie-env does not export OOZIE_URL Execute(format("source {conf_dir}/oozie-env.sh ; oozie admin -oozie {oozie_base_url} -sharelibupdate"), user=params.oozie_user, tries=5, try_sleep=5, logoutput=True, )
def remove_package(self, name): shell.checked_call(REMOVE_CMD % (name))
def install_package(self, name): shell.checked_call(INSTALL_CMD % (name))
def checked_call(self, cmd, **kwargs): return shell.checked_call(cmd, **kwargs)
def install_package(self, name, use_repos=[], skip_repos=[]): if use_repos or not self._check_existence(name): cmd = INSTALL_CMD[self.get_logoutput()] copied_sources_files = [] is_tmp_dir_created = False if use_repos: is_tmp_dir_created = True apt_sources_list_tmp_dir = tempfile.mkdtemp( suffix="-ambari-apt-sources-d") Logger.info("Temporal sources directory was created: %s" % apt_sources_list_tmp_dir) if 'base' not in use_repos: cmd = cmd + ['-o', 'Dir::Etc::SourceList=%s' % EMPTY_FILE] for repo in use_repos: if repo != 'base': new_sources_file = os.path.join( apt_sources_list_tmp_dir, repo + '.list') Logger.info( "Temporal sources file will be copied: %s" % new_sources_file) sudo.copy( os.path.join(APT_SOURCES_LIST_DIR, repo + '.list'), new_sources_file) copied_sources_files.append(new_sources_file) cmd = cmd + [ '-o', 'Dir::Etc::SourceParts=%s' % apt_sources_list_tmp_dir ] cmd = cmd + [name] Logger.info("Installing package %s ('%s')" % (name, string_cmd_from_args_list(cmd))) code, out = shell.call(cmd, sudo=True, env=INSTALL_CMD_ENV, logoutput=self.get_logoutput()) # apt-get update wasn't done too long if code: Logger.info("Execution of '%s' returned %d. %s" % (cmd, code, out)) Logger.info("Failed to install package %s. Executing `%s`" % (name, string_cmd_from_args_list(REPO_UPDATE_CMD))) code, out = shell.call(REPO_UPDATE_CMD, sudo=True, logoutput=self.get_logoutput()) if code: Logger.info("Execution of '%s' returned %d. %s" % (REPO_UPDATE_CMD, code, out)) Logger.info("Retrying to install package %s" % (name)) shell.checked_call(cmd, sudo=True, logoutput=self.get_logoutput()) if is_tmp_dir_created: for temporal_sources_file in copied_sources_files: Logger.info("Removing temporal sources file: %s" % temporal_sources_file) os.remove(temporal_sources_file) Logger.info("Removing temporal sources directory: %s" % apt_sources_list_tmp_dir) os.rmdir(apt_sources_list_tmp_dir) else: Logger.info("Skipping installation of existing package %s" % (name))
def update(self, repo_file_path): Logger.info( "Flushing package manager cache since repo file content is about to change" ) checked_call(self.update_cmd, sudo=True)
def chown(path, owner, group): if owner: shell.checked_call(["chown", owner, path], sudo=True) if group: shell.checked_call(["chgrp", group, path], sudo=True)