def update_hive_types(self): try: updated_hive_typedefs_json = Template( "typedef_update.json").get_content() atlas_rest_endpoint = self.atlas_rest_address + "/api/atlas/v2/types/typedefs" Logger.info( "Attempting to update existing Hive types with profile attributes on Atlas server at {0}" .format(atlas_rest_endpoint)) Logger.debug( "Payload for the Hive model update request => {0}".format( updated_hive_typedefs_json)) username_password = '******'.format(self.username, self.password) base_64_string = base64.encodestring(username_password).replace( '\n', '') request = urllib2.Request(atlas_rest_endpoint, updated_hive_typedefs_json) request.add_header("Content-Type", "application/json") request.add_header("Accept", "application/json") request.add_header("Authorization", "Basic {0}".format(base_64_string)) request.get_method = lambda: 'PUT' result = urllib2.urlopen(request, timeout=20) response_code = result.getcode() if response_code == 200: response_content = result.read() Logger.info( "Hive types updated with profile attributes successfully") Logger.debug("Received content from Atlas server {0}".format( response_content)) except urllib2.HTTPError, e: Logger.error( "Error during Hive type updation. Http status code - {0}. {1}". format(e.code, e.read()))
def action_run(self): if self.resource.creates: if os.path.exists(self.resource.creates): return Logger.debug("Executing %s" % self.resource) if self.resource.path != []: if not self.resource.environment: self.resource.environment = {} self.resource.environment['PATH'] = os.pathsep.join(self.resource.path) for i in range (0, self.resource.tries): try: shell.checked_call(self.resource.command, logoutput=self.resource.logoutput, cwd=self.resource.cwd, env=self.resource.environment, preexec_fn=_preexec_fn(self.resource), user=self.resource.user, wait_for_finish=self.resource.wait_for_finish) break except Fail as ex: if i == self.resource.tries-1: # last try raise ex else: Logger.info("Retrying after %d seconds. Reason: %s" % (self.resource.try_sleep, str(ex))) time.sleep(self.resource.try_sleep)
def add_hive_profile_types(self): try: hive_profile_types_json = Template( "hive_profiler_model.json").get_content() atlas_rest_endpoint = self.atlas_rest_address + "/api/atlas/v2/types/typedefs" Logger.info( "Attempting to register Hive profile types at Atlas server at {0}" .format(atlas_rest_endpoint)) Logger.debug( "Payload for the profiler model registration request => {0}". format(hive_profile_types_json)) username_password = '******'.format(self.username, self.password) base_64_string = base64.encodestring(username_password).replace( '\n', '') request = urllib2.Request(atlas_rest_endpoint, hive_profile_types_json) request.add_header("Content-Type", "application/json") request.add_header("Accept", "application/json") request.add_header("Authorization", "Basic {0}".format(base_64_string)) result = urllib2.urlopen(request, timeout=20) response_code = result.getcode() if response_code == 200: response_content = result.read() Logger.info("Profile types registered successfully") Logger.debug("Received content from Atlas server {0}".format( response_content)) except urllib2.HTTPError, e: Logger.error( "Error during profile type registration. Http status code - {0}. {1}" .format(e.code, e.read()))
def action_run(self): if self.resource.creates: if os.path.exists(self.resource.creates): return Logger.debug("Executing %s" % self.resource) for i in range (0, self.resource.tries): try: shell.checked_call(self.resource.command, logoutput=self.resource.logoutput, cwd=self.resource.cwd, env=self.resource.environment, preexec_fn=_preexec_fn(self.resource), user=self.resource.user, wait_for_finish=self.resource.wait_for_finish, timeout=self.resource.timeout, path=self.resource.path) break except Fail as ex: if i == self.resource.tries-1: # last try raise ex else: Logger.info("Retrying after %d seconds. Reason: %s" % (self.resource.try_sleep, str(ex))) time.sleep(self.resource.try_sleep) except ExecuteTimeoutException: err_msg = ("Execution of '%s' was killed due timeout after %d seconds") % (self.resource.command, self.resource.timeout) if self.resource.on_timeout: Logger.info("Executing '%s'. Reason: %s" % (self.resource.on_timeout, err_msg)) shell.checked_call(self.resource.on_timeout) else: raise Fail(err_msg)
def check_stack_feature(stack_feature, stack_version): """ Given a stack_feature and a specific stack_version, it validates that the feature is supported by the stack_version. IMPORTANT, notice that the mapping of feature to version comes from cluster-env if it exists there. :param stack_feature: Feature name to check if it is supported by the stack. For example: "rolling_upgrade" :param stack_version: Version of the stack :return: Will return True if successful, otherwise, False. """ from resource_management.libraries.functions.default import default from resource_management.libraries.functions.version import compare_versions stack_features_config = default("/configurations/cluster-env/stack_features", None) if not stack_version: Logger.debug("Cannot determine if feature %s is supported since did not provide a stack version." % stack_feature) return False if stack_features_config: data = json.loads(stack_features_config) for feature in data["stack_features"]: if feature["name"] == stack_feature: if "min_version" in feature: min_version = feature["min_version"] if compare_versions(stack_version, min_version, format = True) < 0: return False if "max_version" in feature: max_version = feature["max_version"] if compare_versions(stack_version, max_version, format = True) >= 0: return False return True else: raise Fail("Stack features not defined by stack") return False
def status(self, env): cmd = 'ps -ef | grep proc_rangerkms | grep -v grep' code, output = shell.call(cmd, timeout=20) if code != 0: Logger.debug('KMS process not running') raise ComponentIsNotRunning() pass
def remove_atlas_hook_if_exists(merged_hive_interactive_site): if 'hive.exec.post.hooks' in merged_hive_interactive_site.keys(): existing_hive_exec_post_hooks = merged_hive_interactive_site.get( 'hive.exec.post.hooks') if existing_hive_exec_post_hooks: hook_splits = existing_hive_exec_post_hooks.split(",") updated_hook_splits = [ hook for hook in hook_splits if not hook.strip() == 'org.apache.atlas.hive.hook.HiveHook' ] updated_hooks_str = ",".join( (str(hook)).strip() for hook in updated_hook_splits) if updated_hooks_str != existing_hive_exec_post_hooks: merged_hive_interactive_site[ 'hive.exec.post.hooks'] = updated_hooks_str Logger.info( "Updated Hive2/hive-site.xml 'hive.exec.post.hooks' value from : '{0}' to : '{1}'" .format(existing_hive_exec_post_hooks, updated_hooks_str)) else: Logger.info( "No change done to Hive2/hive-site.xml 'hive.exec.post.hooks' value." ) else: Logger.debug( "'hive.exec.post.hooks' doesn't exist in Hive2/hive-site.xml")
def kill_zkfc(zkfc_user): """ There are two potential methods for failing over the namenode, especially during a Rolling Upgrade. Option 1. Kill zkfc on primary namenode provided that the secondary is up and has zkfc running on it. Option 2. Silent failover (not supported as of HDP 2.2.0.0) :param zkfc_user: User that started the ZKFC process. :return: Return True if ZKFC was killed, otherwise, false. """ import params if params.dfs_ha_enabled: zkfc_pid_file = get_service_pid_file("zkfc", zkfc_user) if zkfc_pid_file: check_process = as_user(format("ls {zkfc_pid_file} > /dev/null 2>&1 && ps -p `cat {zkfc_pid_file}` > /dev/null 2>&1"), user=zkfc_user) code, out = shell.call(check_process) if code == 0: Logger.debug("ZKFC is running and will be killed.") kill_command = format("kill -15 `cat {zkfc_pid_file}`") Execute(kill_command, user=zkfc_user ) File(zkfc_pid_file, action = "delete", ) return True return False
def check_process_status(pid_file): """ Function checks whether process is running. Process is considered running, if pid file exists, and process with a pid, mentioned in pid file is running If process is not running, will throw ComponentIsNotRunning exception @param pid_file: path to service pid file """ if not pid_file or not os.path.isfile(pid_file): raise ComponentIsNotRunning() try: pid = int(sudo.read_file(pid_file)) except: Logger.debug("Pid file {0} does not exist".format(pid_file)) raise ComponentIsNotRunning() code, out = shell.call(["ps","-p", str(pid)]) if code: Logger.debug("Process with pid {0} is not running. Stale pid file" " at {1}".format(pid, pid_file)) raise ComponentIsNotRunning() pass
def check_process_status(pid_file): """ Function checks whether process is running. Process is considered running, if pid file exists, and process with a pid, mentioned in pid file is running If process is not running, will throw ComponentIsNotRunning exception @param pid_file: path to service pid file """ if not pid_file or not os.path.isfile(pid_file): raise ComponentIsNotRunning() with open(pid_file, "r") as f: try: pid = int(f.read()) except: Logger.debug("Pid file {0} does not exist".format(pid_file)) raise ComponentIsNotRunning() try: # Kill will not actually kill the process # From the doc: # If sig is 0, then no signal is sent, but error checking is still # performed; this can be used to check for the existence of a # process ID or process group ID. os.kill(pid, 0) except OSError: Logger.debug("Process with pid {0} is not running. Stale pid file" " at {1}".format(pid, pid_file)) raise ComponentIsNotRunning() pass
def link_config(old_conf, link_conf): """ Creates a config link following: 1. Checks if the old_conf location exists 2. If it does, check if it's a link already 3. Make a copy to /etc/[component]/conf.backup 4. Remove the old directory and create a symlink to link_conf :old_conf: the old config directory, ie /etc/[component]/conf :link_conf: the new target for the config directory, ie /usr/hdp/current/[component-dir]/conf """ if not os.path.exists(old_conf): Logger.debug("Skipping {0}; it does not exist".format(old_conf)) return if os.path.islink(old_conf): Logger.debug("Skipping {0}; it is already a link".format(old_conf)) return old_parent = os.path.abspath(os.path.join(old_conf, os.pardir)) Logger.info("Linking {0} to {1}".format(old_conf, link_conf)) old_conf_copy = os.path.join(old_parent, "conf.backup") if not os.path.exists(old_conf_copy): Execute(("cp", "-R", "-p", old_conf, old_conf_copy), sudo=True, logoutput=True) shutil.rmtree(old_conf, ignore_errors=True) # link /etc/[component]/conf -> /usr/hdp/current/[component]-client/conf Link(old_conf, to = link_conf)
def _may_manage_folder(dir_, last_mount_point_for_dir, is_non_root_dir, dirs_unmounted, error_messages, manage_dirs_on_root, curr_mount_point): may_manage_this_dir = True if last_mount_point_for_dir is None: if is_non_root_dir: may_manage_this_dir = True else: # root mount if manage_dirs_on_root: may_manage_this_dir = True else: Logger.warning("Will not manage the directory {0} since it's on root mount and cluster-env/manage_dirs_on_root == {1}".format(dir_, str(manage_dirs_on_root))) may_manage_this_dir = False # Do not add to the history file: dirs_unmounted.add(dir_) else: Logger.debug("Last mount for {0} in the history file is {1}".format(dir_, str(last_mount_point_for_dir))) if last_mount_point_for_dir == curr_mount_point: if is_non_root_dir or manage_dirs_on_root: Logger.debug("Will manage {0} since it's on the same mount point: {1}".format(dir_, str(last_mount_point_for_dir))) may_manage_this_dir = True else: Logger.warning("Will not manage {0} since it's on the root mount point and cluster-env/manage_dirs_on_root == {1}".format(dir_, str(manage_dirs_on_root))) may_manage_this_dir = False else: may_manage_this_dir = False dirs_unmounted.add(dir_) msg = "Directory {0} became unmounted from {1} . Current mount point: {2} .".format(dir_, last_mount_point_for_dir, curr_mount_point) error_messages.append(msg) Logger.warning(msg) return may_manage_this_dir
def hook(self, env): import params self.run_custom_hook('before-ANY') env.set_params(params) install_mysql_connector_java() install_repos() install_packages() ldap_client_conf() try: Execute('service nslcd start') except Exception as e: print 'nslcd restart error' link_libjvm() install_jce_policy() # hadoop rack awareness try: modify_rack_awareness() except Exception as e: Logger.debug(str(e)) try: install_gpu_stack() except Exception as e: Logger.debug(str(e))
def action_run(self): if self.resource.creates: if os.path.exists(self.resource.creates): return Logger.debug("Executing %s" % self.resource) if self.resource.path != []: if not self.resource.environment: self.resource.environment = {} self.resource.environment['PATH'] = os.pathsep.join(self.resource.path) for i in range (0, self.resource.tries): try: shell.checked_call(self.resource.command, logoutput=self.resource.logoutput, cwd=self.resource.cwd, env=self.resource.environment, preexec_fn=_preexec_fn(self.resource), user=self.resource.user, wait_for_finish=self.resource.wait_for_finish, timeout=self.resource.timeout, pid_file=self.resource.pid_file, poll_after=self.resource.poll_after) break except Fail as ex: if i == self.resource.tries-1: # last try raise ex else: Logger.info("Retrying after %d seconds. Reason: %s" % (self.resource.try_sleep, str(ex))) time.sleep(self.resource.try_sleep) except ExecuteTimeoutException: err_msg = ("Execution of '%s' was killed due timeout after %d seconds") % (self.resource.command, self.resource.timeout) if self.resource.on_timeout: Logger.info("Executing '%s'. Reason: %s" % (self.resource.on_timeout, err_msg)) shell.checked_call(self.resource.on_timeout) else: raise Fail(err_msg)
def create_config_links(stack_id, stack_version): """ Creates config links stack_id: stack id, ie HDP-2.3 stack_version: version to set, ie 2.3.0.0-1234 """ if stack_id is None: Logger.info("Cannot create config links when stack_id is not defined") return args = stack_id.upper().split('-') if len(args) != 2: Logger.info("Unrecognized stack id {0}".format(stack_id)) return if args[0] != "HDP": Logger.info("Unrecognized stack name {0}".format(args[0])) if version.compare_versions(version.format_hdp_stack_version(args[1]), "2.3.0.0") < 0: Logger.info("Cannot link configs unless HDP-2.3 or higher") return for k, v in PACKAGE_DIRS.iteritems(): dirs = create(args[0], k, stack_version, dry_run = True) if 0 == len(dirs): Logger.debug("Package {0} is not installed".format(k)) else: need = False for new_conf_dir in dirs: if not os.path.exists(new_conf_dir): need = True if need: Logger.info("Creating conf dirs {0} for {1}".format(",".join(dirs), k)) try: select(args[0], k, stack_version) except Exception, err: # don't ruin someone's day Logger.logger.exception("'conf-select set' failed to link '{0}'. Error: {1}".format(k, str(err)))
def kill_zkfc(zkfc_user): """ There are two potential methods for failing over the namenode, especially during a Rolling Upgrade. Option 1. Kill zkfc on primary namenode provided that the secondary is up and has zkfc running on it. Option 2. Silent failover :param zkfc_user: User that started the ZKFC process. :return: Return True if ZKFC was killed, otherwise, false. """ import params if params.dfs_ha_enabled: if params.zkfc_pid_file: check_process = as_user(format( "ls {zkfc_pid_file} > /dev/null 2>&1 && ps -p `cat {zkfc_pid_file}` > /dev/null 2>&1" ), user=zkfc_user) code, out = shell.call(check_process) if code == 0: Logger.debug("ZKFC is running and will be killed.") kill_command = format("kill -15 `cat {zkfc_pid_file}`") Execute(kill_command, user=zkfc_user) File( params.zkfc_pid_file, action="delete", ) return True return False
def action_umount(self): if self.is_mounted(): check_call(["umount", self.resource.mount_point]) Logger.info("%s unmounted" % self) else: Logger.debug("%s is not mounted" % self)
def action_run(self): if self.resource.creates: if os.path.exists(self.resource.creates): return Logger.debug("Executing %s" % self.resource) if self.resource.path != []: if not self.resource.environment: self.resource.environment = {} self.resource.environment['PATH'] = os.pathsep.join( self.resource.path) for i in range(0, self.resource.tries): try: shell.checked_call( self.resource.command, logoutput=self.resource.logoutput, cwd=self.resource.cwd, env=self.resource.environment, preexec_fn=_preexec_fn(self.resource), user=self.resource.user, wait_for_finish=self.resource.wait_for_finish) break except Fail as ex: if i == self.resource.tries - 1: # last try raise ex else: Logger.info("Retrying after %d seconds. Reason: %s" % (self.resource.try_sleep, str(ex))) time.sleep(self.resource.try_sleep)
def check_nifi_process_status(self, pid_file): """ Function checks whether process is running. Process is considered running, if pid file exists, and process with a pid, mentioned in pid file is running If process is not running, will throw ComponentIsNotRunning exception @param pid_file: path to service pid file """ if not pid_file or not os.path.isfile(pid_file): raise ComponentIsNotRunning() try: lines = [line.rstrip('\n') for line in open(pid_file)] pid = int(lines[2].split('=')[1]); except: Logger.warn("Pid file {0} does not exist".format(pid_file)) raise ComponentIsNotRunning() code, out = shell.call(["ps","-p", str(pid)]) if code: Logger.debug("Process with pid {0} is not running. Stale pid file" " at {1}".format(pid, pid_file)) raise ComponentIsNotRunning() pass
def link_config(old_conf, link_conf): """ Creates a config link following: 1. Checks if the old_conf location exists 2. If it does, check if it's a link already 3. Make a copy to /etc/[component]/conf.backup 4. Remove the old directory and create a symlink to link_conf :old_conf: the old config directory, ie /etc/[component]/conf :link_conf: the new target for the config directory, ie <stack-root>/current/[component-dir]/conf """ if os.path.islink(old_conf): # if the link exists but is wrong, then change it if os.path.realpath(old_conf) != link_conf: Link(old_conf, to = link_conf) else: Logger.debug("Skipping {0}; it is already a link".format(old_conf)) return if not os.path.exists(old_conf): Logger.debug("Skipping {0}; it does not exist".format(old_conf)) return old_parent = os.path.abspath(os.path.join(old_conf, os.pardir)) Logger.info("Linking {0} to {1}".format(old_conf, link_conf)) old_conf_copy = os.path.join(old_parent, "conf.backup") if not os.path.exists(old_conf_copy): Execute(("cp", "-R", "-p", old_conf, old_conf_copy), sudo=True, logoutput=True) shutil.rmtree(old_conf, ignore_errors=True) # link /etc/[component]/conf -> <stack-root>/current/[component]-client/conf Link(old_conf, to = link_conf)
def status(self, env): cmd = "ps -ef | grep proc_rangeradmin | grep -v grep" code, output = shell.call(cmd, timeout=20) if code != 0: Logger.debug("Ranger admin process not running") raise ComponentIsNotRunning() pass
def perform_grafana_post_call(url, payload, server): import params response = None data = None userAndPass = b64encode('{0}:{1}'.format(server.user, server.password)) Logger.debug('POST payload: %s' % payload) headers = { "Content-Type": "application/json", "Content-Length": len(payload), 'Authorization': 'Basic %s' % userAndPass } grafana_https_enabled = server.protocol.lower() == 'https' ca_certs = None if grafana_https_enabled: ca_certs = params.ams_grafana_ca_cert for i in xrange(0, params.grafana_connect_attempts): try: Logger.info("Connecting (POST) to %s:%s%s" % (server.host, server.port, url)) conn = network.get_http_connection( server.host, int(server.port), grafana_https_enabled, ca_certs, ssl_version=Script.get_force_https_protocol_value()) conn.request("POST", url, payload, headers) response = conn.getresponse() Logger.info("Http response: %s %s" % (response.status, response.reason)) if response.status == 401: #Intermittent error thrown from Grafana if i < params.grafana_connect_attempts - 1: Logger.info( "Connection to Grafana failed. Next retry in %s seconds." % (params.grafana_connect_retry_delay)) time.sleep(params.grafana_connect_retry_delay) continue data = response.read() Logger.info("Http data: %s" % data) conn.close() break except (httplib.HTTPException, socket.error) as ex: if i < params.grafana_connect_attempts - 1: Logger.info( "Connection to Grafana failed. Next retry in %s seconds." % (params.grafana_connect_retry_delay)) time.sleep(params.grafana_connect_retry_delay) continue else: raise Fail("Ambari Metrics Grafana update failed due to: %s" % str(ex)) pass return (response, data)
def install(self, env): Logger.info('Installing R Server Client...') tmp_dir = Script.tmp_dir Logger.debug('Using temp dir: {0}'.format(tmp_dir)) for rpm in rpms: Logger.info('Installing {0}'.format(rpm)) Package(rpm) Logger.info('Installed R Server')
def QueryPrivilegeState(hToken, priv): # Get the ID for the privilege. privId = LookupPrivilegeValue(None, priv) privList = GetTokenInformation(hToken, TokenPrivileges) privState = 0 for (id, attr) in privList: if id == privId: privState = attr Logger.debug('Privilege state: {0}={1} ({2}) Enabled={3}'.format(privId, priv, LookupPrivilegeDisplayName(None, priv), privState)) return privState
def get_component_version(stack_name, component_name): """ For any stack name, returns the version currently installed for a given component. Because each stack name may have different logic, the input is a generic dictionary. :param stack_name: one of HDP, HDPWIN, BIGTOP, PHD, etc. usually retrieved from the command-#.json file's ["hostLevelParams"]["stack_name"] :param component_name: Component name as a string necessary to get the version :return: Returns a string if found, e.g., 2.2.1.0-2175, otherwise, returns None """ version = None if stack_name is None or component_name is None: Logger.error("Could not determine component version because of the parameters is empty. " \ "stack_name: %s, component_name: %s" % (str(stack_name), str(component_name))) return version out = None code = -1 if not stack_name: Logger.error("Stack name not provided") elif not component_name: Logger.error("Component name not provided") else: (stack_selector_name, stack_selector_path, stack_selector_package) = stack_tools.get_stack_tool( stack_tools.STACK_SELECTOR_NAME) if stack_selector_name and stack_selector_path and os.path.exists( stack_selector_path): tmpfile = tempfile.NamedTemporaryFile() get_stack_comp_version_cmd = "" try: # This is necessary because Ubuntu returns "stdin: is not a tty", see AMBARI-8088 with open(tmpfile.name, 'r') as file: get_stack_comp_version_cmd = '%s status %s > %s' % ( stack_selector_path, component_name, tmpfile.name) code, stdoutdata = shell.call(get_stack_comp_version_cmd, quiet=True) out = file.read() if code != 0 or out is None: raise Exception("Code is nonzero or output is empty") Logger.debug("Command: %s\nOutput: %s" % (get_stack_comp_version_cmd, str(out))) matches = re.findall(r"( [\d\.]+(\-\d+)?)", out) version = matches[0][0].strip() if matches and len( matches) > 0 and len(matches[0]) > 0 else None Logger.debug("Version for component %s: %s" % (component_name, str(version))) except Exception, e: Logger.error( "Could not determine stack version for component %s by calling '%s'. Return Code: %s, Output: %s." % (component_name, get_stack_comp_version_cmd, str(code), str(out))) else:
def _call_command(command, logoutput=False, cwd=None, env=None, wait_for_finish=True, timeout=None, user=None, pid_file_name=None, poll_after=None): # TODO implement user Logger.info("Executing %s" % (command)) #adding redirecting stdout stderr to file outfilename = APPLICATION_STD_OUTPUT_LOG_FILE_PREFIX + APPLICATION_STD_OUTPUT_LOG_FILE_FILE_TYPE errfilename = APPLICATION_STD_ERROR_LOG_FILE_PREFIX + APPLICATION_STD_ERROR_LOG_FILE_FILE_TYPE stdoutFile = open(outfilename, 'w+') stderrFile = open(errfilename, 'w+') proc = subprocess.Popen(command, stdout = stdoutFile, stderr = stderrFile, universal_newlines = True, cwd=cwd, env=env, shell=False) code = None logAnyway = False if not wait_for_finish: Logger.debug("No need to wait for the process to exit. Will leave the process running ...") code = 0 logAnyway = False if pid_file_name: Logger.debug("Writing the process id %s to file %s" % (str(proc.pid), pid_file_name)) pidfile = open(pid_file_name, 'w') pidfile.write(str(proc.pid)) pidfile.close() Logger.info("Wrote the process id to file %s" % pid_file_name) ## wait poll_after seconds and poll if poll_after: time.sleep(poll_after) if proc.poll() is None: return code, None, None # if still running then return else: logAnyway = True # assume failure and log Logger.warning("Process is not up after the polling interval " + str(poll_after) + " seconds.") else: return code, None, None if timeout: q = Queue() t = threading.Timer(timeout, on_timeout, [proc, q]) t.start() out, err = proc.communicate() code = proc.returncode if logoutput or logAnyway: if out: Logger.info("Out: " + str(out)) if err: Logger.info("Err: " + str(err)) if code: Logger.info("Ret Code: " + str(code)) return code, out, err
def service_check(self, env): import params env.set_params(params) try: Execute(params.smoke_logsearch_cmd, user=params.logsearch_user, timeout=10) Logger.info('Log Search Server up and running') except: Logger.debug('Log Search Server not running')
def service_check(self, env): import params env.set_params(params) cmd = 'ps -ef | grep proc_rangerkms | grep -v grep' code, output = shell.call(cmd, timeout=20) if code == 0: Logger.info('KMS process up and running') else: Logger.debug('KMS process not running') raise ComponentIsNotRunning()
def QueryPrivilegeState(hToken, priv): # Get the ID for the privilege. privId = LookupPrivilegeValue(None, priv) privList = GetTokenInformation(hToken, TokenPrivileges) privState = 0 for (id, attr) in privList: if id == privId: privState = attr Logger.debug('Privilege state: {0}={1} ({2}) Enabled={3}'.format( privId, priv, LookupPrivilegeDisplayName(None, priv), privState)) return privState
def run_action(self, resource, action): Logger.debug("Performing action %s on %s" % (action, resource)) provider_class = find_provider(self, resource.__class__.__name__, resource.provider) provider = provider_class(resource) try: provider_action = getattr(provider, 'action_%s' % action) except AttributeError: raise Fail("%r does not implement action %s" % (provider, action)) provider_action()
def status(self, env): cmd = 'ps -ef | grep proc_rangeradmin | grep -v grep' code, output = shell.call(cmd, timeout=20) if code != 0: if self.is_ru_rangeradmin_in_progress(): Logger.info('Ranger admin process not running - skipping as rolling upgrade is in progress') else: Logger.debug('Ranger admin process not running') raise ComponentIsNotRunning() pass
def status(self, env): cmd = 'ps -ef | grep proc_rangeradmin | grep -v grep' code, output = shell.call(cmd, timeout=20) if code != 0: if self.is_ru_rangeradmin_in_progress(): Logger.info('Ranger admin process not running - skipping as stack upgrade is in progress') else: Logger.debug('Ranger admin process not running') raise ComponentIsNotRunning() pass
def is_unchanged_datasource_url(datasource_url): import params parsed_url = urlparse(datasource_url) Logger.debug("parsed url: scheme = %s, host = %s, port = %s" % ( parsed_url.scheme, parsed_url.hostname, parsed_url.port)) Logger.debug("collector: scheme = %s, host = %s, port = %s" % (params.metric_collector_protocol, params.metric_collector_host, params.metric_collector_port)) return parsed_url.scheme.strip() == params.metric_collector_protocol.strip() and \ parsed_url.hostname.strip() == params.metric_collector_host.strip() and \ str(parsed_url.port) == params.metric_collector_port
def check_llap_app_status_in_llap_ga(self, llap_app_name, num_retries, return_immediately_if_stopped=False): curr_time = time.time() total_timeout = int(num_retries) * 20; # Total wait time while checking the status via llapstatus command Logger.debug("Calculated 'total_timeout' : {0} using config 'num_retries_for_checking_llap_status' : {1}".format(total_timeout, num_retries)) refresh_rate = 2 # Frequency of checking the llapstatus percent_desired_instances_to_be_up = 80 # Out of 100. llap_app_info = self._get_llap_app_status_info_in_llap_ga(percent_desired_instances_to_be_up/100.0, total_timeout, refresh_rate) try: return self._verify_llap_app_status(llap_app_info, llap_app_name, return_immediately_if_stopped, curr_time) except Exception as e: Logger.info(e.message) return False
def java_version(): return 1, 8 import params import subprocess proc = subprocess.Popen([params.java_home + '/bin/java', '-version'], stdout=subprocess.PIPE, stderr=subprocess.PIPE) java_version = proc.communicate()[1].split('\n')[0] version_number = java_version.split()[-1].strip('"') major, minor, _ = version_number.split('.') Logger.debug("java version is : {0}".format(minor)) return int(major), int(minor)
def default(name, default_value): subdicts = filter(None, name.split('/')) curr_dict = Script.get_config() for x in subdicts: if x in curr_dict: curr_dict = curr_dict[x] else: if not isinstance(default_value, UnknownConfiguration): Logger.debug("Cannot find configuration: '%s'. Using '%s' value as default" % (name, default_value)) return default_value return curr_dict
def remove_solr_kerberos_auth(): import params if not _has_security_json(): Logger.debug(format("Solr Security Json not found {solr_cloud_zk_directory}{security_json}")) return Execute(format('{zk_client_prefix} -cmd clear {solr_cloud_zk_directory}{security_json}'), environment={'JAVA_HOME': params.java64_home}, timeout=60, ignore_failures=True, user=params.solr_config_user )
def _clear_package_manager_cache(self): package_manager_cmd = "" if OSCheck.is_redhat_family(): package_manager_cmd = ("/usr/bin/yum", "clean", "metadata") if OSCheck.is_suse_family(): package_manager_cmd = ("/usr/bin/zypper", "-q", "-n", "clean") if OSCheck.is_ubuntu_family(): return Logger.debug("Clearing repo manager metadata") Execute(package_manager_cmd, logoutput=False, sudo=True)
def status(self, env): import status_params env.set_params(status_params) cmd = 'ps -ef | grep proc_rangeradmin | grep -v grep' code, output = shell.call(cmd, timeout=20) if code != 0: if self.is_ru_rangeradmin_in_progress(status_params.upgrade_marker_file): Logger.info('Ranger admin process not running - skipping as stack upgrade is in progress') else: Logger.debug('Ranger admin process not running') raise ComponentIsNotRunning() pass
def status(self, env): import status_params env.set_params(status_params) if status_params.stack_supports_pid: check_process_status(status_params.ranger_kms_pid_file) return cmd = 'ps -ef | grep proc_rangerkms | grep -v grep' code, output = shell.call(cmd, timeout=20) if code != 0: Logger.debug('KMS process not running') raise ComponentIsNotRunning() pass
def remove_solr_kerberos_auth(): import params if not _has_security_json(): Logger.debug( format("Solr Security Json not found {solr_cloud_zk_directory}{security_json}")) return Execute(format('{zk_client_prefix} -cmd clear {solr_cloud_zk_directory}{security_json}'), environment={'JAVA_HOME': params.java64_home}, timeout=60, ignore_failures=True, user=params.solr_config_user )
def action_mount(self): if not os.path.exists(self.resource.mount_point): os.makedirs(self.resource.mount_point) if self.is_mounted(): Logger.debug("%s already mounted" % self) else: args = ["mount"] if self.resource.fstype: args += ["-t", self.resource.fstype] if self.resource.options: args += ["-o", ",".join(self.resource.options)] if self.resource.device: args.append(self.resource.device) args.append(self.resource.mount_point) check_call(args) Logger.info("%s mounted" % self)
def action_enable(self): if self.is_enabled(): Logger.debug("%s already enabled" % self) else: if not self.resource.device: raise Fail("[%s] device not set but required for enable action" % self) if not self.resource.fstype: raise Fail("[%s] fstype not set but required for enable action" % self) with open("/etc/fstab", "a") as fp: fp.write("%s %s %s %s %d %d\n" % ( self.resource.device, self.resource.mount_point, self.resource.fstype, ",".join(self.resource.options or ["defaults"]), self.resource.dump, self.resource.passno, )) Logger.info("%s enabled" % self)
def _exec_cmd(self, command, expect=None): if command != "status": Logger.info("%s command '%s'" % (self.resource, command)) custom_cmd = getattr(self.resource, "%s_command" % command, None) if custom_cmd: Logger.debug("%s executing '%s'" % (self.resource, custom_cmd)) if hasattr(custom_cmd, "__call__"): if custom_cmd(): ret = 0 else: ret = 1 else: ret,out = shell.call(custom_cmd) else: ret,out = self._init_cmd(command) if expect is not None and expect != ret: raise Fail("%r command %s for service %s failed with return code: %d. %s" % ( self, command, self.resource.service_name, ret, out)) return ret
def action_run(self): if self.resource.creates: if os.path.exists(self.resource.creates): return Logger.debug("Executing %s" % self.resource) if self.resource.path != []: if not self.resource.environment: self.resource.environment = {} self.resource.environment['PATH'] = os.pathsep.join(self.resource.path) for i in range(0, self.resource.tries): try: code, _, _ = _call_command(self.resource.command, logoutput=self.resource.logoutput, cwd=self.resource.cwd, env=self.resource.environment, wait_for_finish=self.resource.wait_for_finish, timeout=self.resource.timeout, user=self.resource.user, pid_file_name=self.resource.pid_file, poll_after=self.resource.poll_after) if code != 0 and not self.resource.ignore_failures: raise Fail("Failed to execute " + self.resource.command) break except Fail as ex: Logger.info("Error raised: %s" % str(ex)) if i == self.resource.tries - 1: # last try raise ex else: Logger.info("Retrying after %d seconds. Reason: %s" % (self.resource.try_sleep, str(ex))) time.sleep(self.resource.try_sleep) except ExecuteTimeoutException: err_msg = ("Execution of '%s' was killed due timeout after %d seconds") % ( self.resource.command, self.resource.timeout) if self.resource.on_timeout: Logger.info("Executing '%s'. Reason: %s" % (self.resource.on_timeout, err_msg)) _call_command(self.resource.on_timeout) else: raise Fail(err_msg)
def check_process_status(pid_file): """ Function checks whether process is running. Process is considered running, if pid file exists, and process with a pid, mentioned in pid file is running If process is not running, will throw ComponentIsNotRunning exception @param pid_file: path to service pid file """ if not pid_file or not os.path.isfile(pid_file): if not pid_file: Logger.warning("pid_file is not valid") else: Logger.info("pid file does not exist {0}".format(pid_file)) raise ComponentIsNotRunning() with open(pid_file, "r") as f: try: pid = int(f.read()) except: Logger.debug("Pid file {0} does not exist".format(pid_file)) raise ComponentIsNotRunning() if IS_WINDOWS: not_running = True try: ps = subprocess.Popen(r'tasklist.exe /NH /FI "PID eq %d"' % (pid), shell=True, stdout=subprocess.PIPE) output = ps.stdout.read() ps.stdout.close() ps.wait() not_running = str(pid) not in output except OSError, e: Logger.debug("Error {0}".format(str(e))) Logger.info("Process with pid {0} is not running. Stale pid file" " at {1}".format(pid, pid_file)) if not_running: raise ComponentIsNotRunning() else:
def get_component_version(stack_name, component_name): """ For any stack name, returns the version currently installed for a given component. Because each stack name may have different logic, the input is a generic dictionary. :param stack_name: one of HDP, HDPWIN, BIGTOP, PHD, etc. usually retrieved from the command-#.json file's ["hostLevelParams"]["stack_name"] :param component_name: Component name as a string necessary to get the version :return: Returns a string if found, e.g., 2.2.1.0-2175, otherwise, returns None """ version = None if stack_name is None or component_name is None: Logger.error("Could not determine component version because of the parameters is empty. " \ "stack_name: %s, component_name: %s" % (str(stack_name), str(component_name))) return version out = None code = -1 if stack_name == "HDP": tmpfile = tempfile.NamedTemporaryFile() get_hdp_comp_version_cmd = "" try: # This is necessary because Ubuntu returns "stdin: is not a tty", see AMBARI-8088 with open(tmpfile.name, 'r') as file: get_hdp_comp_version_cmd = '/usr/bin/hdp-select status %s > %s' % (component_name, tmpfile.name) code, stdoutdata = shell.call(get_hdp_comp_version_cmd) out = file.read() if code != 0 or out is None: raise Exception("Code is nonzero or output is empty") Logger.debug("Command: %s\nOutput: %s" % (get_hdp_comp_version_cmd, str(out))) matches = re.findall(r"([\d\.]+\-\d+)", out) version = matches[0] if matches and len(matches) > 0 else None except Exception, e: Logger.error("Could not determine HDP version for component %s by calling '%s'. Return Code: %s, Output: %s." % (component_name, get_hdp_comp_version_cmd, str(code), str(out)))
def handle_mounted_dirs(func, dirs_string, history_filename, update_cache=True): """ This function determine which dir paths can be created. There are 2 uses cases: 1. Customers that have many dirs, each one on a separate mount point that corresponds to a different drive. 2. Developers that are using a sandbox VM and all dirs are mounted on the root. The goal is to avoid forcefully creating a dir when a user's drive fails. In this scenario, the mount point for a dir changes from something like /hadoop/hdfs/data/data1 to / If Ambari forcefully creates the directory when it doesn't exist and drive became unmounted, then Ambari will soon fill up the root drive, which is bad. Instead, we should not create the directory and let HDFS handle the failure based on its tolerance of missing directories. This function relies on the history_file parameter to parse a file that contains a mapping from a dir, and its last known mount point. After determining which dirs can be created if they don't exist, it recalculates the mount points and writes to the file again. :param func: Function that will be called if a directory will be created. This function will be called as func(dir) :param update_cache: Bool indicating whether to update the global cache of mount points :return: Returns a history_filename content """ Directory(os.path.dirname(history_filename), create_parents = True, mode=0755, ) # Get the dirs that Ambari knows about and their last known mount point prev_dir_to_mount_point = get_dir_to_mount_from_file(history_filename) # Dictionary from dir to the mount point that will be written to the history file. # If a dir becomes unmounted, we should still keep its original value. # If a dir was previously on / and is now mounted on a drive, we should store that too. dir_to_mount_point = prev_dir_to_mount_point.copy() # This should typically be True after first DataNode start, but False the first time. history_file_exists = True if history_filename is None: history_file_exists = False Logger.warning("History_file.file property is null.") else: if not os.path.exists(history_filename): history_file_exists = False Logger.warning("History_file property has file %s and it does not exist." % history_filename) valid_dirs = [] # dirs that have been normalized error_messages = [] # list of error messages to report at the end dirs_unmounted = set() # set of dirs that have become unmounted valid_existing_dirs = [] dirs_string = ",".join([re.sub(r'^\[.+\]', '', dfs_dir.strip()) for dfs_dir in dirs_string.split(",")]) for dir in dirs_string.split(","): if dir is None or dir.strip() == "": continue dir = dir.strip() valid_dirs.append(dir) if os.path.isdir(dir): valid_existing_dirs.append(dir) used_mounts = set([get_mount_point_for_dir(dir) for dir in valid_existing_dirs]) ignore_bad_mounts = default('/configurations/cluster-env/ignore_bad_mounts', False) manage_dirs_on_root = default('/configurations/cluster-env/manage_dirs_on_root', True) for dir_ in valid_dirs: last_mount_point_for_dir = prev_dir_to_mount_point.get(dir_, None) if history_file_exists else None curr_mount_point = get_mount_point_for_dir(dir_) is_non_root_dir = curr_mount_point is not None and curr_mount_point != "/" folder_exists = dir_ in valid_existing_dirs if not folder_exists and ignore_bad_mounts: Logger.debug("The directory {0} doesn't exist.".format(dir_)) Logger.warning("Not creating {0} as cluster-env/ignore_bad_mounts is enabled.".format(dir_)) may_manage_this_dir = False else: may_manage_this_dir = _may_manage_folder(dir_, last_mount_point_for_dir, is_non_root_dir, dirs_unmounted, error_messages, manage_dirs_on_root, curr_mount_point) if may_manage_this_dir and dir_ not in valid_existing_dirs and curr_mount_point in used_mounts: if default('/configurations/cluster-env/one_dir_per_partition', False): may_manage_this_dir = False Logger.warning("Skipping creation of another directory on the following mount: " + curr_mount_point + " . Please turn off cluster-env/one_dir_per_partition or handle the situation manually.") else: Logger.warning("Trying to create another directory on the following mount: " + str(curr_mount_point)) if may_manage_this_dir: Logger.info("Forcefully ensuring existence and permissions of the directory: {0}".format(dir_)) # Call the function func(dir_) used_mounts.add(curr_mount_point) pass # This is set to false during unit tests. if update_cache: get_and_cache_mount_points(refresh=True) # Update all dirs (except the unmounted ones) with their current mount points. for dir in valid_dirs: # At this point, the directory may or may not exist if os.path.isdir(dir) and dir not in dirs_unmounted: curr_mount_point = get_mount_point_for_dir(dir) dir_to_mount_point[dir] = curr_mount_point if error_messages and len(error_messages) > 0: header = " WARNING ".join(["*****"] * 6) header = "\n" + "\n".join([header, ] * 3) + "\n" msg = " ".join(error_messages) + \ " Please ensure that mounts are healthy. If the mount change was intentional, you can update the contents of {0}.".format(history_filename) Logger.error(header + msg + header) dir_to_mount = DIR_TO_MOUNT_HEADER for kv in dir_to_mount_point.iteritems(): dir_to_mount += kv[0] + "," + kv[1] + "\n" return dir_to_mount
def _call_command(command, logoutput=False, cwd=None, env=None, wait_for_finish=True, timeout=None, user=None): # TODO implement timeout, wait_for_finish Logger.info("Executing %s" % (command)) if user: domain, username = UserHelper.parse_user_name(user, ".") proc_token = OpenProcessToken(GetCurrentProcess(), TOKEN_QUERY | TOKEN_ADJUST_PRIVILEGES) old_states = [] privileges = [ SE_ASSIGNPRIMARYTOKEN_NAME, SE_INCREASE_QUOTA_NAME, ] for priv in privileges: old_states.append(QueryPrivilegeState(proc_token, priv)) AdjustPrivilege(proc_token, priv) QueryPrivilegeState(proc_token, priv) user_token = LogonUser(username, domain, Script.get_password(user), win32con.LOGON32_LOGON_SERVICE, win32con.LOGON32_PROVIDER_DEFAULT) env_token = DuplicateTokenEx(user_token, SecurityIdentification, TOKEN_QUERY, TokenPrimary) # getting updated environment for impersonated user and merge it with custom env current_env = CreateEnvironmentBlock(env_token, False) current_env = _merge_env(current_env, env) si = STARTUPINFO() out_handle, err_handle, out_file, err_file = _create_tmp_files(current_env) ok, si.hStdInput = _safe_duplicate_handle(GetStdHandle(STD_INPUT_HANDLE)) if not ok: raise Exception("Unable to create StdInput for child process") ok, si.hStdOutput = _safe_duplicate_handle(out_handle) if not ok: raise Exception("Unable to create StdOut for child process") ok, si.hStdError = _safe_duplicate_handle(err_handle) if not ok: raise Exception("Unable to create StdErr for child process") Logger.debug("Redirecting stdout to '{0}', stderr to '{1}'".format(out_file.name, err_file.name)) si.dwFlags = win32con.STARTF_USESTDHANDLES si.lpDesktop = "" try: info = CreateProcessAsUser(user_token, None, command, None, None, 1, win32con.CREATE_NO_WINDOW, current_env, cwd, si) hProcess, hThread, dwProcessId, dwThreadId = info hThread.Close() try: WaitForSingleObject(hProcess, INFINITE) except KeyboardInterrupt: pass out, err = _get_files_output(out_file, err_file) code = GetExitCodeProcess(hProcess) finally: for priv in privileges: old_state = old_states.pop(0) AdjustPrivilege(proc_token, priv, old_state) else: # getting updated environment for current process and merge it with custom env cur_token = OpenProcessToken(GetCurrentProcess(), TOKEN_QUERY) current_env = CreateEnvironmentBlock(cur_token, False) current_env = _merge_env(current_env, env) proc = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, cwd=cwd, env=current_env, shell=False) out, err = proc.communicate() code = proc.returncode if logoutput and out: Logger.info(out) if logoutput and err: Logger.info(err) return code, out, err
def _link_configs(package, version, dirs): """ Link a specific package's configuration directory """ bad_dirs = [] for dir_def in dirs: if not os.path.exists(dir_def['conf_dir']): bad_dirs.append(dir_def['conf_dir']) if len(bad_dirs) > 0: Logger.debug("Skipping {0} as it does not exist.".format(",".join(bad_dirs))) return bad_dirs = [] for dir_def in dirs: # check if conf is a link already old_conf = dir_def['conf_dir'] if os.path.islink(old_conf): Logger.debug("{0} is a link to {1}".format(old_conf, os.path.realpath(old_conf))) bad_dirs.append(old_conf) if len(bad_dirs) > 0: return # make backup dir and copy everything in case configure() was called after install() for dir_def in dirs: old_conf = dir_def['conf_dir'] old_parent = os.path.abspath(os.path.join(old_conf, os.pardir)) old_conf_copy = os.path.join(old_parent, "conf.install") Execute(("cp", "-R", "-p", old_conf, old_conf_copy), not_if = format("test -e {old_conf_copy}"), sudo = True) # we're already in the HDP stack versioned_confs = conf_select.create("HDP", package, version, dry_run = True) Logger.info("New conf directories: {0}".format(", ".join(versioned_confs))) need_dirs = [] for d in versioned_confs: if not os.path.exists(d): need_dirs.append(d) if len(need_dirs) > 0: conf_select.create("HDP", package, version) # find the matching definition and back it up (not the most efficient way) ONLY if there is more than one directory if len(dirs) > 1: for need_dir in need_dirs: for dir_def in dirs: if 'prefix' in dir_def and need_dir.startswith(dir_def['prefix']): old_conf = dir_def['conf_dir'] versioned_conf = need_dir Execute(as_sudo(["cp", "-R", "-p", os.path.join(old_conf, "*"), versioned_conf], auto_escape=False), only_if = format("ls {old_conf}/*")) elif 1 == len(dirs) and 1 == len(need_dirs): old_conf = dirs[0]['conf_dir'] versioned_conf = need_dirs[0] Execute(as_sudo(["cp", "-R", "-p", os.path.join(old_conf, "*"), versioned_conf], auto_escape=False), only_if = format("ls {old_conf}/*")) # make /usr/hdp/[version]/[component]/conf point to the versioned config. # /usr/hdp/current is already set try: conf_select.select("HDP", package, version) # no more references to /etc/[component]/conf for dir_def in dirs: Directory(dir_def['conf_dir'], action="delete") # link /etc/[component]/conf -> /usr/hdp/current/[component]-client/conf Link(dir_def['conf_dir'], to = dir_def['current_dir']) except Exception, e: Logger.warning("Could not select the directory: {0}".format(e.message))