def test_get_stack_feature_version_missing_params(self): """ Tests that simple upgrade information can be extracted from JSON :return: """ command_json = TestUpgradeSummary._get_cluster_simple_upgrade_json() Script.config = command_json summary = upgrade_summary.get_upgrade_summary() self.assertEqual(False, summary.is_revert) self.assertEqual("UPGRADE", summary.direction) self.assertEqual("STANDARD", summary.orchestration) self.assertEqual("rolling_upgrade", summary.type) services = summary.services self.assertEqual("2.4.0.0-1234", services["HDFS"].source_version) self.assertEqual("2.5.9.9-9999", services["HDFS"].target_version) self.assertEqual("2.4.0.0-1234", upgrade_summary.get_source_version("HDFS")) self.assertEqual("2.5.9.9-9999", upgrade_summary.get_target_version("HDFS")) self.assertTrue( upgrade_summary.get_downgrade_from_version("HDFS") is None)
def set_pre_start(self, env): import params env.set_params(params) orchestration = stack_select.PACKAGE_SCOPE_STANDARD summary = upgrade_summary.get_upgrade_summary() if summary is not None: orchestration = summary.orchestration if orchestration is None: raise Fail( "The upgrade summary does not contain an orchestration type" ) if orchestration.upper( ) in stack_select._PARTIAL_ORCHESTRATION_SCOPES: orchestration = stack_select.PACKAGE_SCOPE_PATCH stack_select_packages = stack_select.get_packages( orchestration, service_name="RANGER", component_name="RANGER_ADMIN") if stack_select_packages is None: raise Fail("Unable to get packages for stack-select") Logger.info( "RANGER_ADMIN component will be stack-selected to version {0} using a {1} orchestration" .format(params.version, orchestration.upper())) for stack_select_package_name in stack_select_packages: stack_select.select(stack_select_package_name, params.version)
def select_packages(version): """ Uses the command's service and role to determine the stack-select packages which need to be invoked. If in an upgrade, then the upgrade summary's orchestration is used to determine which packages to install. :param version: the version to select :return: None """ package_scope = PACKAGE_SCOPE_STANDARD orchestration = package_scope summary = upgrade_summary.get_upgrade_summary() if summary is not None: orchestration = summary.orchestration if orchestration is None: raise Fail("The upgrade summary for does not contain an orchestration type") # if the orchestration is patch or maint, use the "patch" key from the package JSON if orchestration.upper() in _PARTIAL_ORCHESTRATION_SCOPES: package_scope = PACKAGE_SCOPE_PATCH stack_select_packages = get_packages(package_scope) if stack_select_packages is None: return Logger.info("The following packages will be stack-selected to version {0} using a {1} orchestration and {2} scope: {3}".format( version, orchestration.upper(), package_scope, ", ".join(stack_select_packages))) for stack_select_package_name in stack_select_packages: select(stack_select_package_name, version)
def finalize_upgrade(upgrade_type, hdfs_binary): """ Finalize the Namenode upgrade, at which point it cannot be downgraded. :param upgrade_type rolling or nonrolling :param hdfs_binary: name/path of the HDFS binary to use """ Logger.info("Executing Rolling Upgrade finalize") import params if params.security_enabled: kinit_command = format( "{params.kinit_path_local} -kt {params.hdfs_user_keytab} {params.hdfs_principal_name}" ) Execute(kinit_command, user=params.hdfs_user, logoutput=True) dfsadmin_base_command = get_dfsadmin_base_command(hdfs_binary) finalize_cmd = dfsadmin_base_command + " -rollingUpgrade finalize" query_cmd = dfsadmin_base_command + " -rollingUpgrade query" summary = upgrade_summary.get_upgrade_summary() if summary is not None and not summary.is_downgrade_allowed: finalize_cmd = dfsadmin_base_command + " -finalizeUpgrade" query_cmd = dfsadmin_base_command + " -upgrade query" if summary is not None and summary.is_switch_bits: Logger.info( "The {0} switches the binaries only. No need to call finalization." .format(summary.direction)) else: Execute(query_cmd, user=params.hdfs_user, logoutput=True) Execute(finalize_cmd, user=params.hdfs_user, logoutput=True) Execute(query_cmd, user=params.hdfs_user, logoutput=True) # upgrade is finalized; remove the upgrade marker delete_upgrade_marker()
def configure_atlas_user_for_tagsync(self, env): Logger.info("Configuring Atlas user for Tagsync service.") import params env.set_params(params) orchestration = stack_select.PACKAGE_SCOPE_STANDARD summary = upgrade_summary.get_upgrade_summary() if summary is not None: orchestration = summary.orchestration if orchestration is None: raise Fail("The upgrade summary does not contain an orchestration type") if orchestration.upper() in stack_select._PARTIAL_ORCHESTRATION_SCOPES: orchestration = stack_select.PACKAGE_SCOPE_PATCH stack_select_packages = stack_select.get_packages(orchestration, service_name = "RANGER", component_name = "RANGER_TAGSYNC") if stack_select_packages is None: raise Fail("Unable to get packages for stack-select") Logger.info("RANGER_TAGSYNC component will be stack-selected to version {0} using a {1} orchestration".format(params.version, orchestration.upper())) for stack_select_package_name in stack_select_packages: stack_select.select(stack_select_package_name, params.version) if params.stack_supports_ranger_tagsync_ssl_xml_support: Logger.info("Upgrading Tagsync, stack support Atlas user for Tagsync, creating keystore for same.") self.create_atlas_user_keystore(env) else: Logger.info("Upgrading Tagsync, stack does not support Atlas user for Tagsync, skipping keystore creation for same.") Logger.info("Configuring Atlas user for Tagsync service done.")
def update_atlas_simple_authz(self, env): import params env.set_params(params) if params.upgrade_direction == Direction.UPGRADE: orchestration = stack_select.PACKAGE_SCOPE_STANDARD summary = upgrade_summary.get_upgrade_summary() if summary is not None: orchestration = summary.orchestration if orchestration is None: raise Fail( "The upgrade summary does not contain an orchestration type" ) if orchestration.upper( ) in stack_select._PARTIAL_ORCHESTRATION_SCOPES: orchestration = stack_select.PACKAGE_SCOPE_PATCH stack_select_packages = stack_select.get_packages( orchestration, service_name="ATLAS", component_name="ATLAS_SERVER") if stack_select_packages is None: raise Fail("Unable to get packages for stack-select") Logger.info( "ATLAS_SERVER component will be stack-selected to version {0} using a {1} orchestration" .format(params.version, orchestration.upper())) for stack_select_package_name in stack_select_packages: stack_select.select(stack_select_package_name, params.version) Directory( format('{metadata_home}/'), owner=params.metadata_user, group=params.user_group, recursive_ownership=True, ) target_version = upgrade_summary.get_target_version('ATLAS') update_atlas_simple_authz_script = os.path.join( format('{stack_root}'), target_version, 'atlas', 'bin', 'atlas_update_simple_auth_json.py') update_atlas_simple_authz_command = format( 'source {params.conf_dir}/atlas-env.sh ; {update_atlas_simple_authz_script} {conf_dir}' ) Execute( update_atlas_simple_authz_command, only_if=format("test -e {update_atlas_simple_authz_script}"), user=params.metadata_user) atlas_simple_auth_policy_file = os.path.join( format('{conf_dir}'), 'atlas-simple-authz-policy.json') File(atlas_simple_auth_policy_file, group=params.user_group, owner=params.metadata_user, only_if=format("test -e {atlas_simple_auth_policy_file}"), mode=0644)
def pre_upgrade_restart(self, env, upgrade_type=None): Logger.info("Executing Stack Upgrade pre-restart") import params env.set_params(params) summary = upgrade_summary.get_upgrade_summary() is_switch_bits = summary is not None and summary.is_switch_bits is True # When downgrading an Express Upgrade, the first thing we do is to revert the symlinks. # Therefore, we cannot call this code in that scenario. if is_switch_bits or upgrade_type != constants.UPGRADE_TYPE_NON_ROLLING or params.upgrade_direction != Direction.DOWNGRADE: stack_select.select_packages(params.version)
def prepare_rolling_upgrade(hdfs_binary): """ This can be called during either Rolling Upgrade or Express Upgrade (aka nonrolling) Rolling Upgrade for HDFS Namenode requires the following. 0. Namenode must be up 1. If HA: leave safemode if the safemode status is not OFF 2. Execute a rolling upgrade "prepare" 3. Execute a rolling upgrade "query" :param hdfs_binary: name/path of the HDFS binary to use """ import params if not params.upgrade_direction or params.upgrade_direction not in [ Direction.UPGRADE, Direction.DOWNGRADE ]: raise Fail("Could not retrieve upgrade direction: %s" % str(params.upgrade_direction)) Logger.info(format("Performing a(n) {params.upgrade_direction} of HDFS")) if params.security_enabled: kinit_command = format( "{params.kinit_path_local} -kt {params.hdfs_user_keytab} {params.hdfs_principal_name}" ) Execute(kinit_command, user=params.hdfs_user, logoutput=True) if params.upgrade_direction == Direction.UPGRADE: if params.dfs_ha_enabled: Logger.info( 'High Availability is enabled, must leave safemode before calling "-rollingUpgrade prepare"' ) desired_state = SafeMode.OFF safemode_transition_successful, original_state = reach_safemode_state( params.hdfs_user, desired_state, True, hdfs_binary) if not safemode_transition_successful: raise Fail( "Could not transition to safemode state %s. Please check logs to make sure namenode is up." % str(desired_state)) summary = upgrade_summary.get_upgrade_summary() if summary is not None and summary.is_switch_bits: Logger.info( "The {0} switches the binaries only. No need to call prepare." .format(summary.direction)) else: dfsadmin_base_command = get_dfsadmin_base_command(hdfs_binary) prepare = dfsadmin_base_command + " -rollingUpgrade prepare" query = dfsadmin_base_command + " -rollingUpgrade query" Execute(prepare, user=params.hdfs_user, logoutput=True) Execute(query, user=params.hdfs_user, logoutput=True)
def prepare_express_upgrade(self, env): """ During an Express Upgrade. If in HA, on the Active NameNode only, examine the directory dfs.namenode.name.dir and make sure that there is no "/previous" directory. Create a list of all the DataNodes in the cluster. hdfs dfsadmin -report > dfs-old-report-1.log hdfs dfsadmin -safemode enter hdfs dfsadmin -saveNamespace Copy the checkpoint files located in ${dfs.namenode.name.dir}/current into a backup directory. Finalize any prior HDFS upgrade, hdfs dfsadmin -finalizeUpgrade Prepare for a NameNode rolling upgrade in order to not lose any data. hdfs dfsadmin -rollingUpgrade prepare """ import params Logger.info( "Preparing the NameNodes for a NonRolling (aka Express) Upgrade.") if params.security_enabled: kinit_command = format( "{params.kinit_path_local} -kt {params.hdfs_user_keytab} {params.hdfs_principal_name}" ) Execute(kinit_command, user=params.hdfs_user, logoutput=True) hdfs_binary = self.get_hdfs_binary() namenode_upgrade.prepare_upgrade_check_for_previous_dir() namenode_upgrade.prepare_upgrade_enter_safe_mode(hdfs_binary) if not params.skip_namenode_save_namespace_express: namenode_upgrade.prepare_upgrade_save_namespace(hdfs_binary) if not params.skip_namenode_namedir_backup_express: namenode_upgrade.prepare_upgrade_backup_namenode_dir() namenode_upgrade.prepare_upgrade_finalize_previous_upgrades( hdfs_binary) summary = upgrade_summary.get_upgrade_summary() if summary is not None and summary.is_downgrade_allowed: # Call -rollingUpgrade prepare namenode_upgrade.prepare_rolling_upgrade(hdfs_binary) else: Logger.info( "Downgrade will not be possible. Skipping '-rollingUpgrade prepare'" )
def actionexecute(self, env): summary = upgrade_summary.get_upgrade_summary() if summary is None: Logger.warning("There is no upgrade in progress") return if summary.associated_version is None: Logger.warning( "There is no version associated with the upgrade in progress") return if summary.orchestration != "STANDARD": Logger.warning( "The 'stack-select set all' command can only be invoked during STANDARD upgrades" ) return if summary.direction.lower( ) != Direction.UPGRADE or summary.is_downgrade_allowed or summary.is_revert: Logger.warning( "The 'stack-select set all' command can only be invoked during an UPGRADE which cannot be downgraded" ) return # other os? if OSCheck.is_redhat_family(): cmd = ('/usr/bin/yum', 'clean', 'all') code, out = shell.call(cmd, sudo=True) stack_selector_path = stack_tools.get_stack_tool_path( stack_tools.STACK_SELECTOR_NAME) # this script runs on all hosts; if this host doesn't have stack components, # then don't invoke the stack tool # (no need to log that it's skipped - the function will do that) if is_host_skippable(stack_selector_path, summary.associated_version): return # invoke "set all" cmd = ('ambari-python-wrap', stack_selector_path, 'set', 'all', summary.associated_version) code, out = shell.call(cmd, sudo=True) if code != 0: raise Exception("Command '{0}' exit code is nonzero".format(cmd))
def namenode(action=None, hdfs_binary=None, do_format=True, upgrade_type=None, upgrade_suspended=False, env=None): if action is None: raise Fail('"action" parameter is required for function namenode().') if action in ["start", "stop"] and hdfs_binary is None: raise Fail( '"hdfs_binary" parameter is required for function namenode().') if action == "configure": import params #we need this directory to be present before any action(HA manual steps for #additional namenode) create_name_dirs(params.dfs_name_dir) generate_logfeeder_input_config( 'hdfs', Template("input.config-hdfs.json.j2", extra_imports=[default])) # set up failover / secure zookeper ACLs, this feature is supported from HDP 2.6 ownwards set_up_zkfc_security(params) elif action == "start": Logger.info("Called service {0} with upgrade_type: {1}".format( action, str(upgrade_type))) setup_ranger_hdfs(upgrade_type=upgrade_type) import params File(params.exclude_file_path, content=Template("exclude_hosts_list.j2"), owner=params.hdfs_user, group=params.user_group) if do_format and not params.hdfs_namenode_format_disabled: format_namenode() pass if params.dfs_ha_enabled and \ len(params.dfs_ha_namenode_active) > 0 and \ params.hostname not in params.dfs_ha_namenode_active: # if the current host is the standby NameNode in an HA deployment # run the bootstrap command, to start the NameNode in standby mode # this requires that the active NameNode is already up and running, # so this execute should be re-tried upon failure, up to a timeout success = bootstrap_standby_namenode(params) if not success: raise Fail("Could not bootstrap standby namenode") if upgrade_type == constants.UPGRADE_TYPE_ROLLING and params.dfs_ha_enabled: # Most likely, ZKFC is up since RU will initiate the failover command. However, if that failed, it would have tried # to kill ZKFC manually, so we need to start it if not already running. safe_zkfc_op(action, env) summary = upgrade_summary.get_upgrade_summary() is_downgrade_allowed = summary is not None and summary.is_downgrade_allowed is_switch_bits = summary is not None and summary.is_switch_bits options = "" if is_switch_bits: Logger.info( "The {0} switches the binaries only. No options are used to restart NameNode." .format(summary.direction)) else: if upgrade_type == constants.UPGRADE_TYPE_ROLLING: if params.upgrade_direction == Direction.UPGRADE: options = "-rollingUpgrade started" elif params.upgrade_direction == Direction.DOWNGRADE: options = "" elif upgrade_type == constants.UPGRADE_TYPE_NON_ROLLING: is_previous_image_dir = is_previous_fs_image() Logger.info( "Previous file system image dir present is {0}".format( str(is_previous_image_dir))) if params.upgrade_direction == Direction.UPGRADE: if is_downgrade_allowed: options = "-rollingUpgrade started" else: # if we are HA, then -upgrade needs to be called for the active NN, # then -bootstrapStandby on the other, followed by normal daemon # if we are NOT HA, then -upgrade needs to be called on the lone NN if params.dfs_ha_enabled: name_service = get_name_service_by_hostname( params.hdfs_site, params.hostname) any_active = is_there_any_active_nn(name_service) if any_active: if not bootstrap_standby_namenode( params, use_path=True, run_if_present=True): raise Fail( "Could not bootstrap this namenode of an Express upgrade" ) options = "" # we're bootstrapped, no other work needs to happen for the daemon else: options = "-upgrade" # no other are active, so this host's NN is the first else: options = "-upgrade" # non-HA marker = os.path.exists( namenode_upgrade.get_upgrade_in_progress_marker()) if options == "-upgrade" and upgrade_suspended is True and marker is True: Logger.info( "The NameNode is currently upgrading. No options will be passed to startup" ) options = "" elif params.upgrade_direction == Direction.DOWNGRADE: options = "" elif upgrade_type == constants.UPGRADE_TYPE_HOST_ORDERED: # nothing special to do for HOU - should be very close to a normal restart pass elif upgrade_type is None and upgrade_suspended is True: # the rollingUpgrade flag must be passed in during a suspended upgrade when starting NN if os.path.exists( namenode_upgrade.get_upgrade_in_progress_marker()): if is_downgrade_allowed: options = "-rollingUpgrade started" else: options = "-upgrade" else: Logger.info( "The NameNode upgrade marker file {0} does not exist, yet an upgrade is currently suspended. " "Assuming that the upgrade of NameNode has not occurred yet." .format( namenode_upgrade.get_upgrade_in_progress_marker())) Logger.info("Options for start command are: {0}".format(options)) service(action="start", name="namenode", user=params.hdfs_user, options=options, create_pid_dir=True, create_log_dir=True) if params.security_enabled: Execute(format( "{kinit_path_local} -kt {hdfs_user_keytab} {hdfs_principal_name}" ), user=params.hdfs_user) name_service = get_name_service_by_hostname(params.hdfs_site, params.hostname) # ___Scenario___________|_Expected safemode state__|_Wait for safemode OFF____| # no-HA | ON -> OFF | Yes | # HA and active | ON -> OFF | Yes | # HA and standby | no change | No | # RU with HA on active | ON -> OFF | Yes | # RU with HA on standby | ON -> OFF | Yes | # EU with HA on active | ON -> OFF | No | # EU with HA on standby | ON -> OFF | No | # EU non-HA | ON -> OFF | No | # because we do things like create directories after starting NN, # the vast majority of the time this should be True - it should only # be False if this is HA and we are the Standby NN ensure_safemode_off = True # True if this is the only NameNode (non-HA) or if its the Active one in HA is_active_namenode = True if params.dfs_ha_enabled: Logger.info( "Waiting for the NameNode to broadcast whether it is Active or Standby..." ) if is_this_namenode_active(name_service) is False: # we are the STANDBY NN is_active_namenode = False # we are the STANDBY NN and this restart is not part of an upgrade if upgrade_type is None: ensure_safemode_off = False # During an Express Upgrade, NameNode will not leave SafeMode until the DataNodes are started, # so always disable the Safemode check if upgrade_type == constants.UPGRADE_TYPE_NON_ROLLING: ensure_safemode_off = False # some informative logging separate from the above logic to keep things a little cleaner if ensure_safemode_off: Logger.info( "Waiting for this NameNode to leave Safemode due to the following conditions: HA: {0}, isActive: {1}, upgradeType: {2}" .format(params.dfs_ha_enabled, is_active_namenode, upgrade_type)) else: Logger.info( "Skipping Safemode check due to the following conditions: HA: {0}, isActive: {1}, upgradeType: {2}" .format(params.dfs_ha_enabled, is_active_namenode, upgrade_type)) # wait for Safemode to end if ensure_safemode_off: if params.rolling_restart and params.rolling_restart_safemode_exit_timeout: calculated_retries = int( params.rolling_restart_safemode_exit_timeout) / 30 wait_for_safemode_off(hdfs_binary, afterwait_sleep=30, retries=calculated_retries, sleep_seconds=30) else: wait_for_safemode_off(hdfs_binary) # Always run this on the "Active" NN unless Safemode has been ignored # in the case where safemode was ignored (like during an express upgrade), then # NN will be in SafeMode and cannot have directories created if is_active_namenode and ensure_safemode_off: create_hdfs_directories(name_service) create_ranger_audit_hdfs_directories() else: Logger.info( "Skipping creation of HDFS directories since this is either not the Active NameNode or we did not wait for Safemode to finish." ) elif action == "stop": import params service(action="stop", name="namenode", user=params.hdfs_user) elif action == "status": import status_params check_process_status(status_params.namenode_pid_file) elif action == "decommission": decommission() elif action == "refresh_nodes": refresh_nodes()