def _create_config_links_if_necessary(self, stack_id, stack_version): """ Sets up the required structure for /etc/<component>/conf symlinks and /usr/hdp/current configuration symlinks IFF the current stack is < HDP 2.3+ and the new stack is >= HDP 2.3 stack_id: stack id, ie HDP-2.3 stack_version: version to set, ie 2.3.0.0-1234 """ if stack_id is None: Logger.info("Cannot create config links when stack_id is not defined") return args = stack_id.upper().split('-') if len(args) != 2: Logger.info("Unrecognized stack id {0}, cannot create config links".format(stack_id)) return if args[0] != "HDP": Logger.info("Unrecognized stack name {0}, cannot create config links".format(args[0])) if compare_versions(format_hdp_stack_version(args[1]), "2.3.0.0") < 0: Logger.info("Configuration symlinks are not needed for {0}, only HDP-2.3+".format(stack_version)) return for package_name, directories in conf_select.PACKAGE_DIRS.iteritems(): # if already on HDP 2.3, then we should skip making conf.backup folders if self.current_hdp_stack_version and compare_versions(self.current_hdp_stack_version, '2.3') >= 0: Logger.info("The current cluster stack of {0} does not require backing up configurations; " "only conf-select versioned config directories will be created.".format(stack_version)) # only link configs for all known packages conf_select.link_component_conf_to_versioned_config(package_name, stack_version) else: # link configs and create conf.backup folders for all known packages conf_select.convert_conf_directories_to_symlinks(package_name, stack_version, directories, skip_existing_links = False, link_to = "backup")
def pre_upgrade_restart(self, env, upgrade_type=None): import params env.set_params(params) if params.version and compare_versions(format_hdp_stack_version(params.version), '2.2.0.0') >= 0: hdp_select.select("kafka-broker", params.version) if params.version and compare_versions(format_hdp_stack_version(params.version), '2.3.0.0') >= 0: conf_select.select(params.stack_name, "kafka", params.version) # This is extremely important since it should only be called if crossing the HDP 2.3.4.0 boundary. if params.current_version and params.version and params.upgrade_direction: src_version = dst_version = None if params.upgrade_direction == Direction.UPGRADE: src_version = format_hdp_stack_version(params.current_version) dst_version = format_hdp_stack_version(params.version) else: # These represent the original values during the UPGRADE direction src_version = format_hdp_stack_version(params.version) dst_version = format_hdp_stack_version(params.downgrade_from_version) if compare_versions(src_version, '2.3.4.0') < 0 and compare_versions(dst_version, '2.3.4.0') >= 0: # Calling the acl migration script requires the configs to be present. self.configure(env, upgrade_type=upgrade_type) upgrade.run_migration(env, upgrade_type)
def actionexecute(self, env): config = Script.get_config() version = default('/commandParams/version', None) stack_name = default('/hostLevelParams/stack_name', "") if not version: raise Fail("Value is required for '/commandParams/version'") # other os? if OSCheck.is_redhat_family(): cmd = ('/usr/bin/yum', 'clean', 'all') code, out = shell.call(cmd, sudo=True) min_ver = format_hdp_stack_version("2.2") real_ver = format_hdp_stack_version(version) if stack_name == "HDP": if compare_versions(real_ver, min_ver) >= 0: cmd = ('hdp-select', 'set', 'all', version) code, out = shell.call(cmd, sudo=True) if compare_versions(real_ver, format_hdp_stack_version("2.3")) >= 0: # backup the old and symlink /etc/[component]/conf to /usr/hdp/current/[component] for k, v in conf_select.PACKAGE_DIRS.iteritems(): for dir_def in v: link_config(dir_def['conf_dir'], dir_def['current_dir'])
def check_stack_feature(stack_feature, stack_version): """ Given a stack_feature and a specific stack_version, it validates that the feature is supported by the stack_version. IMPORTANT, notice that the mapping of feature to version comes from cluster-env if it exists there. :param stack_feature: Feature name to check if it is supported by the stack. For example: "rolling_upgrade" :param stack_version: Version of the stack :return: Will return True if successful, otherwise, False. """ from resource_management.libraries.functions.default import default from resource_management.libraries.functions.version import compare_versions stack_features_config = default("/configurations/cluster-env/stack_features", None) if not stack_version: Logger.debug("Cannot determine if feature %s is supported since did not provide a stack version." % stack_feature) return False if stack_features_config: data = json.loads(stack_features_config) for feature in data["stack_features"]: if feature["name"] == stack_feature: if "min_version" in feature: min_version = feature["min_version"] if compare_versions(stack_version, min_version, format = True) < 0: return False if "max_version" in feature: max_version = feature["max_version"] if compare_versions(stack_version, max_version, format = True) >= 0: return False return True else: raise Fail("Stack features not defined by stack") return False
def pre_rolling_restart(self, env): import params env.set_params(params) if params.version and compare_versions(format_hdp_stack_version(params.version), '2.2.0.0') >= 0: Execute(format("hdp-select set spark-historyserver {version}")) copy_tarballs_to_hdfs('tez', 'spark-historyserver', params.spark_user, params.hdfs_user, params.user_group)
def pre_rolling_restart(self, env): import params env.set_params(params) if params.version and compare_versions(format_hdp_stack_version(params.version), '2.3.2.0') >= 0: conf_select.select(params.stack_name, "spark", params.version) hdp_select.select("spark-thriftserver", params.version)
def pre_rolling_restart(self, env): Logger.info("Executing Rolling Upgrade post-restart") import params env.set_params(params) if params.version and compare_versions(format_hdp_stack_version(params.version), '2.2.0.0') >= 0: Execute(format("hdp-select set hadoop-yarn-resourcemanager {version}"))
def pre_upgrade_restart(self, env, upgrade_type=None): import params env.set_params(params) if params.version and compare_versions(format_hdp_stack_version(params.version), '2.2.0.0') >= 0: conf_select.select(params.stack_name, "storm", params.version) hdp_select.select("storm-client", params.version)
def pre_rolling_restart(self, env): import params env.set_params(params) if params.version and compare_versions(format_hdp_stack_version(params.version), '2.2.0.0') >= 0: conf_select.select(params.stack_name, "sqoop", params.version) hdp_select.select("sqoop-client", params.version)
def _get_current_hiveserver_version(): """ Runs "hive --version" and parses the result in order to obtain the current version of hive. :return: the hiveserver2 version, returned by "hive --version" """ import params try: # When downgrading the source version should be the version we are downgrading from if "downgrade" == params.upgrade_direction: if not params.downgrade_from_version: raise Fail('The version from which we are downgrading from should be provided in \'downgrade_from_version\'') source_version = params.downgrade_from_version else: source_version = params.current_version hive_execute_path = _get_hive_execute_path(source_version) version_hive_bin = params.hive_bin formatted_source_version = format_hdp_stack_version(source_version) if formatted_source_version and compare_versions(formatted_source_version, "2.2") >= 0: version_hive_bin = format('/usr/hdp/{source_version}/hive/bin') command = format('{version_hive_bin}/hive --version') return_code, hdp_output = shell.call(command, user=params.hive_user, path=hive_execute_path) except Exception, e: Logger.error(str(e)) raise Fail('Unable to execute hive --version command to retrieve the hiveserver2 version.')
def _get_directory_mappings_during_upgrade(): """ Gets a dictionary of directory to archive name that represents the directories that need to be backed up and their output tarball archive targets :return: the dictionary of directory to tarball mappings """ import params # Must be performing an Upgrade if params.upgrade_direction is None or params.upgrade_direction != Direction.UPGRADE or \ params.upgrade_from_version is None or params.upgrade_from_version == "": Logger.error("Function _get_directory_mappings_during_upgrade() can only be called during a Stack Upgrade in direction UPGRADE.") return {} # By default, use this for all stacks. knox_data_dir = '/var/lib/knox/data' if params.stack_name and params.stack_name.upper() == "HDP" and \ compare_versions(format_hdp_stack_version(params.upgrade_from_version), "2.3.0.0") > 0: # Use the version that is being upgraded from. knox_data_dir = format('/usr/hdp/{upgrade_from_version}/knox/data') # the trailing "/" is important here so as to not include the "conf" folder itself directories = {knox_data_dir: BACKUP_DATA_ARCHIVE, params.knox_conf_dir + "/": BACKUP_CONF_ARCHIVE} Logger.info(format("Knox directories to backup:\n{directories}")) return directories
def spark_service(action): import params if action == 'start': if params.security_enabled: spark_kinit_cmd = format("{kinit_path_local} -kt {spark_kerberos_keytab} {spark_principal}; ") Execute(spark_kinit_cmd, user=params.spark_user) # Spark 1.3.1.2.3, and higher, which was included in HDP 2.3, does not have a dependency on Tez, so it does not # need to copy the tarball, otherwise, copy it. if params.hdp_stack_version and compare_versions(params.hdp_stack_version, '2.3.0.0') < 0: resource_created = copy_to_hdfs("tez", params.user_group, params.hdfs_user) if resource_created: params.HdfsResource(None, action="execute") no_op_test = format( 'ls {spark_history_server_pid_file} >/dev/null 2>&1 && ps -p `cat {spark_history_server_pid_file}` >/dev/null 2>&1') Execute(format('{spark_history_server_start}'), user=params.spark_user, environment={'JAVA_HOME': params.java_home}, not_if=no_op_test ) elif action == 'stop': Execute(format('{spark_history_server_stop}'), user=params.spark_user, environment={'JAVA_HOME': params.java_home} ) File(params.spark_history_server_pid_file, action="delete" )
def pre_rolling_restart(self, env): Logger.info("Executing Rolling Upgrade pre-restart") import params env.set_params(params) if params.version and compare_versions(format_hdp_stack_version(params.version), '2.2.0.0') >= 0: Execute(format("hdp-select set zookeeper-server {version}"))
def pre_upgrade_restart(self, env, upgrade_type=None): Logger.info("Executing DataNode Stack Upgrade pre-restart") import params env.set_params(params) if params.version and compare_versions(format_hdp_stack_version(params.version), '2.2.0.0') >= 0: conf_select.select(params.stack_name, "hadoop", params.version) hdp_select.select("hadoop-hdfs-datanode", params.version)
def pre_rolling_restart(self, env): import params env.set_params(params) if params.version and compare_versions(format_hdp_stack_version(params.version), '2.2.0.0') >= 0: absolute_backup_dir = None if params.upgrade_direction and params.upgrade_direction == Direction.UPGRADE: Logger.info("Backing up directories. Initial conf folder: %s" % os.path.realpath(params.knox_conf_dir)) # This will backup the contents of the conf directory into /tmp/knox-upgrade-backup/knox-conf-backup.tar absolute_backup_dir = upgrade.backup_data() # conf-select will change the symlink to the conf folder. conf_select.select(params.stack_name, "knox", params.version) hdp_select.select("knox-server", params.version) # Extract the tar of the old conf folder into the new conf directory if absolute_backup_dir is not None and params.upgrade_direction and params.upgrade_direction == Direction.UPGRADE: conf_tar_source_path = os.path.join(absolute_backup_dir, upgrade.BACKUP_CONF_ARCHIVE) if os.path.exists(conf_tar_source_path): extract_dir = os.path.realpath(params.knox_conf_dir) conf_tar_dest_path = os.path.join(extract_dir, upgrade.BACKUP_CONF_ARCHIVE) Logger.info("Copying %s into %s file." % (upgrade.BACKUP_CONF_ARCHIVE, conf_tar_dest_path)) Execute(('cp', conf_tar_source_path, conf_tar_dest_path), sudo = True, ) tar_archive.untar_archive(conf_tar_source_path, extract_dir) File(conf_tar_dest_path, action = "delete", )
def zookeeper_service(action='start', upgrade_type=None): import params # This path may be missing after Ambari upgrade. We need to create it. if upgrade_type is None and not os.path.exists("/usr/hdp/current/zookeeper-server") and params.current_version \ and compare_versions(format_hdp_stack_version(params.version), '2.2.0.0') >= 0: conf_select.select(params.stack_name, "zookeeper", params.current_version) hdp_select.select("zookeeper-server", params.version) cmd = format("env ZOOCFGDIR={config_dir} ZOOCFG=zoo.cfg {zk_bin}/zkServer.sh") if action == 'start': daemon_cmd = format("source {config_dir}/zookeeper-env.sh ; {cmd} start") no_op_test = format("ls {zk_pid_file} >/dev/null 2>&1 && ps -p `cat {zk_pid_file}` >/dev/null 2>&1") Execute(daemon_cmd, not_if=no_op_test, user=params.zk_user ) if params.security_enabled: kinit_cmd = format("{kinit_path_local} -kt {smoke_user_keytab} {smokeuser_principal};") Execute(kinit_cmd, user=params.smokeuser ) elif action == 'stop': daemon_cmd = format("source {config_dir}/zookeeper-env.sh ; {cmd} stop") rm_pid = format("rm -f {zk_pid_file}") Execute(daemon_cmd, user=params.zk_user ) Execute(rm_pid)
def pre_rolling_restart(self, env): Logger.info("Executing DataNode Rolling Upgrade pre-restart") import params env.set_params(params) if params.version and compare_versions(format_hdp_stack_version(params.version), '2.2.0.0') >= 0: Execute(format("hdp-select set hadoop-hdfs-datanode {version}"))
def pre_rolling_restart(self, env): import params env.set_params(params) if params.version and compare_versions(format_hdp_stack_version(params.version), "2.2.0.0") >= 0: conf_select.select(params.stack_name, "spark", params.version) hdp_select.select("spark-historyserver", params.version) # Spark 1.3.1.2.3, and higher, which was included in HDP 2.3, does not have a dependency on Tez, so it does not # need to copy the tarball, otherwise, copy it. if params.version and compare_versions(format_hdp_stack_version(params.version), "2.3.0.0") < 0: resource_created = copy_to_hdfs( "tez", params.user_group, params.hdfs_user, host_sys_prepped=params.host_sys_prepped ) if resource_created: params.HdfsResource(None, action="execute")
def pre_upgrade_restart(self, env, upgrade_type=None): import params env.set_params(params) if params.version and compare_versions(format_hdp_stack_version(params.version), '2.2.0.0') >= 0: Logger.info("Executing Spark Client Stack Upgrade pre-restart") conf_select.select(params.stack_name, "spark", params.version) hdp_select.select("spark-client", params.version)
def pre_upgrade_restart(self, env, upgrade_type=None): Logger.info("Executing Stack Upgrade pre-restart") import params env.set_params(params) if params.version and compare_versions(format_hdp_stack_version(params.version), '2.2.0.0') >= 0: conf_select.select(params.stack_name, "zookeeper", params.version) hdp_select.select("zookeeper-server", params.version)
def unlink_all_configs(self, env): """ Reverses the work performed in link_config. This should only be used when downgrading from HDP 2.3 to 2.2 in order to under the symlink work required for 2.3. """ stack_name = default('/hostLevelParams/stack_name', "").upper() downgrade_to_version = default('/commandParams/version', None) downgrade_from_version = default('/commandParams/downgrade_from_version', None) upgrade_direction = default("/commandParams/upgrade_direction", Direction.UPGRADE) # downgrade only if upgrade_direction != Direction.DOWNGRADE: Logger.warning("Unlinking configurations should only be performed on a downgrade.") return # HDP only if stack_name != "HDP": Logger.warning("Unlinking configurations should only be performed on the HDP stack.") return if downgrade_to_version is None or downgrade_from_version is None: Logger.warning("Both 'commandParams/version' and 'commandParams/downgrade_from_version' must be specified to unlink configs on downgrade.") return Logger.info("Unlinking all configs when downgrading from HDP 2.3 to 2.2") # normalize the versions stack_23 = format_hdp_stack_version("2.3") downgrade_to_version = format_hdp_stack_version(downgrade_to_version) downgrade_from_version = format_hdp_stack_version(downgrade_from_version) # downgrade-to-version must be 2.2 (less than 2.3) if compare_versions(downgrade_to_version, stack_23) >= 0: Logger.warning("Unlinking configurations should only be performed when downgrading to HDP 2.2") return # downgrade-from-version must be 2.3+ if compare_versions(downgrade_from_version, stack_23) < 0: Logger.warning("Unlinking configurations should only be performed when downgrading from HDP 2.3 or later") return # iterate through all directory conf mappings and undo the symlinks for key, value in conf_select.PACKAGE_DIRS.iteritems(): for directory_mapping in value: original_config_directory = directory_mapping['conf_dir'] self._unlink_config(original_config_directory)
def pre_rolling_restart(self, env): Logger.info("Executing DataNode Rolling Upgrade pre-restart") import params env.set_params(params) if params.version and compare_versions(format_hdp_stack_version(params.version), "2.2.0.0") >= 0: conf_select.select(params.stack_name, "hadoop", params.version) hdp_select.select("hadoop-hdfs-datanode", params.version)
def pre_rolling_restart(self, env): Logger.info("Executing Rolling Upgrade pre-restart") import params env.set_params(params) if params.version and compare_versions(format_hdp_stack_version(params.version), '2.2.0.0') >= 0: conf_select.select(params.stack_name, "zookeeper", params.version) hdp_select.select("zookeeper-client", params.version)
def pre_rolling_restart(self, env): Logger.info("Executing Rolling Upgrade pre-restart") import params env.set_params(params) if params.version and compare_versions(format_hdp_stack_version(params.version), '2.2.0.0') >= 0: Execute(format("hdp-select set hadoop-mapreduce-historyserver {version}")) copy_tarballs_to_hdfs('mapreduce', 'hadoop-mapreduce-historyserver', params.mapred_user, params.hdfs_user, params.user_group)
def pre_rolling_restart(self, env): Logger.info("Executing Rolling Upgrade post-restart") import params env.set_params(params) if params.version and compare_versions(format_hdp_stack_version(params.version), '2.2.0.0') >= 0: conf_select.select(params.stack_name, "hadoop", params.version) hdp_select.select("hadoop-yarn-resourcemanager", params.version)
def upgrade_schema(self, env): """ Executes the schema upgrade binary. This is its own function because it could be called as a standalone task from the upgrade pack, but is safe to run it for each metastore instance. The metastore schema upgrade requires a database driver library for most databases. During an upgrade, it's possible that the library is not present, so this will also attempt to copy/download the appropriate driver. """ Logger.info("Upgrading Hive Metastore") import params env.set_params(params) if params.security_enabled: kinit_command = format("{kinit_path_local} -kt {smoke_user_keytab} {smokeuser_principal}; ") Execute(kinit_command, user=params.smokeuser) # ensure that the JDBC drive is present for the schema tool; if it's not # present, then download it first if params.hive_jdbc_driver in params.hive_jdbc_drivers_list and params.hive_use_existing_db: target_directory = format("/usr/hdp/{version}/hive/lib") if not os.path.exists(params.target): # download it jdbc_connector() if params.sqla_db_used: target_native_libs_directory = format("{target_directory}/native/lib64") Execute(format("yes | {sudo} cp {jars_in_hive_lib} {target_directory}")) Directory(target_native_libs_directory, recursive=True) Execute(format("yes | {sudo} cp {libs_in_hive_lib} {target_native_libs_directory}")) Execute(format("{sudo} chown -R {hive_user}:{user_group} {hive_lib}/*")) else: Execute(("cp", params.target, target_directory), path=["/bin", "/usr/bin/"], sudo=True) File(os.path.join(target_directory, os.path.basename(params.target)), mode=0644) # build the schema tool command binary = format("/usr/hdp/{version}/hive/bin/schematool") # the conf.server directory changed locations between HDP 2.2 and 2.3 # since the configurations have not been written out yet during an upgrade # we need to choose the original legacy location schematool_hive_server_conf_dir = params.hive_server_conf_dir if params.current_version is not None: current_version = format_hdp_stack_version(params.current_version) if compare_versions(current_version, "2.3") < 0: schematool_hive_server_conf_dir = LEGACY_HIVE_SERVER_CONF env_dict = {"HIVE_CONF_DIR": schematool_hive_server_conf_dir} command = format("{binary} -dbType {hive_metastore_db_type} -upgradeSchema") Execute(command, user=params.hive_user, tries=1, environment=env_dict, logoutput=True)
def install_windows_msi(url_base, save_dir, save_files, hadoop_user, hadoop_password, stack_version): global _working_dir _working_dir = save_dir save_dir = os.path.abspath(save_dir) msi_save_dir = save_dir # system wide lock to prevent simultaneous installations(when first task failed on timeout) install_lock = SystemWideLock("Global\\hdp_msi_lock") try: # try to acquire lock if not install_lock.lock(): Logger.info("Some other task currently installing hdp.msi, waiting for 10 min for finish") if not install_lock.lock(600000): raise Fail("Timeout on acquiring lock") if _validate_msi_install(): Logger.info("hdp.msi already installed") return stack_version_formatted = format_stack_version(stack_version) hdp_22_specific_props = '' if stack_version_formatted != "" and compare_versions(stack_version_formatted, '2.2') >= 0: hdp_22_specific_props = hdp_22.format(data_dir=data_dir) # MSIs cannot be larger than 2GB. HDPWIN 2.3 needed split in order to accommodate this limitation msi_file = '' for save_file in save_files: if save_file.lower().endswith(".msi"): msi_file = save_file file_url = urlparse.urljoin(url_base, save_file) try: download_file(file_url, os.path.join(msi_save_dir, save_file)) except: raise Fail("Failed to download {url}".format(url=file_url)) File(os.path.join(msi_save_dir, "properties.txt"), content=cluster_properties.format(log_dir=log_dir, data_dir=data_dir, local_host=local_host, db_flavor=db_flavor, hdp_22_specific_props=hdp_22_specific_props)) # install msi msi_path = os_utils.quote_path(os.path.join(save_dir, msi_file)) log_path = os_utils.quote_path(os.path.join(save_dir, msi_file[:-3] + "log")) layout_path = os_utils.quote_path(os.path.join(save_dir, "properties.txt")) hadoop_password_arg = os_utils.quote_path(hadoop_password) Execute( INSTALL_MSI_CMD.format(msi_path=msi_path, log_path=log_path, layout_path=layout_path, hadoop_user=hadoop_user, hadoop_password_arg=hadoop_password_arg)) reload_windows_env() # create additional services manually due to hdp.msi limitaitons _ensure_services_created(hadoop_user, hadoop_password) _create_symlinks(stack_version) # finalizing install _write_marker() _validate_msi_install() finally: install_lock.unlock()
def service_check(self, env): import params env.set_params(params) path_to_tez_jar = format(params.tez_examples_jar) wordcount_command = format("jar {path_to_tez_jar} orderedwordcount /tmp/tezsmokeinput/sample-tez-test /tmp/tezsmokeoutput/") test_command = format("fs -test -e /tmp/tezsmokeoutput/_SUCCESS") File(format("{tmp_dir}/sample-tez-test"), content = "foo\nbar\nfoo\nbar\nfoo", mode = 0755 ) params.HdfsResource("/tmp/tezsmokeoutput", action = "delete_on_execute", type = "directory" ) params.HdfsResource("/tmp/tezsmokeinput", action = "create_on_execute", type = "directory", owner = params.smokeuser, ) params.HdfsResource("/tmp/tezsmokeinput/sample-tez-test", action = "create_on_execute", type = "file", owner = params.smokeuser, source = format("{tmp_dir}/sample-tez-test"), ) if params.hdp_stack_version and compare_versions(params.hdp_stack_version, '2.2.0.0') >= 0: copy_to_hdfs("tez", params.user_group, params.hdfs_user, host_sys_prepped=params.host_sys_prepped) params.HdfsResource(None, action = "execute") if params.security_enabled: kinit_cmd = format("{kinit_path_local} -kt {smoke_user_keytab} {smokeuser_principal};") Execute(kinit_cmd, user=params.smokeuser ) ExecuteHadoop(wordcount_command, tries = 3, try_sleep = 5, user = params.smokeuser, conf_dir = params.hadoop_conf_dir, bin_dir = params.hadoop_bin_dir ) ExecuteHadoop(test_command, tries = 10, try_sleep = 6, user = params.smokeuser, conf_dir = params.hadoop_conf_dir, bin_dir = params.hadoop_bin_dir )
def setup_hdp_install_directory(): # This is a name of marker file. SELECT_ALL_PERFORMED_MARKER = "/var/lib/ambari-agent/data/hdp-select-set-all.performed" import params if params.hdp_stack_version != "" and compare_versions(params.hdp_stack_version, '2.2') >= 0: Execute(as_sudo(['touch', SELECT_ALL_PERFORMED_MARKER]) + ' ; ' + format('{sudo} /usr/bin/hdp-select set all `ambari-python-wrap /usr/bin/hdp-select versions | grep ^{stack_version_unformatted} | tail -1`'), only_if=format('ls -d /usr/hdp/{stack_version_unformatted}*'), # If any HDP version is installed not_if=format("test -f {SELECT_ALL_PERFORMED_MARKER}") # Do that only once (otherwise we break rolling upgrade logic) )
def spark_service(name, action): import params if action == "start": if params.security_enabled: spark_kinit_cmd = format("{kinit_path_local} -kt {spark_kerberos_keytab} {spark_principal}; ") Execute(spark_kinit_cmd, user=params.spark_user) # Spark 1.3.1.2.3, and higher, which was included in HDP 2.3, does not have a dependency on Tez, so it does not # need to copy the tarball, otherwise, copy it. if params.hdp_stack_version and compare_versions(params.hdp_stack_version, "2.3.0.0") < 0: resource_created = copy_to_hdfs( "tez", params.user_group, params.hdfs_user, host_sys_prepped=params.host_sys_prepped ) if resource_created: params.HdfsResource(None, action="execute") if name == "jobhistoryserver": historyserver_no_op_test = format( "ls {spark_history_server_pid_file} >/dev/null 2>&1 && ps -p `cat {spark_history_server_pid_file}` >/dev/null 2>&1" ) Execute( format("{spark_history_server_start}"), user=params.spark_user, environment={"JAVA_HOME": params.java_home}, not_if=historyserver_no_op_test, ) elif name == "sparkthriftserver": thriftserver_no_op_test = format( "ls {spark_thrift_server_pid_file} >/dev/null 2>&1 && ps -p `cat {spark_thrift_server_pid_file}` >/dev/null 2>&1" ) Execute( format("{spark_thrift_server_start} --properties-file {spark_thrift_server_conf_file}"), user=params.spark_user, environment={"JAVA_HOME": params.java_home}, not_if=thriftserver_no_op_test, ) elif action == "stop": if name == "jobhistoryserver": Execute( format("{spark_history_server_stop}"), user=params.spark_user, environment={"JAVA_HOME": params.java_home}, ) File(params.spark_history_server_pid_file, action="delete") elif name == "sparkthriftserver": Execute( format("{spark_thrift_server_stop}"), user=params.spark_user, environment={"JAVA_HOME": params.java_home}, ) File(params.spark_thrift_server_pid_file, action="delete")
def pre_upgrade_restart(self, env, upgrade_type=None): """ Execute <stack-selector-tool> before reconfiguring this client to the new HDP version. :param env: :param upgrade_type: :return: """ Logger.info("Executing Hive HCat Client Stack Upgrade pre-restart") import params env.set_params(params) # this function should not execute if the version can't be determined or # is not at least HDP 2.2.0.0 if not params.version or compare_versions( params.version, "2.2", format=True) < 0: return # HCat client doesn't have a first-class entry in <stack-selector-tool>. Since clients always # update after daemons, this ensures that the hcat directories are correct on hosts # which do not include the WebHCat daemon stack_select.select("hive-webhcat", params.version)
def should_expose_component_version(self, command_name): """ Analyzes config and given command to determine if stack version should be written to structured out. Currently only HDP stack versions >= 2.2 are supported. :param command_name: command name :return: True or False """ from resource_management.libraries.functions.default import default stack_version_unformatted = str( default("/hostLevelParams/stack_version", "")) hdp_stack_version = format_hdp_stack_version(stack_version_unformatted) if hdp_stack_version != "" and compare_versions( hdp_stack_version, '2.2') >= 0: if command_name.lower() == "status": request_version = default("/commandParams/request_version", None) if request_version is not None: return True else: # Populate version only on base commands return command_name.lower() == "start" or command_name.lower( ) == "install" or command_name.lower() == "restart" return False
def pre_rolling_restart(self, env): Logger.info("Executing HiveServer2 Rolling Upgrade pre-restart") import params env.set_params(params) if params.version and compare_versions( format_hdp_stack_version(params.version), '2.2.0.0') >= 0: conf_select.select(params.stack_name, "hive", params.version) hdp_select.select("hive-server2", params.version) # Copy mapreduce.tar.gz and tez.tar.gz to HDFS resource_created = copy_to_hdfs( "mapreduce", params.user_group, params.hdfs_user, host_sys_prepped=params.host_sys_prepped) resource_created = copy_to_hdfs( "tez", params.user_group, params.hdfs_user, host_sys_prepped=params.host_sys_prepped) or resource_created if resource_created: params.HdfsResource(None, action="execute")
def pre_upgrade_restart(self, env, upgrade_type=None): Logger.info("Executing Stack Upgrade pre-restart") import params env.set_params(params) if params.version and compare_versions( format_hdp_stack_version(params.version), '2.2.0.0') >= 0: conf_select.select(params.stack_name, "hadoop", params.version) hdp_select.select("hadoop-mapreduce-historyserver", params.version) # MC Hammer said, "Can't touch this" copy_to_hdfs("mapreduce", params.user_group, params.hdfs_user, host_sys_prepped=params.host_sys_prepped) copy_to_hdfs("tez", params.user_group, params.hdfs_user, host_sys_prepped=params.host_sys_prepped) copy_to_hdfs("slider", params.user_group, params.hdfs_user, host_sys_prepped=params.host_sys_prepped) params.HdfsResource(None, action="execute")
def prepare(self, env): """ During the "Upgrade" direction of a Stack Upgrade, it is necessary to ensure that the older tez tarball has been copied to HDFS. This is an additional check for added robustness. """ import params env.set_params(params) Logger.info("Before starting Stack Upgrade, check if tez tarball has been copied to HDFS.") if params.stack_version_formatted and compare_versions(params.stack_version_formatted, '2.2.0.0') >= 0: Logger.info("Stack version {0} is sufficient to check if need to copy tez.tar.gz to HDFS.".format(params.stack_version_formatted)) # Force it to copy the current version of the tez tarball, rather than the version the RU will go to. resource_created = copy_to_hdfs( "tez", params.user_group, params.hdfs_user, use_upgrading_version_during_uprade=False, host_sys_prepped=params.host_sys_prepped) if resource_created: params.HdfsResource(None, action="execute") else: raise Fail("Could not copy tez tarball to HDFS.")
def upgrade_schema(self, env): """ Executes the schema upgrade binary. This is its own function because it could be called as a standalone task from the upgrade pack, but is safe to run it for each metastore instance. """ Logger.info("Upgrading Hive Metastore") import params env.set_params(params) if params.security_enabled: kinit_command = format( "{kinit_path_local} -kt {smoke_user_keytab} {smokeuser_principal}; " ) Execute(kinit_command, user=params.smokeuser) binary = format("/usr/hdp/{version}/hive/bin/schematool") # the conf.server directory changed locations between HDP 2.2 and 2.3 # since the configurations have not been written out yet during an upgrade # we need to choose the original legacy location schematool_hive_server_conf_dir = params.hive_server_conf_dir if params.current_version is not None: current_version = format_hdp_stack_version(params.current_version) if compare_versions(current_version, "2.3") < 0: schematool_hive_server_conf_dir = LEGACY_HIVE_SERVER_CONF env_dict = {'HIVE_CONF_DIR': schematool_hive_server_conf_dir} command = format( "{binary} -dbType {hive_metastore_db_type} -upgradeSchema") Execute(command, user=params.hive_user, tries=1, environment=env_dict, logoutput=True)
def recommendZEPPELINConfigurations(self, configurations, clusterData, services, hosts): """ :type configurations dict :type clusterData dict :type services dict :type hosts dict """ super(HDP26StackAdvisor, self).recommendZeppelinConfigurations(configurations, clusterData, services, hosts) cluster_env = self.getServicesSiteProperties(services, "cluster-env") if cluster_env and "recommendations_full_stack_version" in cluster_env: full_stack_version = cluster_env["recommendations_full_stack_version"] if full_stack_version and compare_versions(full_stack_version, '2.6.3.0', format=True) >= 0: zeppelin_config = self.getServicesSiteProperties(services, "zeppelin-config") if zeppelin_config: putZeppelinConfigProperty = self.putProperty(configurations, 'zeppelin-config', services) if zeppelin_config.get('zeppelin.notebook.storage', None) == 'org.apache.zeppelin.notebook.repo.VFSNotebookRepo': putZeppelinConfigProperty('zeppelin.notebook.storage', 'org.apache.zeppelin.notebook.repo.FileSystemNotebookRepo') if 'zeppelin.config.fs.dir' not in zeppelin_config: putZeppelinConfigProperty('zeppelin.config.fs.dir', 'conf') self.__addZeppelinToLivy2SuperUsers(configurations, services)
def upgrade_schema(self, env): """ Executes the schema upgrade binary. This is its own function because it could be called as a standalone task from the upgrade pack, but is safe to run it for each metastore instance. The schema upgrade on an already upgraded metastore is a NOOP. The metastore schema upgrade requires a database driver library for most databases. During an upgrade, it's possible that the library is not present, so this will also attempt to copy/download the appropriate driver. This function will also ensure that configurations are written out to disk before running since the new configs will most likely not yet exist on an upgrade. Should not be invoked for a DOWNGRADE; Metastore only supports schema upgrades. """ Logger.info("Upgrading Hive Metastore Schema") import params env.set_params(params) # ensure that configurations are written out before trying to upgrade the schema # since the schematool needs configs and doesn't know how to use the hive conf override self.configure(env) if params.security_enabled: kinit_command = format( "{kinit_path_local} -kt {smoke_user_keytab} {smokeuser_principal}; " ) Execute(kinit_command, user=params.smokeuser) # ensure that the JDBC drive is present for the schema tool; if it's not # present, then download it first if params.hive_jdbc_driver in params.hive_jdbc_drivers_list: target_directory = format("/usr/hdp/{version}/hive/lib") # download it if it does not exist if not os.path.exists(params.source_jdbc_file): jdbc_connector() target_directory_and_filename = os.path.join( target_directory, os.path.basename(params.source_jdbc_file)) if params.sqla_db_used: target_native_libs_directory = format( "{target_directory}/native/lib64") Execute( format( "yes | {sudo} cp {jars_in_hive_lib} {target_directory}" )) Directory(target_native_libs_directory, create_parents=True) Execute( format( "yes | {sudo} cp {libs_in_hive_lib} {target_native_libs_directory}" )) Execute( format( "{sudo} chown -R {hive_user}:{user_group} {hive_lib}/*" )) else: # copy the JDBC driver from the older metastore location to the new location only # if it does not already exist if not os.path.exists(target_directory_and_filename): Execute(('cp', params.source_jdbc_file, target_directory), path=["/bin", "/usr/bin/"], sudo=True) File(target_directory_and_filename, mode=0644) # build the schema tool command binary = format("/usr/hdp/{version}/hive/bin/schematool") # the conf.server directory changed locations between HDP 2.2 and 2.3 # since the configurations have not been written out yet during an upgrade # we need to choose the original legacy location schematool_hive_server_conf_dir = params.hive_server_conf_dir if params.current_version is not None: current_version = format_hdp_stack_version(params.current_version) if compare_versions(current_version, "2.3") < 0: schematool_hive_server_conf_dir = LEGACY_HIVE_SERVER_CONF env_dict = {'HIVE_CONF_DIR': schematool_hive_server_conf_dir} command = format( "{binary} -dbType {hive_metastore_db_type} -upgradeSchema") Execute(command, user=params.hive_user, tries=1, environment=env_dict, logoutput=True)
def oozie_server_specific(): import params no_op_test = as_user(format( "ls {pid_file} >/dev/null 2>&1 && ps -p `cat {pid_file}` >/dev/null 2>&1" ), user=params.oozie_user) File(params.pid_file, action="delete", not_if=no_op_test) oozie_server_directories = [ format("{oozie_home}/{oozie_tmp_dir}"), params.oozie_pid_dir, params.oozie_log_dir, params.oozie_tmp_dir, params.oozie_data_dir, params.oozie_lib_dir, params.oozie_webapps_dir, params.oozie_webapps_conf_dir, params.oozie_server_dir ] Directory( oozie_server_directories, owner=params.oozie_user, group=params.user_group, mode=0755, create_parents=True, cd_access="a", ) Directory( params.oozie_libext_dir, create_parents=True, ) hashcode_file = format("{oozie_home}/.hashcode") skip_recreate_sharelib = format( "test -f {hashcode_file} && test -d {oozie_home}/share") untar_sharelib = ('tar', '-xvf', format('{oozie_home}/oozie-sharelib.tar.gz'), '-C', params.oozie_home) Execute( untar_sharelib, # time-expensive not_if=format("{no_op_test} || {skip_recreate_sharelib}"), sudo=True, ) configure_cmds = [] configure_cmds.append(('cp', params.ext_js_path, params.oozie_libext_dir)) configure_cmds.append(('chown', format('{oozie_user}:{user_group}'), format('{oozie_libext_dir}/{ext_js_file}'))) Execute( configure_cmds, not_if=no_op_test, sudo=True, ) Directory( params.oozie_webapps_conf_dir, owner=params.oozie_user, group=params.user_group, recursive_ownership=True, recursion_follow_links=True, ) # download the database JAR download_database_library_if_needed() #falcon el extension if params.has_falcon_host: Execute(format( '{sudo} cp {falcon_home}/oozie/ext/falcon-oozie-el-extension-*.jar {oozie_libext_dir}' ), not_if=no_op_test) Execute(format( '{sudo} chown {oozie_user}:{user_group} {oozie_libext_dir}/falcon-oozie-el-extension-*.jar' ), not_if=no_op_test) if params.lzo_enabled and len(params.all_lzo_packages) > 0: Package(params.all_lzo_packages, retry_on_repo_unavailability=params. agent_stack_retry_on_unavailability, retry_count=params.agent_stack_retry_count) Execute( format( '{sudo} cp {hadoop_lib_home}/hadoop-lzo*.jar {oozie_lib_dir}'), not_if=no_op_test, ) prepare_war() File( hashcode_file, mode=0644, ) if params.stack_version_formatted != "" and compare_versions( params.stack_version_formatted, '2.2') >= 0: # Create hive-site and tez-site configs for oozie Directory(params.hive_conf_dir, create_parents=True, owner=params.oozie_user, group=params.user_group) if 'hive-site' in params.config['configurations']: XmlConfig( "hive-site.xml", conf_dir=params.hive_conf_dir, configurations=params.config['configurations']['hive-site'], configuration_attributes=params. config['configuration_attributes']['hive-site'], owner=params.oozie_user, group=params.user_group, mode=0644) if 'tez-site' in params.config['configurations']: XmlConfig( "tez-site.xml", conf_dir=params.hive_conf_dir, configurations=params.config['configurations']['tez-site'], configuration_attributes=params. config['configuration_attributes']['tez-site'], owner=params.oozie_user, group=params.user_group, mode=0664) Directory( params.oozie_server_dir, owner=params.oozie_user, group=params.user_group, recursive_ownership=True, )
def webhcat(): import params if params.hdp_stack_version != "" and compare_versions(params.hdp_stack_version, "2.2.0.0") < 0: params.HdfsDirectory(params.webhcat_apps_dir, action="create_delayed", owner=params.webhcat_user, mode=0755 ) if params.hcat_hdfs_user_dir != params.webhcat_hdfs_user_dir: params.HdfsDirectory(params.hcat_hdfs_user_dir, action="create_delayed", owner=params.hcat_user, mode=params.hcat_hdfs_user_mode ) params.HdfsDirectory(params.webhcat_hdfs_user_dir, action="create_delayed", owner=params.webhcat_user, mode=params.webhcat_hdfs_user_mode ) params.HdfsDirectory(None, action="create") Directory(params.templeton_pid_dir, owner=params.webhcat_user, mode=0755, group=params.user_group, recursive=True) Directory(params.templeton_log_dir, owner=params.webhcat_user, mode=0755, group=params.user_group, recursive=True) Directory(params.config_dir, recursive=True, owner=params.webhcat_user, group=params.user_group) if params.security_enabled: kinit_if_needed = format("{kinit_path_local} -kt {hdfs_user_keytab} {hdfs_principal_name};") else: kinit_if_needed = "" if kinit_if_needed: Execute(kinit_if_needed, user=params.webhcat_user, path='/bin' ) # TODO, these checks that are specific to HDP 2.2 and greater should really be in a script specific to that stack. if params.hdp_stack_version != "" and compare_versions(params.hdp_stack_version, "2.2.0.0") >= 0: copy_tarballs_to_hdfs('hive', 'hive-webhcat', params.webhcat_user, params.hdfs_user, params.user_group) copy_tarballs_to_hdfs('pig', 'hive-webhcat', params.webhcat_user, params.hdfs_user, params.user_group) copy_tarballs_to_hdfs('hadoop-streaming', 'hive-webhcat', params.webhcat_user, params.hdfs_user, params.user_group) copy_tarballs_to_hdfs('sqoop', 'hive-webhcat', params.webhcat_user, params.hdfs_user, params.user_group) else: CopyFromLocal(params.hadoop_streeming_jars, owner=params.webhcat_user, mode=0755, dest_dir=params.webhcat_apps_dir, kinnit_if_needed=kinit_if_needed, hdfs_user=params.hdfs_user, hadoop_bin_dir=params.hadoop_bin_dir, hadoop_conf_dir=params.hadoop_conf_dir ) if (os.path.isfile(params.pig_tar_file)): CopyFromLocal(params.pig_tar_file, owner=params.webhcat_user, mode=0755, dest_dir=params.webhcat_apps_dir, kinnit_if_needed=kinit_if_needed, hdfs_user=params.hdfs_user, hadoop_bin_dir=params.hadoop_bin_dir, hadoop_conf_dir=params.hadoop_conf_dir ) CopyFromLocal(params.hive_tar_file, owner=params.webhcat_user, mode=0755, dest_dir=params.webhcat_apps_dir, kinnit_if_needed=kinit_if_needed, hdfs_user=params.hdfs_user, hadoop_bin_dir=params.hadoop_bin_dir, hadoop_conf_dir=params.hadoop_conf_dir ) if (len(glob.glob(params.sqoop_tar_file)) > 0): CopyFromLocal(params.sqoop_tar_file, owner=params.webhcat_user, mode=0755, dest_dir=params.webhcat_apps_dir, kinnit_if_needed=kinit_if_needed, hdfs_user=params.hdfs_user, hadoop_bin_dir=params.hadoop_bin_dir, hadoop_conf_dir=params.hadoop_conf_dir ) # Replace _HOST with hostname in relevant principal-related properties webhcat_site = params.config['configurations']['webhcat-site'].copy() for prop_name in ['templeton.hive.properties', 'templeton.kerberos.principal']: if prop_name in webhcat_site: webhcat_site[prop_name] = webhcat_site[prop_name].replace("_HOST", params.hostname) XmlConfig("webhcat-site.xml", conf_dir=params.config_dir, configurations=webhcat_site, configuration_attributes=params.config['configuration_attributes']['webhcat-site'], owner=params.webhcat_user, group=params.user_group, ) File(format("{config_dir}/webhcat-env.sh"), owner=params.webhcat_user, group=params.user_group, content=InlineTemplate(params.webhcat_env_sh_template) ) Directory(params.webhcat_conf_dir, cd_access='a', recursive=True ) log4j_webhcat_filename = 'webhcat-log4j.properties' if (params.log4j_webhcat_props != None): File(format("{config_dir}/{log4j_webhcat_filename}"), mode=0644, group=params.user_group, owner=params.webhcat_user, content=params.log4j_webhcat_props ) elif (os.path.exists("{config_dir}/{log4j_webhcat_filename}.template")): File(format("{config_dir}/{log4j_webhcat_filename}"), mode=0644, group=params.user_group, owner=params.webhcat_user, content=StaticFile(format("{config_dir}/{log4j_webhcat_filename}.template")) )
if metric_collector_web_address.find(':') != -1: metric_collector_port = metric_collector_web_address.split(':')[1] else: metric_collector_port = '6188' pass # Security-related params security_enabled = config['configurations']['cluster-env']['security_enabled'] kafka_kerberos_enabled = ( 'security.inter.broker.protocol' in config['configurations']['kafka-broker'] and config['configurations']['kafka-broker']['security.inter.broker.protocol'] == "SASL_PLAINTEXT") if security_enabled and iop_stack_version != "" and 'kafka_principal_name' in config[ 'configurations']['kafka-env'] and compare_versions( iop_stack_version, '4.1') >= 0: _hostname_lowercase = config['hostname'].lower() _kafka_principal_name = config['configurations']['kafka-env'][ 'kafka_principal_name'] kafka_jaas_principal = _kafka_principal_name.replace( '_HOST', _hostname_lowercase) kafka_keytab_path = config['configurations']['kafka-env']['kafka_keytab'] kafka_bare_jaas_principal = get_bare_principal(_kafka_principal_name) kafka_kerberos_params = "-Djava.security.auth.login.config=" + conf_dir + "/kafka_jaas.conf" else: kafka_kerberos_params = '' namenode_hosts = default("/clusterHostInfo/namenode_host", []) has_namenode = not len(namenode_hosts) == 0 hdfs_user = config['configurations']['hadoop-env'][
def get_hadoop_conf_dir(force_latest_on_upgrade=False): """ Gets the shared hadoop conf directory using: 1. Start with /etc/hadoop/conf 2. When the stack is greater than HDP-2.2, use /usr/hdp/current/hadoop-client/conf 3. Only when doing a RU and HDP-2.3 or higher, use the value as computed by conf-select. This is in the form /usr/hdp/VERSION/hadoop/conf to make sure the configs are written in the correct place. However, if the component itself has not yet been upgraded, it should use the hadoop configs from the prior version. This will perform an hdp-select status to determine which version to use. :param force_latest_on_upgrade: if True, then force the returned path to always be that of the upgrade target version, even if hdp-select has not been called. This is primarily used by hooks like before-ANY to ensure that hadoop environment configurations are written to the correct location since they are written out before the hdp-select/conf-select would have been called. """ hadoop_conf_dir = "/etc/hadoop/conf" stack_name = None version = None allow_setting_conf_select_symlink = False if not Script.in_stack_upgrade(): # During normal operation, the HDP stack must be 2.3 or higher if Script.is_hdp_stack_greater_or_equal("2.2"): hadoop_conf_dir = "/usr/hdp/current/hadoop-client/conf" if Script.is_hdp_stack_greater_or_equal("2.3"): hadoop_conf_dir = "/usr/hdp/current/hadoop-client/conf" stack_name = default("/hostLevelParams/stack_name", None) version = default("/commandParams/version", None) if stack_name and version: version = str(version) allow_setting_conf_select_symlink = True else: # During an upgrade/downgrade, which can be a Rolling or Express Upgrade, need to calculate it based on the version ''' Whenever upgrading to HDP 2.2, or downgrading back to 2.2, need to use /etc/hadoop/conf Whenever upgrading to HDP 2.3, or downgrading back to 2.3, need to use a versioned hadoop conf dir Type__|_Source_|_Target_|_Direction_____________|_Comment_____________________________________________________________ Normal| | 2.2 | | Use /etc/hadoop/conf Normal| | 2.3 | | Use /etc/hadoop/conf, which should be a symlink to /usr/hdp/current/hadoop-client/conf EU | 2.1 | 2.3 | Upgrade | Use versioned /usr/hdp/current/hadoop-client/conf | | | No Downgrade Allowed | Invalid EU/RU | 2.2 | 2.2.* | Any | Use /usr/hdp/current/hadoop-client/conf EU/RU | 2.2 | 2.3 | Upgrade | Use /usr/hdp/$version/hadoop/conf, which should be a symlink destination | | | Downgrade | Use /usr/hdp/current/hadoop-client/conf EU/RU | 2.3 | 2.3.* | Any | Use /usr/hdp/$version/hadoop/conf, which should be a symlink destination ''' # The method "is_hdp_stack_greater_or_equal" uses "stack_version" which is the desired stack, e.g., 2.2 or 2.3 # In an RU, it is always the desired stack, and doesn't change even during the Downgrade! # In an RU Downgrade from HDP 2.3 to 2.2, the first thing we do is # rm /etc/[component]/conf and then mv /etc/[component]/conf.backup /etc/[component]/conf if Script.is_hdp_stack_greater_or_equal("2.2"): hadoop_conf_dir = "/usr/hdp/current/hadoop-client/conf" # This contains the "version", including the build number, that is actually used during a stack upgrade and # is the version upgrading/downgrading to. stack_info = hdp_select._get_upgrade_stack() if stack_info is not None: stack_name = stack_info[0] version = stack_info[1] else: raise Fail("Unable to get parameter 'version'") Logger.info( "In the middle of a stack upgrade/downgrade for Stack {0} and destination version {1}, determining which hadoop conf dir to use." .format(stack_name, version)) # This is the version either upgrading or downgrading to. if compare_versions(format_hdp_stack_version(version), "2.3.0.0") >= 0: # Determine if hdp-select has been run and if not, then use the current # hdp version until this component is upgraded. if not force_latest_on_upgrade: current_hdp_version = hdp_select.get_role_component_current_hdp_version( ) if current_hdp_version is not None and version != current_hdp_version: version = current_hdp_version Logger.info( "hdp-select has not yet been called to update the symlink for this component, keep using version {0}" .format(current_hdp_version)) # Only change the hadoop_conf_dir path, don't conf-select this older version hadoop_conf_dir = "/usr/hdp/{0}/hadoop/conf".format(version) Logger.info("Hadoop conf dir: {0}".format(hadoop_conf_dir)) allow_setting_conf_select_symlink = True if allow_setting_conf_select_symlink: # If not in the middle of an upgrade and on HDP 2.3 or higher, or if # upgrading stack to version 2.3.0.0 or higher (which may be upgrade or downgrade), then consider setting the # symlink for /etc/hadoop/conf. # If a host does not have any HDFS or YARN components (e.g., only ZK), then it will not contain /etc/hadoop/conf # Therefore, any calls to conf-select will fail. # For that reason, if the hadoop conf directory exists, then make sure it is set. if os.path.exists(hadoop_conf_dir): Logger.info( "The hadoop conf dir {0} exists, will call conf-select on it for version {1}" .format(hadoop_conf_dir, version)) select(stack_name, "hadoop", version) Logger.info("Using hadoop conf dir: {0}".format(hadoop_conf_dir)) return hadoop_conf_dir
def pre_rolling_restart(self, env): import params env.set_params(params) if params.version and compare_versions(format_hdp_stack_version(params.version), '2.2.0.0') >= 0: Execute(format("hdp-select set storm-client {version}"))
def setup_ranger_hdfs(upgrade_type=None): import params if params.has_ranger_admin: if params.xml_configurations_supported: from resource_management.libraries.functions.setup_ranger_plugin_xml import setup_ranger_plugin else: from resource_management.libraries.functions.setup_ranger_plugin import setup_ranger_plugin hdp_version = None if upgrade_type is not None: hdp_version = params.version if params.retryAble: Logger.info( "HDFS: Setup ranger: command retry enables thus retrying if ranger admin is down !" ) else: Logger.info( "HDFS: Setup ranger: command retry not enabled thus skipping if ranger admin is down !" ) setup_ranger_plugin( 'hadoop-client', 'hdfs', params.downloaded_custom_connector, params.driver_curl_source, params.driver_curl_target, params.java_home, params.repo_name, params.hdfs_ranger_plugin_repo, params.ranger_env, params.ranger_plugin_properties, params.policy_user, params.policymgr_mgr_url, params.enable_ranger_hdfs, conf_dict=params.hadoop_conf_dir, component_user=params.hdfs_user, component_group=params.user_group, cache_service_list=['hdfs'], plugin_audit_properties=params.config['configurations'] ['ranger-hdfs-audit'], plugin_audit_attributes=params.config['configuration_attributes'] ['ranger-hdfs-audit'], plugin_security_properties=params.config['configurations'] ['ranger-hdfs-security'], plugin_security_attributes=params. config['configuration_attributes']['ranger-hdfs-security'], plugin_policymgr_ssl_properties=params.config['configurations'] ['ranger-hdfs-policymgr-ssl'], plugin_policymgr_ssl_attributes=params. config['configuration_attributes']['ranger-hdfs-policymgr-ssl'], component_list=['hadoop-client'], audit_db_is_enabled=params.xa_audit_db_is_enabled, credential_file=params.credential_file, xa_audit_db_password=params.xa_audit_db_password, ssl_truststore_password=params.ssl_truststore_password, ssl_keystore_password=params.ssl_keystore_password, hdp_version_override=hdp_version, skip_if_rangeradmin_down=not params.retryAble) if hdp_version and params.upgrade_direction == Direction.UPGRADE: # when upgrading to 2.3+, this env file must be removed if compare_versions(hdp_version, '2.3', format=True) > 0: source_file = os.path.join(params.hadoop_conf_dir, 'set-hdfs-plugin-env.sh') target_file = source_file + ".bak" Execute(("mv", source_file, target_file), sudo=True, only_if=format("test -f {source_file}")) else: Logger.info('Ranger admin not installed')
def hive(name=None): import params if name == 'hiveserver2': # HDP 2.1.* or lower if params.hdp_stack_version_major != "" and compare_versions(params.hdp_stack_version_major, "2.2.0.0") < 0: params.HdfsResource(params.webhcat_apps_dir, type="directory", action="create_on_execute", owner=params.webhcat_user, mode=0755 ) # Create webhcat dirs. if params.hcat_hdfs_user_dir != params.webhcat_hdfs_user_dir: params.HdfsResource(params.hcat_hdfs_user_dir, type="directory", action="create_on_execute", owner=params.hcat_user, mode=params.hcat_hdfs_user_mode ) params.HdfsResource(params.webhcat_hdfs_user_dir, type="directory", action="create_on_execute", owner=params.webhcat_user, mode=params.webhcat_hdfs_user_mode ) # ****** Begin Copy Tarballs ****** # ********************************* # HDP 2.2 or higher, copy mapreduce.tar.gz to HDFS if params.hdp_stack_version_major != "" and compare_versions(params.hdp_stack_version_major, '2.2') >= 0: copy_to_hdfs("mapreduce", params.user_group, params.hdfs_user, host_sys_prepped=params.host_sys_prepped) copy_to_hdfs("tez", params.user_group, params.hdfs_user, host_sys_prepped=params.host_sys_prepped) # Always copy pig.tar.gz and hive.tar.gz using the appropriate mode. # This can use a different source and dest location to account for both HDP 2.1 and 2.2 copy_to_hdfs("pig", params.user_group, params.hdfs_user, file_mode=params.tarballs_mode, custom_source_file=params.pig_tar_source, custom_dest_file=params.pig_tar_dest_file, host_sys_prepped=params.host_sys_prepped) copy_to_hdfs("hive", params.user_group, params.hdfs_user, file_mode=params.tarballs_mode, custom_source_file=params.hive_tar_source, custom_dest_file=params.hive_tar_dest_file, host_sys_prepped=params.host_sys_prepped) wildcard_tarballs = ["sqoop", "hadoop_streaming"] for tarball_name in wildcard_tarballs: source_file_pattern = eval("params." + tarball_name + "_tar_source") dest_dir = eval("params." + tarball_name + "_tar_dest_dir") if source_file_pattern is None or dest_dir is None: continue source_files = glob.glob(source_file_pattern) if "*" in source_file_pattern else [source_file_pattern] for source_file in source_files: src_filename = os.path.basename(source_file) dest_file = os.path.join(dest_dir, src_filename) copy_to_hdfs(tarball_name, params.user_group, params.hdfs_user, file_mode=params.tarballs_mode, custom_source_file=source_file, custom_dest_file=dest_file, host_sys_prepped=params.host_sys_prepped) # ******* End Copy Tarballs ******* # ********************************* # if warehouse directory is in DFS if not params.whs_dir_protocol or params.whs_dir_protocol == urlparse(params.default_fs).scheme: # Create Hive Metastore Warehouse Dir params.HdfsResource(params.hive_apps_whs_dir, type="directory", action="create_on_execute", owner=params.hive_user, mode=0777 ) else: Logger.info(format("Not creating warehouse directory '{hive_apps_whs_dir}', as the location is not in DFS.")) # Create Hive User Dir params.HdfsResource(params.hive_hdfs_user_dir, type="directory", action="create_on_execute", owner=params.hive_user, mode=params.hive_hdfs_user_mode ) if not is_empty(params.hive_exec_scratchdir) and not urlparse(params.hive_exec_scratchdir).path.startswith("/tmp"): params.HdfsResource(params.hive_exec_scratchdir, type="directory", action="create_on_execute", owner=params.hive_user, group=params.hdfs_user, mode=0777) # Hive expects this dir to be writeable by everyone as it is used as a temp dir params.HdfsResource(None, action="execute") Directory(params.hive_etc_dir_prefix, mode=0755 ) # We should change configurations for client as well as for server. # The reason is that stale-configs are service-level, not component. for conf_dir in params.hive_conf_dirs_list: fill_conf_dir(conf_dir) XmlConfig("hive-site.xml", conf_dir=params.hive_config_dir, configurations=params.hive_site_config, configuration_attributes=params.config['configuration_attributes']['hive-site'], owner=params.hive_user, group=params.user_group, mode=0644) setup_atlas_hive() if params.hive_specific_configs_supported and name == 'hiveserver2': XmlConfig("hiveserver2-site.xml", conf_dir=params.hive_server_conf_dir, configurations=params.config['configurations']['hiveserver2-site'], configuration_attributes=params.config['configuration_attributes']['hiveserver2-site'], owner=params.hive_user, group=params.user_group, mode=0644) File(format("{hive_config_dir}/hive-env.sh"), owner=params.hive_user, group=params.user_group, content=InlineTemplate(params.hive_env_sh_template) ) # On some OS this folder could be not exists, so we will create it before pushing there files Directory(params.limits_conf_dir, recursive=True, owner='root', group='root' ) File(os.path.join(params.limits_conf_dir, 'hive.conf'), owner='root', group='root', mode=0644, content=Template("hive.conf.j2") ) if (name == 'metastore' or name == 'hiveserver2') and not os.path.exists(params.target): jdbc_connector() File(format("/usr/lib/ambari-agent/{check_db_connection_jar_name}"), content = DownloadSource(format("{jdk_location}{check_db_connection_jar_name}")), mode = 0644, ) if name == 'metastore': File(params.start_metastore_path, mode=0755, content=StaticFile('startMetastore.sh') ) if params.init_metastore_schema: create_schema_cmd = format("export HIVE_CONF_DIR={hive_server_conf_dir} ; " "{hive_bin}/schematool -initSchema " "-dbType {hive_metastore_db_type} " "-userName {hive_metastore_user_name} " "-passWord {hive_metastore_user_passwd!p}") check_schema_created_cmd = as_user(format("export HIVE_CONF_DIR={hive_server_conf_dir} ; " "{hive_bin}/schematool -info " "-dbType {hive_metastore_db_type} " "-userName {hive_metastore_user_name} " "-passWord {hive_metastore_user_passwd!p}"), params.hive_user) # HACK: in cases with quoted passwords and as_user (which does the quoting as well) !p won't work for hiding passwords. # Fixing it with the hack below: quoted_hive_metastore_user_passwd = quote_bash_args(quote_bash_args(params.hive_metastore_user_passwd)) if quoted_hive_metastore_user_passwd[0] == "'" and quoted_hive_metastore_user_passwd[-1] == "'" \ or quoted_hive_metastore_user_passwd[0] == '"' and quoted_hive_metastore_user_passwd[-1] == '"': quoted_hive_metastore_user_passwd = quoted_hive_metastore_user_passwd[1:-1] Logger.sensitive_strings[repr(check_schema_created_cmd)] = repr(check_schema_created_cmd.replace( format("-passWord {quoted_hive_metastore_user_passwd}"), "-passWord " + utils.PASSWORDS_HIDE_STRING)) Execute(create_schema_cmd, not_if = check_schema_created_cmd, user = params.hive_user ) elif name == 'hiveserver2': File(params.start_hiveserver2_path, mode=0755, content=Template(format('{start_hiveserver2_script}')) ) if name != "client": crt_directory(params.hive_pid_dir) crt_directory(params.hive_log_dir) crt_directory(params.hive_var_lib)
def spark_service(name, action): import params if action == 'start': if params.security_enabled: spark_kinit_cmd = format( "{kinit_path_local} -kt {spark_kerberos_keytab} {spark_principal}; " ) Execute(spark_kinit_cmd, user=params.spark_user) # Spark 1.3.1.2.3, and higher, which was included in HDP 2.3, does not have a dependency on Tez, so it does not # need to copy the tarball, otherwise, copy it. if params.hdp_stack_version and compare_versions( params.hdp_stack_version, '2.3.0.0') < 0: resource_created = copy_to_hdfs( "tez", params.user_group, params.hdfs_user, host_sys_prepped=params.host_sys_prepped) if resource_created: params.HdfsResource(None, action="execute") if name == 'jobhistoryserver': historyserver_no_op_test = format( 'ls {spark_history_server_pid_file} >/dev/null 2>&1 && ps -p `cat {spark_history_server_pid_file}` >/dev/null 2>&1' ) Execute(format('{spark_history_server_start}'), user=params.spark_user, environment={'JAVA_HOME': params.java_home}, not_if=historyserver_no_op_test) elif name == 'sparkthriftserver': if params.security_enabled: hive_principal = params.hive_kerberos_principal.replace( '_HOST', socket.getfqdn().lower()) hive_kinit_cmd = format( "{kinit_path_local} -kt {hive_kerberos_keytab} {hive_principal}; " ) Execute(hive_kinit_cmd, user=params.hive_user) thriftserver_no_op_test = format( 'ls {spark_thrift_server_pid_file} >/dev/null 2>&1 && ps -p `cat {spark_thrift_server_pid_file}` >/dev/null 2>&1' ) Execute(format( '{spark_thrift_server_start} --properties-file {spark_thrift_server_conf_file} {spark_thrift_cmd_opts_properties}' ), user=params.hive_user, environment={'JAVA_HOME': params.java_home}, not_if=thriftserver_no_op_test) elif action == 'stop': if name == 'jobhistoryserver': Execute(format('{spark_history_server_stop}'), user=params.spark_user, environment={'JAVA_HOME': params.java_home}) File(params.spark_history_server_pid_file, action="delete") elif name == 'sparkthriftserver': Execute(format('{spark_thrift_server_stop}'), user=params.hive_user, environment={'JAVA_HOME': params.java_home}) File(params.spark_thrift_server_pid_file, action="delete")
def service_check(self, env): import params env.set_params(params) if params.hdp_stack_version != "" and compare_versions( params.hdp_stack_version, '2.2') >= 0: path_to_distributed_shell_jar = "/usr/hdp/current/hadoop-yarn-client/hadoop-yarn-applications-distributedshell.jar" else: path_to_distributed_shell_jar = "/usr/lib/hadoop-yarn/hadoop-yarn-applications-distributedshell*.jar" yarn_distrubuted_shell_check_cmd = format( "yarn org.apache.hadoop.yarn.applications.distributedshell.Client " "-shell_command ls -num_containers {number_of_nm} -jar {path_to_distributed_shell_jar}" ) if params.security_enabled: kinit_cmd = format( "{kinit_path_local} -kt {smoke_user_keytab} {smokeuser_principal};" ) smoke_cmd = format( "{kinit_cmd} {yarn_distrubuted_shell_check_cmd}") else: smoke_cmd = yarn_distrubuted_shell_check_cmd return_code, out = shell.checked_call( smoke_cmd, path='/usr/sbin:/sbin:/usr/local/bin:/bin:/usr/bin', user=params.smokeuser, ) m = re.search("appTrackingUrl=(.*),\s", out) app_url = m.group(1) splitted_app_url = str(app_url).split('/') for item in splitted_app_url: if "application" in item: application_name = item json_response_received = False for rm_host in params.rm_hosts: info_app_url = "http://" + rm_host + ":" + params.rm_port + "/ws/v1/cluster/apps/" + application_name get_app_info_cmd = "curl --negotiate -u : -sL --connect-timeout " + CURL_CONNECTION_TIMEOUT + " " + info_app_url return_code, stdout = shell.checked_call( get_app_info_cmd, path='/usr/sbin:/sbin:/usr/local/bin:/bin:/usr/bin', user=params.smokeuser, ) try: json_response = json.loads(stdout) json_response_received = True if json_response['app']['state'] != "FINISHED" or json_response[ 'app']['finalStatus'] != "SUCCEEDED": raise Exception( "Application " + app_url + " state/status is not valid. Should be FINISHED/SUCCEEDED." ) except Exception as e: pass if not json_response_received: raise Exception("Could not get json response from YARN API")
def service_check(self, env): import params env.set_params(params) input_file = format('/user/{smokeuser}/passwd') output_dir = format('/user/{smokeuser}/pigsmoke.out') params.HdfsResource( output_dir, type="directory", action="delete_on_execute", owner=params.smokeuser, ) params.HdfsResource( input_file, type="file", source="/etc/passwd", action="create_on_execute", owner=params.smokeuser, ) params.HdfsResource(None, action="execute") if params.security_enabled: kinit_cmd = format( "{kinit_path_local} -kt {smoke_user_keytab} {smokeuser_principal};" ) Execute(kinit_cmd, user=params.smokeuser) File(format("{tmp_dir}/pigSmoke.sh"), content=StaticFile("pigSmoke.sh"), mode=0755) # check for Pig-on-M/R Execute( format("pig {tmp_dir}/pigSmoke.sh"), tries=3, try_sleep=5, path=format( '{pig_bin_dir}:/usr/sbin:/sbin:/usr/local/bin:/bin:/usr/bin'), user=params.smokeuser, logoutput=True) test_cmd = format("fs -test -e {output_dir}") ExecuteHadoop(test_cmd, user=params.smokeuser, conf_dir=params.hadoop_conf_dir, bin_dir=params.hadoop_bin_dir) if params.stack_version_formatted != "" and compare_versions( params.stack_version_formatted, '2.2') >= 0: # cleanup results from previous test params.HdfsResource( output_dir, type="directory", action="delete_on_execute", owner=params.smokeuser, ) params.HdfsResource( input_file, type="file", source="/etc/passwd", action="create_on_execute", owner=params.smokeuser, ) # Check for Pig-on-Tez resource_created = copy_to_hdfs( "tez", params.user_group, params.hdfs_user, host_sys_prepped=params.host_sys_prepped) if resource_created: params.HdfsResource(None, action="execute") Execute( format("pig -x tez {tmp_dir}/pigSmoke.sh"), tries=3, try_sleep=5, path=format( '{pig_bin_dir}:/usr/sbin:/sbin:/usr/local/bin:/bin:/usr/bin' ), user=params.smokeuser, logoutput=True) ExecuteHadoop(test_cmd, user=params.smokeuser, conf_dir=params.hadoop_conf_dir, bin_dir=params.hadoop_bin_dir)
def oozie_server_specific(): import params File( params.pid_file, action="delete", not_if=format( "ls {pid_file} >/dev/null 2>&1 && ps -p `cat {pid_file}` >/dev/null 2>&1" )) oozie_server_directories = [ format("{oozie_home}/{oozie_tmp_dir}"), params.oozie_pid_dir, params.oozie_log_dir, params.oozie_tmp_dir, params.oozie_data_dir, params.oozie_lib_dir, params.oozie_webapps_dir, params.oozie_webapps_conf_dir, params.oozie_server_dir ] Directory( oozie_server_directories, owner=params.oozie_user, group=params.user_group, mode=0755, recursive=True, cd_access="a", ) Directory( params.oozie_libext_dir, recursive=True, ) no_op_test = format( "ls {pid_file} >/dev/null 2>&1 && ps -p `cat {pid_file}` >/dev/null 2>&1" ) hashcode_file = format("{oozie_home}/.hashcode") hashcode = hashlib.md5( format('{oozie_home}/oozie-sharelib.tar.gz')).hexdigest() skip_recreate_sharelib = format( "test -f {hashcode_file} && test -d {oozie_home}/share && [[ `cat {hashcode_file}` == '{hashcode}' ]]" ) untar_sharelib = ('tar', '-xvf', format('{oozie_home}/oozie-sharelib.tar.gz'), '-C', params.oozie_home) Execute( untar_sharelib, # time-expensive not_if=format("{no_op_test} || {skip_recreate_sharelib}"), sudo=True, ) configure_cmds = [] configure_cmds.append(('cp', params.ext_js_path, params.oozie_libext_dir)) configure_cmds.append(('chown', format('{oozie_user}:{user_group}'), format('{oozie_libext_dir}/{ext_js_file}'))) configure_cmds.append(('chown', '-RL', format('{oozie_user}:{user_group}'), params.oozie_webapps_conf_dir)) Execute( configure_cmds, not_if=no_op_test, sudo=True, ) if params.jdbc_driver_name=="com.mysql.jdbc.Driver" or \ params.jdbc_driver_name == "com.microsoft.sqlserver.jdbc.SQLServerDriver" or \ params.jdbc_driver_name=="oracle.jdbc.driver.OracleDriver": File( params.downloaded_custom_connector, content=DownloadSource(params.driver_curl_source), ) Execute( ('cp', '--remove-destination', params.downloaded_custom_connector, params.target), #creates=params.target, TODO: uncomment after ranger_hive_plugin will not provide jdbc path=["/bin", "/usr/bin/"], sudo=True) File(params.target, owner=params.oozie_user, group=params.user_group) #falcon el extension if params.has_falcon_host: Execute( format( '{sudo} cp {falcon_home}/oozie/ext/falcon-oozie-el-extension-*.jar {oozie_libext_dir}' ), not_if=no_op_test, ) Execute( format( '{sudo} chown {oozie_user}:{user_group} {oozie_libext_dir}/falcon-oozie-el-extension-*.jar' ), not_if=no_op_test, ) if params.lzo_enabled and len(params.all_lzo_packages) > 0: Package(params.all_lzo_packages) Execute( format( '{sudo} cp {hadoop_lib_home}/hadoop-lzo*.jar {oozie_lib_dir}'), not_if=no_op_test, ) Execute( format( "cd {oozie_tmp_dir} && {oozie_setup_sh} prepare-war {oozie_secure}" ), # time-expensive user=params.oozie_user, not_if=format("{no_op_test} || {skip_recreate_sharelib}")) File( hashcode_file, content=hashcode, mode=0644, ) if params.hdp_stack_version != "" and compare_versions( params.hdp_stack_version, '2.2') >= 0: # Create hive-site and tez-site configs for oozie Directory(params.hive_conf_dir, recursive=True, owner=params.oozie_user, group=params.user_group) if 'hive-site' in params.config['configurations']: XmlConfig( "hive-site.xml", conf_dir=params.hive_conf_dir, configurations=params.config['configurations']['hive-site'], configuration_attributes=params. config['configuration_attributes']['hive-site'], owner=params.oozie_user, group=params.user_group, mode=0644) if 'tez-site' in params.config['configurations']: XmlConfig( "tez-site.xml", conf_dir=params.hive_conf_dir, configurations=params.config['configurations']['tez-site'], configuration_attributes=params. config['configuration_attributes']['tez-site'], owner=params.oozie_user, group=params.user_group, mode=0664) Execute(('chown', '-R', format("{oozie_user}:{user_group}"), params.oozie_server_dir), sudo=True)
def service(action=None, name=None, user=None, options="", create_pid_dir=False, create_log_dir=False): """ :param action: Either "start" or "stop" :param name: Component name, e.g., "namenode", "datanode", "secondarynamenode", "zkfc" :param user: User to run the command as :param options: Additional options to pass to command as a string :param create_pid_dir: Create PID directory :param create_log_dir: Crate log file directory """ import params options = options if options else "" pid_dir = format("{hadoop_pid_dir_prefix}/{user}") pid_file = format("{pid_dir}/hadoop-{user}-{name}.pid") hadoop_env_exports = {'HADOOP_LIBEXEC_DIR': params.hadoop_libexec_dir} log_dir = format("{hdfs_log_dir_prefix}/{user}") # NFS GATEWAY is always started by root using jsvc due to rpcbind bugs # on Linux such as CentOS6.2. https://bugzilla.redhat.com/show_bug.cgi?id=731542 if name == "nfs3": pid_file = format("{pid_dir}/hadoop_privileged_nfs3.pid") custom_export = { 'HADOOP_PRIVILEGED_NFS_USER': params.hdfs_user, 'HADOOP_PRIVILEGED_NFS_PID_DIR': pid_dir, 'HADOOP_PRIVILEGED_NFS_LOG_DIR': log_dir } hadoop_env_exports.update(custom_export) process_id_exists_command = as_sudo( ["test", "-f", pid_file]) + " && " + as_sudo(["pgrep", "-F", pid_file]) # on STOP directories shouldn't be created # since during stop still old dirs are used (which were created during previous start) if action != "stop": if name == "nfs3": Directory(params.hadoop_pid_dir_prefix, mode=0755, owner=params.root_user, group=params.root_group) else: Directory(params.hadoop_pid_dir_prefix, mode=0755, owner=params.hdfs_user, group=params.user_group) if create_pid_dir: Directory(pid_dir, owner=user, recursive=True) if create_log_dir: if name == "nfs3": Directory(log_dir, mode=0775, owner=params.root_user, group=params.user_group) else: Directory(log_dir, owner=user, recursive=True) if params.security_enabled and name == "datanode": ## The directory where pid files are stored in the secure data environment. hadoop_secure_dn_pid_dir = format( "{hadoop_pid_dir_prefix}/{hdfs_user}") hadoop_secure_dn_pid_file = format( "{hadoop_secure_dn_pid_dir}/hadoop_secure_dn.pid") # At Champlain stack and further, we may start datanode as a non-root even in secure cluster if not (params.hdp_stack_version != "" and compare_versions(params.hdp_stack_version, '2.2') >= 0 ) or params.secure_dn_ports_are_in_use: user = "******" pid_file = format( "{hadoop_pid_dir_prefix}/{hdfs_user}/hadoop-{hdfs_user}-{name}.pid" ) if action == 'stop' and (params.hdp_stack_version != "" and compare_versions(params.hdp_stack_version, '2.2') >= 0) and \ os.path.isfile(hadoop_secure_dn_pid_file): # We need special handling for this case to handle the situation # when we configure non-root secure DN and then restart it # to handle new configs. Otherwise we will not be able to stop # a running instance user = "******" try: check_process_status(hadoop_secure_dn_pid_file) custom_export = {'HADOOP_SECURE_DN_USER': params.hdfs_user} hadoop_env_exports.update(custom_export) except ComponentIsNotRunning: pass hadoop_daemon = format("{hadoop_bin}/hadoop-daemon.sh") if user == "root": cmd = [hadoop_daemon, "--config", params.hadoop_conf_dir, action, name] if options: cmd += [ options, ] daemon_cmd = as_sudo(cmd) else: cmd = format( "{ulimit_cmd} {hadoop_daemon} --config {hadoop_conf_dir} {action} {name}" ) if options: cmd += " " + options daemon_cmd = as_user(cmd, user) if action == "start": # remove pid file from dead process File(pid_file, action="delete", not_if=process_id_exists_command) Execute(daemon_cmd, not_if=process_id_exists_command, environment=hadoop_env_exports) elif action == "stop": Execute(daemon_cmd, only_if=process_id_exists_command, environment=hadoop_env_exports) File(pid_file, action="delete")
def oozie(is_server=False): import params if is_server: params.HdfsResource(params.oozie_hdfs_user_dir, type="directory", action="create_on_execute", owner=params.oozie_user, mode=params.oozie_hdfs_user_mode) params.HdfsResource(None, action="execute") Directory(params.conf_dir, recursive=True, owner=params.oozie_user, group=params.user_group) XmlConfig( "oozie-site.xml", conf_dir=params.conf_dir, configurations=params.oozie_site, configuration_attributes=params.config['configuration_attributes'] ['oozie-site'], owner=params.oozie_user, group=params.user_group, mode=0664) File(format("{conf_dir}/oozie-env.sh"), owner=params.oozie_user, content=InlineTemplate(params.oozie_env_sh_template)) if (params.log4j_props != None): File(format("{params.conf_dir}/oozie-log4j.properties"), mode=0644, group=params.user_group, owner=params.oozie_user, content=params.log4j_props) elif (os.path.exists(format("{params.conf_dir}/oozie-log4j.properties"))): File(format("{params.conf_dir}/oozie-log4j.properties"), mode=0644, group=params.user_group, owner=params.oozie_user) if params.hdp_stack_version != "" and compare_versions( params.hdp_stack_version, '2.2') >= 0: File(format("{params.conf_dir}/adminusers.txt"), mode=0644, group=params.user_group, owner=params.oozie_user, content=Template('adminusers.txt.j2', oozie_user=params.oozie_user)) else: File(format("{params.conf_dir}/adminusers.txt"), owner=params.oozie_user, group=params.user_group) if params.jdbc_driver_name == "com.mysql.jdbc.Driver" or \ params.jdbc_driver_name == "com.microsoft.sqlserver.jdbc.SQLServerDriver" or \ params.jdbc_driver_name == "org.postgresql.Driver" or \ params.jdbc_driver_name == "oracle.jdbc.driver.OracleDriver": File( format("/usr/lib/ambari-agent/{check_db_connection_jar_name}"), content=DownloadSource( format("{jdk_location}{check_db_connection_jar_name}")), ) pass oozie_ownership() if is_server: oozie_server_specific()
import os # server configurations config = Script.get_config() tmp_dir = Script.get_tmp_dir() sudo = AMBARI_SUDO_BINARY stack_name = default("/hostLevelParams/stack_name", None) # node hostname hostname = config["hostname"] # This is expected to be of the form #.#.#.# stack_version_unformatted = str(config['hostLevelParams']['stack_version']) hdp_stack_version = format_hdp_stack_version(stack_version_unformatted) stack_is_hdp21 = hdp_stack_version != "" and compare_versions( hdp_stack_version, '2.1') >= 0 and compare_versions( hdp_stack_version, '2.2') < 0 # New Cluster Stack Version that is defined during the RESTART of a Rolling Upgrade version = default("/commandParams/version", None) # Hadoop params # TODO, this logic should initialize these parameters in a file inside the HDP 2.2 stack. if hdp_stack_version != "" and compare_versions(hdp_stack_version, '2.2') >= 0: # start out with client libraries hadoop_bin_dir = "/usr/hdp/current/hadoop-client/bin" hadoop_home = '/usr/hdp/current/hadoop-client' hive_bin = '/usr/hdp/current/hive-client/bin' hive_lib = '/usr/hdp/current/hive-client/lib' # if this is a server action, then use the server binaries; smoke tests
security_enabled = config['configurations']['cluster-env']['security_enabled'] hdfs_user = status_params.hdfs_user root_user = "******" hadoop_pid_dir_prefix = status_params.hadoop_pid_dir_prefix # Some datanode settings dfs_dn_addr = default('/configurations/hdfs-site/dfs.datanode.address', None) dfs_dn_http_addr = default('/configurations/hdfs-site/dfs.datanode.http.address', None) dfs_dn_https_addr = default('/configurations/hdfs-site/dfs.datanode.https.address', None) dfs_http_policy = default('/configurations/hdfs-site/dfs.http.policy', None) dfs_dn_ipc_address = config['configurations']['hdfs-site']['dfs.datanode.ipc.address'] secure_dn_ports_are_in_use = False #hadoop params if hdp_stack_version != "" and compare_versions(hdp_stack_version, '2.2') >= 0: mapreduce_libs_path = "/usr/hdp/current/hadoop-mapreduce-client/*" hadoop_libexec_dir = "/usr/hdp/current/hadoop-client/libexec" hadoop_bin = "/usr/hdp/current/hadoop-client/sbin" hadoop_bin_dir = "/usr/hdp/current/hadoop-client/bin" hadoop_home = "/usr/hdp/current/hadoop-client" if not security_enabled: hadoop_secure_dn_user = '******' else: dfs_dn_port = utils.get_port(dfs_dn_addr) dfs_dn_http_port = utils.get_port(dfs_dn_http_addr) dfs_dn_https_port = utils.get_port(dfs_dn_https_addr) # We try to avoid inability to start datanode as a plain user due to usage of root-owned ports if dfs_http_policy == "HTTPS_ONLY": secure_dn_ports_are_in_use = utils.is_secure_port(dfs_dn_port) or utils.is_secure_port(dfs_dn_https_port) elif dfs_http_policy == "HTTP_AND_HTTPS":
config = Script.get_config() tmp_dir = Script.get_tmp_dir() stack_name = default("/hostLevelParams/stack_name", None) # This is expected to be of the form #.#.#.# stack_version_unformatted = str(config['hostLevelParams']['stack_version']) hdp_stack_version = format_hdp_stack_version(stack_version_unformatted) # New Cluster Stack Version that is defined during the RESTART of a Rolling Upgrade version = default("/commandParams/version", None) hostname = config['hostname'] #hadoop params if hdp_stack_version != "" and compare_versions(hdp_stack_version, '2.2') >= 0: yarn_role_root = "hadoop-yarn-client" mapred_role_root = "hadoop-mapreduce-client" command_role = default("/role", "") if command_role == "APP_TIMELINE_SERVER": yarn_role_root = "hadoop-yarn-timelineserver" elif command_role == "HISTORYSERVER": mapred_role_root = "hadoop-mapreduce-historyserver" elif command_role == "MAPREDUCE2_CLIENT": mapred_role_root = "hadoop-mapreduce-client" elif command_role == "NODEMANAGER": yarn_role_root = "hadoop-yarn-nodemanager" elif command_role == "RESOURCEMANAGER": yarn_role_root = "hadoop-yarn-resourcemanager" elif command_role == "YARN_CLIENT":
import os import itertools import re config = Script.get_config() tmp_dir = Script.get_tmp_dir() print config stack_version_unformatted = str(config['hostLevelParams']['stack_version']) hdp_stack_version = format_hdp_stack_version(stack_version_unformatted) security_enabled = config['configurations']['cluster-env']['security_enabled'] kerberos_cache_file = default( '/configurations/cluster-env/kerberos_cache_file', '/tmp/ccache_keytab') kerberos_domain = config['configurations']['cluster-env']['kerberos_domain'] stack_is_hdp22_or_further = hdp_stack_version != "" and compare_versions( hdp_stack_version, '2.2') >= 0 hdfs_user = status_params.hdfs_user hadoop_pid_dir_prefix = status_params.hadoop_pid_dir_prefix # Some datanode settings dfs_dn_addr = default('/configurations/hdfs-site/dfs.datanode.address', None) dfs_dn_http_addr = default( '/configurations/hdfs-site/dfs.datanode.http.address', None) dfs_dn_https_addr = default( '/configurations/hdfs-site/dfs.datanode.https.address', None) dfs_http_policy = default('/configurations/hdfs-site/dfs.http.policy', None) secure_dn_ports_are_in_use = False #hadoop params if stack_is_hdp22_or_further: mapreduce_libs_path = "/usr/hdp/current/hadoop-mapreduce-client/*"
ambari_db_rca_password = config['hostLevelParams']['ambari_db_rca_password'][0] if has_namenode and 'rca_enabled' in config['configurations']['hadoop-env']: rca_enabled = config['configurations']['hadoop-env']['rca_enabled'] else: rca_enabled = False rca_disabled_prefix = "###" if rca_enabled == True: rca_prefix = "" else: rca_prefix = rca_disabled_prefix #hadoop-env.sh java_home = config['hostLevelParams']['java_home'] if hdp_stack_version != "" and compare_versions( hdp_stack_version, '2.0') >= 0 and compare_versions( hdp_stack_version, '2.1') < 0 and System.get_instance().os_family != "suse": # deprecated rhel jsvc_path jsvc_path = "/usr/libexec/bigtop-utils" else: jsvc_path = "/usr/lib/bigtop-utils" hadoop_heapsize = config['configurations']['hadoop-env']['hadoop_heapsize'] namenode_heapsize = config['configurations']['hadoop-env']['namenode_heapsize'] namenode_opt_newsize = config['configurations']['hadoop-env'][ 'namenode_opt_newsize'] namenode_opt_maxnewsize = config['configurations']['hadoop-env'][ 'namenode_opt_maxnewsize'] namenode_opt_permsize = format_jvm_option( "/configurations/hadoop-env/namenode_opt_permsize", "128m")
def kafka(upgrade_type=None): import params ensure_base_directories() kafka_server_config = mutable_config_dict( params.config['configurations']['kafka-broker']) # This still has an issue of hostnames being alphabetically out-of-order for broker.id in HDP-2.2. # Starting in HDP 2.3, Kafka handles the generation of broker.id so Ambari doesn't have to. effective_version = params.hdp_stack_version if upgrade_type is None else format_hdp_stack_version( params.version) Logger.info(format("Effective stack version: {effective_version}")) if effective_version is not None and effective_version != "" and compare_versions( effective_version, '2.2.0.0') >= 0 and compare_versions( effective_version, '2.3.0.0') < 0: if len(params.kafka_hosts ) > 0 and params.hostname in params.kafka_hosts: brokerid = str(sorted(params.kafka_hosts).index(params.hostname)) kafka_server_config['broker.id'] = brokerid Logger.info(format("Calculating broker.id as {brokerid}")) # listeners and advertised.listeners are only added in 2.3.0.0 onwards. if effective_version is not None and effective_version != "" and compare_versions( effective_version, '2.3.0.0') >= 0: listeners = kafka_server_config['listeners'].replace( "localhost", params.hostname) Logger.info(format("Kafka listeners: {listeners}")) if params.security_enabled and params.kafka_kerberos_enabled: Logger.info("Kafka kerberos security is enabled.") if "SASL" not in listeners: listeners = listeners.replace("PLAINTEXT", "PLAINTEXTSASL") kafka_server_config['listeners'] = listeners kafka_server_config['advertised.listeners'] = listeners Logger.info(format("Kafka advertised listeners: {listeners}")) else: kafka_server_config['listeners'] = listeners if 'advertised.listeners' in kafka_server_config: advertised_listeners = kafka_server_config[ 'advertised.listeners'].replace("localhost", params.hostname) kafka_server_config[ 'advertised.listeners'] = advertised_listeners Logger.info( format( "Kafka advertised listeners: {advertised_listeners}")) else: kafka_server_config['host.name'] = params.hostname if params.has_metric_collector: kafka_server_config[ 'kafka.timeline.metrics.host'] = params.metric_collector_host kafka_server_config[ 'kafka.timeline.metrics.port'] = params.metric_collector_port kafka_server_config[ 'kafka.timeline.metrics.protocol'] = params.metric_collector_protocol kafka_server_config[ 'kafka.timeline.metrics.truststore.path'] = params.metric_truststore_path kafka_server_config[ 'kafka.timeline.metrics.truststore.type'] = params.metric_truststore_type kafka_server_config[ 'kafka.timeline.metrics.truststore.password'] = params.metric_truststore_password kafka_data_dir = kafka_server_config['log.dirs'] kafka_data_dirs = filter(None, kafka_data_dir.split(",")) Directory( kafka_data_dirs, mode=0755, cd_access='a', owner=params.kafka_user, group=params.user_group, create_parents=True, recursive_ownership=True, ) PropertiesFile( "server.properties", dir=params.conf_dir, properties=kafka_server_config, owner=params.kafka_user, group=params.user_group, ) File(format("{conf_dir}/kafka-env.sh"), owner=params.kafka_user, content=InlineTemplate(params.kafka_env_sh_template)) if (params.log4j_props != None): File(format("{conf_dir}/log4j.properties"), mode=0644, group=params.user_group, owner=params.kafka_user, content=params.log4j_props) if params.security_enabled and params.kafka_kerberos_enabled: TemplateConfig(format("{conf_dir}/kafka_jaas.conf"), owner=params.kafka_user) TemplateConfig(format("{conf_dir}/kafka_client_jaas.conf"), owner=params.kafka_user) # On some OS this folder could be not exists, so we will create it before pushing there files Directory(params.limits_conf_dir, create_parents=True, owner='root', group='root') File(os.path.join(params.limits_conf_dir, 'kafka.conf'), owner='root', group='root', mode=0644, content=Template("kafka.conf.j2")) setup_symlink(params.kafka_managed_pid_dir, params.kafka_pid_dir) setup_symlink(params.kafka_managed_log_dir, params.kafka_log_dir)
xml_configurations_supported = config['configurations']['ranger-env'][ 'xml_configurations_supported'] create_db_dbuser = config['configurations']['ranger-env']['create_db_dbuser'] stack_is_hdp22_or_further = Script.is_hdp_stack_greater_or_equal("2.2") stack_is_hdp23_or_further = Script.is_hdp_stack_greater_or_equal("2.3") downgrade_from_version = default("/commandParams/downgrade_from_version", None) upgrade_direction = default("/commandParams/upgrade_direction", None) ranger_conf = '/etc/ranger/admin/conf' ranger_ugsync_conf = '/etc/ranger/usersync/conf' if upgrade_direction == Direction.DOWNGRADE and compare_versions( format_hdp_stack_version(version), '2.3') < 0: stack_is_hdp22_or_further = True stack_is_hdp23_or_further = False ranger_home = '/usr/lib/ranger-admin' ranger_conf = '/etc/ranger/admin/conf' ranger_stop = '/usr/lib/ranger-admin/ews/stop-ranger-admin.sh' ranger_start = '/usr/lib/ranger-admin/ews/start-ranger-admin.sh' usersync_home = '/usr/lib/ranger-usersync' usersync_start = '/usr/lib/ranger-usersync/start.sh' usersync_stop = '/usr/lib/ranger-usersync/stop.sh' ranger_ugsync_conf = '/etc/ranger/usersync/conf' ranger_conf = '/usr/lib/ranger-admin/conf' ranger_ugsync_conf = '/usr/lib/ranger-usersync/conf' hadoop_conf = '/etc/hadoop/conf' hadoop_conf_dir = '/etc/hadoop/conf'
def service_check(self, env): import params env.set_params(params) if params.hdp_stack_version != "" and compare_versions( params.hdp_stack_version, '2.2') >= 0: hdp_version = functions.get_hdp_version("hadoop-client") path_to_tez_jar = format(params.path_to_tez_examples_jar) copy_test_file_to_hdfs_cmd = format( "fs -put {tmp_dir}/sample-tez-test /tmp/tezsmokeinput/") create_input_dir_cmd = format("fs -mkdir /tmp/tezsmokeinput") wordcount_command = format( "jar {path_to_tez_jar} orderedwordcount " "/tmp/tezsmokeinput/sample-tez-test /tmp/tezsmokeoutput/") test_command = format("fs -test -e /tmp/tezsmokeoutput/_SUCCESS") remove_output_input_dirs_cmd = "fs -rm -r -f /tmp/tezsmokeinput /tmp/tezsmokeoutput" ExecuteHadoop( remove_output_input_dirs_cmd, tries=3, try_sleep=5, user=params.smokeuser, conf_dir=params.hadoop_conf_dir, # for kinit run keytab=params.smoke_user_keytab, principal=params.smokeuser_principal, security_enabled=params.security_enabled, kinit_path_local=params.kinit_path_local, bin_dir=params.hadoop_bin_dir) ExecuteHadoop(create_input_dir_cmd, tries=3, try_sleep=5, user=params.smokeuser, conf_dir=params.hadoop_conf_dir, bin_dir=params.hadoop_bin_dir) File(format("{tmp_dir}/sample-tez-test"), content="foo\nbar\nfoo\nbar\nfoo", mode=0755) ExecuteHadoop(copy_test_file_to_hdfs_cmd, tries=3, try_sleep=5, user=params.smokeuser, conf_dir=params.hadoop_conf_dir, bin_dir=params.hadoop_bin_dir) ExecuteHadoop(wordcount_command, tries=3, try_sleep=5, user=params.smokeuser, conf_dir=params.hadoop_conf_dir, bin_dir=params.hadoop_bin_dir) ExecuteHadoop(test_command, tries=10, try_sleep=6, user=params.smokeuser, conf_dir=params.hadoop_conf_dir, bin_dir=params.hadoop_bin_dir)
def pre_upgrade_restart(self, env, upgrade_type=None): Logger.info("Executing DataNode Stack Upgrade pre-restart") import params env.set_params(params) if params.version and compare_versions(format_stack_version(params.version), '4.0.0.0') >= 0: stack_select.select_packages(params.version)