def tez(config_dir): """ Write out tez-site.xml and tez-env.sh to the config directory. :param config_dir: Which config directory to save configs to, which is different during rolling upgrade. """ import params # ensure that matching LZO libraries are installed for Tez lzo_utils.install_lzo_if_needed() Directory(params.tez_etc_dir, mode=0755) Directory(config_dir, owner=params.tez_user, group=params.user_group, create_parents=True) XmlConfig( "tez-site.xml", conf_dir=config_dir, configurations=params.config['configurations']['tez-site'], configuration_attributes=params.config['configuration_attributes'] ['tez-site'], owner=params.tez_user, group=params.user_group, mode=0664) tez_env_file_path = os.path.join(config_dir, "tez-env.sh") File(tez_env_file_path, owner=params.tez_user, content=InlineTemplate(params.tez_env_sh_template), mode=0555)
def mahout(): import params # ensure that matching LZO libraries are installed for Mahout lzo_utils.install_lzo_if_needed() Directory(params.mahout_conf_dir, create_parents=True, owner=params.mahout_user, group=params.user_group) XmlConfig( "yarn-site.xml", conf_dir=params.hadoop_conf_dir, configurations=params.config['configurations']['yarn-site'], configuration_attributes=params.config['configuration_attributes'] ['yarn-site'], owner=params.yarn_user, group=params.user_group, mode=0644) if not is_empty(params.log4j_props): File(format("{params.mahout_conf_dir}/log4j.properties"), mode=0644, group=params.user_group, owner=params.mahout_user, content=params.log4j_props) elif (os.path.exists(format("{params.mahout_conf_dir}/log4j.properties"))): File(format("{params.mahout_conf_dir}/log4j.properties"), mode=0644, group=params.user_group, owner=params.mahout_user)
def pig(): import params # ensure that matching LZO libraries are installed for Pig lzo_utils.install_lzo_if_needed() Directory(params.pig_conf_dir, create_parents=True, owner=params.hdfs_user, group=params.user_group) File(format("{pig_conf_dir}/pig-env.sh"), owner=params.hdfs_user, mode=0755, content=InlineTemplate(params.pig_env_sh_template)) # pig_properties is always set to a default even if it's not in the payload File(format("{params.pig_conf_dir}/pig.properties"), mode=0644, group=params.user_group, owner=params.hdfs_user, content=params.pig_properties) if (params.log4j_props is not None): File(format("{params.pig_conf_dir}/log4j.properties"), mode=0644, group=params.user_group, owner=params.hdfs_user, content=params.log4j_props) elif (os.path.exists(format("{params.pig_conf_dir}/log4j.properties"))): File(format("{params.pig_conf_dir}/log4j.properties"), mode=0644, group=params.user_group, owner=params.hdfs_user)
def hdfs(name=None): import params if params.create_lib_snappy_symlinks: install_snappy() # On some OS this folder could be not exists, so we will create it before pushing there files Directory(params.limits_conf_dir, create_parents=True, owner='root', group='root') File(os.path.join(params.limits_conf_dir, 'hdfs.conf'), owner='root', group='root', mode=0644, content=Template("hdfs.conf.j2")) if params.security_enabled: File(os.path.join(params.hadoop_conf_dir, 'hdfs_dn_jaas.conf'), owner=params.hdfs_user, group=params.user_group, content=Template("hdfs_dn_jaas.conf.j2")) File(os.path.join(params.hadoop_conf_dir, 'hdfs_nn_jaas.conf'), owner=params.hdfs_user, group=params.user_group, content=Template("hdfs_nn_jaas.conf.j2")) if params.dfs_ha_enabled: File(os.path.join(params.hadoop_conf_dir, 'hdfs_jn_jaas.conf'), owner=params.hdfs_user, group=params.user_group, content=Template("hdfs_jn_jaas.conf.j2")) tc_mode = 0644 tc_owner = "root" else: tc_mode = None tc_owner = params.hdfs_user if "hadoop-policy" in params.config['configurations']: XmlConfig( "hadoop-policy.xml", conf_dir=params.hadoop_conf_dir, configurations=params.config['configurations']['hadoop-policy'], configuration_attributes=params.config['configurationAttributes'] ['hadoop-policy'], owner=params.hdfs_user, group=params.user_group) if "ssl-client" in params.config['configurations']: XmlConfig( "ssl-client.xml", conf_dir=params.hadoop_conf_dir, configurations=params.config['configurations']['ssl-client'], configuration_attributes=params.config['configurationAttributes'] ['ssl-client'], owner=params.hdfs_user, group=params.user_group) Directory( params.hadoop_conf_secure_dir, create_parents=True, owner='root', group=params.user_group, cd_access='a', ) XmlConfig( "ssl-client.xml", conf_dir=params.hadoop_conf_secure_dir, configurations=params.config['configurations']['ssl-client'], configuration_attributes=params.config['configurationAttributes'] ['ssl-client'], owner=params.hdfs_user, group=params.user_group) if "ssl-server" in params.config['configurations']: XmlConfig( "ssl-server.xml", conf_dir=params.hadoop_conf_dir, configurations=params.config['configurations']['ssl-server'], configuration_attributes=params.config['configurationAttributes'] ['ssl-server'], owner=params.hdfs_user, group=params.user_group) XmlConfig("hdfs-site.xml", conf_dir=params.hadoop_conf_dir, configurations=params.config['configurations']['hdfs-site'], configuration_attributes=params.config['configurationAttributes'] ['hdfs-site'], owner=params.hdfs_user, group=params.user_group) XmlConfig("core-site.xml", conf_dir=params.hadoop_conf_dir, configurations=params.config['configurations']['core-site'], configuration_attributes=params.config['configurationAttributes'] ['core-site'], owner=params.hdfs_user, group=params.user_group, mode=0644) File(os.path.join(params.hadoop_conf_dir, 'slaves'), owner=tc_owner, content=Template("slaves.j2")) install_lzo_if_needed()
def hive(name=None): import params install_lzo_if_needed() hive_client_conf_path = format("{stack_root}/current/{component_directory}/conf") # Permissions 644 for conf dir (client) files, and 600 for conf.server mode_identified = 0644 if params.hive_config_dir == hive_client_conf_path else 0600 Directory(params.hive_etc_dir_prefix, mode=0755 ) # We should change configurations for client as well as for server. # The reason is that stale-configs are service-level, not component. Logger.info("Directories to fill with configs: %s" % str(params.hive_conf_dirs_list)) for conf_dir in params.hive_conf_dirs_list: fill_conf_dir(conf_dir) params.hive_site_config = update_credential_provider_path(params.hive_site_config, 'hive-site', os.path.join(params.hive_config_dir, 'hive-site.jceks'), params.hive_user, params.user_group ) XmlConfig("hive-site.xml", conf_dir=params.hive_config_dir, configurations=params.hive_site_config, configuration_attributes=params.config['configurationAttributes']['hive-site'], owner=params.hive_user, group=params.user_group, mode=mode_identified) # Generate atlas-application.properties.xml file if params.enable_atlas_hook: atlas_hook_filepath = os.path.join(params.hive_config_dir, params.atlas_hook_filename) setup_atlas_hook(SERVICE.HIVE, params.hive_atlas_application_properties, atlas_hook_filepath, params.hive_user, params.user_group) File(format("{hive_config_dir}/hive-env.sh"), owner=params.hive_user, group=params.user_group, content=InlineTemplate(params.hive_env_sh_template), mode=mode_identified ) # On some OS this folder could be not exists, so we will create it before pushing there files Directory(params.limits_conf_dir, create_parents = True, owner='root', group='root' ) File(os.path.join(params.limits_conf_dir, 'hive.conf'), owner='root', group='root', mode=0644, content=Template("hive.conf.j2") ) if params.security_enabled: File(os.path.join(params.hive_config_dir, 'zkmigrator_jaas.conf'), owner=params.hive_user, group=params.user_group, content=Template("zkmigrator_jaas.conf.j2") ) File(format("/usr/lib/ambari-agent/{check_db_connection_jar_name}"), content = DownloadSource(format("{jdk_location}/{check_db_connection_jar_name}")), mode = 0644, ) if name != "client": setup_non_client() if name == 'hiveserver2': setup_hiveserver2() if name == 'metastore': setup_metastore()
def oozie(is_server=False, upgrade_type=None): import params if is_server: params.HdfsResource(params.oozie_hdfs_user_dir, type="directory", action="create_on_execute", owner=params.oozie_user, mode=params.oozie_hdfs_user_mode) params.HdfsResource(None, action="execute") Directory(params.conf_dir, create_parents=True, owner=params.oozie_user, group=params.user_group) params.oozie_site = update_credential_provider_path( params.oozie_site, 'oozie-site', os.path.join(params.conf_dir, 'oozie-site.jceks'), params.oozie_user, params.user_group, use_local_jceks=True) XmlConfig("oozie-site.xml", conf_dir=params.conf_dir, configurations=params.oozie_site, configuration_attributes=params.config['configurationAttributes'] ['oozie-site'], owner=params.oozie_user, group=params.user_group, mode=0664) File( format("{conf_dir}/oozie-env.sh"), owner=params.oozie_user, content=InlineTemplate(params.oozie_env_sh_template), group=params.user_group, ) # On some OS this folder could be not exists, so we will create it before pushing there files Directory(params.limits_conf_dir, create_parents=True, owner='root', group='root') File(os.path.join(params.limits_conf_dir, 'oozie.conf'), owner='root', group='root', mode=0644, content=Template("oozie.conf.j2")) if (params.log4j_props != None): File(format("{params.conf_dir}/oozie-log4j.properties"), mode=0644, group=params.user_group, owner=params.oozie_user, content=InlineTemplate(params.log4j_props)) elif (os.path.exists(format("{params.conf_dir}/oozie-log4j.properties"))): File(format("{params.conf_dir}/oozie-log4j.properties"), mode=0644, group=params.user_group, owner=params.oozie_user) if params.stack_version_formatted and check_stack_feature( StackFeature.OOZIE_ADMIN_USER, params.stack_version_formatted): File(format("{params.conf_dir}/adminusers.txt"), mode=0644, group=params.user_group, owner=params.oozie_user, content=Template('adminusers.txt.j2', oozie_admin_users=params.oozie_admin_users)) else: File(format("{params.conf_dir}/adminusers.txt"), owner=params.oozie_user, group=params.user_group) if params.jdbc_driver_name == "com.mysql.jdbc.Driver" or \ params.jdbc_driver_name == "com.microsoft.sqlserver.jdbc.SQLServerDriver" or \ params.jdbc_driver_name == "org.postgresql.Driver" or \ params.jdbc_driver_name == "oracle.jdbc.driver.OracleDriver": File( format("/usr/lib/ambari-agent/{check_db_connection_jar_name}"), content=DownloadSource( format("{jdk_location}{check_db_connection_jar_name}")), ) pass oozie_ownership() if params.lzo_enabled: install_lzo_if_needed() Execute( format( '{sudo} cp {hadoop_lib_home}/hadoop-lzo*.jar {oozie_lib_dir}'), ) if is_server: oozie_server_specific(upgrade_type)
def yarn(name=None): import params install_lzo_if_needed() if name == "historyserver": if params.yarn_log_aggregation_enabled: params.HdfsResource(params.yarn_nm_app_log_dir, action="create_on_execute", type="directory", owner=params.yarn_user, group=params.user_group, mode=0777, recursive_chmod=True) params.HdfsResource(params.entity_file_history_directory, action="create_on_execute", type="directory", owner=params.yarn_user, group=params.user_group) params.HdfsResource("/mapred", type="directory", action="create_on_execute", owner=params.mapred_user) params.HdfsResource("/mapred/system", type="directory", action="create_on_execute", owner=params.hdfs_user) params.HdfsResource(params.mapreduce_jobhistory_intermediate_done_dir, type="directory", action="create_on_execute", owner=params.mapred_user, group=params.user_group, mode=0777) params.HdfsResource(params.mapreduce_jobhistory_done_dir, type="directory", action="create_on_execute", owner=params.mapred_user, group=params.user_group, change_permissions_for_parents=True, mode=0777) params.HdfsResource(None, action="execute") Directory( params.jhs_leveldb_state_store_dir, owner=params.mapred_user, group=params.user_group, create_parents=True, cd_access="a", ) Execute( ("chown", "-R", format("{mapred_user}:{user_group}"), params.jhs_leveldb_state_store_dir), sudo=True, ) if name == "nodemanager": # First start after enabling/disabling security if params.toggle_nm_security: Directory(params.nm_local_dirs_list + params.nm_log_dirs_list, action='delete') # If yarn.nodemanager.recovery.dir exists, remove this dir if params.yarn_nodemanager_recovery_dir: Directory(InlineTemplate( params.yarn_nodemanager_recovery_dir).get_content(), action='delete') # Setting NM marker file if params.security_enabled: Directory(params.nm_security_marker_dir) File( params.nm_security_marker, content= "Marker file to track first start after enabling/disabling security. " "During first start yarn local, log dirs are removed and recreated" ) elif not params.security_enabled: File(params.nm_security_marker, action="delete") if not params.security_enabled or params.toggle_nm_security: # handle_mounted_dirs ensures that we don't create dirs which are temporary unavailable (unmounted), and intended to reside on a different mount. nm_log_dir_to_mount_file_content = handle_mounted_dirs( create_log_dir, params.nm_log_dirs, params.nm_log_dir_to_mount_file, params) # create a history file used by handle_mounted_dirs File(params.nm_log_dir_to_mount_file, owner=params.hdfs_user, group=params.user_group, mode=0644, content=nm_log_dir_to_mount_file_content) nm_local_dir_to_mount_file_content = handle_mounted_dirs( create_local_dir, params.nm_local_dirs, params.nm_local_dir_to_mount_file, params) File(params.nm_local_dir_to_mount_file, owner=params.hdfs_user, group=params.user_group, mode=0644, content=nm_local_dir_to_mount_file_content) if params.yarn_nodemanager_recovery_dir: Directory( InlineTemplate(params.yarn_nodemanager_recovery_dir).get_content(), owner=params.yarn_user, group=params.user_group, create_parents=True, mode=0755, cd_access='a', ) if params.security_enabled: smokeuser_directories = [ os.path.join(dir, 'usercache', params.smokeuser) for dir in params.nm_local_dirs.split(',') ] for directory in smokeuser_directories: Execute( ('chown', '-R', params.smokeuser, directory), only_if=format("test -d {directory}"), sudo=True, ) Directory( [params.yarn_pid_dir_prefix, params.yarn_pid_dir, params.yarn_log_dir], owner=params.yarn_user, group=params.user_group, create_parents=True, cd_access='a', ) Directory( [ params.mapred_pid_dir_prefix, params.mapred_pid_dir, params.mapred_log_dir_prefix, params.mapred_log_dir ], owner=params.mapred_user, group=params.user_group, create_parents=True, cd_access='a', ) Directory( [params.yarn_log_dir_prefix], owner=params.yarn_user, create_parents=True, ignore_failures=True, cd_access='a', ) XmlConfig( "core-site.xml", conf_dir=params.hadoop_conf_dir, configurations=params.config['configurations']['core-site'], configuration_attributes=params.config['configuration_attributes'] ['core-site'], owner=params.hdfs_user, group=params.user_group, mode=0644) # During RU, Core Masters and Slaves need hdfs-site.xml # TODO, instead of specifying individual configs, which is susceptible to breaking when new configs are added, # RU should rely on all available in /usr/hdp/<version>/hadoop/conf XmlConfig( "hdfs-site.xml", conf_dir=params.hadoop_conf_dir, configurations=params.config['configurations']['hdfs-site'], configuration_attributes=params.config['configuration_attributes'] ['hdfs-site'], owner=params.hdfs_user, group=params.user_group, mode=0644) XmlConfig( "mapred-site.xml", conf_dir=params.hadoop_conf_dir, configurations=params.config['configurations']['mapred-site'], configuration_attributes=params.config['configuration_attributes'] ['mapred-site'], owner=params.yarn_user, group=params.user_group, mode=0644) XmlConfig( "yarn-site.xml", conf_dir=params.hadoop_conf_dir, configurations=params.config['configurations']['yarn-site'], configuration_attributes=params.config['configuration_attributes'] ['yarn-site'], owner=params.yarn_user, group=params.user_group, mode=0644) XmlConfig( "capacity-scheduler.xml", conf_dir=params.hadoop_conf_dir, configurations=params.config['configurations']['capacity-scheduler'], configuration_attributes=params.config['configuration_attributes'] ['capacity-scheduler'], owner=params.yarn_user, group=params.user_group, mode=0644) if name == 'resourcemanager': File(params.yarn_job_summary_log, owner=params.yarn_user, group=params.user_group) if not is_empty( params.node_label_enable ) and params.node_label_enable or is_empty( params.node_label_enable) and params.node_labels_dir: params.HdfsResource(params.node_labels_dir, type="directory", action="create_on_execute", change_permissions_for_parents=True, owner=params.yarn_user, group=params.user_group, mode=0700) params.HdfsResource(None, action="execute") elif name == 'apptimelineserver': Directory( params.ats_leveldb_dir, owner=params.yarn_user, group=params.user_group, create_parents=True, cd_access="a", ) Directory( params.ats_leveldb_state_store_dir, owner=params.yarn_user, group=params.user_group, create_parents=True, cd_access="a", ) # app timeline server 1.5 directories if not is_empty(params.entity_groupfs_store_dir): parent_path = os.path.dirname(params.entity_groupfs_store_dir) params.HdfsResource(parent_path, type="directory", action="create_on_execute", change_permissions_for_parents=True, owner=params.yarn_user, group=params.user_group, mode=0755) params.HdfsResource(params.entity_groupfs_store_dir, type="directory", action="create_on_execute", owner=params.yarn_user, group=params.user_group, mode=params.entity_groupfs_store_dir_mode) if not is_empty(params.entity_groupfs_active_dir): parent_path = os.path.dirname(params.entity_groupfs_active_dir) params.HdfsResource(parent_path, type="directory", action="create_on_execute", change_permissions_for_parents=True, owner=params.yarn_user, group=params.user_group, mode=0755) params.HdfsResource(params.entity_groupfs_active_dir, type="directory", action="create_on_execute", owner=params.yarn_user, group=params.user_group, mode=params.entity_groupfs_active_dir_mode) params.HdfsResource(None, action="execute") File(params.rm_nodes_exclude_path, owner=params.yarn_user, group=params.user_group) File(format("{limits_conf_dir}/yarn.conf"), mode=0644, content=Template('yarn.conf.j2')) File(format("{limits_conf_dir}/mapreduce.conf"), mode=0644, content=Template('mapreduce.conf.j2')) File(format("{hadoop_conf_dir}/yarn-env.sh"), owner=params.yarn_user, group=params.user_group, mode=0755, content=InlineTemplate(params.yarn_env_sh_template)) container_executor = format("{yarn_container_bin}/container-executor") File(container_executor, group=params.yarn_executor_container_group, mode=params.container_executor_mode) File(format("{hadoop_conf_dir}/container-executor.cfg"), group=params.user_group, mode=0644, content=Template('container-executor.cfg.j2')) Directory(params.cgroups_dir, group=params.user_group, create_parents=True, mode=0755, cd_access="a") if params.security_enabled: tc_mode = 0644 tc_owner = "root" else: tc_mode = None tc_owner = params.hdfs_user File(format("{hadoop_conf_dir}/mapred-env.sh"), owner=tc_owner, mode=0755, content=InlineTemplate(params.mapred_env_sh_template)) if params.security_enabled: File(os.path.join(params.hadoop_bin, "task-controller"), owner="root", group=params.mapred_tt_group, mode=06050) File(os.path.join(params.hadoop_conf_dir, 'taskcontroller.cfg'), owner=tc_owner, mode=tc_mode, group=params.mapred_tt_group, content=Template("taskcontroller.cfg.j2")) else: File(os.path.join(params.hadoop_conf_dir, 'taskcontroller.cfg'), owner=tc_owner, content=Template("taskcontroller.cfg.j2")) if "mapred-site" in params.config['configurations']: XmlConfig( "mapred-site.xml", conf_dir=params.hadoop_conf_dir, configurations=params.config['configurations']['mapred-site'], configuration_attributes=params.config['configuration_attributes'] ['mapred-site'], owner=params.mapred_user, group=params.user_group) if "capacity-scheduler" in params.config['configurations']: XmlConfig( "capacity-scheduler.xml", conf_dir=params.hadoop_conf_dir, configurations=params.config['configurations'] ['capacity-scheduler'], configuration_attributes=params.config['configuration_attributes'] ['capacity-scheduler'], owner=params.hdfs_user, group=params.user_group) if "ssl-client" in params.config['configurations']: XmlConfig( "ssl-client.xml", conf_dir=params.hadoop_conf_dir, configurations=params.config['configurations']['ssl-client'], configuration_attributes=params.config['configuration_attributes'] ['ssl-client'], owner=params.hdfs_user, group=params.user_group) Directory( params.hadoop_conf_secure_dir, create_parents=True, owner='root', group=params.user_group, cd_access='a', ) XmlConfig( "ssl-client.xml", conf_dir=params.hadoop_conf_secure_dir, configurations=params.config['configurations']['ssl-client'], configuration_attributes=params.config['configuration_attributes'] ['ssl-client'], owner=params.hdfs_user, group=params.user_group) if "ssl-server" in params.config['configurations']: XmlConfig( "ssl-server.xml", conf_dir=params.hadoop_conf_dir, configurations=params.config['configurations']['ssl-server'], configuration_attributes=params.config['configuration_attributes'] ['ssl-server'], owner=params.hdfs_user, group=params.user_group) if os.path.exists( os.path.join(params.hadoop_conf_dir, 'fair-scheduler.xml')): File(os.path.join(params.hadoop_conf_dir, 'fair-scheduler.xml'), owner=params.mapred_user, group=params.user_group) if os.path.exists( os.path.join(params.hadoop_conf_dir, 'ssl-client.xml.example')): File(os.path.join(params.hadoop_conf_dir, 'ssl-client.xml.example'), owner=params.mapred_user, group=params.user_group) if os.path.exists( os.path.join(params.hadoop_conf_dir, 'ssl-server.xml.example')): File(os.path.join(params.hadoop_conf_dir, 'ssl-server.xml.example'), owner=params.mapred_user, group=params.user_group)
def yarn(name=None, config_dir=None): """ :param name: Component name, apptimelineserver, nodemanager, resourcemanager, or None (defaults for client) :param config_dir: Which config directory to write configs to, which could be different during rolling upgrade. """ import params install_lzo_if_needed() if config_dir is None: config_dir = params.hadoop_conf_dir if params.yarn_nodemanager_recovery_dir: Directory(InlineTemplate(params.yarn_nodemanager_recovery_dir).get_content(), owner=params.yarn_user, group=params.user_group, create_parents=True, mode=0755, cd_access='a', ) Directory([params.yarn_pid_dir_prefix, params.yarn_pid_dir, params.yarn_log_dir], owner=params.yarn_user, group=params.user_group, create_parents=True, cd_access='a', ) Directory([params.mapred_pid_dir_prefix, params.mapred_pid_dir, params.mapred_log_dir_prefix, params.mapred_log_dir], owner=params.mapred_user, group=params.user_group, create_parents=True, cd_access='a', ) Directory([params.yarn_log_dir_prefix], owner=params.yarn_user, group=params.user_group, create_parents=True, ignore_failures=True, cd_access='a', ) # Some of these function calls depend on the directories above being created first. if name == 'resourcemanager': setup_resourcemanager() elif name == 'nodemanager': setup_nodemanager() elif name == 'apptimelineserver': setup_ats() elif name == 'historyserver': setup_historyserver() XmlConfig("core-site.xml", conf_dir=config_dir, configurations=params.config['configurations']['core-site'], configuration_attributes=params.config['configuration_attributes']['core-site'], owner=params.hdfs_user, group=params.user_group, mode=0644 ) # During RU, Core Masters and Slaves need hdfs-site.xml # TODO, instead of specifying individual configs, which is susceptible to breaking when new configs are added, # RU should rely on all available in <stack-root>/<version>/hadoop/conf XmlConfig("hdfs-site.xml", conf_dir=config_dir, configurations=params.config['configurations']['hdfs-site'], configuration_attributes=params.config['configuration_attributes']['hdfs-site'], owner=params.hdfs_user, group=params.user_group, mode=0644 ) XmlConfig("mapred-site.xml", conf_dir=config_dir, configurations=params.config['configurations']['mapred-site'], configuration_attributes=params.config['configuration_attributes']['mapred-site'], owner=params.yarn_user, group=params.user_group, mode=0644 ) XmlConfig("yarn-site.xml", conf_dir=config_dir, configurations=params.config['configurations']['yarn-site'], configuration_attributes=params.config['configuration_attributes']['yarn-site'], owner=params.yarn_user, group=params.user_group, mode=0644 ) XmlConfig("capacity-scheduler.xml", conf_dir=config_dir, configurations=params.config['configurations']['capacity-scheduler'], configuration_attributes=params.config['configuration_attributes']['capacity-scheduler'], owner=params.yarn_user, group=params.user_group, mode=0644 ) File(format("{limits_conf_dir}/yarn.conf"), mode=0644, content=Template('yarn.conf.j2') ) File(format("{limits_conf_dir}/mapreduce.conf"), mode=0644, content=Template('mapreduce.conf.j2') ) File(os.path.join(config_dir, "yarn-env.sh"), owner=params.yarn_user, group=params.user_group, mode=0755, content=InlineTemplate(params.yarn_env_sh_template) ) File(format("{yarn_container_bin}/container-executor"), group=params.yarn_executor_container_group, mode=params.container_executor_mode ) File(os.path.join(config_dir, "container-executor.cfg"), group=params.user_group, mode=0644, content=Template('container-executor.cfg.j2') ) Directory(params.cgroups_dir, group=params.user_group, create_parents = True, mode=0755, cd_access="a") File(os.path.join(config_dir, "mapred-env.sh"), owner=params.tc_owner, mode=0755, content=InlineTemplate(params.mapred_env_sh_template) ) if params.security_enabled: File(os.path.join(params.hadoop_bin, "task-controller"), owner="root", group=params.mapred_tt_group, mode=06050 ) File(os.path.join(config_dir, 'taskcontroller.cfg'), owner = params.tc_owner, mode = params.tc_mode, group = params.mapred_tt_group, content=Template("taskcontroller.cfg.j2") ) File(os.path.join(config_dir, 'yarn_jaas.conf'), owner=params.yarn_user, group=params.user_group, content=Template("yarn_jaas.conf.j2") ) if params.has_ats: File(os.path.join(config_dir, 'yarn_ats_jaas.conf'), owner=params.yarn_user, group=params.user_group, content=Template("yarn_ats_jaas.conf.j2") ) File(os.path.join(config_dir, 'yarn_nm_jaas.conf'), owner=params.yarn_user, group=params.user_group, content=Template("yarn_nm_jaas.conf.j2") ) if params.has_hs: File(os.path.join(config_dir, 'mapred_jaas.conf'), owner=params.mapred_user, group=params.user_group, content=Template("mapred_jaas.conf.j2") ) else: File(os.path.join(config_dir, 'taskcontroller.cfg'), owner=params.tc_owner, content=Template("taskcontroller.cfg.j2") ) XmlConfig("mapred-site.xml", conf_dir=config_dir, configurations=params.config['configurations']['mapred-site'], configuration_attributes=params.config['configuration_attributes']['mapred-site'], owner=params.mapred_user, group=params.user_group ) XmlConfig("capacity-scheduler.xml", conf_dir=config_dir, configurations=params.config['configurations'][ 'capacity-scheduler'], configuration_attributes=params.config['configuration_attributes']['capacity-scheduler'], owner=params.hdfs_user, group=params.user_group ) if "ssl-client" in params.config['configurations']: XmlConfig("ssl-client.xml", conf_dir=config_dir, configurations=params.config['configurations']['ssl-client'], configuration_attributes=params.config['configuration_attributes']['ssl-client'], owner=params.hdfs_user, group=params.user_group ) Directory(params.hadoop_conf_secure_dir, create_parents = True, owner='root', group=params.user_group, cd_access='a', ) XmlConfig("ssl-client.xml", conf_dir=params.hadoop_conf_secure_dir, configurations=params.config['configurations']['ssl-client'], configuration_attributes=params.config['configuration_attributes']['ssl-client'], owner=params.hdfs_user, group=params.user_group ) if "ssl-server" in params.config['configurations']: XmlConfig("ssl-server.xml", conf_dir=config_dir, configurations=params.config['configurations']['ssl-server'], configuration_attributes=params.config['configuration_attributes']['ssl-server'], owner=params.hdfs_user, group=params.user_group ) if os.path.exists(os.path.join(config_dir, 'fair-scheduler.xml')): File(os.path.join(config_dir, 'fair-scheduler.xml'), owner=params.mapred_user, group=params.user_group ) if os.path.exists( os.path.join(config_dir, 'ssl-client.xml.example')): File(os.path.join(config_dir, 'ssl-client.xml.example'), owner=params.mapred_user, group=params.user_group ) if os.path.exists( os.path.join(config_dir, 'ssl-server.xml.example')): File(os.path.join(config_dir, 'ssl-server.xml.example'), owner=params.mapred_user, group=params.user_group )
def hive(name=None): import params install_lzo_if_needed() XmlConfig( "hive-site.xml", conf_dir=params.hive_conf_dir, configurations=params.config['configurations']['hive-site'], owner=params.hive_user, configuration_attributes=params.config['configuration_attributes'] ['hive-site']) if name in ["hiveserver2", "metastore"]: # Manually overriding service logon user & password set by the installation package service_name = params.service_map[name] ServiceConfig(service_name, action="change_user", username=params.hive_user, password=Script.get_password(params.hive_user)) Execute(format("cmd /c hadoop fs -mkdir -p {hive_warehouse_dir}"), logoutput=True, user=params.hadoop_user) if name == 'metastore': if params.init_metastore_schema: check_schema_created_cmd = format( 'cmd /c "{hive_bin}\\hive.cmd --service schematool -info ' '-dbType {hive_metastore_db_type} ' '-userName {hive_metastore_user_name} ' '-passWord {hive_metastore_user_passwd!p}' '&set EXITCODE=%ERRORLEVEL%&exit /B %EXITCODE%"', #cmd "feature", propagate the process exit code manually hive_bin=params.hive_bin, hive_metastore_db_type=params.hive_metastore_db_type, hive_metastore_user_name=params.hive_metastore_user_name, hive_metastore_user_passwd=params.hive_metastore_user_passwd) try: Execute(check_schema_created_cmd) except Fail: create_schema_cmd = format( 'cmd /c {hive_bin}\\hive.cmd --service schematool -initSchema ' '-dbType {hive_metastore_db_type} ' '-userName {hive_metastore_user_name} ' '-passWord {hive_metastore_user_passwd!p}', hive_bin=params.hive_bin, hive_metastore_db_type=params.hive_metastore_db_type, hive_metastore_user_name=params.hive_metastore_user_name, hive_metastore_user_passwd=params. hive_metastore_user_passwd) Execute(create_schema_cmd, user=params.hive_user, logoutput=True) if name == "hiveserver2": if params.hive_execution_engine == "tez": # Init the tez app dir in hadoop script_file = __file__.replace('/', os.sep) cmd_file = os.path.normpath( os.path.join(os.path.dirname(script_file), "..", "files", "hiveTezSetup.cmd")) Execute("cmd /c " + cmd_file, logoutput=True, user=params.hadoop_user)
def setup_spark(env, type, upgrade_type = None, action = None): import params # ensure that matching LZO libraries are installed for Spark lzo_utils.install_lzo_if_needed() Directory([params.spark_pid_dir, params.spark_log_dir], owner=params.spark_user, group=params.user_group, mode=0775, create_parents = True ) if type == 'server' and action == 'config': params.HdfsResource(params.spark_hdfs_user_dir, type="directory", action="create_on_execute", owner=params.spark_user, mode=0775 ) if not params.whs_dir_protocol or params.whs_dir_protocol == urlparse(params.default_fs).scheme: # Create Spark Warehouse Dir params.HdfsResource(params.spark_warehouse_dir, type="directory", action="create_on_execute", owner=params.spark_user, mode=0777 ) params.HdfsResource(None, action="execute") generate_logfeeder_input_config('spark2', Template("input.config-spark2.json.j2", extra_imports=[default])) spark2_defaults = dict(params.config['configurations']['spark2-defaults']) if params.security_enabled: spark2_defaults.pop("history.server.spnego.kerberos.principal") spark2_defaults.pop("history.server.spnego.keytab.file") spark2_defaults['spark.history.kerberos.principal'] = spark2_defaults['spark.history.kerberos.principal'].replace('_HOST', socket.getfqdn().lower()) PropertiesFile(format("{spark_conf}/spark-defaults.conf"), properties = spark2_defaults, key_value_delimiter = " ", owner=params.spark_user, group=params.spark_group, mode=0644 ) # create spark-env.sh in etc/conf dir File(os.path.join(params.spark_conf, 'spark-env.sh'), owner=params.spark_user, group=params.spark_group, content=InlineTemplate(params.spark_env_sh), mode=0644, ) #create log4j.properties in etc/conf dir File(os.path.join(params.spark_conf, 'log4j.properties'), owner=params.spark_user, group=params.spark_group, content=params.spark_log4j_properties, mode=0644, ) #create metrics.properties in etc/conf dir File(os.path.join(params.spark_conf, 'metrics.properties'), owner=params.spark_user, group=params.spark_group, content=InlineTemplate(params.spark_metrics_properties), mode=0644 ) if params.is_hive_installed: XmlConfig("hive-site.xml", conf_dir=params.spark_conf, configurations=params.spark_hive_properties, owner=params.spark_user, group=params.spark_group, mode=0644) if params.has_spark_thriftserver: spark2_thrift_sparkconf = dict(params.config['configurations']['spark2-thrift-sparkconf']) if params.security_enabled and 'spark.yarn.principal' in spark2_thrift_sparkconf: spark2_thrift_sparkconf['spark.yarn.principal'] = spark2_thrift_sparkconf['spark.yarn.principal'].replace('_HOST', socket.getfqdn().lower()) PropertiesFile(params.spark_thrift_server_conf_file, properties = spark2_thrift_sparkconf, owner = params.hive_user, group = params.user_group, key_value_delimiter = " ", mode=0644 ) effective_version = params.version if upgrade_type is not None else params.stack_version_formatted if effective_version: effective_version = format_stack_version(effective_version) if params.spark_thrift_fairscheduler_content and effective_version and check_stack_feature(StackFeature.SPARK_16PLUS, effective_version): # create spark-thrift-fairscheduler.xml File(os.path.join(params.spark_conf,"spark-thrift-fairscheduler.xml"), owner=params.spark_user, group=params.spark_group, mode=0755, content=InlineTemplate(params.spark_thrift_fairscheduler_content) )
def prepare_libext_directory(upgrade_type=None): """ Performs the following actions on libext: - creates <stack-root>/current/oozie/libext and recursively - set 777 permissions on it and its parents. - downloads JDBC driver JAR if needed - copies Falcon JAR for the Oozie WAR if needed """ import params # some stack versions don't need the lzo compression libraries target_version_needs_compression_libraries = check_stack_feature( StackFeature.LZO, params.version_for_stack_feature_checks) # ensure the directory exists Directory(params.oozie_libext_dir, mode=0777) # get all hadooplzo* JAR files # <stack-selector-tool> set hadoop-client has not run yet, therefore we cannot use # <stack-root>/current/hadoop-client ; we must use params.version directly # however, this only works when upgrading beyond 2.2.0.0; don't do this # for downgrade to 2.2.0.0 since hadoop-lzo will not be present # This can also be called during a Downgrade. # When a version is Installed, it is responsible for downloading the hadoop-lzo packages # if lzo is enabled. if params.lzo_enabled and ( params.upgrade_direction == Direction.UPGRADE or target_version_needs_compression_libraries): # ensure that the LZO files are installed for this version of Oozie lzo_utils.install_lzo_if_needed() hadoop_lzo_pattern = 'hadoop-lzo*.jar' hadoop_client_new_lib_dir = format( "{stack_root}/{version}/hadoop/lib") files = glob.iglob( os.path.join(hadoop_client_new_lib_dir, hadoop_lzo_pattern)) if not files: raise Fail("There are no files at {0} matching {1}".format( hadoop_client_new_lib_dir, hadoop_lzo_pattern)) # copy files into libext files_copied = False for file in files: if os.path.isfile(file): Logger.info("Copying {0} to {1}".format( str(file), params.oozie_libext_dir)) shutil.copy2(file, params.oozie_libext_dir) files_copied = True if not files_copied: raise Fail("There are no files at {0} matching {1}".format( hadoop_client_new_lib_dir, hadoop_lzo_pattern)) # ExtJS is used to build a working Oozie Web UI - without it, Oozie will startup and work # but will not have a functioning user interface - Some stacks no longer ship ExtJS, # so it's optional now. On an upgrade, we should make sure that if it's not found, that's OK # However, if it is found on the system (from an earlier install) then it should be used extjs_included = check_stack_feature( StackFeature.OOZIE_EXTJS_INCLUDED, params.version_for_stack_feature_checks) # something like <stack-root>/current/oozie-server/libext/ext-2.2.zip oozie_ext_zip_target_path = os.path.join(params.oozie_libext_dir, params.ext_js_file) # Copy ext ZIP to libext dir # Default to /usr/share/$TARGETSTACK-oozie/ext-2.2.zip as the first path source_ext_zip_paths = oozie.get_oozie_ext_zip_source_paths( upgrade_type, params) found_at_least_one_oozie_ext_file = False # Copy the first oozie ext-2.2.zip file that is found. # This uses a list to handle the cases when migrating from some versions of BigInsights to HDP. if source_ext_zip_paths is not None: for source_ext_zip_path in source_ext_zip_paths: if os.path.isfile(source_ext_zip_path): found_at_least_one_oozie_ext_file = True Logger.info("Copying {0} to {1}".format( source_ext_zip_path, params.oozie_libext_dir)) Execute( ("cp", source_ext_zip_path, params.oozie_libext_dir), sudo=True) Execute(("chown", format("{oozie_user}:{user_group}"), oozie_ext_zip_target_path), sudo=True) File(oozie_ext_zip_target_path, mode=0644) break # ExtJS was expected to the be on the system, but was not found if extjs_included and not found_at_least_one_oozie_ext_file: raise Fail( "Unable to find any Oozie source extension files from the following paths {0}" .format(source_ext_zip_paths)) # ExtJS is not expected, so it's OK - just log a warning if not found_at_least_one_oozie_ext_file: Logger.warning( "Unable to find ExtJS in any of the following paths. The Oozie UI will not be available. Source Paths: {0}" .format(source_ext_zip_paths)) # Redownload jdbc driver to a new current location oozie.download_database_library_if_needed() # get the upgrade version in the event that it's needed upgrade_stack = stack_select._get_upgrade_stack() if upgrade_stack is None or len( upgrade_stack) < 2 or upgrade_stack[1] is None: raise Fail( "Unable to determine the stack that is being upgraded to or downgraded to." ) stack_version = upgrade_stack[1] # copy the Falcon JAR if needed; falcon has not upgraded yet, so we must # use the versioned falcon directory if params.has_falcon_host: versioned_falcon_jar_directory = "{0}/{1}/falcon/oozie/ext/falcon-oozie-el-extension-*.jar".format( params.stack_root, stack_version) Logger.info("Copying {0} to {1}".format( versioned_falcon_jar_directory, params.oozie_libext_dir)) Execute( format( '{sudo} cp {versioned_falcon_jar_directory} {oozie_libext_dir}' )) Execute( format( '{sudo} chown {oozie_user}:{user_group} {oozie_libext_dir}/falcon-oozie-el-extension-*.jar' ))
def oozie(is_server=False): import params if is_server: params.HdfsResource(params.oozie_hdfs_user_dir, type="directory", action="create_on_execute", owner=params.oozie_user, mode=params.oozie_hdfs_user_mode) params.HdfsResource(None, action="execute") Directory(params.conf_dir, create_parents=True, owner=params.oozie_user, group=params.user_group) XmlConfig( "oozie-site.xml", conf_dir=params.conf_dir, configurations=params.oozie_site, configuration_attributes=params.config['configuration_attributes'] ['oozie-site'], owner=params.oozie_user, group=params.user_group, mode=0660) File( format("{conf_dir}/oozie-env.sh"), owner=params.oozie_user, content=InlineTemplate(params.oozie_env_sh_template), group=params.user_group, ) if (params.log4j_props != None): File(format("{params.conf_dir}/oozie-log4j.properties"), mode=0644, group=params.user_group, owner=params.oozie_user, content=params.log4j_props) elif (os.path.exists(format("{params.conf_dir}/oozie-log4j.properties"))): File(format("{params.conf_dir}/oozie-log4j.properties"), mode=0644, group=params.user_group, owner=params.oozie_user) File(format("{params.conf_dir}/adminusers.txt"), mode=0644, group=params.user_group, owner=params.oozie_user, content=Template('adminusers.txt.j2', oozie_user=params.oozie_user)) environment = {"no_proxy": format("{ambari_server_hostname}")} if params.jdbc_driver_name == "com.mysql.jdbc.Driver" or \ params.jdbc_driver_name == "org.postgresql.Driver" or \ params.jdbc_driver_name == "oracle.jdbc.driver.OracleDriver": File( format("/usr/lib/ambari-agent/{check_db_connection_jar_name}"), content=DownloadSource( format("{jdk_location}{check_db_connection_jar_name}")), ) pass oozie_ownership() if params.lzo_enabled: install_lzo_if_needed() Execute( format( '{sudo} cp {hadoop_lib_home}/hadoop-lzo*.jar {oozie_lib_dir}'), not_if=no_op_test, ) if is_server: oozie_server_specific()
def hive(name=None): import params install_lzo_if_needed() hive_client_conf_path = format( "{stack_root}/current/{component_directory}/conf") # Permissions 644 for conf dir (client) files, and 600 for conf.server mode_identified = 0644 if params.hive_config_dir == hive_client_conf_path else 0600 if name == 'hiveserver2': # copy tarball to HDFS feature not supported if not (params.stack_version_formatted_major and check_stack_feature(StackFeature.COPY_TARBALL_TO_HDFS, params.stack_version_formatted_major)): params.HdfsResource(params.webhcat_apps_dir, type="directory", action="create_on_execute", owner=params.webhcat_user, mode=0755) # Create webhcat dirs. if params.hcat_hdfs_user_dir != params.webhcat_hdfs_user_dir: params.HdfsResource(params.hcat_hdfs_user_dir, type="directory", action="create_on_execute", owner=params.hcat_user, mode=params.hcat_hdfs_user_mode) params.HdfsResource(params.webhcat_hdfs_user_dir, type="directory", action="create_on_execute", owner=params.webhcat_user, mode=params.webhcat_hdfs_user_mode) # ****** Begin Copy Tarballs ****** # ********************************* # if copy tarball to HDFS feature supported copy mapreduce.tar.gz and tez.tar.gz to HDFS if params.stack_version_formatted_major and check_stack_feature( StackFeature.COPY_TARBALL_TO_HDFS, params.stack_version_formatted_major): copy_tarball.copy_to_hdfs( "mapreduce", params.user_group, params.hdfs_user, skip=params.sysprep_skip_copy_tarballs_hdfs) copy_tarball.copy_to_hdfs( "tez", params.user_group, params.hdfs_user, skip=params.sysprep_skip_copy_tarballs_hdfs) # Always copy pig.tar.gz and hive.tar.gz using the appropriate mode. # This can use a different source and dest location to account copy_tarball.copy_to_hdfs("pig", params.user_group, params.hdfs_user, file_mode=params.tarballs_mode, custom_source_file=params.pig_tar_source, custom_dest_file=params.pig_tar_dest_file, skip=params.sysprep_skip_copy_tarballs_hdfs) copy_tarball.copy_to_hdfs("hive", params.user_group, params.hdfs_user, file_mode=params.tarballs_mode, custom_source_file=params.hive_tar_source, custom_dest_file=params.hive_tar_dest_file, skip=params.sysprep_skip_copy_tarballs_hdfs) wildcard_tarballs = ["sqoop", "hadoop_streaming"] for tarball_name in wildcard_tarballs: source_file_pattern = eval("params." + tarball_name + "_tar_source") dest_dir = eval("params." + tarball_name + "_tar_dest_dir") if source_file_pattern is None or dest_dir is None: continue source_files = glob.glob( source_file_pattern) if "*" in source_file_pattern else [ source_file_pattern ] for source_file in source_files: src_filename = os.path.basename(source_file) dest_file = os.path.join(dest_dir, src_filename) copy_tarball.copy_to_hdfs( tarball_name, params.user_group, params.hdfs_user, file_mode=params.tarballs_mode, custom_source_file=source_file, custom_dest_file=dest_file, skip=params.sysprep_skip_copy_tarballs_hdfs) # ******* End Copy Tarballs ******* # ********************************* # if warehouse directory is in DFS if not params.whs_dir_protocol or params.whs_dir_protocol == urlparse( params.default_fs).scheme: # Create Hive Metastore Warehouse Dir params.HdfsResource(params.hive_apps_whs_dir, type="directory", action="create_on_execute", owner=params.hive_user, group=params.user_group, mode=params.hive_apps_whs_mode) else: Logger.info( format( "Not creating warehouse directory '{hive_apps_whs_dir}', as the location is not in DFS." )) # Create Hive User Dir params.HdfsResource(params.hive_hdfs_user_dir, type="directory", action="create_on_execute", owner=params.hive_user, mode=params.hive_hdfs_user_mode) if not is_empty(params.hive_exec_scratchdir) and not urlparse( params.hive_exec_scratchdir).path.startswith("/tmp"): params.HdfsResource( params.hive_exec_scratchdir, type="directory", action="create_on_execute", owner=params.hive_user, group=params.hdfs_user, mode=0777 ) # Hive expects this dir to be writeable by everyone as it is used as a temp dir if params.hive_repl_cmrootdir: params.HdfsResource(params.hive_repl_cmrootdir, type="directory", action="create_on_execute", owner=params.hive_user, group=params.user_group, mode=01777) if params.hive_repl_rootdir: params.HdfsResource(params.hive_repl_rootdir, type="directory", action="create_on_execute", owner=params.hive_user, group=params.user_group, mode=0700) params.HdfsResource(None, action="execute") Directory(params.hive_etc_dir_prefix, mode=0755) # We should change configurations for client as well as for server. # The reason is that stale-configs are service-level, not component. Logger.info("Directories to fill with configs: %s" % str(params.hive_conf_dirs_list)) for conf_dir in params.hive_conf_dirs_list: fill_conf_dir(conf_dir) params.hive_site_config = update_credential_provider_path( params.hive_site_config, 'hive-site', os.path.join(params.hive_conf_dir, 'hive-site.jceks'), params.hive_user, params.user_group) XmlConfig( "hive-site.xml", conf_dir=params.hive_config_dir, configurations=params.hive_site_config, configuration_attributes=params.config['configuration_attributes'] ['hive-site'], owner=params.hive_user, group=params.user_group, mode=mode_identified) # Generate atlas-application.properties.xml file if params.enable_atlas_hook: atlas_hook_filepath = os.path.join(params.hive_config_dir, params.atlas_hook_filename) setup_atlas_hook(SERVICE.HIVE, params.hive_atlas_application_properties, atlas_hook_filepath, params.hive_user, params.user_group) if name == 'hiveserver2': XmlConfig( "hiveserver2-site.xml", conf_dir=params.hive_server_conf_dir, configurations=params.config['configurations']['hiveserver2-site'], configuration_attributes=params.config['configuration_attributes'] ['hiveserver2-site'], owner=params.hive_user, group=params.user_group, mode=0600) if params.hive_metastore_site_supported and name == 'metastore': XmlConfig( "hivemetastore-site.xml", conf_dir=params.hive_server_conf_dir, configurations=params.config['configurations'] ['hivemetastore-site'], configuration_attributes=params.config['configuration_attributes'] ['hivemetastore-site'], owner=params.hive_user, group=params.user_group, mode=0600) File(format("{hive_config_dir}/hive-env.sh"), owner=params.hive_user, group=params.user_group, mode=mode_identified, content=InlineTemplate(params.hive_env_sh_template)) # On some OS this folder could be not exists, so we will create it before pushing there files Directory(params.limits_conf_dir, create_parents=True, owner='root', group='root') File(os.path.join(params.limits_conf_dir, 'hive.conf'), owner='root', group='root', mode=0644, content=Template("hive.conf.j2")) if params.security_enabled: File(os.path.join(params.hive_config_dir, 'zkmigrator_jaas.conf'), owner=params.hive_user, group=params.user_group, content=Template("zkmigrator_jaas.conf.j2")) if name == 'metastore' or name == 'hiveserver2': if params.hive_jdbc_target is not None and not os.path.exists( params.hive_jdbc_target): jdbc_connector(params.hive_jdbc_target, params.hive_previous_jdbc_jar) if params.hive2_jdbc_target is not None and not os.path.exists( params.hive2_jdbc_target): jdbc_connector(params.hive2_jdbc_target, params.hive2_previous_jdbc_jar) File( format("/usr/lib/ambari-agent/{check_db_connection_jar_name}"), content=DownloadSource( format("{jdk_location}{check_db_connection_jar_name}")), mode=0644, ) if name == 'metastore': File(os.path.join(params.hive_server_conf_dir, "hadoop-metrics2-hivemetastore.properties"), owner=params.hive_user, group=params.user_group, mode=0600, content=Template("hadoop-metrics2-hivemetastore.properties.j2")) File(params.start_metastore_path, mode=0755, content=StaticFile('startMetastore.sh')) if not is_empty(params.hive_exec_scratchdir): dirPathStr = urlparse(params.hive_exec_scratchdir).path pathComponents = dirPathStr.split("/") if dirPathStr.startswith("/tmp") and len(pathComponents) > 2: Directory(params.hive_exec_scratchdir, owner=params.hive_user, create_parents=True, mode=0777) if params.hive_repl_cmrootdir: params.HdfsResource(params.hive_repl_cmrootdir, type="directory", action="create_on_execute", owner=params.hive_user, group=params.user_group, mode=01777) if params.hive_repl_rootdir: params.HdfsResource(params.hive_repl_rootdir, type="directory", action="create_on_execute", owner=params.hive_user, group=params.user_group, mode=0700) if params.hive_repl_cmrootdir or params.hive_repl_rootdir: params.HdfsResource(None, action="execute") elif name == 'hiveserver2': File(params.start_hiveserver2_path, mode=0755, content=Template(format('{start_hiveserver2_script}'))) File(os.path.join(params.hive_server_conf_dir, "hadoop-metrics2-hiveserver2.properties"), owner=params.hive_user, group=params.user_group, mode=0600, content=Template("hadoop-metrics2-hiveserver2.properties.j2")) if name != "client": Directory(params.hive_pid_dir, create_parents=True, cd_access='a', owner=params.hive_user, group=params.user_group, mode=0755) Directory(params.hive_log_dir, create_parents=True, cd_access='a', owner=params.hive_user, group=params.user_group, mode=0755) Directory(params.hive_var_lib, create_parents=True, cd_access='a', owner=params.hive_user, group=params.user_group, mode=0755)
def setup_spark(env, type, upgrade_type = None, action = None): import params # ensure that matching LZO libraries are installed for Spark lzo_utils.install_lzo_if_needed() Directory([params.spark_pid_dir, params.spark_log_dir], owner=params.spark_user, group=params.user_group, mode=0775, create_parents = True, cd_access = 'a', ) if type == 'server' and action == 'config': params.HdfsResource(params.spark_hdfs_user_dir, type="directory", action="create_on_execute", owner=params.spark_user, mode=0775 ) params.HdfsResource(None, action="execute") PropertiesFile(format("{spark_conf}/spark-defaults.conf"), properties = params.config['configurations']['spark-defaults'], key_value_delimiter = " ", owner=params.spark_user, group=params.spark_group, mode=0644 ) # create spark-env.sh in etc/conf dir File(os.path.join(params.spark_conf, 'spark-env.sh'), owner=params.spark_user, group=params.spark_group, content=InlineTemplate(params.spark_env_sh), mode=0644, ) #create log4j.properties in etc/conf dir File(os.path.join(params.spark_conf, 'log4j.properties'), owner=params.spark_user, group=params.spark_group, content=params.spark_log4j_properties, mode=0644, ) #create metrics.properties in etc/conf dir File(os.path.join(params.spark_conf, 'metrics.properties'), owner=params.spark_user, group=params.spark_group, content=InlineTemplate(params.spark_metrics_properties), mode=0644 ) if params.is_hive_installed: XmlConfig("hive-site.xml", conf_dir=params.spark_conf, configurations=params.spark_hive_properties, owner=params.spark_user, group=params.spark_group, mode=0644) if params.has_spark_thriftserver: PropertiesFile(params.spark_thrift_server_conf_file, properties = params.config['configurations']['spark-thrift-sparkconf'], owner = params.hive_user, group = params.user_group, key_value_delimiter = " ", mode=0644 ) effective_version = params.version if upgrade_type is not None else params.stack_version_formatted if effective_version: effective_version = format_stack_version(effective_version) if params.spark_thrift_fairscheduler_content and effective_version and check_stack_feature(StackFeature.SPARK_16PLUS, effective_version): # create spark-thrift-fairscheduler.xml File(os.path.join(params.spark_conf,"spark-thrift-fairscheduler.xml"), owner=params.spark_user, group=params.spark_group, mode=0755, content=InlineTemplate(params.spark_thrift_fairscheduler_content) )
def actionexecute(self, env): num_errors = 0 # Parse parameters config = Script.get_config() try: command_repository = CommandRepository(config['repositoryFile']) except KeyError: raise Fail("The command repository indicated by 'repositoryFile' was not found") # Handle a SIGTERM and SIGINT gracefully signal.signal(signal.SIGTERM, self.abort_handler) signal.signal(signal.SIGINT, self.abort_handler) self.repository_version = command_repository.version_string # Select dict that contains parameters try: package_list = json.loads(config['roleParams']['package_list']) stack_id = config['roleParams']['stack_id'] except KeyError: pass self.stack_name = Script.get_stack_name() if self.stack_name is None: raise Fail("Cannot determine the stack name") self.stack_root_folder = Script.get_stack_root() if self.stack_root_folder is None: raise Fail("Cannot determine the stack's root directory") if self.repository_version is None: raise Fail("Cannot determine the repository version to install") self.repository_version = self.repository_version.strip() try: if not command_repository.items: Logger.warning( "Repository list is empty. Ambari may not be managing the repositories for {0}.".format( self.repository_version)) else: Logger.info( "Will install packages for repository version {0}".format(self.repository_version)) new_repo_files = Script.repository_util.create_repo_files() self.repo_files.update(new_repo_files) except Exception as err: Logger.logger.exception("Cannot install repository files. Error: {0}".format(str(err))) num_errors += 1 # Build structured output with initial values self.structured_output = { 'package_installation_result': 'FAIL', 'repository_version_id': command_repository.version_id } self.put_structured_out(self.structured_output) try: # check package manager non-completed transactions if self.repo_mgr.check_uncompleted_transactions(): self.repo_mgr.print_uncompleted_transaction_hint() num_errors += 1 except Exception as e: # we need to ignore any exception Logger.warning("Failed to check for uncompleted package manager transactions: " + str(e)) if num_errors > 0: raise Fail("Failed to distribute repositories/install packages") # Initial list of versions, used to compute the new version installed self.old_versions = get_stack_versions(self.stack_root_folder) try: is_package_install_successful = False ret_code = self.install_packages(package_list) if ret_code == 0: self.structured_output['package_installation_result'] = 'SUCCESS' self.put_structured_out(self.structured_output) is_package_install_successful = True else: num_errors += 1 except Exception as err: num_errors += 1 Logger.logger.exception("Could not install packages. Error: {0}".format(str(err))) try: lzo_utils.install_lzo_if_needed() except Exception as err: num_errors += 1 Logger.logger.exception("Could not install LZO packages. Error: {0}".format(str(err))) # Provide correct exit code if num_errors > 0: raise Fail("Failed to distribute repositories/install packages") self._fix_default_links_for_current() # if installing a version of HDP that needs some symlink love, then create them if is_package_install_successful and 'actual_version' in self.structured_output: self._relink_configurations_with_conf_select(stack_id, self.structured_output['actual_version'])
def setup_spark(env, type, upgrade_type=None, action=None, config_dir=None): """ :param env: Python environment :param type: Spark component type :param upgrade_type: If in a stack upgrade, either UPGRADE_TYPE_ROLLING or UPGRADE_TYPE_NON_ROLLING :param action: Action to perform, such as generate configs :param config_dir: Optional config directory to write configs to. """ import params # ensure that matching LZO libraries are installed for Spark lzo_utils.install_lzo_if_needed() if config_dir is None: config_dir = params.spark_conf Directory([params.spark_pid_dir, params.spark_log_dir], owner=params.spark_user, group=params.user_group, mode=0775, create_parents = True, cd_access = 'a', ) if type == 'server' and action == 'config': params.HdfsResource(params.spark_hdfs_user_dir, type="directory", action="create_on_execute", owner=params.spark_user, mode=0775 ) params.HdfsResource(None, action="execute") PropertiesFile(os.path.join(config_dir, "spark-defaults.conf"), properties = params.config['configurations']['spark-defaults'], key_value_delimiter = " ", owner=params.spark_user, group=params.spark_group, mode=0644 ) # create spark-env.sh in etc/conf dir File(os.path.join(config_dir, 'spark-env.sh'), owner=params.spark_user, group=params.spark_group, content=InlineTemplate(params.spark_env_sh), mode=0644, ) #create log4j.properties in etc/conf dir File(os.path.join(config_dir, 'log4j.properties'), owner=params.spark_user, group=params.spark_group, content=params.spark_log4j_properties, mode=0644, ) #create metrics.properties in etc/conf dir File(os.path.join(config_dir, 'metrics.properties'), owner=params.spark_user, group=params.spark_group, content=InlineTemplate(params.spark_metrics_properties), mode=0644 ) Directory(params.spark_logs_dir, owner=params.spark_user, group=params.spark_group, mode=0755, ) if params.is_hive_installed: XmlConfig("hive-site.xml", conf_dir=config_dir, configurations=params.spark_hive_properties, owner=params.spark_user, group=params.spark_group, mode=0644) if params.has_spark_thriftserver: PropertiesFile(params.spark_thrift_server_conf_file, properties = params.config['configurations']['spark-thrift-sparkconf'], owner = params.hive_user, group = params.user_group, key_value_delimiter = " ", mode=0644 ) effective_version = params.version if upgrade_type is not None else params.version_for_stack_feature_checks if effective_version: effective_version = format_stack_version(effective_version) if check_stack_feature(StackFeature.SPARK_JAVA_OPTS_SUPPORT, effective_version): File(os.path.join(params.spark_conf, 'java-opts'), owner=params.spark_user, group=params.spark_group, content=InlineTemplate(params.spark_javaopts_properties), mode=0644 ) else: File(os.path.join(params.spark_conf, 'java-opts'), action="delete" ) if params.spark_thrift_fairscheduler_content and check_stack_feature(StackFeature.SPARK_16PLUS, effective_version): # create spark-thrift-fairscheduler.xml File(os.path.join(config_dir,"spark-thrift-fairscheduler.xml"), owner=params.spark_user, group=params.spark_group, mode=0755, content=InlineTemplate(params.spark_thrift_fairscheduler_content) )
def yarn(name=None, config_dir=None): """ :param name: Component name, apptimelinereader, apptimelineserver, nodemanager, resourcemanager, or None (defaults for client) :param config_dir: Which config directory to write configs to, which could be different during rolling upgrade. """ import params install_lzo_if_needed() if config_dir is None: config_dir = params.hadoop_conf_dir Directory( [params.yarn_log_dir_prefix], owner=params.yarn_user, group=params.user_group, create_parents=True, ignore_failures=True, cd_access='a', mode=0775, ) Directory( [params.yarn_pid_dir_prefix, params.yarn_pid_dir, params.yarn_log_dir], owner=params.yarn_user, group=params.user_group, create_parents=True, cd_access='a', ) Directory( [ params.mapred_pid_dir_prefix, params.mapred_pid_dir, params.mapred_log_dir_prefix, params.mapred_log_dir ], owner=params.mapred_user, group=params.user_group, create_parents=True, cd_access='a', ) Directory( params.yarn_hbase_conf_dir, owner=params.yarn_hbase_user, group=params.user_group, create_parents=True, cd_access='a', ) # Some of these function calls depend on the directories above being created first. if name == 'resourcemanager': setup_resourcemanager() elif name == 'nodemanager': setup_nodemanager() elif name == 'apptimelineserver': setup_ats() elif name == 'historyserver': setup_historyserver() elif name == 'apptimelinereader': if not params.is_hbase_system_service_launch: setup_atsv2_hbase_directories() setup_atsv2_hbase_files() generate_logfeeder_input_config( 'yarn', Template("input.config-yarn.json.j2", extra_imports=[default])) # if there is the viewFS mount table content, create separate xml config and include in in the core-site # else just create core-site if params.mount_table_content: XmlConfig( "core-site.xml", conf_dir=config_dir, configurations=params.config['configurations']['core-site'], configuration_attributes=params.config['configurationAttributes'] ['core-site'], owner=params.hdfs_user, group=params.user_group, mode=0644, xml_include_file=os.path.join(config_dir, params.xml_inclusion_file_name)) File(os.path.join(config_dir, params.xml_inclusion_file_name), owner=params.hdfs_user, group=params.user_group, content=params.mount_table_content, mode=0644) else: XmlConfig( "core-site.xml", conf_dir=config_dir, configurations=params.config['configurations']['core-site'], configuration_attributes=params.config['configurationAttributes'] ['core-site'], owner=params.hdfs_user, group=params.user_group, mode=0644) # During RU, Core Masters and Slaves need hdfs-site.xml # TODO, instead of specifying individual configs, which is susceptible to breaking when new configs are added, # RU should rely on all available in <stack-root>/<version>/hadoop/conf XmlConfig("hdfs-site.xml", conf_dir=config_dir, configurations=params.config['configurations']['hdfs-site'], configuration_attributes=params.config['configurationAttributes'] ['hdfs-site'], owner=params.hdfs_user, group=params.user_group, mode=0644) XmlConfig("mapred-site.xml", conf_dir=config_dir, configurations=params.config['configurations']['mapred-site'], configuration_attributes=params.config['configurationAttributes'] ['mapred-site'], owner=params.yarn_user, group=params.user_group, mode=0644) configs = {} configs.update(params.config['configurations']['yarn-site']) configs["hadoop.registry.dns.bind-port"] = params.config['configurations'][ 'yarn-env']['registry.dns.bind-port'] XmlConfig("yarn-site.xml", conf_dir=config_dir, configurations=configs, configuration_attributes=params.config['configurationAttributes'] ['yarn-site'], owner=params.yarn_user, group=params.user_group, mode=0644) XmlConfig( "capacity-scheduler.xml", conf_dir=config_dir, configurations=params.config['configurations']['capacity-scheduler'], configuration_attributes=params.config['configurationAttributes'] ['capacity-scheduler'], owner=params.yarn_user, group=params.user_group, mode=0644) XmlConfig( "hbase-site.xml", conf_dir=params.yarn_hbase_conf_dir, configurations=params.config['configurations']['yarn-hbase-site'], configuration_attributes=params.config['configurationAttributes'] ['yarn-hbase-site'], owner=params.yarn_hbase_user, group=params.user_group, mode=0644) XmlConfig("resource-types.xml", conf_dir=config_dir, configurations=params.config['configurations']['resource-types'], configuration_attributes=params.config['configurationAttributes'] ['resource-types'], owner=params.yarn_user, group=params.user_group, mode=0644) File(format("{limits_conf_dir}/yarn.conf"), mode=0644, content=Template('yarn.conf.j2')) File(format("{limits_conf_dir}/mapreduce.conf"), mode=0644, content=Template('mapreduce.conf.j2')) File(os.path.join(config_dir, "yarn-env.sh"), owner=params.yarn_user, group=params.user_group, mode=0755, content=InlineTemplate(params.yarn_env_sh_template)) File(format("{yarn_bin}/container-executor"), group=params.yarn_executor_container_group, mode=params.container_executor_mode) File(os.path.join(config_dir, "container-executor.cfg"), group=params.user_group, mode=0644, content=InlineTemplate(params.container_executor_cfg_template)) Directory(params.cgroups_dir, group=params.user_group, create_parents=True, mode=0755, cd_access="a") File(os.path.join(config_dir, "mapred-env.sh"), owner=params.tc_owner, mode=0755, content=InlineTemplate(params.mapred_env_sh_template)) if params.yarn_nodemanager_recovery_dir: Directory( InlineTemplate(params.yarn_nodemanager_recovery_dir).get_content(), owner=params.yarn_user, group=params.user_group, create_parents=True, mode=0755, cd_access='a', ) if params.security_enabled: File(os.path.join(params.hadoop_bin, "task-controller"), owner="root", group=params.mapred_tt_group, mode=06050) File(os.path.join(config_dir, 'taskcontroller.cfg'), owner=params.tc_owner, mode=params.tc_mode, group=params.mapred_tt_group, content=Template("taskcontroller.cfg.j2")) File(os.path.join(config_dir, 'yarn_jaas.conf'), owner=params.yarn_user, group=params.user_group, content=Template("yarn_jaas.conf.j2")) if params.has_ats: File(os.path.join(config_dir, 'yarn_ats_jaas.conf'), owner=params.yarn_user, group=params.user_group, content=Template("yarn_ats_jaas.conf.j2")) if params.has_registry_dns: File(os.path.join(config_dir, 'yarn_registry_dns_jaas.conf'), owner=params.yarn_user, group=params.user_group, content=Template("yarn_registry_dns_jaas.conf.j2")) File(os.path.join(config_dir, 'yarn_nm_jaas.conf'), owner=params.yarn_user, group=params.user_group, content=Template("yarn_nm_jaas.conf.j2")) if params.has_hs: File(os.path.join(config_dir, 'mapred_jaas.conf'), owner=params.mapred_user, group=params.user_group, content=Template("mapred_jaas.conf.j2")) else: File(os.path.join(config_dir, 'taskcontroller.cfg'), owner=params.tc_owner, content=Template("taskcontroller.cfg.j2")) XmlConfig("mapred-site.xml", conf_dir=config_dir, configurations=params.config['configurations']['mapred-site'], configuration_attributes=params.config['configurationAttributes'] ['mapred-site'], owner=params.mapred_user, group=params.user_group) XmlConfig( "capacity-scheduler.xml", conf_dir=config_dir, configurations=params.config['configurations']['capacity-scheduler'], configuration_attributes=params.config['configurationAttributes'] ['capacity-scheduler'], owner=params.hdfs_user, group=params.user_group) if "ssl-client" in params.config['configurations']: XmlConfig( "ssl-client.xml", conf_dir=config_dir, configurations=params.config['configurations']['ssl-client'], configuration_attributes=params.config['configurationAttributes'] ['ssl-client'], owner=params.hdfs_user, group=params.user_group) Directory( params.hadoop_conf_secure_dir, create_parents=True, owner='root', group=params.user_group, cd_access='a', ) XmlConfig( "ssl-client.xml", conf_dir=params.hadoop_conf_secure_dir, configurations=params.config['configurations']['ssl-client'], configuration_attributes=params.config['configurationAttributes'] ['ssl-client'], owner=params.hdfs_user, group=params.user_group) if "ssl-server" in params.config['configurations']: XmlConfig( "ssl-server.xml", conf_dir=config_dir, configurations=params.config['configurations']['ssl-server'], configuration_attributes=params.config['configurationAttributes'] ['ssl-server'], owner=params.hdfs_user, group=params.user_group) if os.path.exists(os.path.join(config_dir, 'fair-scheduler.xml')): File(os.path.join(config_dir, 'fair-scheduler.xml'), owner=params.mapred_user, group=params.user_group) if os.path.exists(os.path.join(config_dir, 'ssl-client.xml.example')): File(os.path.join(config_dir, 'ssl-client.xml.example'), owner=params.mapred_user, group=params.user_group) if os.path.exists(os.path.join(config_dir, 'ssl-server.xml.example')): File(os.path.join(config_dir, 'ssl-server.xml.example'), owner=params.mapred_user, group=params.user_group) setup_atsv2_backend(name, config_dir)
def hbase(name=None): import params # ensure that matching LZO libraries are installed for HBase lzo_utils.install_lzo_if_needed() Directory(params.etc_prefix_dir, mode=0755) Directory(params.hbase_conf_dir, owner=params.hbase_user, group=params.user_group, create_parents=True) Directory(params.java_io_tmpdir, create_parents=True, mode=0777) # If a file location is specified in ioengine parameter, # ensure that directory exists. Otherwise create the # directory with permissions assigned to hbase:hadoop. ioengine_input = params.ioengine_param if ioengine_input != None: if ioengine_input.startswith("file:/"): ioengine_fullpath = ioengine_input[5:] ioengine_dir = os.path.dirname(ioengine_fullpath) Directory(ioengine_dir, owner=params.hbase_user, group=params.user_group, create_parents=True, mode=0755) parent_dir = os.path.dirname(params.tmp_dir) # In case if we have several placeholders in path while ("${" in parent_dir): parent_dir = os.path.dirname(parent_dir) if parent_dir != os.path.abspath(os.sep): Directory( parent_dir, create_parents=True, cd_access="a", ) Execute(("chmod", "1777", parent_dir), sudo=True) XmlConfig("hbase-site.xml", conf_dir=params.hbase_conf_dir, configurations=params.config['configurations']['hbase-site'], configuration_attributes=params.config['configurationAttributes'] ['hbase-site'], owner=params.hbase_user, group=params.user_group) if check_stack_feature(StackFeature.PHOENIX_CORE_HDFS_SITE_REQUIRED, params.version_for_stack_feature_checks): XmlConfig( "core-site.xml", conf_dir=params.hbase_conf_dir, configurations=params.config['configurations']['core-site'], configuration_attributes=params.config['configurationAttributes'] ['core-site'], owner=params.hbase_user, group=params.user_group) if 'hdfs-site' in params.config['configurations']: XmlConfig( "hdfs-site.xml", conf_dir=params.hbase_conf_dir, configurations=params.config['configurations']['hdfs-site'], configuration_attributes=params. config['configurationAttributes']['hdfs-site'], owner=params.hbase_user, group=params.user_group) else: File(format("{params.hbase_conf_dir}/hdfs-site.xml"), action="delete") File(format("{params.hbase_conf_dir}/core-site.xml"), action="delete") if 'hbase-policy' in params.config['configurations']: XmlConfig( "hbase-policy.xml", conf_dir=params.hbase_conf_dir, configurations=params.config['configurations']['hbase-policy'], configuration_attributes=params.config['configurationAttributes'] ['hbase-policy'], owner=params.hbase_user, group=params.user_group) # Manually overriding ownership of file installed by hadoop package else: File(format("{params.hbase_conf_dir}/hbase-policy.xml"), owner=params.hbase_user, group=params.user_group) File( format("{hbase_conf_dir}/hbase-env.sh"), owner=params.hbase_user, content=InlineTemplate(params.hbase_env_sh_template), group=params.user_group, ) # On some OS this folder could be not exists, so we will create it before pushing there files Directory(params.limits_conf_dir, create_parents=True, owner='root', group='root') File(os.path.join(params.limits_conf_dir, 'hbase.conf'), owner='root', group='root', mode=0644, content=Template("hbase.conf.j2")) hbase_TemplateConfig( params.metric_prop_file_name, tag='GANGLIA-MASTER' if name == 'master' else 'GANGLIA-RS') hbase_TemplateConfig('regionservers') if params.security_enabled: hbase_TemplateConfig(format("hbase_{name}_jaas.conf")) if name != "client": Directory( params.pid_dir, owner=params.hbase_user, create_parents=True, cd_access="a", mode=0755, ) Directory( params.log_dir, owner=params.hbase_user, create_parents=True, cd_access="a", mode=0755, ) if (params.log4j_props != None): File(format("{params.hbase_conf_dir}/log4j.properties"), mode=0644, group=params.user_group, owner=params.hbase_user, content=InlineTemplate(params.log4j_props)) elif (os.path.exists(format("{params.hbase_conf_dir}/log4j.properties"))): File(format("{params.hbase_conf_dir}/log4j.properties"), mode=0644, group=params.user_group, owner=params.hbase_user) if name == "master" and params.default_fs: if not params.hbase_hdfs_root_dir_protocol or params.hbase_hdfs_root_dir_protocol == urlparse( params.default_fs).scheme: params.HdfsResource(params.hbase_hdfs_root_dir, type="directory", action="create_on_execute", owner=params.hbase_user) params.HdfsResource(params.hbase_staging_dir, type="directory", action="create_on_execute", owner=params.hbase_user, mode=0711) if params.create_hbase_home_directory: params.HdfsResource(params.hbase_home_directory, type="directory", action="create_on_execute", owner=params.hbase_user, mode=0755) params.HdfsResource(None, action="execute") if name in ('master', 'regionserver') and not params.default_fs: Directory( params.hbase_staging_dir, owner=params.hbase_user, create_parents=True, cd_access="a", mode=0711, ) if params.phoenix_enabled: Package(params.phoenix_package, retry_on_repo_unavailability=params. agent_stack_retry_on_unavailability, retry_count=params.agent_stack_retry_count)
def yarn(name=None, config_dir=None): """ :param name: Component name, apptimelineserver, nodemanager, resourcemanager, or None (defaults for client) :param config_dir: Which config directory to write configs to, which could be different during rolling upgrade. """ import params install_lzo_if_needed() if config_dir is None: config_dir = params.hadoop_conf_dir if name == "historyserver": if params.yarn_log_aggregation_enabled: params.HdfsResource(params.yarn_nm_app_log_dir, action="create_on_execute", type="directory", owner=params.yarn_user, group=params.user_group, mode=0777, recursive_chmod=True) # create the /tmp folder with proper permissions if it doesn't exist yet if params.entity_file_history_directory.startswith('/tmp'): params.HdfsResource( params.hdfs_tmp_dir, action="create_on_execute", type="directory", owner=params.hdfs_user, mode=0777, ) params.HdfsResource(params.entity_file_history_directory, action="create_on_execute", type="directory", owner=params.yarn_user, group=params.user_group) params.HdfsResource("/mapred", type="directory", action="create_on_execute", owner=params.mapred_user) params.HdfsResource("/mapred/system", type="directory", action="create_on_execute", owner=params.hdfs_user) params.HdfsResource(params.mapreduce_jobhistory_done_dir, type="directory", action="create_on_execute", owner=params.mapred_user, group=params.user_group, change_permissions_for_parents=True, mode=0777) params.HdfsResource(None, action="execute") Directory( params.jhs_leveldb_state_store_dir, owner=params.mapred_user, group=params.user_group, create_parents=True, cd_access="a", recursive_ownership=True, ) #<editor-fold desc="Node Manager Section"> if name == "nodemanager": # First start after enabling/disabling security if params.toggle_nm_security: Directory(params.nm_local_dirs_list + params.nm_log_dirs_list, action='delete') # If yarn.nodemanager.recovery.dir exists, remove this dir if params.yarn_nodemanager_recovery_dir: Directory(InlineTemplate( params.yarn_nodemanager_recovery_dir).get_content(), action='delete') # Setting NM marker file if params.security_enabled: Directory(params.nm_security_marker_dir) File( params.nm_security_marker, content= "Marker file to track first start after enabling/disabling security. " "During first start yarn local, log dirs are removed and recreated" ) elif not params.security_enabled: File(params.nm_security_marker, action="delete") if not params.security_enabled or params.toggle_nm_security: # handle_mounted_dirs ensures that we don't create dirs which are temporary unavailable (unmounted), and intended to reside on a different mount. nm_log_dir_to_mount_file_content = handle_mounted_dirs( create_log_dir, params.nm_log_dirs, params.nm_log_dir_to_mount_file, params) # create a history file used by handle_mounted_dirs File(params.nm_log_dir_to_mount_file, owner=params.hdfs_user, group=params.user_group, mode=0644, content=nm_log_dir_to_mount_file_content) nm_local_dir_to_mount_file_content = handle_mounted_dirs( create_local_dir, params.nm_local_dirs, params.nm_local_dir_to_mount_file, params) File(params.nm_local_dir_to_mount_file, owner=params.hdfs_user, group=params.user_group, mode=0644, content=nm_local_dir_to_mount_file_content) #</editor-fold> if params.yarn_nodemanager_recovery_dir: Directory( InlineTemplate(params.yarn_nodemanager_recovery_dir).get_content(), owner=params.yarn_user, group=params.user_group, create_parents=True, mode=0755, cd_access='a', ) Directory( [params.yarn_pid_dir_prefix, params.yarn_pid_dir, params.yarn_log_dir], owner=params.yarn_user, group=params.user_group, create_parents=True, cd_access='a', ) Directory( [ params.mapred_pid_dir_prefix, params.mapred_pid_dir, params.mapred_log_dir_prefix, params.mapred_log_dir ], owner=params.mapred_user, group=params.user_group, create_parents=True, cd_access='a', ) Directory( [params.yarn_log_dir_prefix], owner=params.yarn_user, group=params.user_group, create_parents=True, ignore_failures=True, cd_access='a', ) XmlConfig( "core-site.xml", conf_dir=config_dir, configurations=params.config['configurations']['core-site'], configuration_attributes=params.config['configuration_attributes'] ['core-site'], owner=params.hdfs_user, group=params.user_group, mode=0644) # During RU, Core Masters and Slaves need hdfs-site.xml # TODO, instead of specifying individual configs, which is susceptible to breaking when new configs are added, # RU should rely on all available in <stack-root>/<version>/hadoop/conf if 'hdfs-site' in params.config['configurations']: XmlConfig( "hdfs-site.xml", conf_dir=config_dir, configurations=params.config['configurations']['hdfs-site'], configuration_attributes=params.config['configuration_attributes'] ['hdfs-site'], owner=params.hdfs_user, group=params.user_group, mode=0644) XmlConfig( "mapred-site.xml", conf_dir=config_dir, configurations=params.config['configurations']['mapred-site'], configuration_attributes=params.config['configuration_attributes'] ['mapred-site'], owner=params.yarn_user, group=params.user_group, mode=0644) XmlConfig( "yarn-site.xml", conf_dir=config_dir, configurations=params.config['configurations']['yarn-site'], configuration_attributes=params.config['configuration_attributes'] ['yarn-site'], owner=params.yarn_user, group=params.user_group, mode=0644) XmlConfig( "capacity-scheduler.xml", conf_dir=config_dir, configurations=params.config['configurations']['capacity-scheduler'], configuration_attributes=params.config['configuration_attributes'] ['capacity-scheduler'], owner=params.yarn_user, group=params.user_group, mode=0644) if name == 'resourcemanager': Directory( params.rm_nodes_exclude_dir, mode=0755, create_parents=True, cd_access='a', ) File(params.exclude_file_path, content=Template("exclude_hosts_list.j2"), owner=params.yarn_user, group=params.user_group) if params.include_hosts: Directory( params.rm_nodes_include_dir, mode=0755, create_parents=True, cd_access='a', ) File(params.include_file_path, content=Template("include_hosts_list.j2"), owner=params.yarn_user, group=params.user_group) File(params.yarn_job_summary_log, owner=params.yarn_user, group=params.user_group) if not is_empty( params.node_label_enable ) and params.node_label_enable or is_empty( params.node_label_enable) and params.node_labels_dir: params.HdfsResource(params.node_labels_dir, type="directory", action="create_on_execute", change_permissions_for_parents=True, owner=params.yarn_user, group=params.user_group, mode=0700) params.HdfsResource(None, action="execute") elif name == 'apptimelineserver': Directory( params.ats_leveldb_dir, owner=params.yarn_user, group=params.user_group, create_parents=True, cd_access="a", ) # if stack support application timeline-service state store property (timeline_state_store stack feature) if params.stack_supports_timeline_state_store: Directory( params.ats_leveldb_state_store_dir, owner=params.yarn_user, group=params.user_group, create_parents=True, cd_access="a", ) # app timeline server 1.5 directories if not is_empty(params.entity_groupfs_store_dir): parent_path = os.path.dirname(params.entity_groupfs_store_dir) params.HdfsResource(parent_path, type="directory", action="create_on_execute", change_permissions_for_parents=True, owner=params.yarn_user, group=params.user_group, mode=0755) params.HdfsResource(params.entity_groupfs_store_dir, type="directory", action="create_on_execute", owner=params.yarn_user, group=params.user_group, mode=params.entity_groupfs_store_dir_mode) if not is_empty(params.entity_groupfs_active_dir): parent_path = os.path.dirname(params.entity_groupfs_active_dir) params.HdfsResource(parent_path, type="directory", action="create_on_execute", change_permissions_for_parents=True, owner=params.yarn_user, group=params.user_group, mode=0755) params.HdfsResource(params.entity_groupfs_active_dir, type="directory", action="create_on_execute", owner=params.yarn_user, group=params.user_group, mode=params.entity_groupfs_active_dir_mode) params.HdfsResource(None, action="execute") File(format("{limits_conf_dir}/yarn.conf"), mode=0644, content=Template('yarn.conf.j2')) File(format("{limits_conf_dir}/mapreduce.conf"), mode=0644, content=Template('mapreduce.conf.j2')) File(os.path.join(config_dir, "yarn-env.sh"), owner=params.yarn_user, group=params.user_group, mode=0755, content=InlineTemplate(params.yarn_env_sh_template)) container_executor = format("{yarn_container_bin}/container-executor") File(container_executor, group=params.yarn_executor_container_group, mode=params.container_executor_mode) File(os.path.join(config_dir, "container-executor.cfg"), group=params.user_group, mode=0644, content=Template('container-executor.cfg.j2')) Directory(params.cgroups_dir, group=params.user_group, create_parents=True, mode=0755, cd_access="a") if params.security_enabled: tc_mode = 0644 tc_owner = "root" else: tc_mode = None tc_owner = params.hdfs_user File(os.path.join(config_dir, "mapred-env.sh"), owner=tc_owner, mode=0755, content=InlineTemplate(params.mapred_env_sh_template)) if params.security_enabled: File(os.path.join(params.hadoop_bin, "task-controller"), owner="root", group=params.mapred_tt_group, mode=06050) File(os.path.join(config_dir, 'taskcontroller.cfg'), owner=tc_owner, mode=tc_mode, group=params.mapred_tt_group, content=Template("taskcontroller.cfg.j2")) File(os.path.join(config_dir, 'yarn_jaas.conf'), owner=params.yarn_user, group=params.user_group, content=Template("yarn_jaas.conf.j2")) if params.has_ats: File(os.path.join(config_dir, 'yarn_ats_jaas.conf'), owner=params.yarn_user, group=params.user_group, content=Template("yarn_ats_jaas.conf.j2")) File(os.path.join(config_dir, 'yarn_nm_jaas.conf'), owner=params.yarn_user, group=params.user_group, content=Template("yarn_nm_jaas.conf.j2")) if params.has_hs: File(os.path.join(config_dir, 'mapred_jaas.conf'), owner=params.mapred_user, group=params.user_group, content=Template("mapred_jaas.conf.j2")) else: File(os.path.join(config_dir, 'taskcontroller.cfg'), owner=tc_owner, content=Template("taskcontroller.cfg.j2")) if "mapred-site" in params.config['configurations']: XmlConfig( "mapred-site.xml", conf_dir=config_dir, configurations=params.config['configurations']['mapred-site'], configuration_attributes=params.config['configuration_attributes'] ['mapred-site'], owner=params.mapred_user, group=params.user_group) if "capacity-scheduler" in params.config['configurations']: XmlConfig( "capacity-scheduler.xml", conf_dir=config_dir, configurations=params.config['configurations'] ['capacity-scheduler'], configuration_attributes=params.config['configuration_attributes'] ['capacity-scheduler'], owner=params.hdfs_user, group=params.user_group) if "ssl-client" in params.config['configurations']: XmlConfig( "ssl-client.xml", conf_dir=config_dir, configurations=params.config['configurations']['ssl-client'], configuration_attributes=params.config['configuration_attributes'] ['ssl-client'], owner=params.hdfs_user, group=params.user_group) Directory( params.hadoop_conf_secure_dir, create_parents=True, owner='root', group=params.user_group, cd_access='a', ) XmlConfig( "ssl-client.xml", conf_dir=params.hadoop_conf_secure_dir, configurations=params.config['configurations']['ssl-client'], configuration_attributes=params.config['configuration_attributes'] ['ssl-client'], owner=params.hdfs_user, group=params.user_group) if "ssl-server" in params.config['configurations']: XmlConfig( "ssl-server.xml", conf_dir=config_dir, configurations=params.config['configurations']['ssl-server'], configuration_attributes=params.config['configuration_attributes'] ['ssl-server'], owner=params.hdfs_user, group=params.user_group) if os.path.exists(os.path.join(config_dir, 'fair-scheduler.xml')): File(os.path.join(config_dir, 'fair-scheduler.xml'), owner=params.mapred_user, group=params.user_group) if os.path.exists(os.path.join(config_dir, 'ssl-client.xml.example')): File(os.path.join(config_dir, 'ssl-client.xml.example'), owner=params.mapred_user, group=params.user_group) if os.path.exists(os.path.join(config_dir, 'ssl-server.xml.example')): File(os.path.join(config_dir, 'ssl-server.xml.example'), owner=params.mapred_user, group=params.user_group)