def wait_for_dfs_directory_created(self, dir_path, ignored_dfs_dirs): import params if not is_empty(dir_path): dir_path = HdfsResourceProvider.parse_path(dir_path) if dir_path in ignored_dfs_dirs: Logger.info("Skipping DFS directory '" + dir_path + "' as it's marked to be ignored.") return Logger.info("Verifying if DFS directory '" + dir_path + "' exists.") dir_exists = None nameservices = namenode_ha_utils.get_nameservices(params.hdfs_site) nameservice = None if not nameservices else nameservices[-1] if WebHDFSUtil.is_webhdfs_available(params.is_webhdfs_enabled, params.dfs_type): # check with webhdfs is much faster than executing hdfs dfs -test util = WebHDFSUtil(params.hdfs_site, nameservice, params.hdfs_user, params.security_enabled) list_status = util.run_command(dir_path, 'GETFILESTATUS', method='GET', ignore_status_codes=['404'], assertable_result=False) dir_exists = ('FileStatus' in list_status) else: # have to do time expensive hdfs dfs -d check. dfs_ret_code = shell.call(format("hdfs --config {hadoop_conf_dir} dfs -test -d " + dir_path), user=params.livy2_user)[0] dir_exists = not dfs_ret_code #dfs -test -d returns 0 in case the dir exists if not dir_exists: raise Fail("DFS directory '" + dir_path + "' does not exist !") else: Logger.info("DFS directory '" + dir_path + "' exists.")
def wait_for_dfs_directory_created(self, dir_path, ignored_dfs_dirs): import params if not is_empty(dir_path): dir_path = HdfsResourceProvider.parse_path(dir_path) if dir_path in ignored_dfs_dirs: Logger.info("Skipping DFS directory '" + dir_path + "' as it's marked to be ignored.") return Logger.info("Verifying if DFS directory '" + dir_path + "' exists.") dir_exists = None if WebHDFSUtil.is_webhdfs_available(params.is_webhdfs_enabled, params.default_fs): # check with webhdfs is much faster than executing hdfs dfs -test util = WebHDFSUtil(params.hdfs_site, params.yarn_user, params.security_enabled) list_status = util.run_command(dir_path, 'GETFILESTATUS', method='GET', ignore_status_codes=['404'], assertable_result=False) dir_exists = ('FileStatus' in list_status) else: # have to do time expensive hdfs dfs -d check. dfs_ret_code = shell.call(format("hdfs --config {hadoop_conf_dir} dfs -test -d " + dir_path), user=params.yarn_user)[0] dir_exists = not dfs_ret_code #dfs -test -d returns 0 in case the dir exists if not dir_exists: raise Fail("DFS directory '" + dir_path + "' does not exist !") else: Logger.info("DFS directory '" + dir_path + "' exists.")
def setup_hadoop(): """ Setup hadoop files and directories """ import params Execute( ("setenforce", "0"), only_if="test -f /selinux/enforce", not_if= "(! which getenforce ) || (which getenforce && getenforce | grep -q Disabled)", sudo=True, ) #directories if params.has_namenode: Directory( params.hdfs_log_dir_prefix, recursive=True, owner='root', group=params.user_group, mode=0775, cd_access='a', ) Directory( params.hadoop_pid_dir_prefix, recursive=True, owner='root', group='root', cd_access='a', ) Directory( params.hadoop_tmp_dir, recursive=True, owner=params.hdfs_user, cd_access='a', ) #files if params.security_enabled: tc_owner = "root" else: tc_owner = params.hdfs_user # if WebHDFS is not enabled we need this jar to create hadoop folders. if not WebHDFSUtil.is_webhdfs_available(params.is_webhdfs_enabled, params.default_fs): # for source-code of jar goto contrib/fast-hdfs-resource File(format("{ambari_libs_dir}/fast-hdfs-resource.jar"), mode=0644, content=StaticFile("fast-hdfs-resource.jar")) if os.path.exists(params.hadoop_conf_dir): File(os.path.join(params.hadoop_conf_dir, 'commons-logging.properties'), owner=tc_owner, content=Template('commons-logging.properties.j2')) health_check_template_name = "health_check" File(os.path.join(params.hadoop_conf_dir, health_check_template_name), owner=tc_owner, content=Template(health_check_template_name + ".j2")) log4j_filename = os.path.join(params.hadoop_conf_dir, "log4j.properties") if (params.log4j_props != None): File(log4j_filename, mode=0644, group=params.user_group, owner=params.hdfs_user, content=params.log4j_props) elif (os.path.exists( format("{params.hadoop_conf_dir}/log4j.properties"))): File( log4j_filename, mode=0644, group=params.user_group, owner=params.hdfs_user, ) File(os.path.join(params.hadoop_conf_dir, "hadoop-metrics2.properties"), owner=params.hdfs_user, content=Template("hadoop-metrics2.properties.j2"))
def setup_hadoop(): """ Setup hadoop files and directories """ import params Execute( ("setenforce", "0"), only_if="test -f /selinux/enforce", not_if= "(! which getenforce ) || (which getenforce && getenforce | grep -q Disabled)", sudo=True, ) #directories if params.has_namenode or params.dfs_type == 'HCFS': Directory( params.hdfs_log_dir_prefix, create_parents=True, owner='root', group=params.user_group, mode=0775, cd_access='a', ) if params.has_namenode: Directory( params.hadoop_pid_dir_prefix, create_parents=True, owner='root', group='root', cd_access='a', ) Directory( params.hadoop_tmp_dir, create_parents=True, owner=params.hdfs_user, cd_access='a', ) #files if params.security_enabled: tc_owner = "root" else: tc_owner = params.hdfs_user # if WebHDFS is not enabled we need this jar to create hadoop folders and copy tarballs to HDFS. if params.sysprep_skip_copy_fast_jar_hdfs: print "Skipping copying of fast-hdfs-resource.jar as host is sys prepped" elif params.dfs_type == 'HCFS' or not WebHDFSUtil.is_webhdfs_available( params.is_webhdfs_enabled, params.default_fs): # for source-code of jar goto contrib/fast-hdfs-resource File(format("{ambari_libs_dir}/fast-hdfs-resource.jar"), mode=0644, content=StaticFile("fast-hdfs-resource.jar")) if os.path.exists(params.hadoop_conf_dir): File(os.path.join(params.hadoop_conf_dir, 'commons-logging.properties'), owner=tc_owner, content=Template('commons-logging.properties.j2')) health_check_template_name = "health_check" File(os.path.join(params.hadoop_conf_dir, health_check_template_name), owner=tc_owner, content=Template(health_check_template_name + ".j2")) log4j_filename = os.path.join(params.hadoop_conf_dir, "log4j.properties") if (params.log4j_props != None): File(log4j_filename, mode=0644, group=params.user_group, owner=params.hdfs_user, content=params.log4j_props) elif (os.path.exists( format("{params.hadoop_conf_dir}/log4j.properties"))): File( log4j_filename, mode=0644, group=params.user_group, owner=params.hdfs_user, ) File(os.path.join(params.hadoop_conf_dir, "hadoop-metrics2.properties"), owner=params.hdfs_user, group=params.user_group, content=InlineTemplate( params.hadoop_metrics2_properties_content)) if params.dfs_type == 'HCFS' and params.has_core_site and 'ECS_CLIENT' in params.component_list: create_dirs() create_microsoft_r_dir()
def oozie_service(action='start', upgrade_type=None): """ Starts or stops the Oozie service :param action: 'start' or 'stop' :param upgrade_type: type of upgrade, either "rolling" or "non_rolling" skipped since a variation of them was performed during the rolling upgrade :return: """ import params environment = {'OOZIE_CONFIG': params.conf_dir} if params.security_enabled: if params.oozie_principal is None: oozie_principal_with_host = 'missing_principal' else: oozie_principal_with_host = params.oozie_principal.replace( "_HOST", params.hostname) kinit_if_needed = format( "{kinit_path_local} -kt {oozie_keytab} {oozie_principal_with_host};" ) else: kinit_if_needed = "" no_op_test = as_user(format( "ls {pid_file} >/dev/null 2>&1 && ps -p `cat {pid_file}` >/dev/null 2>&1" ), user=params.oozie_user) if action == 'start': start_cmd = format( "cd {oozie_tmp_dir} && {oozie_home}/bin/oozie-start.sh") path_to_jdbc = params.target if params.jdbc_driver_name == "com.mysql.jdbc.Driver" or \ params.jdbc_driver_name == "com.microsoft.sqlserver.jdbc.SQLServerDriver" or \ params.jdbc_driver_name == "org.postgresql.Driver" or \ params.jdbc_driver_name == "oracle.jdbc.driver.OracleDriver": if not params.jdbc_driver_jar: path_to_jdbc = format("{oozie_libext_dir}/") + \ params.default_connectors_map[params.jdbc_driver_name] if params.jdbc_driver_name in params.default_connectors_map else None if not os.path.isfile(path_to_jdbc): path_to_jdbc = format("{oozie_libext_dir}/") + "*" error_message = "Error! Sorry, but we can't find jdbc driver with default name " + params.default_connectors_map[params.jdbc_driver_name] + \ " in oozie lib dir. So, db connection check can fail. Please run 'ambari-server setup --jdbc-db={db_name} --jdbc-driver={path_to_jdbc} on server host.'" Logger.error(error_message) db_connection_check_command = format( "{java_home}/bin/java -cp {check_db_connection_jar}:{path_to_jdbc} org.apache.ambari.server.DBConnectionVerification '{oozie_jdbc_connection_url}' {oozie_metastore_user_name} {oozie_metastore_user_passwd!p} {jdbc_driver_name}" ) else: db_connection_check_command = None if upgrade_type is None: if not os.path.isfile( path_to_jdbc ) and params.jdbc_driver_name == "org.postgresql.Driver": print format("ERROR: jdbc file {target} is unavailable. Please, follow next steps:\n" \ "1) Download postgresql-9.0-801.jdbc4.jar.\n2) Create needed directory: mkdir -p {oozie_home}/libserver/\n" \ "3) Copy postgresql-9.0-801.jdbc4.jar to newly created dir: cp /path/to/jdbc/postgresql-9.0-801.jdbc4.jar " \ "{oozie_home}/libserver/\n4) Copy postgresql-9.0-801.jdbc4.jar to libext: cp " \ "/path/to/jdbc/postgresql-9.0-801.jdbc4.jar {oozie_home}/libext/\n") exit(1) if db_connection_check_command: sudo.chmod(params.check_db_connection_jar, 0755) Execute( db_connection_check_command, tries=5, try_sleep=10, user=params.oozie_user, ) Execute(format( "cd {oozie_tmp_dir} && {oozie_home}/bin/ooziedb.sh create -sqlfile oozie.sql -run" ), user=params.oozie_user, not_if=no_op_test, ignore_failures=True) if params.security_enabled: Execute( kinit_if_needed, user=params.oozie_user, ) if params.host_sys_prepped: print "Skipping creation of oozie sharelib as host is sys prepped" hdfs_share_dir_exists = True # skip time-expensive hadoop fs -ls check elif WebHDFSUtil.is_webhdfs_available(params.is_webhdfs_enabled, params.default_fs): # check with webhdfs is much faster than executing hadoop fs -ls. util = WebHDFSUtil(params.hdfs_site, params.oozie_user, params.security_enabled) list_status = util.run_command(params.hdfs_share_dir, 'GETFILESTATUS', method='GET', ignore_status_codes=['404'], assertable_result=False) hdfs_share_dir_exists = ('FileStatus' in list_status) else: # have to do time expensive hadoop fs -ls check. hdfs_share_dir_exists = shell.call(format( "{kinit_if_needed} hadoop --config {hadoop_conf_dir} dfs -ls {hdfs_share_dir} | awk 'BEGIN {{count=0;}} /share/ {{count++}} END {{if (count > 0) {{exit 0}} else {{exit 1}}}}'" ), user=params.oozie_user)[0] if not hdfs_share_dir_exists: Execute(params.put_shared_lib_to_hdfs_cmd, user=params.oozie_user, path=params.execute_path) params.HdfsResource( format("{oozie_hdfs_user_dir}/share"), type="directory", action="create_on_execute", mode=0755, recursive_chmod=True, ) params.HdfsResource(None, action="execute") try: # start oozie Execute(start_cmd, environment=environment, user=params.oozie_user, not_if=no_op_test) copy_atlas_hive_hook_to_dfs_share_lib(upgrade_type, params.upgrade_direction) except: show_logs(params.oozie_log_dir, params.oozie_user) raise elif action == 'stop': stop_cmd = format( "cd {oozie_tmp_dir} && {oozie_home}/bin/oozied.sh stop 60 -force") try: # stop oozie Execute(stop_cmd, environment=environment, only_if=no_op_test, user=params.oozie_user) except: show_logs(params.oozie_log_dir, params.oozie_user) raise File(params.pid_file, action="delete")
def setup_hadoop(): """ Setup hadoop files and directories """ import params Execute( ("setenforce", "0"), only_if="test -f /selinux/enforce", not_if="(! which getenforce ) || (which getenforce && getenforce | grep -q Disabled)", sudo=True, ) # directories if params.has_namenode: Directory( params.hdfs_log_dir_prefix, recursive=True, owner="root", group=params.user_group, mode=0775, cd_access="a" ) Directory(params.hadoop_pid_dir_prefix, recursive=True, owner="root", group="root", cd_access="a") Directory(params.hadoop_tmp_dir, recursive=True, owner=params.hdfs_user, cd_access="a") # files if params.security_enabled: tc_owner = "root" else: tc_owner = params.hdfs_user # if WebHDFS is not enabled we need this jar to create hadoop folders. if not WebHDFSUtil.is_webhdfs_available(params.is_webhdfs_enabled, params.default_fs): # for source-code of jar goto contrib/fast-hdfs-resource File( format("{ambari_libs_dir}/fast-hdfs-resource.jar"), mode=0644, content=StaticFile("fast-hdfs-resource.jar"), ) if os.path.exists(params.hadoop_conf_dir): File( os.path.join(params.hadoop_conf_dir, "commons-logging.properties"), owner=tc_owner, content=Template("commons-logging.properties.j2"), ) health_check_template_name = "health_check" File( os.path.join(params.hadoop_conf_dir, health_check_template_name), owner=tc_owner, content=Template(health_check_template_name + ".j2"), ) log4j_filename = os.path.join(params.hadoop_conf_dir, "log4j.properties") if params.log4j_props != None: File( log4j_filename, mode=0644, group=params.user_group, owner=params.hdfs_user, content=params.log4j_props, ) elif os.path.exists(format("{params.hadoop_conf_dir}/log4j.properties")): File(log4j_filename, mode=0644, group=params.user_group, owner=params.hdfs_user) File( os.path.join(params.hadoop_conf_dir, "hadoop-metrics2.properties"), owner=params.hdfs_user, content=Template("hadoop-metrics2.properties.j2"), )
def oozie_service(action = 'start', rolling_restart=False): """ Starts or stops the Oozie service :param action: 'start' or 'stop' :param rolling_restart: if True, then most of the pre-startup checks are skipped since a variation of them was performed during the rolling upgrade :return: """ import params environment={'OOZIE_CONFIG': params.conf_dir} if params.security_enabled: if params.oozie_principal is None: oozie_principal_with_host = 'missing_principal' else: oozie_principal_with_host = params.oozie_principal.replace("_HOST", params.hostname) kinit_if_needed = format("{kinit_path_local} -kt {oozie_keytab} {oozie_principal_with_host};") else: kinit_if_needed = "" no_op_test = as_user(format("ls {pid_file} >/dev/null 2>&1 && ps -p `cat {pid_file}` >/dev/null 2>&1"), user=params.oozie_user) if action == 'start': start_cmd = format("cd {oozie_tmp_dir} && {oozie_home}/bin/oozie-start.sh") if params.jdbc_driver_name == "com.mysql.jdbc.Driver" or \ params.jdbc_driver_name == "com.microsoft.sqlserver.jdbc.SQLServerDriver" or \ params.jdbc_driver_name == "org.postgresql.Driver" or \ params.jdbc_driver_name == "oracle.jdbc.driver.OracleDriver": db_connection_check_command = format("{java_home}/bin/java -cp {check_db_connection_jar}:{target} org.apache.ambari.server.DBConnectionVerification '{oozie_jdbc_connection_url}' {oozie_metastore_user_name} {oozie_metastore_user_passwd!p} {jdbc_driver_name}") else: db_connection_check_command = None if not rolling_restart: if not os.path.isfile(params.target) and params.jdbc_driver_name == "org.postgresql.Driver": print format("ERROR: jdbc file {target} is unavailable. Please, follow next steps:\n" \ "1) Download postgresql-9.0-801.jdbc4.jar.\n2) Create needed directory: mkdir -p {oozie_home}/libserver/\n" \ "3) Copy postgresql-9.0-801.jdbc4.jar to newly created dir: cp /path/to/jdbc/postgresql-9.0-801.jdbc4.jar " \ "{oozie_home}/libserver/\n4) Copy postgresql-9.0-801.jdbc4.jar to libext: cp " \ "/path/to/jdbc/postgresql-9.0-801.jdbc4.jar {oozie_home}/libext/\n") exit(1) if db_connection_check_command: Execute( db_connection_check_command, tries=5, try_sleep=10, user=params.oozie_user, ) Execute( format("cd {oozie_tmp_dir} && {oozie_home}/bin/ooziedb.sh create -sqlfile oozie.sql -run"), user = params.oozie_user, not_if = no_op_test, ignore_failures = True ) if params.security_enabled: Execute(kinit_if_needed, user = params.oozie_user, ) if params.host_sys_prepped: print "Skipping creation of oozie sharelib as host is sys prepped" hdfs_share_dir_exists = True # skip time-expensive hadoop fs -ls check elif WebHDFSUtil.is_webhdfs_available(params.is_webhdfs_enabled, params.default_fs): # check with webhdfs is much faster than executing hadoop fs -ls. util = WebHDFSUtil(params.hdfs_site, params.oozie_user, params.security_enabled) list_status = util.run_command(params.hdfs_share_dir, 'GETFILESTATUS', method='GET', ignore_status_codes=['404'], assertable_result=False) hdfs_share_dir_exists = ('FileStatus' in list_status) else: # have to do time expensive hadoop fs -ls check. hdfs_share_dir_exists = shell.call(format("{kinit_if_needed} hadoop --config {hadoop_conf_dir} dfs -ls {hdfs_share_dir} | awk 'BEGIN {{count=0;}} /share/ {{count++}} END {{if (count > 0) {{exit 0}} else {{exit 1}}}}'"), user=params.oozie_user)[0] if not hdfs_share_dir_exists: Execute( params.put_shared_lib_to_hdfs_cmd, user = params.oozie_user, path = params.execute_path ) params.HdfsResource(format("{oozie_hdfs_user_dir}/share"), type="directory", action="create_on_execute", mode=0755, recursive_chmod=True, ) params.HdfsResource(None, action="execute") # start oozie Execute( start_cmd, environment=environment, user = params.oozie_user, not_if = no_op_test ) elif action == 'stop': stop_cmd = format("cd {oozie_tmp_dir} && {oozie_home}/bin/oozie-stop.sh") # stop oozie Execute(stop_cmd, environment=environment, only_if = no_op_test, user = params.oozie_user) File(params.pid_file, action = "delete")
def oozie_service(action='start', rolling_restart=False): """ Starts or stops the Oozie service :param action: 'start' or 'stop' :param rolling_restart: if True, then most of the pre-startup checks are skipped since a variation of them was performed during the rolling upgrade :return: """ import params environment = {'OOZIE_CONFIG': params.conf_dir} if params.security_enabled: if params.oozie_principal is None: oozie_principal_with_host = 'missing_principal' else: oozie_principal_with_host = params.oozie_principal.replace( "_HOST", params.hostname) kinit_if_needed = format( "{kinit_path_local} -kt {oozie_keytab} {oozie_principal_with_host};" ) else: kinit_if_needed = "" no_op_test = format( "ls {pid_file} >/dev/null 2>&1 && ps -p `cat {pid_file}` >/dev/null 2>&1" ) if action == 'start': start_cmd = format( "cd {oozie_tmp_dir} && {oozie_home}/bin/oozie-start.sh") if params.jdbc_driver_name == "com.mysql.jdbc.Driver" or \ params.jdbc_driver_name == "com.microsoft.sqlserver.jdbc.SQLServerDriver" or \ params.jdbc_driver_name == "org.postgresql.Driver" or \ params.jdbc_driver_name == "oracle.jdbc.driver.OracleDriver": db_connection_check_command = format( "{java_home}/bin/java -cp {check_db_connection_jar}:{target} org.apache.ambari.server.DBConnectionVerification '{oozie_jdbc_connection_url}' {oozie_metastore_user_name} {oozie_metastore_user_passwd!p} {jdbc_driver_name}" ) else: db_connection_check_command = None if not rolling_restart: if not os.path.isfile( params.target ) and params.jdbc_driver_name == "org.postgresql.Driver": print format("ERROR: jdbc file {target} is unavailable. Please, follow next steps:\n" \ "1) Download postgresql-9.0-801.jdbc4.jar.\n2) Create needed directory: mkdir -p {oozie_home}/libserver/\n" \ "3) Copy postgresql-9.0-801.jdbc4.jar to newly created dir: cp /path/to/jdbc/postgresql-9.0-801.jdbc4.jar " \ "{oozie_home}/libserver/\n4) Copy postgresql-9.0-801.jdbc4.jar to libext: cp " \ "/path/to/jdbc/postgresql-9.0-801.jdbc4.jar {oozie_home}/libext/\n") exit(1) if db_connection_check_command: Execute(db_connection_check_command, tries=5, try_sleep=10) Execute(format( "cd {oozie_tmp_dir} && {oozie_home}/bin/ooziedb.sh create -sqlfile oozie.sql -run" ), user=params.oozie_user, not_if=no_op_test, ignore_failures=True) if params.security_enabled: Execute( kinit_if_needed, user=params.oozie_user, ) if WebHDFSUtil.is_webhdfs_available(params.is_webhdfs_enabled, params.default_fs): # check with webhdfs is much faster than executing hadoop fs -ls. util = WebHDFSUtil(params.hdfs_site, params.oozie_user, params.security_enabled) list_status = util.run_command(params.hdfs_share_dir, 'GETFILESTATUS', method='GET', ignore_status_codes=['404'], assertable_result=False) hdfs_share_dir_exists = ('FileStatus' in list_status) else: # have to do time expensive hadoop fs -ls check. hdfs_share_dir_exists = shell.call(format( "{kinit_if_needed} hadoop --config {hadoop_conf_dir} dfs -ls {hdfs_share_dir} | awk 'BEGIN {{count=0;}} /share/ {{count++}} END {{if (count > 0) {{exit 0}} else {{exit 1}}}}'" ), user=params.oozie_user)[0] if not hdfs_share_dir_exists: Execute(params.put_shared_lib_to_hdfs_cmd, user=params.oozie_user, path=params.execute_path) params.HdfsResource( format("{oozie_hdfs_user_dir}/share"), type="directory", action="create_on_execute", mode=0755, recursive_chmod=True, ) params.HdfsResource(None, action="execute") # start oozie Execute(start_cmd, environment=environment, user=params.oozie_user, not_if=no_op_test) elif action == 'stop': stop_cmd = format( "cd {oozie_tmp_dir} && {oozie_home}/bin/oozie-stop.sh") # stop oozie Execute(stop_cmd, environment=environment, only_if=no_op_test, user=params.oozie_user) File(params.pid_file, action="delete")