def is_active_namenode(hdfs_binary): """ Checks if current NameNode is active. Waits up to 30 seconds. If other NameNode is active returns False. :return: True if current NameNode is active, False otherwise """ import params if params.dfs_ha_enabled: is_active_this_namenode_cmd = as_user(format("{hdfs_binary} --config {hadoop_conf_dir} haadmin -getServiceState {namenode_id} | grep active"), params.hdfs_user, env={'PATH':params.hadoop_bin_dir}) is_active_other_namenode_cmd = as_user(format("{hdfs_binary} --config {hadoop_conf_dir} haadmin -getServiceState {other_namenode_id} | grep active"), params.hdfs_user, env={'PATH':params.hadoop_bin_dir}) for i in range(0, 5): code, out = shell.call(is_active_this_namenode_cmd) # If active NN, code will be 0 if code == 0: # active return True code, out = shell.call(is_active_other_namenode_cmd) # If other NN is active, code will be 0 if code == 0: # other NN is active return False if i < 4: # Do not sleep after last iteration time.sleep(6) Logger.info("Active NameNode is not found.") return False else: return True
def knox(): import params directories = [params.knox_data_dir, params.knox_logs_dir, params.knox_pid_dir, params.knox_conf_dir, os.path.join(params.knox_conf_dir, "topologies")] for directory in directories: Directory(directory, owner = params.knox_user, group = params.knox_group, recursive = True ) XmlConfig("gateway-site.xml", conf_dir=params.knox_conf_dir, configurations=params.config['configurations']['gateway-site'], configuration_attributes=params.config['configuration_attributes']['gateway-site'], owner=params.knox_user, group=params.knox_group, ) File(format("{params.knox_conf_dir}/gateway-log4j.properties"), mode=0644, group=params.knox_group, owner=params.knox_user, content=params.gateway_log4j ) File(format("{params.knox_conf_dir}/topologies/default.xml"), group=params.knox_group, owner=params.knox_user, content=InlineTemplate(params.topology_template) ) if params.security_enabled: TemplateConfig( format("{knox_conf_dir}/krb5JAASLogin.conf"), owner = params.knox_user, template_tag = None ) dirs_to_chown = tuple(directories) cmd = ('chown','-R',format('{knox_user}:{knox_group}')) + dirs_to_chown Execute(cmd, sudo = True, ) cmd = format('{knox_client_bin} create-master --master {knox_master_secret!p}') master_secret_exist = as_user(format('test -f {knox_master_secret_path}'), params.knox_user) Execute(cmd, user=params.knox_user, environment={'JAVA_HOME': params.java_home}, not_if=master_secret_exist, ) cmd = format('{knox_client_bin} create-cert --hostname {knox_host_name_in_cluster}') cert_store_exist = as_user(format('test -f {knox_cert_store_path}'), params.knox_user) Execute(cmd, user=params.knox_user, environment={'JAVA_HOME': params.java_home}, not_if=cert_store_exist, )
def webhcat_service(action='start', upgrade_type=None): import params environ = {'HADOOP_HOME': params.hadoop_home} cmd = format('{webhcat_bin_dir}/webhcat_server.sh') if action == 'start': if upgrade_type is not None and params.version and params.stack_root: environ['HADOOP_HOME'] = format("{stack_root}/{version}/hadoop") daemon_cmd = format('cd {hcat_pid_dir} ; {cmd} start') no_op_test = as_user(format( 'ls {webhcat_pid_file} >/dev/null 2>&1 && ps -p `cat {webhcat_pid_file}` >/dev/null 2>&1' ), user=params.webhcat_user) try: Execute(daemon_cmd, user=params.webhcat_user, not_if=no_op_test, environment=environ) except: show_logs(params.hcat_log_dir, params.webhcat_user) raise elif action == 'stop': try: graceful_stop(cmd, environ) except Fail: show_logs(params.hcat_log_dir, params.webhcat_user) Logger.info(traceback.format_exc()) pid_expression = "`" + as_user(format("cat {webhcat_pid_file}"), user=params.webhcat_user) + "`" process_id_exists_command = format( "ls {webhcat_pid_file} >/dev/null 2>&1 && ps -p {pid_expression} >/dev/null 2>&1" ) daemon_hard_kill_cmd = format("{sudo} kill -9 {pid_expression}") wait_time = 10 Execute( daemon_hard_kill_cmd, not_if=format( "! ({process_id_exists_command}) || ( sleep {wait_time} && ! ({process_id_exists_command}) )" ), ignore_failures=True) try: # check if stopped the process, else fail the task Execute( format("! ({process_id_exists_command})"), tries=20, try_sleep=3, ) except: show_logs(params.hcat_log_dir, params.webhcat_user) raise File( params.webhcat_pid_file, action="delete", )
def knox(): import params Directory([params.knox_data_dir, params.knox_logs_dir, params.knox_pid_dir, params.knox_conf_dir, os.path.join(params.knox_conf_dir, "topologies")], owner = params.knox_user, group = params.knox_group, create_parents = True, cd_access = "a", mode = 0755, recursive_ownership = True, recursion_follow_links = True, ) XmlConfig("gateway-site.xml", conf_dir=params.knox_conf_dir, configurations=params.config['configurations']['gateway-site'], configuration_attributes=params.config['configuration_attributes']['gateway-site'], owner=params.knox_user, group=params.knox_group, ) File(format("{params.knox_conf_dir}/gateway-log4j.properties"), mode=0644, group=params.knox_group, owner=params.knox_user, content=params.gateway_log4j ) File(format("{params.knox_conf_dir}/topologies/default.xml"), group=params.knox_group, owner=params.knox_user, content=InlineTemplate(params.topology_template) ) if params.security_enabled: TemplateConfig( format("{knox_conf_dir}/krb5JAASLogin.conf"), owner = params.knox_user, template_tag = None ) cmd = format('{knox_client_bin} create-master --master {knox_master_secret!p}') master_secret_exist = as_user(format('test -f {knox_master_secret_path}'), params.knox_user) Execute(cmd, user=params.knox_user, environment={'JAVA_HOME': params.java_home}, not_if=master_secret_exist, ) cmd = format('{knox_client_bin} create-cert --hostname {knox_host_name_in_cluster}') cert_store_exist = as_user(format('test -f {knox_cert_store_path}'), params.knox_user) Execute(cmd, user=params.knox_user, environment={'JAVA_HOME': params.java_home}, not_if=cert_store_exist, )
def accumulo_service(name, action='start'): # 'start' or 'stop' or 'status' import params role = name pid_file = format("{pid_dir}/accumulo-{accumulo_user}-{role}.pid") pid_exists = format( "ls {pid_file} >/dev/null 2>&1 && ps `cat {pid_file}` >/dev/null 2>&1") if action == 'start': Directory(os.path.expanduser(format("~{accumulo_user}")), owner=params.accumulo_user, group=params.user_group, recursive_ownership=True) if name != 'tserver': Execute(format( "{daemon_script} org.apache.accumulo.master.state.SetGoalState NORMAL" ), not_if=as_user(pid_exists, params.accumulo_user), user=params.accumulo_user) address = params.hostname if name == 'monitor' and params.accumulo_monitor_bind_all: address = '0.0.0.0' daemon_cmd = format( "{daemon_script} {role} --address {address} > {log_dir}/accumulo-{role}.out 2>{log_dir}/accumulo-{role}.err & echo $! > {pid_file}" ) try: Execute(daemon_cmd, not_if=as_user(pid_exists, params.accumulo_user), user=params.accumulo_user) except: show_logs(params.log_dir, params.accumulo_user) raise elif action == 'stop': no_pid_exists = format("! ({pid_exists})") pid = format("`cat {pid_file}` >/dev/null 2>&1") Execute(format("kill {pid}"), not_if=as_user(no_pid_exists, params.accumulo_user), user=params.accumulo_user) Execute( format("kill -9 {pid}"), not_if=as_user( format( "sleep 2; {no_pid_exists} || sleep 20; {no_pid_exists}"), params.accumulo_user), ignore_failures=True, user=params.accumulo_user) Execute(format("rm -f {pid_file}"), user=params.accumulo_user)
def webhcat_service(action='start', upgrade_type=None): import params environ = {'HADOOP_HOME': "/usr/lib/hadoop"} env = format( 'export HIVE_HOME=/usr/lib/hive; export HCAT_HOME=/usr/lib/hive-hcatalog' ) cmd = format('{webhcat_bin_dir}/webhcat_server.sh') if action == 'start': if upgrade_type is not None and params.version: environ['HADOOP_HOME'] = format("/usr/lib/hadoop") daemon_cmd = format('{env} ; cd {hcat_pid_dir} ; {cmd} start') no_op_test = as_user(format( 'ls {webhcat_pid_file} >/dev/null 2>&1 && ps -p `cat {webhcat_pid_file}` >/dev/null 2>&1' ), user=params.webhcat_user) Execute(daemon_cmd, user=params.webhcat_user, not_if=no_op_test, environment=environ) elif action == 'stop': daemon_cmd = format('{env} ; {cmd} stop') Execute(daemon_cmd, user=params.webhcat_user, environment=environ) pid_expression = "`" + as_user(format("cat {webhcat_pid_file}"), user=params.webhcat_user) + "`" process_id_exists_command = format( "ls {webhcat_pid_file} >/dev/null 2>&1 && ps -p {pid_expression} >/dev/null 2>&1" ) daemon_hard_kill_cmd = format("{sudo} kill -9 {pid_expression}") wait_time = 10 Execute( daemon_hard_kill_cmd, not_if=format( "! ({process_id_exists_command}) || ( sleep {wait_time} && ! ({process_id_exists_command}) )" )) # check if stopped the process, else fail the task Execute( format("! ({process_id_exists_command})"), tries=20, try_sleep=3, ) File( params.webhcat_pid_file, action="delete", )
def create_user(user, password): import params rpassfile = format("{params.exec_tmp_dir}/pass0") passfile = format("{params.exec_tmp_dir}/pass") cmdfile = format("{params.exec_tmp_dir}/cmds") try: File(cmdfile, mode=0600, group=params.user_group, owner=params.accumulo_user, content=InlineTemplate( format("createuser {user}\n" "grant -s System.CREATE_TABLE -u {user}\n\n"))) if params.security_enabled and params.has_secure_user_auth: Execute(format( "{params.kinit_cmd} {params.daemon_script} shell -f " "{cmdfile}"), not_if=as_user( format("{params.kinit_cmd} " "{params.daemon_script} shell " "-e \"userpermissions -u {user}\""), params.accumulo_user), user=params.accumulo_user) else: File(rpassfile, mode=0600, group=params.user_group, owner=params.accumulo_user, content=InlineTemplate('{{root_password}}\n\n')) File(passfile, mode=0600, group=params.user_group, owner=params.accumulo_user, content=InlineTemplate( format("{params.root_password}\n" "{password}\n" "{password}\n\n"))) Execute(format( "cat {passfile} | {params.daemon_script} shell -u root " "-f {cmdfile}"), not_if=as_user( format("cat {rpassfile} | " "{params.daemon_script} shell -u root " "-e \"userpermissions -u {user}\""), params.accumulo_user), user=params.accumulo_user) finally: try_remove(rpassfile) try_remove(passfile) try_remove(cmdfile)
def prepare_upgrade_save_namespace(): """ During a NonRolling (aka Express Upgrade), preparing the NameNode requires saving the namespace. """ import params dfsadmin_base_command = get_dfsadmin_base_command('hdfs') save_namespace_cmd = dfsadmin_base_command + " -saveNamespace" try: Logger.info("Checkpoint the current namespace.") as_user(save_namespace_cmd, params.hdfs_user, env={'PATH': params.hadoop_bin_dir}) except Exception, e: message = format("Could not save the NameSpace. As the HDFS user, call this command: {save_namespace_cmd}") Logger.error(message) raise Fail(message)
def kill_zkfc(zkfc_user): """ There are two potential methods for failing over the namenode, especially during a Rolling Upgrade. Option 1. Kill zkfc on primary namenode provided that the secondary is up and has zkfc running on it. Option 2. Silent failover :param zkfc_user: User that started the ZKFC process. :return: Return True if ZKFC was killed, otherwise, false. """ import params if params.dfs_ha_enabled: if params.zkfc_pid_file: check_process = as_user(format( "ls {zkfc_pid_file} > /dev/null 2>&1 && ps -p `cat {zkfc_pid_file}` > /dev/null 2>&1" ), user=zkfc_user) code, out = shell.call(check_process) if code == 0: Logger.debug("ZKFC is running and will be killed.") kill_command = format("kill -15 `cat {zkfc_pid_file}`") Execute(kill_command, user=zkfc_user) File( params.zkfc_pid_file, action="delete", ) return True return False
def upload_configuration_to_zk(zookeeper_quorum, solr_znode, config_set, config_set_dir, tmp_config_set_dir, java64_home, user, retry=5, interval=10): """ Upload configuration set to zookeeper with solrCloudCli.sh At first, it tries to download configuration set if exists into a temporary location, then upload that one to zookeeper. (if the configuration changed there, in that case the user wont redefine it) If the configuration set does not exits in zookeeper then upload it based on the config_set_dir parameter. """ solr_cli_prefix = __create_solr_cloud_cli_prefix(zookeeper_quorum, solr_znode, java64_home) Execute(format( '{solr_cli_prefix} --download-config --config-dir {tmp_config_set_dir} --config-set {config_set} --retry {retry} --interval {interval}' ), only_if=as_user( format( "{solr_cli_prefix} --check-config --config-set {config_set} --retry {retry} --interval {interval}" ), user), user=user) Execute(format( '{solr_cli_prefix} --upload-config --config-dir {config_set_dir} --config-set {config_set} --retry {retry} --interval {interval}' ), not_if=format("test -d {tmp_config_set_dir}"), user=user)
def kill_zkfc(zkfc_user): """ There are two potential methods for failing over the namenode, especially during a Rolling Upgrade. Option 1. Kill zkfc on primary namenode provided that the secondary is up and has zkfc running on it. Option 2. Silent failover (not supported as of HDP 2.2.0.0) :param zkfc_user: User that started the ZKFC process. :return: Return True if ZKFC was killed, otherwise, false. """ import params if params.dfs_ha_enabled: zkfc_pid_file = get_service_pid_file("zkfc", zkfc_user) if zkfc_pid_file: check_process = as_user(format("ls {zkfc_pid_file} > /dev/null 2>&1 && ps -p `cat {zkfc_pid_file}` > /dev/null 2>&1"), user=zkfc_user) code, out = shell.call(check_process) if code == 0: Logger.debug("ZKFC is running and will be killed.") kill_command = format("kill -15 `cat {zkfc_pid_file}`") Execute(kill_command, user=zkfc_user ) File(zkfc_pid_file, action = "delete", ) return True return False
def create_metastore_schema(): import params create_schema_cmd = format("export HIVE_CONF_DIR={hive_server_conf_dir} ; " "{hive_schematool_bin}/schematool -initSchema " "-dbType {hive_metastore_db_type} " "-userName {hive_metastore_user_name} " "-passWord {hive_metastore_user_passwd!p} -verbose") check_schema_created_cmd = as_user(format("export HIVE_CONF_DIR={hive_server_conf_dir} ; " "{hive_schematool_bin}/schematool -info " "-dbType {hive_metastore_db_type} " "-userName {hive_metastore_user_name} " "-passWord {hive_metastore_user_passwd!p} -verbose"), params.hive_user) # HACK: in cases with quoted passwords and as_user (which does the quoting as well) !p won't work for hiding passwords. # Fixing it with the hack below: quoted_hive_metastore_user_passwd = quote_bash_args(quote_bash_args(params.hive_metastore_user_passwd)) if quoted_hive_metastore_user_passwd[0] == "'" and quoted_hive_metastore_user_passwd[-1] == "'" \ or quoted_hive_metastore_user_passwd[0] == '"' and quoted_hive_metastore_user_passwd[-1] == '"': quoted_hive_metastore_user_passwd = quoted_hive_metastore_user_passwd[1:-1] Logger.sensitive_strings[repr(check_schema_created_cmd)] = repr(check_schema_created_cmd.replace( format("-passWord {quoted_hive_metastore_user_passwd}"), "-passWord " + utils.PASSWORDS_HIDE_STRING)) Execute(create_schema_cmd, not_if = check_schema_created_cmd, user = params.hive_user )
def test_start_secured(self, isfile_mock): isfile_mock.return_value = True self.executeScript(self.COMMON_SERVICES_PACKAGE_DIR + "/scripts/oozie_server.py", classname = "OozieServer", command = "start", config_file="secured.json", hdp_stack_version = self.STACK_VERSION, target = RMFTestCase.TARGET_COMMON_SERVICES ) self.assert_configure_secured() self.assertResourceCalled('Execute', 'cd /var/tmp/oozie && /usr/lib/oozie/bin/ooziedb.sh create -sqlfile oozie.sql -run', not_if = 'ls /var/run/oozie/oozie.pid >/dev/null 2>&1 && ps -p `cat /var/run/oozie/oozie.pid` >/dev/null 2>&1', ignore_failures = True, user = '******', ) self.assertResourceCalled('Execute', '/usr/bin/kinit -kt /etc/security/keytabs/oozie.service.keytab oozie/[email protected]; hadoop --config /etc/hadoop/conf dfs -put /usr/lib/oozie/share /user/oozie ; hadoop --config /etc/hadoop/conf dfs -chmod -R 755 /user/oozie/share', not_if = shell.as_user("/usr/bin/kinit -kt /etc/security/keytabs/oozie.service.keytab oozie/[email protected]; hadoop --config /etc/hadoop/conf dfs -ls /user/oozie/share | awk 'BEGIN {count=0;} /share/ {count++} END {if (count > 0) {exit 0} else {exit 1}}'", "oozie"), user = '******', path = ['/usr/bin:/usr/bin'], ) self.assertResourceCalled('Execute', 'cd /var/tmp/oozie && /usr/lib/oozie/bin/oozie-start.sh', not_if = 'ls /var/run/oozie/oozie.pid >/dev/null 2>&1 && ps -p `cat /var/run/oozie/oozie.pid` >/dev/null 2>&1', user = '******', ) self.assertNoMoreResources()
def startRebalancingProcess(threshold, rebalance_env): rebalanceCommand = format( 'hdfs --config {hadoop_conf_dir} balancer -threshold {threshold}' ) return as_user(rebalanceCommand, params.hdfs_user, env=rebalance_env)
def get_user_call_output(command, user, is_checked_call=True, **call_kwargs): """ This function eliminates only output of command inside the su, ignoring the su ouput itself. This is useful since some users have motd messages setup by default on su -l. @return: code, stdout, stderr """ command_string = shell.string_cmd_from_args_list(command) if isinstance(command, (list, tuple)) else command out_files = [] try: out_files.append(tempfile.NamedTemporaryFile()) out_files.append(tempfile.NamedTemporaryFile()) # other user should be able to write to it for f in out_files: os.chmod(f.name, 0666) command_string += " 1>" + out_files[0].name command_string += " 2>" + out_files[1].name func = shell.checked_call if is_checked_call else shell.call func_result = func(shell.as_user(command_string, user), **call_kwargs) files_output = [] for f in out_files: files_output.append(f.read()) return func_result[0], files_output[0], files_output[1] finally: for f in out_files: f.close()
def prepare_upgrade_save_namespace(hdfs_binary): """ During a NonRolling (aka Express Upgrade), preparing the NameNode requires saving the namespace. :param hdfs_binary: name/path of the HDFS binary to use """ import params dfsadmin_base_command = get_dfsadmin_base_command(hdfs_binary) save_namespace_cmd = dfsadmin_base_command + " -saveNamespace" try: Logger.info("Checkpoint the current namespace.") as_user(save_namespace_cmd, params.hdfs_user, env={'PATH': params.hadoop_bin_dir}) except Exception, e: message = format("Could not save the NameSpace. As the HDFS user, call this command: {save_namespace_cmd}") Logger.error(message) raise Fail(message)
def create_metastore_schema(): import params create_schema_cmd = format( "export HIVE_CONF_DIR={hive_server_conf_dir} ; " "{hive_schematool_bin}/schematool -initSchema " "-dbType {hive_metastore_db_type} " "-userName {hive_metastore_user_name} " "-passWord {hive_metastore_user_passwd!p} -verbose") check_schema_created_cmd = as_user( format("export HIVE_CONF_DIR={hive_server_conf_dir} ; " "{hive_schematool_bin}/schematool -info " "-dbType {hive_metastore_db_type} " "-userName {hive_metastore_user_name} " "-passWord {hive_metastore_user_passwd!p} -verbose"), params.hive_user) quoted_hive_metastore_user_passwd = quote_bash_args( quote_bash_args(params.hive_metastore_user_passwd)) if quoted_hive_metastore_user_passwd[0] == "'" and quoted_hive_metastore_user_passwd[-1] == "'" \ or quoted_hive_metastore_user_passwd[0] == '"' and quoted_hive_metastore_user_passwd[-1] == '"': quoted_hive_metastore_user_passwd = quoted_hive_metastore_user_passwd[ 1:-1] Logger.sensitive_strings[repr(check_schema_created_cmd)] = repr( check_schema_created_cmd.replace( format("-passWord {quoted_hive_metastore_user_passwd}"), "-passWord " + utils.PASSWORDS_HIDE_STRING)) Execute(create_schema_cmd, not_if=check_schema_created_cmd, user=params.hive_user)
def test_start_secured(self, isfile_mock): isfile_mock.return_value = True self.executeScript(self.COMMON_SERVICES_PACKAGE_DIR + "/scripts/oozie_server.py", classname="OozieServer", command="start", config_file="secured.json", hdp_stack_version=self.STACK_VERSION, target=RMFTestCase.TARGET_COMMON_SERVICES) self.assert_configure_secured() self.assertResourceCalled( 'Execute', 'cd /var/tmp/oozie && /usr/lib/oozie/bin/ooziedb.sh create -sqlfile oozie.sql -run', not_if= 'ls /var/run/oozie/oozie.pid >/dev/null 2>&1 && ps -p `cat /var/run/oozie/oozie.pid` >/dev/null 2>&1', ignore_failures=True, user='******', ) self.assertResourceCalled( 'Execute', '/usr/bin/kinit -kt /etc/security/keytabs/oozie.service.keytab oozie/[email protected]; hadoop --config /etc/hadoop/conf dfs -put /usr/lib/oozie/share /user/oozie ; hadoop --config /etc/hadoop/conf dfs -chmod -R 755 /user/oozie/share', not_if=shell.as_user( "/usr/bin/kinit -kt /etc/security/keytabs/oozie.service.keytab oozie/[email protected]; hadoop --config /etc/hadoop/conf dfs -ls /user/oozie/share | awk 'BEGIN {count=0;} /share/ {count++} END {if (count > 0) {exit 0} else {exit 1}}'", "oozie"), user='******', path=['/usr/bin:/usr/bin'], ) self.assertResourceCalled( 'Execute', 'cd /var/tmp/oozie && /usr/lib/oozie/bin/oozie-start.sh', not_if= 'ls /var/run/oozie/oozie.pid >/dev/null 2>&1 && ps -p `cat /var/run/oozie/oozie.pid` >/dev/null 2>&1', user='******', ) self.assertNoMoreResources()
def webhcat_service(action='start', upgrade_type=None): import params environ = { 'HADOOP_HOME': params.hadoop_home } cmd = format('{webhcat_bin_dir}/webhcat_server.sh') if action == 'start': if upgrade_type is not None and params.version: environ['HADOOP_HOME'] = format("/usr/iop/{version}/hadoop") daemon_cmd = format('cd {hcat_pid_dir} ; {cmd} start') no_op_test = as_user(format('ls {webhcat_pid_file} >/dev/null 2>&1 && ps -p `cat {webhcat_pid_file}` >/dev/null 2>&1'), user=params.webhcat_user) Execute(daemon_cmd, user=params.webhcat_user, not_if=no_op_test, environment = environ) elif action == 'stop': daemon_cmd = format('cd {hcat_pid_dir} ; {cmd} stop') Execute(daemon_cmd, user = params.webhcat_user, environment = environ) File(params.webhcat_pid_file, action="delete", )
def check_fs_root(conf_dir, execution_path): import params if not params.manage_hive_fsroot: Logger.info( "Skipping fs root check as cluster-env/manage_hive_fsroot is disabled" ) return if not params.fs_root.startswith("hdfs://"): Logger.info( "Skipping fs root check as fs_root does not start with hdfs://") return metatool_cmd = format("hive --config {conf_dir} --service metatool") cmd = as_user(format("{metatool_cmd} -listFSRoot", env={'PATH': execution_path}), params.hive_user) \ + format(" 2>/dev/null | grep hdfs:// | cut -f1,2,3 -d '/' | grep -v '{fs_root}' | head -1") code, out = shell.call(cmd) if code == 0 and out.strip() != "" and params.fs_root.strip() != out.strip( ): out = out.strip() cmd = format("{metatool_cmd} -updateLocation {fs_root} {out}") Execute(cmd, user=params.hive_user, environment={'PATH': execution_path})
def create_hive_metastore_schema(): import params SYS_DB_CREATED_FILE = "/etc/hive/sys.db.created" if os.path.isfile(SYS_DB_CREATED_FILE): Logger.info("Sys DB is already created") return create_hive_schema_cmd = format( "export HIVE_CONF_DIR={hive_server_conf_dir} ; " "{hive_schematool_bin}/schematool -initSchema " "-dbType hive " "-metaDbType {hive_metastore_db_type} " "-userName {hive_metastore_user_name} " "-passWord {hive_metastore_user_passwd!p} " "-verbose") check_hive_schema_created_cmd = as_user( format("export HIVE_CONF_DIR={hive_server_conf_dir} ; " "{hive_schematool_bin}/schematool -info " "-dbType hive " "-metaDbType {hive_metastore_db_type} " "-userName {hive_metastore_user_name} " "-passWord {hive_metastore_user_passwd!p} " "-verbose"), params.hive_user) # HACK: in cases with quoted passwords and as_user (which does the quoting as well) !p won't work for hiding passwords. # Fixing it with the hack below: quoted_hive_metastore_user_passwd = quote_bash_args( quote_bash_args(params.hive_metastore_user_passwd)) if quoted_hive_metastore_user_passwd.startswith("'") and quoted_hive_metastore_user_passwd.endswith("'") \ or quoted_hive_metastore_user_passwd.startswith('"') and quoted_hive_metastore_user_passwd.endswith('"'): quoted_hive_metastore_user_passwd = quoted_hive_metastore_user_passwd[ 1:-1] Logger.sensitive_strings[repr(create_hive_schema_cmd)] = repr( create_hive_schema_cmd.replace( format("-passWord {quoted_hive_metastore_user_passwd}"), "-passWord " + utils.PASSWORDS_HIDE_STRING)) Logger.sensitive_strings[repr(check_hive_schema_created_cmd)] = repr( check_hive_schema_created_cmd.replace( format("-passWord {quoted_hive_metastore_user_passwd}"), "-passWord " + utils.PASSWORDS_HIDE_STRING)) try: if params.security_enabled: hive_kinit_cmd = format( "{kinit_path_local} -kt {hive_server2_keytab} {hive_principal}; " ) Execute(hive_kinit_cmd, user=params.hive_user) Execute(create_hive_schema_cmd, not_if=check_hive_schema_created_cmd, user=params.hive_user) Execute("touch " + SYS_DB_CREATED_FILE, user="******") Logger.info("Sys DB is set up") except: Logger.error("Could not create Sys DB.") Logger.error(traceback.format_exc())
def setup_metastore(): import params if params.hive_metastore_site_supported: hivemetastore_site_config = get_config("hivemetastore-site") if hivemetastore_site_config: XmlConfig("hivemetastore-site.xml", conf_dir=params.hive_server_conf_dir, configurations=params.config['configurations'] ['hivemetastore-site'], configuration_attributes=params. config['configuration_attributes']['hivemetastore-site'], owner=params.hive_user, group=params.user_group, mode=0600) File(os.path.join(params.hive_server_conf_dir, "hadoop-metrics2-hivemetastore.properties"), owner=params.hive_user, group=params.user_group, content=Template("hadoop-metrics2-hivemetastore.properties.j2"), mode=0600) File(params.start_metastore_path, mode=0755, content=StaticFile('startMetastore.sh')) if params.init_metastore_schema: create_schema_cmd = format( "export HIVE_CONF_DIR={hive_server_conf_dir} ; " "{hive_schematool_bin}/schematool -initSchema " "-dbType {hive_metastore_db_type} " "-userName {hive_metastore_user_name} " "-passWord {hive_metastore_user_passwd!p} -verbose") check_schema_created_cmd = as_user( format("export HIVE_CONF_DIR={hive_server_conf_dir} ; " "{hive_schematool_bin}/schematool -info " "-dbType {hive_metastore_db_type} " "-userName {hive_metastore_user_name} " "-passWord {hive_metastore_user_passwd!p} -verbose"), params.hive_user) # HACK: in cases with quoted passwords and as_user (which does the quoting as well) !p won't work for hiding passwords. # Fixing it with the hack below: quoted_hive_metastore_user_passwd = quote_bash_args( quote_bash_args(params.hive_metastore_user_passwd)) if quoted_hive_metastore_user_passwd[0] == "'" and quoted_hive_metastore_user_passwd[-1] == "'" \ or quoted_hive_metastore_user_passwd[0] == '"' and quoted_hive_metastore_user_passwd[-1] == '"': quoted_hive_metastore_user_passwd = quoted_hive_metastore_user_passwd[ 1:-1] Logger.sensitive_strings[repr(check_schema_created_cmd)] = repr( check_schema_created_cmd.replace( format("-passWord {quoted_hive_metastore_user_passwd}"), "-passWord " + utils.PASSWORDS_HIDE_STRING)) Execute(create_schema_cmd, not_if=check_schema_created_cmd, user=params.hive_user)
def get_user_call_output(command, user, quiet=False, is_checked_call=True, **call_kwargs): """ This function eliminates only output of command inside the su, ignoring the su ouput itself. This is useful since some users have motd messages setup by default on su -l. @return: code, stdout, stderr """ command_string = shell.string_cmd_from_args_list(command) if isinstance( command, (list, tuple)) else command out_files = [] try: out_files.append(tempfile.NamedTemporaryFile()) out_files.append(tempfile.NamedTemporaryFile()) # other user should be able to write to it for f in out_files: os.chmod(f.name, 0666) command_string += " 1>" + out_files[0].name command_string += " 2>" + out_files[1].name code, _ = shell.call(shell.as_user(command_string, user), quiet=quiet, **call_kwargs) files_output = [] for f in out_files: files_output.append(f.read().decode("utf-8").strip('\n')) if code: all_output = files_output[1] + '\n' + files_output[0] err_msg = Logger.filter_text( ("Execution of '%s' returned %d. %s") % (command_string, code, all_output)) if is_checked_call: raise Fail(err_msg) else: Logger.warning(err_msg) result = code, files_output[0], files_output[1] caller_filename = sys._getframe(1).f_code.co_filename is_internal_call = shell.NOT_LOGGED_FOLDER in caller_filename if quiet == False or (quiet == None and not is_internal_call): log_msg = "{0} returned {1}".format(get_user_call_output.__name__, result) Logger.info(log_msg) return result finally: for f in out_files: f.close()
def service(componentName, action='start', serviceName='yarn'): import params if serviceName == 'mapreduce' and componentName == 'historyserver': delete_pid_file = True daemon = format("{mapred_bin}/mr-jobhistory-daemon.sh") pid_file = format("{mapred_pid_dir}/mapred-{mapred_user}-{componentName}.pid") usr = params.mapred_user else: # !!! yarn-daemon.sh deletes the PID for us; if we remove it the script # may not work correctly when stopping the service delete_pid_file = False daemon = format("{yarn_bin}/yarn-daemon.sh") pid_file = format("{yarn_pid_dir}/yarn-{yarn_user}-{componentName}.pid") usr = params.yarn_user cmd = format("export HADOOP_LIBEXEC_DIR={hadoop_libexec_dir} && {daemon} --config {hadoop_conf_dir}") if action == 'start': daemon_cmd = format("{ulimit_cmd} {cmd} start {componentName}") check_process = as_user(format("ls {pid_file} && ps -p `cat {pid_file}`"), user=usr) # Remove the pid file if its corresponding process is not running. File(pid_file, action = "delete", not_if = check_process) if componentName == 'timelineserver' and serviceName == 'yarn': File(params.ats_leveldb_lock_file, action = "delete", only_if = format("ls {params.ats_leveldb_lock_file}"), not_if = check_process, ignore_failures = True ) # Attempt to start the process. Internally, this is skipped if the process is already running. Execute(daemon_cmd, user = usr, not_if = check_process) # Ensure that the process with the expected PID exists. Execute(check_process, not_if = check_process, tries=5, try_sleep=1, ) elif action == 'stop': daemon_cmd = format("{cmd} stop {componentName}") Execute(daemon_cmd, user=usr) # !!! yarn-daemon doesn't need us to delete PIDs if delete_pid_file is True: File(pid_file, action="delete") elif action == 'refreshQueues': rm_kinit_cmd = params.rm_kinit_cmd refresh_cmd = format("{rm_kinit_cmd} export HADOOP_LIBEXEC_DIR={hadoop_libexec_dir} && {yarn_container_bin}/yarn rmadmin -refreshQueues") Execute(refresh_cmd, user=usr)
def prepare_upgrade_save_namespace(hdfs_binary): """ During a NonRolling (aka Express Upgrade), preparing the NameNode requires saving the namespace. :param hdfs_binary: name/path of the HDFS binary to use """ import params save_namespace_cmd = format("{hdfs_binary} dfsadmin -saveNamespace") try: Logger.info("Checkpoint the current namespace.") as_user(save_namespace_cmd, params.hdfs_user, env={'PATH': params.hadoop_bin_dir}) except Exception, e: message = format( "Could not save the NameSpace. As the HDFS user, call this command: {save_namespace_cmd}" ) Logger.error(message) raise Fail(message)
def webhcat_service(action='start', rolling_restart=False): import params environ = { 'HADOOP_HOME': params.hadoop_home } cmd = format('{webhcat_bin_dir}/webhcat_server.sh') if action == 'start': if rolling_restart and params.version: environ['HADOOP_HOME'] = format("/usr/hdp/{version}/hadoop") daemon_cmd = format('cd {hcat_pid_dir} ; {cmd} start') no_op_test = as_user(format('ls {webhcat_pid_file} >/dev/null 2>&1 && ps -p `cat {webhcat_pid_file}` >/dev/null 2>&1'), user=params.webhcat_user) Execute(daemon_cmd, user=params.webhcat_user, not_if=no_op_test, environment = environ) elif action == 'stop': daemon_cmd = format('{cmd} stop') Execute(daemon_cmd, user = params.webhcat_user, environment = environ) pid_expression = "`" + as_user(format("cat {webhcat_pid_file}"), user=params.webhcat_user) + "`" process_id_exists_command = format("ls {webhcat_pid_file} >/dev/null 2>&1 && ps -p {pid_expression} >/dev/null 2>&1") daemon_hard_kill_cmd = format("{sudo} kill -9 {pid_expression}") wait_time = 10 Execute(daemon_hard_kill_cmd, not_if = format("! ({process_id_exists_command}) || ( sleep {wait_time} && ! ({process_id_exists_command}) )") ) # check if stopped the process, else fail the task Execute(format("! ({process_id_exists_command})"), tries=20, try_sleep=3, ) File(params.webhcat_pid_file, action="delete", )
def prepare_upgrade_enter_safe_mode(hdfs_binary): """ During a NonRolling (aka Express Upgrade), preparing the NameNode requires first entering Safemode. :param hdfs_binary: name/path of the HDFS binary to use """ import params safe_mode_enter_cmd = format("{hdfs_binary} dfsadmin -safemode enter") safe_mode_enter_and_check_for_on = format( "{safe_mode_enter_cmd} | grep 'Safe mode is ON'") try: # Safe to call if already in Safe Mode Logger.info("Enter SafeMode if not already in it.") as_user(safe_mode_enter_and_check_for_on, params.hdfs_user, env={'PATH': params.hadoop_bin_dir}) except Exception, e: message = format( "Could not enter safemode. As the HDFS user, call this command: {safe_mode_enter_cmd}" ) Logger.error(message) raise Fail(message)
def webhcat_service(action="start", rolling_restart=False): import params environ = {"HADOOP_HOME": params.hadoop_home} cmd = format("{webhcat_bin_dir}/webhcat_server.sh") if action == "start": if rolling_restart and params.version: environ["HADOOP_HOME"] = format("/usr/hdp/{version}/hadoop") daemon_cmd = format("cd {hcat_pid_dir} ; {cmd} start") no_op_test = as_user( format("ls {webhcat_pid_file} >/dev/null 2>&1 && ps -p `cat {webhcat_pid_file}` >/dev/null 2>&1"), user=params.webhcat_user, ) Execute(daemon_cmd, user=params.webhcat_user, not_if=no_op_test, environment=environ) elif action == "stop": daemon_cmd = format("{cmd} stop") Execute(daemon_cmd, user=params.webhcat_user, environment=environ) pid_expression = "`" + as_user(format("cat {webhcat_pid_file}"), user=params.webhcat_user) + "`" process_id_exists_command = format( "ls {webhcat_pid_file} >/dev/null 2>&1 && ps -p {pid_expression} >/dev/null 2>&1" ) daemon_hard_kill_cmd = format("{sudo} kill -9 {pid_expression}") wait_time = 10 Execute( daemon_hard_kill_cmd, not_if=format( "! ({process_id_exists_command}) || ( sleep {wait_time} && ! ({process_id_exists_command}) )" ), ) # check if stopped the process, else fail the task Execute(format("! ({process_id_exists_command})"), tries=20, try_sleep=3) File(params.webhcat_pid_file, action="delete")
def check_fs_root(): import params metatool_cmd = format("hive --config {hive_server_conf_dir} --service metatool") cmd = as_user(format("{metatool_cmd} -listFSRoot", env={'PATH': params.execute_path}), params.hive_user) \ + format(" 2>/dev/null | grep hdfs:// | cut -f1,2,3 -d '/' | grep -v '{fs_root}' | head -1") code, out = shell.call(cmd) if code == 0 and out.strip() != "" and params.fs_root.strip() != out.strip(): out = out.strip() cmd = format("{metatool_cmd} -updateLocation {fs_root} {out}") Execute(cmd, user=params.hive_user, environment={'PATH': params.execute_path} )
def get_user_call_output(command, user, quiet=False, is_checked_call=True, **call_kwargs): """ This function eliminates only output of command inside the su, ignoring the su ouput itself. This is useful since some users have motd messages setup by default on su -l. @return: code, stdout, stderr """ command_string = shell.string_cmd_from_args_list(command) if isinstance( command, (list, tuple)) else command out_files = [] try: out_files.append(tempfile.NamedTemporaryFile()) out_files.append(tempfile.NamedTemporaryFile()) # other user should be able to write to it for f in out_files: os.chmod(f.name, 0666) command_string += " 1>" + out_files[0].name command_string += " 2>" + out_files[1].name code, _ = shell.call(shell.as_user(command_string, user), quiet=quiet, **call_kwargs) files_output = [] for f in out_files: files_output.append(f.read().strip('\n')) if code: all_output = files_output[1] + '\n' + files_output[0] err_msg = Logger.filter_text( ("Execution of '%s' returned %d. %s") % (command_string, code, all_output)) if is_checked_call: raise Fail(err_msg) else: Logger.warning(err_msg) return code, files_output[0], files_output[1] finally: for f in out_files: f.close()
def check_fs_root(): import params metatool_cmd = format( "hive --config {hive_server_conf_dir} --service metatool") cmd = as_user(format("{metatool_cmd} -listFSRoot", env={'PATH': params.execute_path}), params.hive_user) \ + format(" 2>/dev/null | grep hdfs:// | cut -f1,2,3 -d '/' | grep -v '{fs_root}' | head -1") code, out = shell.call(cmd) if code == 0 and out.strip() != "" and params.fs_root.strip() != out.strip( ): out = out.strip() cmd = format("{metatool_cmd} -updateLocation {fs_root} {out}") Execute(cmd, user=params.hive_user, environment={'PATH': params.execute_path})
def check_fs_root(): import params if not params.fs_root.startswith("hdfs://"): Logger.info("Skipping fs root check as fs_root does not start with hdfs://") return metatool_cmd = format("hive --config {hive_server_conf_dir} --service metatool") cmd = as_user(format("{metatool_cmd} -listFSRoot", env={"PATH": params.execute_path}), params.hive_user) + format( " 2>/dev/null | grep hdfs:// | cut -f1,2,3 -d '/' | grep -v '{fs_root}' | head -1" ) code, out = shell.call(cmd) if code == 0 and out.strip() != "" and params.fs_root.strip() != out.strip(): out = out.strip() cmd = format("{metatool_cmd} -updateLocation {fs_root} {out}") Execute(cmd, user=params.hive_user, environment={"PATH": params.execute_path})
def get_user_call_output(command, user, quiet=False, is_checked_call=True, **call_kwargs): """ This function eliminates only output of command inside the su, ignoring the su ouput itself. This is useful since some users have motd messages setup by default on su -l. @return: code, stdout, stderr """ command_string = shell.string_cmd_from_args_list(command) if isinstance(command, (list, tuple)) else command out_files = [] try: out_files.append(tempfile.NamedTemporaryFile()) out_files.append(tempfile.NamedTemporaryFile()) # other user should be able to write to it for f in out_files: os.chmod(f.name, 0666) command_string += " 1>" + out_files[0].name command_string += " 2>" + out_files[1].name code, _ = shell.call(shell.as_user(command_string, user), quiet=quiet, **call_kwargs) files_output = [] for f in out_files: files_output.append(f.read().strip('\n')) if code: all_output = files_output[1] + '\n' + files_output[0] err_msg = Logger.filter_text(("Execution of '%s' returned %d. %s") % (command_string, code, all_output)) if is_checked_call: raise Fail(err_msg) else: Logger.warning(err_msg) return code, files_output[0], files_output[1] finally: for f in out_files: f.close()
def service(name, action='start'): import params import status_params pid_file = status_params.pid_files[name] no_op_test = as_user(format( "ls {pid_file} >/dev/null 2>&1 && ps -p `cat {pid_file}` >/dev/null 2>&1" ), user=params.storm_user) if name == "logviewer" or name == "drpc": tries_count = 12 else: tries_count = 6 if name == 'ui': process_grep = "backtype.storm.ui.core$" elif name == "rest_api": process_grep = format("{rest_lib_dir}/storm-rest-.*\.jar$") else: process_grep = format("storm.daemon.{name}$") find_proc = format("{jps_binary} -l | grep {process_grep}") write_pid = format("{find_proc} | awk {{'print $1'}} > {pid_file}") crt_pid_cmd = format("{find_proc} && {write_pid}") storm_env = format( "source {conf_dir}/storm-env.sh ; export PATH=$JAVA_HOME/bin:$PATH") if action == "start": if name == "rest_api": process_cmd = format( "{storm_env} ; java -jar {rest_lib_dir}/`ls {rest_lib_dir} | grep -wE storm-rest-[0-9.-]+\.jar` server" ) cmd = format( "{process_cmd} {rest_api_conf_file} > {log_dir}/restapi.log 2>&1" ) else: cmd = format( "{storm_env} ; storm {name} > {log_dir}/{name}.out 2>&1") Execute(cmd, not_if=no_op_test, user=params.storm_user, wait_for_finish=False, path=params.storm_bin_dir) Execute(crt_pid_cmd, user=params.storm_user, logoutput=True, tries=tries_count, try_sleep=10, path=params.storm_bin_dir) elif action == "stop": process_dont_exist = format("! ({no_op_test})") if os.path.exists(pid_file): pid = get_user_call_output.get_user_call_output( format("! test -f {pid_file} || cat {pid_file}"), user=params.storm_user)[1] # if multiple processes are running (for example user can start logviewer from console) # there can be more than one id pid = pid.replace("\n", " ") Execute(format("{sudo} kill {pid}"), not_if=process_dont_exist) Execute( format("{sudo} kill -9 {pid}"), not_if=format( "sleep 2; {process_dont_exist} || sleep 20; {process_dont_exist}" ), ignore_failures=True) File(pid_file, action="delete")
def namenode(action=None, hdfs_binary=None, do_format=True, upgrade_type=None, env=None): if action is None: raise Fail('"action" parameter is required for function namenode().') if action in ["start", "stop"] and hdfs_binary is None: raise Fail('"hdfs_binary" parameter is required for function namenode().') if action == "configure": import params #we need this directory to be present before any action(HA manual steps for #additional namenode) create_name_dirs(params.dfs_name_dir) elif action == "start": Logger.info("Called service {0} with upgrade_type: {1}".format(action, str(upgrade_type))) setup_ranger_hdfs(upgrade_type=upgrade_type) import params if do_format and not params.hdfs_namenode_format_disabled: format_namenode() pass File(params.exclude_file_path, content=Template("exclude_hosts_list.j2"), owner=params.hdfs_user, group=params.user_group ) if params.dfs_ha_enabled and \ params.dfs_ha_namenode_standby is not None and \ params.hostname == params.dfs_ha_namenode_standby: # if the current host is the standby NameNode in an HA deployment # run the bootstrap command, to start the NameNode in standby mode # this requires that the active NameNode is already up and running, # so this execute should be re-tried upon failure, up to a timeout success = bootstrap_standby_namenode(params) if not success: raise Fail("Could not bootstrap standby namenode") if upgrade_type == "rolling" and params.dfs_ha_enabled: # Most likely, ZKFC is up since RU will initiate the failover command. However, if that failed, it would have tried # to kill ZKFC manually, so we need to start it if not already running. safe_zkfc_op(action, env) options = "" if upgrade_type == "rolling": if params.upgrade_direction == Direction.UPGRADE: options = "-rollingUpgrade started" elif params.upgrade_direction == Direction.DOWNGRADE: options = "-rollingUpgrade downgrade" elif upgrade_type == "nonrolling": is_previous_image_dir = is_previous_fs_image() Logger.info(format("Previous file system image dir present is {is_previous_image_dir}")) if params.upgrade_direction == Direction.UPGRADE: options = "-rollingUpgrade started" elif params.upgrade_direction == Direction.DOWNGRADE: options = "-rollingUpgrade downgrade" Logger.info(format("Option for start command: {options}")) service( action="start", name="namenode", user=params.hdfs_user, options=options, create_pid_dir=True, create_log_dir=True ) if params.security_enabled: Execute(format("{kinit_path_local} -kt {hdfs_user_keytab} {hdfs_principal_name}"), user = params.hdfs_user) if params.dfs_ha_enabled: is_active_namenode_cmd = as_user(format("{hdfs_binary} --config {hadoop_conf_dir} haadmin -getServiceState {namenode_id} | grep active"), params.hdfs_user, env={'PATH':params.hadoop_bin_dir}) else: is_active_namenode_cmd = True # During NonRolling Upgrade, both NameNodes are initially down, # so no point in checking if this is the active or standby. if upgrade_type == "nonrolling": is_active_namenode_cmd = False # ___Scenario___________|_Expected safemode state__|_Wait for safemode OFF____| # no-HA | ON -> OFF | Yes | # HA and active | ON -> OFF | Yes | # HA and standby | no change | no check | # RU with HA on active | ON -> OFF | Yes | # RU with HA on standby | ON -> OFF | Yes | # EU with HA on active | no change | no check | # EU with HA on standby | no change | no check | # EU non-HA | no change | no check | check_for_safemode_off = False msg = "" if params.dfs_ha_enabled: if upgrade_type is not None: check_for_safemode_off = True msg = "Must wait to leave safemode since High Availability is enabled during a Stack Upgrade" else: Logger.info("Wait for NameNode to become active.") if is_active_namenode(hdfs_binary): # active check_for_safemode_off = True msg = "Must wait to leave safemode since High Availability is enabled and this is the Active NameNode." else: msg = "Will remain in the current safemode state." else: msg = "Must wait to leave safemode since High Availability is not enabled." check_for_safemode_off = True Logger.info(msg) # During a NonRolling (aka Express Upgrade), stay in safemode since the DataNodes are down. stay_in_safe_mode = False if upgrade_type == "nonrolling": stay_in_safe_mode = True if check_for_safemode_off: Logger.info("Stay in safe mode: {0}".format(stay_in_safe_mode)) if not stay_in_safe_mode: wait_for_safemode_off(hdfs_binary) # Always run this on non-HA, or active NameNode during HA. create_hdfs_directories(is_active_namenode_cmd) create_ranger_audit_hdfs_directories(is_active_namenode_cmd) elif action == "stop": import params service( action="stop", name="namenode", user=params.hdfs_user ) elif action == "status": import status_params check_process_status(status_params.namenode_pid_file) elif action == "decommission": decommission()
def oozie_server_specific(): import params no_op_test = as_user(format("ls {pid_file} >/dev/null 2>&1 && ps -p `cat {pid_file}` >/dev/null 2>&1"), user=params.oozie_user) File(params.pid_file, action="delete", not_if=no_op_test ) oozie_server_directories = [format("{oozie_home}/{oozie_tmp_dir}"), params.oozie_pid_dir, params.oozie_log_dir, params.oozie_tmp_dir, params.oozie_data_dir, params.oozie_lib_dir, params.oozie_webapps_dir, params.oozie_webapps_conf_dir, params.oozie_server_dir] Directory( oozie_server_directories, owner = params.oozie_user, group = params.user_group, mode = 0755, recursive = True, cd_access="a", ) Directory(params.oozie_libext_dir, recursive=True, ) hashcode_file = format("{oozie_home}/.hashcode") skip_recreate_sharelib = format("test -f {hashcode_file} && test -d {oozie_home}/share") untar_sharelib = ('tar','-xvf',format('{oozie_home}/oozie-sharelib.tar.gz'),'-C',params.oozie_home) Execute( untar_sharelib, # time-expensive not_if = format("{no_op_test} || {skip_recreate_sharelib}"), sudo = True, ) configure_cmds = [] configure_cmds.append(('cp', params.ext_js_path, params.oozie_libext_dir)) configure_cmds.append(('chown', format('{oozie_user}:{user_group}'), format('{oozie_libext_dir}/{ext_js_file}'))) configure_cmds.append(('chown', '-RL', format('{oozie_user}:{user_group}'), params.oozie_webapps_conf_dir)) Execute( configure_cmds, not_if = no_op_test, sudo = True, ) # download the database JAR download_database_library_if_needed() #falcon el extension if params.has_falcon_host: Execute(format('{sudo} cp {falcon_home}/oozie/ext/falcon-oozie-el-extension-*.jar {oozie_libext_dir}'), not_if = no_op_test) Execute(format('{sudo} chown {oozie_user}:{user_group} {oozie_libext_dir}/falcon-oozie-el-extension-*.jar'), not_if = no_op_test) if params.lzo_enabled and len(params.all_lzo_packages) > 0: Package(params.all_lzo_packages) Execute(format('{sudo} cp {hadoop_lib_home}/hadoop-lzo*.jar {oozie_lib_dir}'), not_if = no_op_test, ) prepare_war_cmd_file = format("{oozie_home}/.prepare_war_cmd") prepare_war_cmd = format("cd {oozie_tmp_dir} && {oozie_setup_sh} prepare-war {oozie_secure}").strip() skip_prepare_war_cmd = format("test -f {prepare_war_cmd_file} && [[ `cat {prepare_war_cmd_file}` == '{prepare_war_cmd}' ]]") Execute(prepare_war_cmd, # time-expensive user = params.oozie_user, not_if = format("{no_op_test} || {skip_recreate_sharelib} && {skip_prepare_war_cmd}") ) File(hashcode_file, mode = 0644, ) File(prepare_war_cmd_file, content = prepare_war_cmd, mode = 0644, ) if params.hdp_stack_version != "" and compare_versions(params.hdp_stack_version, '2.2') >= 0: # Create hive-site and tez-site configs for oozie Directory(params.hive_conf_dir, recursive = True, owner = params.oozie_user, group = params.user_group ) if 'hive-site' in params.config['configurations']: XmlConfig("hive-site.xml", conf_dir=params.hive_conf_dir, configurations=params.config['configurations']['hive-site'], configuration_attributes=params.config['configuration_attributes']['hive-site'], owner=params.oozie_user, group=params.user_group, mode=0644 ) if 'tez-site' in params.config['configurations']: XmlConfig( "tez-site.xml", conf_dir = params.hive_conf_dir, configurations = params.config['configurations']['tez-site'], configuration_attributes=params.config['configuration_attributes']['tez-site'], owner = params.oozie_user, group = params.user_group, mode = 0664 ) Execute(('chown', '-R', format("{oozie_user}:{user_group}"), params.oozie_server_dir), sudo=True )
def oozie_server_specific(upgrade_type): import params no_op_test = as_user(format("ls {pid_file} >/dev/null 2>&1 && ps -p `cat {pid_file}` >/dev/null 2>&1"), user=params.oozie_user) File(params.pid_file, action="delete", not_if=no_op_test ) oozie_server_directories = [format("{oozie_home}/{oozie_tmp_dir}"), params.oozie_pid_dir, params.oozie_log_dir, params.oozie_tmp_dir, params.oozie_data_dir, params.oozie_lib_dir, params.oozie_webapps_dir, params.oozie_webapps_conf_dir, params.oozie_server_dir] Directory( oozie_server_directories, owner = params.oozie_user, group = params.user_group, mode = 0755, create_parents = True, cd_access="a", ) Directory(params.oozie_libext_dir, create_parents = True, ) hashcode_file = format("{oozie_home}/.hashcode") skip_recreate_sharelib = format("test -f {hashcode_file} && test -d {oozie_home}/share") untar_sharelib = ('tar','-xvf',format('{oozie_home}/oozie-sharelib.tar.gz'),'-C',params.oozie_home) Execute( untar_sharelib, # time-expensive not_if = format("{no_op_test} || {skip_recreate_sharelib}"), sudo = True, ) configure_cmds = [] # Default to /usr/share/$TARGETSTACK-oozie/ext-2.2.zip as the first path source_ext_zip_paths = get_oozie_ext_zip_source_paths(upgrade_type, params) # Copy the first oozie ext-2.2.zip file that is found. # This uses a list to handle the cases when migrating from some versions of BigInsights to HDP. if source_ext_zip_paths is not None: for source_ext_zip_path in source_ext_zip_paths: if os.path.isfile(source_ext_zip_path): configure_cmds.append(('cp', source_ext_zip_path, params.oozie_libext_dir)) configure_cmds.append(('chown', format('{oozie_user}:{user_group}'), format('{oozie_libext_dir}/{ext_js_file}'))) Execute(configure_cmds, not_if=no_op_test, sudo=True, ) break Directory(params.oozie_webapps_conf_dir, owner = params.oozie_user, group = params.user_group, recursive_ownership = True, recursion_follow_links = True, ) # download the database JAR download_database_library_if_needed() #falcon el extension if params.has_falcon_host: Execute(format('{sudo} cp {falcon_home}/oozie/ext/falcon-oozie-el-extension-*.jar {oozie_libext_dir}'), not_if = no_op_test) Execute(format('{sudo} chown {oozie_user}:{user_group} {oozie_libext_dir}/falcon-oozie-el-extension-*.jar'), not_if = no_op_test) if params.lzo_enabled and len(params.all_lzo_packages) > 0: Package(params.all_lzo_packages, retry_on_repo_unavailability=params.agent_stack_retry_on_unavailability, retry_count=params.agent_stack_retry_count) Execute(format('{sudo} cp {hadoop_lib_home}/hadoop-lzo*.jar {oozie_lib_dir}'), not_if = no_op_test, ) prepare_war(params) File(hashcode_file, mode = 0644, ) if params.stack_version_formatted and check_stack_feature(StackFeature.OOZIE_CREATE_HIVE_TEZ_CONFIGS, params.stack_version_formatted): # Create hive-site and tez-site configs for oozie Directory(params.hive_conf_dir, create_parents = True, owner = params.oozie_user, group = params.user_group ) if 'hive-site' in params.config['configurations']: hive_site_config = update_credential_provider_path(params.config['configurations']['hive-site'], 'hive-site', os.path.join(params.hive_conf_dir, 'hive-site.jceks'), params.oozie_user, params.user_group ) XmlConfig("hive-site.xml", conf_dir=params.hive_conf_dir, configurations=hive_site_config, configuration_attributes=params.config['configuration_attributes']['hive-site'], owner=params.oozie_user, group=params.user_group, mode=0644 ) if 'tez-site' in params.config['configurations']: XmlConfig( "tez-site.xml", conf_dir = params.hive_conf_dir, configurations = params.config['configurations']['tez-site'], configuration_attributes=params.config['configuration_attributes']['tez-site'], owner = params.oozie_user, group = params.user_group, mode = 0664 ) # If Atlas is also installed, need to generate Atlas Hive hook (hive-atlas-application.properties file) in directory # {stack_root}/{current_version}/atlas/hook/hive/ # Because this is a .properties file instead of an xml file, it will not be read automatically by Oozie. # However, should still save the file on this host so that can upload it to the Oozie Sharelib in DFS. if has_atlas_in_cluster(): atlas_hook_filepath = os.path.join(params.hive_conf_dir, params.atlas_hook_filename) Logger.info("Has atlas in cluster, will save Atlas Hive hook into location %s" % str(atlas_hook_filepath)) setup_atlas_hook(SERVICE.HIVE, params.hive_atlas_application_properties, atlas_hook_filepath, params.oozie_user, params.user_group) Directory(params.oozie_server_dir, owner = params.oozie_user, group = params.user_group, recursive_ownership = True, ) if params.security_enabled: File(os.path.join(params.conf_dir, 'zkmigrator_jaas.conf'), owner=params.oozie_user, group=params.user_group, content=Template("zkmigrator_jaas.conf.j2") )
def action_run(self): path = self.resource.path dest_dir = self.resource.dest_dir dest_file = self.resource.dest_file kinnit_if_needed = self.resource.kinnit_if_needed user = self.resource.user # user to perform commands as. If not provided, default to the owner owner = self.resource.owner group = self.resource.group mode = self.resource.mode hdfs_usr=self.resource.hdfs_user hadoop_conf_path = self.resource.hadoop_conf_dir bin_dir = self.resource.hadoop_bin_dir if dest_file: copy_cmd = format("fs -copyFromLocal {path} {dest_dir}/{dest_file}") dest_path = dest_dir + dest_file if dest_dir.endswith(os.sep) else dest_dir + os.sep + dest_file else: dest_file_name = os.path.split(path)[1] copy_cmd = format("fs -copyFromLocal {path} {dest_dir}") dest_path = dest_dir + os.sep + dest_file_name # Need to run unless as resource user if kinnit_if_needed: Execute(kinnit_if_needed, user=user if user else owner, ) unless_cmd = as_user(format("PATH=$PATH:{bin_dir} hadoop fs -ls {dest_path}"), user if user else owner) ExecuteHadoop(copy_cmd, not_if=unless_cmd, user=user if user else owner, bin_dir=bin_dir, conf_dir=hadoop_conf_path ) if not owner: chown = None else: if not group: chown = owner else: chown = format('{owner}:{group}') if chown: chown_cmd = format("fs -chown {chown} {dest_path}") ExecuteHadoop(chown_cmd, user=hdfs_usr, bin_dir=bin_dir, conf_dir=hadoop_conf_path) pass if mode: dir_mode = oct(mode)[1:] chmod_cmd = format('fs -chmod {dir_mode} {dest_path}') ExecuteHadoop(chmod_cmd, user=hdfs_usr, bin_dir=bin_dir, conf_dir=hadoop_conf_path) pass
def namenode(action=None, hdfs_binary=None, do_format=True, upgrade_type=None, env=None): if action is None: raise Fail('"action" parameter is required for function namenode().') if action in ["start", "stop"] and hdfs_binary is None: raise Fail( '"hdfs_binary" parameter is required for function namenode().') if action == "configure": import params #we need this directory to be present before any action(HA manual steps for #additional namenode) create_name_dirs(params.dfs_name_dir) elif action == "start": Logger.info("Called service {0} with upgrade_type: {1}".format( action, str(upgrade_type))) setup_ranger_hdfs(upgrade_type=upgrade_type) import params if do_format: format_namenode() pass File(params.exclude_file_path, content=Template("exclude_hosts_list.j2"), owner=params.hdfs_user, group=params.user_group) if params.dfs_ha_enabled and \ params.dfs_ha_namenode_standby is not None and \ params.hostname == params.dfs_ha_namenode_standby: # if the current host is the standby NameNode in an HA deployment # run the bootstrap command, to start the NameNode in standby mode # this requires that the active NameNode is already up and running, # so this execute should be re-tried upon failure, up to a timeout success = bootstrap_standby_namenode(params) if not success: raise Fail("Could not bootstrap standby namenode") if upgrade_type == "rolling" and params.dfs_ha_enabled: # Most likely, ZKFC is up since RU will initiate the failover command. However, if that failed, it would have tried # to kill ZKFC manually, so we need to start it if not already running. safe_zkfc_op(action, env) options = "" if upgrade_type == "rolling": options = "-rollingUpgrade started" elif upgrade_type == "nonrolling": is_previous_image_dir = is_previous_fs_image() Logger.info( format( "Previous file system image dir present is {is_previous_image_dir}" )) if params.dfs_ha_enabled: if params.desired_namenode_role is None: raise Fail( "Did not receive parameter \"desired_namenode_role\" to indicate the role that this NameNode should have." ) if params.desired_namenode_role == "active": # The "-upgrade" command can only be used exactly once. If used more than once during a retry, it will cause problems. options = "" if is_previous_image_dir else "-upgrade" if params.desired_namenode_role == "standby": options = "-bootstrapStandby -force" else: # Both Primary and Secondary NameNode can use the same command. options = "" if is_previous_image_dir else "-upgrade" Logger.info(format("Option for start command: {options}")) service(action="start", name="namenode", user=params.hdfs_user, options=options, create_pid_dir=True, create_log_dir=True) if params.security_enabled: Execute(format( "{kinit_path_local} -kt {hdfs_user_keytab} {hdfs_principal_name}" ), user=params.hdfs_user) is_namenode_safe_mode_off = format( "{hdfs_binary} dfsadmin -fs {namenode_address} -safemode get | grep 'Safe mode is OFF'" ) if params.dfs_ha_enabled: is_active_namenode_cmd = as_user(format( "{hdfs_binary} --config {hadoop_conf_dir} haadmin -getServiceState {namenode_id} | grep active" ), params.hdfs_user, env={ 'PATH': params.hadoop_bin_dir }) else: is_active_namenode_cmd = False # During NonRolling Upgrade, both NameNodes are initially down, # so no point in checking if this is the active or standby. if upgrade_type == "nonrolling": is_active_namenode_cmd = False # ___Scenario___________|_Expected safemode state__|_Wait for safemode OFF____| # no-HA | ON -> OFF | Yes | # HA and active | ON -> OFF | Yes | # HA and standby | no change | no check | # RU with HA on active | ON -> OFF | Yes | # RU with HA on standby | ON -> OFF | Yes | # EU with HA on active | no change | no check | # EU with HA on standby | no change | no check | # EU non-HA | no change | no check | check_for_safemode_off = False msg = "" if params.dfs_ha_enabled: if upgrade_type is not None: check_for_safemode_off = True msg = "Must wait to leave safemode since High Availability is enabled during a Stack Upgrade" else: # During normal operations, the NameNode is expected to be up. code, out = shell.call( is_active_namenode_cmd, logoutput=True) # If active NN, code will be 0 if code == 0: # active check_for_safemode_off = True msg = "Must wait to leave safemode since High Availability is enabled and this is the Active NameNode." else: msg = "Will remain in the current safemode state." else: msg = "Must wait to leave safemode since High Availability is not enabled." check_for_safemode_off = True Logger.info(msg) # During a NonRolling (aka Express Upgrade), stay in safemode since the DataNodes are down. stay_in_safe_mode = False if upgrade_type == "nonrolling": stay_in_safe_mode = True if check_for_safemode_off: Logger.info("Stay in safe mode: {0}".format(stay_in_safe_mode)) if not stay_in_safe_mode: Logger.info( "Wait to leafe safemode since must transition from ON to OFF." ) try: # Wait up to 30 mins Execute(is_namenode_safe_mode_off, tries=180, try_sleep=10, user=params.hdfs_user, logoutput=True) except Fail: Logger.error( "NameNode is still in safemode, please be careful with commands that need safemode OFF." ) # Always run this on non-HA, or active NameNode during HA. create_hdfs_directories(is_active_namenode_cmd) elif action == "stop": import params service(action="stop", name="namenode", user=params.hdfs_user) elif action == "status": import status_params check_process_status(status_params.namenode_pid_file) elif action == "decommission": decommission()
def hive_service(name, action='start', upgrade_type=None): import params if name == 'metastore': pid_file = format("{hive_pid_dir}/{hive_metastore_pid}") cmd = format( "{start_metastore_path} {hive_log_dir}/hive.out {hive_log_dir}/hive.log {pid_file} {hive_server_conf_dir} {hive_log_dir}" ) elif name == 'hiveserver2': pid_file = format("{hive_pid_dir}/{hive_pid}") cmd = format( "{start_hiveserver2_path} {hive_log_dir}/hive-server2.out {hive_log_dir}/hive-server2.log {pid_file} {hive_server_conf_dir} {hive_log_dir}" ) pid_expression = "`" + as_user(format("cat {pid_file}"), user=params.hive_user) + "`" process_id_exists_command = format( "ls {pid_file} >/dev/null 2>&1 && ps -p {pid_expression} >/dev/null 2>&1" ) if action == 'start': if name == 'hiveserver2': check_fs_root() daemon_cmd = cmd # upgrading hiveserver2 (rolling_restart) means that there is an existing, # de-registering hiveserver2; the pid will still exist, but the new # hiveserver is spinning up on a new port, so the pid will be re-written if upgrade_type == UPGRADE_TYPE_ROLLING: process_id_exists_command = None if params.security_enabled: hive_kinit_cmd = format( "{kinit_path_local} -kt {hive_server2_keytab} {hive_principal}; " ) Execute(hive_kinit_cmd, user=params.hive_user) Execute(daemon_cmd, user=params.hive_user, environment={ 'JAVA_HOME': params.java64_home, 'HIVE_CMD': params.hive_cmd }, path=params.execute_path, not_if=process_id_exists_command) if params.hive_jdbc_driver == "com.mysql.jdbc.Driver" or \ params.hive_jdbc_driver == "org.postgresql.Driver" or \ params.hive_jdbc_driver == "oracle.jdbc.driver.OracleDriver": db_connection_check_command = format( "{java64_home}/bin/java -cp {check_db_connection_jar}:{target} org.apache.ambari.server.DBConnectionVerification '{hive_jdbc_connection_url}' {hive_metastore_user_name} {hive_metastore_user_passwd!p} {hive_jdbc_driver}" ) Execute(db_connection_check_command, path='/usr/sbin:/sbin:/usr/local/bin:/bin:/usr/bin', tries=5, try_sleep=10) elif action == 'stop': daemon_kill_cmd = format("{sudo} kill {pid_expression}") daemon_hard_kill_cmd = format("{sudo} kill -9 {pid_expression}") Execute(daemon_kill_cmd, not_if=format("! ({process_id_exists_command})")) wait_time = 5 Execute( daemon_hard_kill_cmd, not_if=format( "( sleep {wait_time} && ! ({process_id_exists_command}) )")) # check if stopped the process, else fail the task Execute( format("! ({process_id_exists_command})"), tries=20, try_sleep=3, ) File(pid_file, action="delete")
def hive(name=None): import params if name == 'hiveserver2': # HDP 2.1.* or lower if params.hdp_stack_version_major != "" and compare_versions(params.hdp_stack_version_major, "2.2.0.0") < 0: params.HdfsResource(params.webhcat_apps_dir, type="directory", action="create_on_execute", owner=params.webhcat_user, mode=0755 ) # Create webhcat dirs. if params.hcat_hdfs_user_dir != params.webhcat_hdfs_user_dir: params.HdfsResource(params.hcat_hdfs_user_dir, type="directory", action="create_on_execute", owner=params.hcat_user, mode=params.hcat_hdfs_user_mode ) params.HdfsResource(params.webhcat_hdfs_user_dir, type="directory", action="create_on_execute", owner=params.webhcat_user, mode=params.webhcat_hdfs_user_mode ) # ****** Begin Copy Tarballs ****** # ********************************* # HDP 2.2 or higher, copy mapreduce.tar.gz to HDFS if params.hdp_stack_version_major != "" and compare_versions(params.hdp_stack_version_major, '2.2') >= 0: copy_to_hdfs("mapreduce", params.user_group, params.hdfs_user, host_sys_prepped=params.host_sys_prepped) copy_to_hdfs("tez", params.user_group, params.hdfs_user, host_sys_prepped=params.host_sys_prepped) # Always copy pig.tar.gz and hive.tar.gz using the appropriate mode. # This can use a different source and dest location to account for both HDP 2.1 and 2.2 copy_to_hdfs("pig", params.user_group, params.hdfs_user, file_mode=params.tarballs_mode, custom_source_file=params.pig_tar_source, custom_dest_file=params.pig_tar_dest_file, host_sys_prepped=params.host_sys_prepped) copy_to_hdfs("hive", params.user_group, params.hdfs_user, file_mode=params.tarballs_mode, custom_source_file=params.hive_tar_source, custom_dest_file=params.hive_tar_dest_file, host_sys_prepped=params.host_sys_prepped) wildcard_tarballs = ["sqoop", "hadoop_streaming"] for tarball_name in wildcard_tarballs: source_file_pattern = eval("params." + tarball_name + "_tar_source") dest_dir = eval("params." + tarball_name + "_tar_dest_dir") if source_file_pattern is None or dest_dir is None: continue source_files = glob.glob(source_file_pattern) if "*" in source_file_pattern else [source_file_pattern] for source_file in source_files: src_filename = os.path.basename(source_file) dest_file = os.path.join(dest_dir, src_filename) copy_to_hdfs(tarball_name, params.user_group, params.hdfs_user, file_mode=params.tarballs_mode, custom_source_file=source_file, custom_dest_file=dest_file, host_sys_prepped=params.host_sys_prepped) # ******* End Copy Tarballs ******* # ********************************* # Create Hive Metastore Warehouse Dir params.HdfsResource(params.hive_apps_whs_dir, type="directory", action="create_on_execute", owner=params.hive_user, mode=0777 ) # Create Hive User Dir params.HdfsResource(params.hive_hdfs_user_dir, type="directory", action="create_on_execute", owner=params.hive_user, mode=params.hive_hdfs_user_mode ) if not is_empty(params.hive_exec_scratchdir) and not urlparse(params.hive_exec_scratchdir).path.startswith("/tmp"): params.HdfsResource(params.hive_exec_scratchdir, type="directory", action="create_on_execute", owner=params.hive_user, group=params.hdfs_user, mode=0777) # Hive expects this dir to be writeable by everyone as it is used as a temp dir params.HdfsResource(None, action="execute") Directory(params.hive_etc_dir_prefix, mode=0755 ) # We should change configurations for client as well as for server. # The reason is that stale-configs are service-level, not component. for conf_dir in params.hive_conf_dirs_list: fill_conf_dir(conf_dir) XmlConfig("hive-site.xml", conf_dir=params.hive_config_dir, configurations=params.hive_site_config, configuration_attributes=params.config['configuration_attributes']['hive-site'], owner=params.hive_user, group=params.user_group, mode=0644) setup_atlas_hive() if params.hive_specific_configs_supported and name == 'hiveserver2': XmlConfig("hiveserver2-site.xml", conf_dir=params.hive_server_conf_dir, configurations=params.config['configurations']['hiveserver2-site'], configuration_attributes=params.config['configuration_attributes']['hiveserver2-site'], owner=params.hive_user, group=params.user_group, mode=0644) File(format("{hive_config_dir}/hive-env.sh"), owner=params.hive_user, group=params.user_group, content=InlineTemplate(params.hive_env_sh_template) ) # On some OS this folder could be not exists, so we will create it before pushing there files Directory(params.limits_conf_dir, recursive=True, owner='root', group='root' ) File(os.path.join(params.limits_conf_dir, 'hive.conf'), owner='root', group='root', mode=0644, content=Template("hive.conf.j2") ) if (name == 'metastore' or name == 'hiveserver2') and not os.path.exists(params.target): jdbc_connector() File(format("/usr/lib/ambari-agent/{check_db_connection_jar_name}"), content = DownloadSource(format("{jdk_location}{check_db_connection_jar_name}")), mode = 0644, ) if name == 'metastore': File(params.start_metastore_path, mode=0755, content=StaticFile('startMetastore.sh') ) if params.init_metastore_schema: create_schema_cmd = format("export HIVE_CONF_DIR={hive_server_conf_dir} ; " "{hive_bin}/schematool -initSchema " "-dbType {hive_metastore_db_type} " "-userName {hive_metastore_user_name} " "-passWord {hive_metastore_user_passwd!p}") check_schema_created_cmd = as_user(format("export HIVE_CONF_DIR={hive_server_conf_dir} ; " "{hive_bin}/schematool -info " "-dbType {hive_metastore_db_type} " "-userName {hive_metastore_user_name} " "-passWord {hive_metastore_user_passwd!p}"), params.hive_user) # HACK: in cases with quoted passwords and as_user (which does the quoting as well) !p won't work for hiding passwords. # Fixing it with the hack below: quoted_hive_metastore_user_passwd = quote_bash_args(quote_bash_args(params.hive_metastore_user_passwd)) if quoted_hive_metastore_user_passwd[0] == "'" and quoted_hive_metastore_user_passwd[-1] == "'" \ or quoted_hive_metastore_user_passwd[0] == '"' and quoted_hive_metastore_user_passwd[-1] == '"': quoted_hive_metastore_user_passwd = quoted_hive_metastore_user_passwd[1:-1] Logger.sensitive_strings[repr(check_schema_created_cmd)] = repr(check_schema_created_cmd.replace( format("-passWord {quoted_hive_metastore_user_passwd}"), "-passWord " + utils.PASSWORDS_HIDE_STRING)) Execute(create_schema_cmd, not_if = check_schema_created_cmd, user = params.hive_user ) elif name == 'hiveserver2': File(params.start_hiveserver2_path, mode=0755, content=Template(format('{start_hiveserver2_script}')) ) if name != "client": crt_directory(params.hive_pid_dir) crt_directory(params.hive_log_dir) crt_directory(params.hive_var_lib)
def service(action=None, name=None, user=None, options="", create_pid_dir=False, create_log_dir=False): """ :param action: Either "start" or "stop" :param name: Component name, e.g., "namenode", "datanode", "secondarynamenode", "zkfc" :param user: User to run the command as :param options: Additional options to pass to command as a string :param create_pid_dir: Create PID directory :param create_log_dir: Crate log file directory """ import params options = options if options else "" pid_dir = format("{hadoop_pid_dir_prefix}/{user}") pid_file = format("{pid_dir}/hadoop-{user}-{name}.pid") hadoop_env_exports = {'HADOOP_LIBEXEC_DIR': params.hadoop_libexec_dir} log_dir = format("{hdfs_log_dir_prefix}/{user}") # NFS GATEWAY is always started by root using jsvc due to rpcbind bugs # on Linux such as CentOS6.2. https://bugzilla.redhat.com/show_bug.cgi?id=731542 if name == "nfs3": pid_file = format("{pid_dir}/hadoop_privileged_nfs3.pid") custom_export = { 'HADOOP_PRIVILEGED_NFS_USER': params.hdfs_user, 'HADOOP_PRIVILEGED_NFS_PID_DIR': pid_dir, 'HADOOP_PRIVILEGED_NFS_LOG_DIR': log_dir } hadoop_env_exports.update(custom_export) process_id_exists_command = as_sudo( ["test", "-f", pid_file]) + " && " + as_sudo(["pgrep", "-F", pid_file]) # on STOP directories shouldn't be created # since during stop still old dirs are used (which were created during previous start) if action != "stop": if name == "nfs3": Directory(params.hadoop_pid_dir_prefix, mode=0755, owner=params.root_user, group=params.root_group) else: Directory(params.hadoop_pid_dir_prefix, mode=0755, owner=params.hdfs_user, group=params.user_group) if create_pid_dir: Directory(pid_dir, owner=user, group=params.user_group, create_parents=True) if create_log_dir: if name == "nfs3": Directory(log_dir, mode=0775, owner=params.root_user, group=params.user_group) else: Directory(log_dir, owner=user, group=params.user_group, create_parents=True) if params.security_enabled and name == "datanode": ## The directory where pid files are stored in the secure data environment. hadoop_secure_dn_pid_dir = format( "{hadoop_pid_dir_prefix}/{hdfs_user}") hadoop_secure_dn_pid_file = format( "{hadoop_secure_dn_pid_dir}/hadoop_secure_dn.pid") # At datanode_non_root stack version and further, we may start datanode as a non-root even in secure cluster if not (params.stack_version_formatted and check_stack_feature( StackFeature.DATANODE_NON_ROOT, params.stack_version_formatted) ) or params.secure_dn_ports_are_in_use: user = "******" pid_file = format( "{hadoop_pid_dir_prefix}/{hdfs_user}/hadoop-{hdfs_user}-{name}.pid" ) if action == 'stop' and (params.stack_version_formatted and check_stack_feature(StackFeature.DATANODE_NON_ROOT, params.stack_version_formatted)) and \ os.path.isfile(hadoop_secure_dn_pid_file): # We need special handling for this case to handle the situation # when we configure non-root secure DN and then restart it # to handle new configs. Otherwise we will not be able to stop # a running instance user = "******" try: check_process_status(hadoop_secure_dn_pid_file) custom_export = {'HADOOP_SECURE_DN_USER': params.hdfs_user} hadoop_env_exports.update(custom_export) except ComponentIsNotRunning: pass hdfs_bin = format("{hadoop_bin}/hdfs") if user == "root": cmd = [ hdfs_bin, "--config", params.hadoop_conf_dir, "--daemon", action, name ] if options: cmd += [ options, ] daemon_cmd = as_sudo(cmd) else: cmd = format( "{ulimit_cmd} {hdfs_bin} --config {hadoop_conf_dir} --daemon {action} {name}" ) if options: cmd += " " + options daemon_cmd = as_user(cmd, user) if action == "start": # remove pid file from dead process File(pid_file, action="delete", not_if=process_id_exists_command) try: Execute(daemon_cmd, not_if=process_id_exists_command, environment=hadoop_env_exports) except: show_logs(log_dir, user) raise elif action == "stop": try: Execute(daemon_cmd, only_if=process_id_exists_command, environment=hadoop_env_exports) except: show_logs(log_dir, user) raise wait_process_stopped(pid_file) File(pid_file, action="delete")
def hive_service(name, action="start", rolling_restart=False): import params if name == "metastore": pid_file = format("{hive_pid_dir}/{hive_metastore_pid}") cmd = format( "{start_metastore_path} {hive_log_dir}/hive.out {hive_log_dir}/hive.log {pid_file} {hive_server_conf_dir} {hive_log_dir}" ) elif name == "hiveserver2": pid_file = format("{hive_pid_dir}/{hive_pid}") cmd = format( "{start_hiveserver2_path} {hive_log_dir}/hive-server2.out {hive_log_dir}/hive-server2.log {pid_file} {hive_server_conf_dir} {hive_log_dir}" ) pid_expression = "`" + as_user(format("cat {pid_file}"), user=params.hive_user) + "`" process_id_exists_command = format("ls {pid_file} >/dev/null 2>&1 && ps -p {pid_expression} >/dev/null 2>&1") if action == "start": if name == "hiveserver2": check_fs_root() daemon_cmd = cmd hadoop_home = params.hadoop_home hive_bin = "hive" # upgrading hiveserver2 (rolling_restart) means that there is an existing, # de-registering hiveserver2; the pid will still exist, but the new # hiveserver is spinning up on a new port, so the pid will be re-written if rolling_restart: process_id_exists_command = None if params.version: import os hadoop_home = format("/usr/hdp/{version}/hadoop") hive_bin = os.path.join(params.hive_bin, hive_bin) Execute( daemon_cmd, user=params.hive_user, environment={"HADOOP_HOME": hadoop_home, "JAVA_HOME": params.java64_home, "HIVE_BIN": hive_bin}, path=params.execute_path, not_if=process_id_exists_command, ) if ( params.hive_jdbc_driver == "com.mysql.jdbc.Driver" or params.hive_jdbc_driver == "org.postgresql.Driver" or params.hive_jdbc_driver == "oracle.jdbc.driver.OracleDriver" ): db_connection_check_command = format( "{java64_home}/bin/java -cp {check_db_connection_jar}:{target} org.apache.ambari.server.DBConnectionVerification '{hive_jdbc_connection_url}' {hive_metastore_user_name} {hive_metastore_user_passwd!p} {hive_jdbc_driver}" ) Execute( db_connection_check_command, path="/usr/sbin:/sbin:/usr/local/bin:/bin:/usr/bin", tries=5, try_sleep=10 ) elif action == "stop": daemon_kill_cmd = format("{sudo} kill {pid_expression}") daemon_hard_kill_cmd = format("{sudo} kill -9 {pid_expression}") Execute(daemon_kill_cmd, not_if=format("! ({process_id_exists_command})")) wait_time = 5 Execute( daemon_hard_kill_cmd, not_if=format( "! ({process_id_exists_command}) || ( sleep {wait_time} && ! ({process_id_exists_command}) )" ), ) # check if stopped the process, else fail the task Execute(format("! ({process_id_exists_command})"), tries=20, try_sleep=3) File(pid_file, action="delete")
def oozie_service(action = 'start', rolling_restart=False): """ Starts or stops the Oozie service :param action: 'start' or 'stop' :param rolling_restart: if True, then most of the pre-startup checks are skipped since a variation of them was performed during the rolling upgrade :return: """ import params environment={'OOZIE_CONFIG': params.conf_dir} if params.security_enabled: if params.oozie_principal is None: oozie_principal_with_host = 'missing_principal' else: oozie_principal_with_host = params.oozie_principal.replace("_HOST", params.hostname) kinit_if_needed = format("{kinit_path_local} -kt {oozie_keytab} {oozie_principal_with_host};") else: kinit_if_needed = "" no_op_test = as_user(format("ls {pid_file} >/dev/null 2>&1 && ps -p `cat {pid_file}` >/dev/null 2>&1"), user=params.oozie_user) if action == 'start': start_cmd = format("cd {oozie_tmp_dir} && {oozie_home}/bin/oozie-start.sh") if params.jdbc_driver_name == "com.mysql.jdbc.Driver" or \ params.jdbc_driver_name == "com.microsoft.sqlserver.jdbc.SQLServerDriver" or \ params.jdbc_driver_name == "org.postgresql.Driver" or \ params.jdbc_driver_name == "oracle.jdbc.driver.OracleDriver": db_connection_check_command = format("{java_home}/bin/java -cp {check_db_connection_jar}:{target} org.apache.ambari.server.DBConnectionVerification '{oozie_jdbc_connection_url}' {oozie_metastore_user_name} {oozie_metastore_user_passwd!p} {jdbc_driver_name}") else: db_connection_check_command = None if not rolling_restart: if not os.path.isfile(params.target) and params.jdbc_driver_name == "org.postgresql.Driver": print format("ERROR: jdbc file {target} is unavailable. Please, follow next steps:\n" \ "1) Download postgresql-9.0-801.jdbc4.jar.\n2) Create needed directory: mkdir -p {oozie_home}/libserver/\n" \ "3) Copy postgresql-9.0-801.jdbc4.jar to newly created dir: cp /path/to/jdbc/postgresql-9.0-801.jdbc4.jar " \ "{oozie_home}/libserver/\n4) Copy postgresql-9.0-801.jdbc4.jar to libext: cp " \ "/path/to/jdbc/postgresql-9.0-801.jdbc4.jar {oozie_home}/libext/\n") exit(1) if db_connection_check_command: Execute( db_connection_check_command, tries=5, try_sleep=10, user=params.oozie_user, ) Execute( format("cd {oozie_tmp_dir} && {oozie_home}/bin/ooziedb.sh create -sqlfile oozie.sql -run"), user = params.oozie_user, not_if = no_op_test, ignore_failures = True ) if params.security_enabled: Execute(kinit_if_needed, user = params.oozie_user, ) if params.host_sys_prepped: print "Skipping creation of oozie sharelib as host is sys prepped" hdfs_share_dir_exists = True # skip time-expensive hadoop fs -ls check elif WebHDFSUtil.is_webhdfs_available(params.is_webhdfs_enabled, params.default_fs): # check with webhdfs is much faster than executing hadoop fs -ls. util = WebHDFSUtil(params.hdfs_site, params.oozie_user, params.security_enabled) list_status = util.run_command(params.hdfs_share_dir, 'GETFILESTATUS', method='GET', ignore_status_codes=['404'], assertable_result=False) hdfs_share_dir_exists = ('FileStatus' in list_status) else: # have to do time expensive hadoop fs -ls check. hdfs_share_dir_exists = shell.call(format("{kinit_if_needed} hadoop --config {hadoop_conf_dir} dfs -ls {hdfs_share_dir} | awk 'BEGIN {{count=0;}} /share/ {{count++}} END {{if (count > 0) {{exit 0}} else {{exit 1}}}}'"), user=params.oozie_user)[0] if not hdfs_share_dir_exists: Execute( params.put_shared_lib_to_hdfs_cmd, user = params.oozie_user, path = params.execute_path ) params.HdfsResource(format("{oozie_hdfs_user_dir}/share"), type="directory", action="create_on_execute", mode=0755, recursive_chmod=True, ) params.HdfsResource(None, action="execute") # start oozie Execute( start_cmd, environment=environment, user = params.oozie_user, not_if = no_op_test ) elif action == 'stop': stop_cmd = format("cd {oozie_tmp_dir} && {oozie_home}/bin/oozie-stop.sh") # stop oozie Execute(stop_cmd, environment=environment, only_if = no_op_test, user = params.oozie_user) File(params.pid_file, action = "delete")
def service(name, action = 'start'): import params import status_params pid_file = status_params.pid_files[name] no_op_test = as_user(format( "ls {pid_file} >/dev/null 2>&1 && ps -p `cat {pid_file}` >/dev/null 2>&1"), user=params.storm_user) if name == "logviewer" or name == "drpc": tries_count = 12 else: tries_count = 6 if name == 'ui': process_grep = "backtype.storm.ui.core$" elif name == "rest_api": process_grep = format("{rest_lib_dir}/storm-rest-.*\.jar$") else: process_grep = format("storm.daemon.{name}$") find_proc = format("{jps_binary} -l | grep {process_grep}") write_pid = format("{find_proc} | awk {{'print $1'}} > {pid_file}") crt_pid_cmd = format("{find_proc} && {write_pid}") storm_env = format( "source {conf_dir}/storm-env.sh ; export PATH=$JAVA_HOME/bin:$PATH") if action == "start": if name == "rest_api": process_cmd = format( "{storm_env} ; java -jar {rest_lib_dir}/`ls {rest_lib_dir} | grep -wE storm-rest-[0-9.-]+\.jar` server") cmd = format( "{process_cmd} {rest_api_conf_file} > {log_dir}/restapi.log 2>&1") else: cmd = format("{storm_env} ; storm {name} > {log_dir}/{name}.out 2>&1") Execute(cmd, not_if = no_op_test, user = params.storm_user, wait_for_finish = False, path = params.storm_bin_dir) Execute(crt_pid_cmd, user = params.storm_user, logoutput = True, tries = tries_count, try_sleep = 10, path = params.storm_bin_dir) elif action == "stop": process_dont_exist = format("! ({no_op_test})") pid = '`' + as_user(format("cat {pid_file}"), user=params.storm_user) + '`' Execute(format("{sudo} kill {pid}"), not_if = process_dont_exist) Execute(format("{sudo} kill -9 {pid}"), not_if = format( "sleep 2; {process_dont_exist} || sleep 20; {process_dont_exist}"), ignore_failures = True) File(pid_file, action = "delete")
def startRebalancingProcess(threshold, rebalance_env): rebalanceCommand = format('hdfs --config {hadoop_conf_dir} balancer -threshold {threshold}') return as_user(rebalanceCommand, params.hdfs_user, env=rebalance_env)
def service(action=None, name=None, user=None, options="", create_pid_dir=False, create_log_dir=False): """ :param action: Either "start" or "stop" :param name: Component name, e.g., "namenode", "datanode", "secondarynamenode", "zkfc" :param user: User to run the command as :param options: Additional options to pass to command as a string :param create_pid_dir: Create PID directory :param create_log_dir: Crate log file directory """ import params options = options if options else "" pid_dir = format("{hadoop_pid_dir_prefix}/{user}") pid_file = format("{pid_dir}/hadoop-{user}-{name}.pid") hadoop_env_exports = { 'HADOOP_LIBEXEC_DIR': params.hadoop_libexec_dir } log_dir = format("{hdfs_log_dir_prefix}/{user}") # NFS GATEWAY is always started by root using jsvc due to rpcbind bugs # on Linux such as CentOS6.2. https://bugzilla.redhat.com/show_bug.cgi?id=731542 if name == "nfs3" : pid_file = format("{pid_dir}/hadoop_privileged_nfs3.pid") custom_export = { 'HADOOP_PRIVILEGED_NFS_USER': params.hdfs_user, 'HADOOP_PRIVILEGED_NFS_PID_DIR': pid_dir, 'HADOOP_PRIVILEGED_NFS_LOG_DIR': log_dir } hadoop_env_exports.update(custom_export) process_id_exists_command = as_sudo(["test", "-f", pid_file]) + " && " + as_sudo(["pgrep", "-F", pid_file]) # on STOP directories shouldn't be created # since during stop still old dirs are used (which were created during previous start) if action != "stop": if name == "nfs3": Directory(params.hadoop_pid_dir_prefix, mode=0755, owner=params.root_user, group=params.root_group ) else: Directory(params.hadoop_pid_dir_prefix, mode=0755, owner=params.hdfs_user, group=params.user_group ) if create_pid_dir: Directory(pid_dir, owner=user, recursive=True) if create_log_dir: if name == "nfs3": Directory(log_dir, mode=0775, owner=params.root_user, group=params.user_group) else: Directory(log_dir, owner=user, recursive=True) if params.security_enabled and name == "datanode": ## The directory where pid files are stored in the secure data environment. hadoop_secure_dn_pid_dir = format("{hadoop_pid_dir_prefix}/{hdfs_user}") hadoop_secure_dn_pid_file = format("{hadoop_secure_dn_pid_dir}/hadoop_secure_dn.pid") # At Champlain stack and further, we may start datanode as a non-root even in secure cluster if not (params.hdp_stack_version != "" and compare_versions(params.hdp_stack_version, '2.2') >= 0) or params.secure_dn_ports_are_in_use: user = "******" pid_file = format( "{hadoop_pid_dir_prefix}/{hdfs_user}/hadoop-{hdfs_user}-{name}.pid") if action == 'stop' and (params.hdp_stack_version != "" and compare_versions(params.hdp_stack_version, '2.2') >= 0) and \ os.path.isfile(hadoop_secure_dn_pid_file): # We need special handling for this case to handle the situation # when we configure non-root secure DN and then restart it # to handle new configs. Otherwise we will not be able to stop # a running instance user = "******" try: check_process_status(hadoop_secure_dn_pid_file) custom_export = { 'HADOOP_SECURE_DN_USER': params.hdfs_user } hadoop_env_exports.update(custom_export) except ComponentIsNotRunning: pass hadoop_daemon = format("{hadoop_bin}/hadoop-daemon.sh") if user == "root": cmd = [hadoop_daemon, "--config", params.hadoop_conf_dir, action, name] if options: cmd += [options, ] daemon_cmd = as_sudo(cmd) else: cmd = format("{ulimit_cmd} {hadoop_daemon} --config {hadoop_conf_dir} {action} {name}") if options: cmd += " " + options daemon_cmd = as_user(cmd, user) if action == "start": # remove pid file from dead process File(pid_file, action="delete", not_if=process_id_exists_command) Execute(daemon_cmd, not_if=process_id_exists_command, environment=hadoop_env_exports) elif action == "stop": Execute(daemon_cmd, only_if=process_id_exists_command, environment=hadoop_env_exports) File(pid_file, action="delete")
def service_check(self, env): import params env.set_params(params) unique = functions.get_unique_id_and_date() dir = '/tmp' tmp_file = format("{dir}/{unique}") safemode_command = format("dfsadmin -fs {namenode_address} -safemode get | grep OFF") if params.security_enabled: Execute(format("{kinit_path_local} -kt {hdfs_user_keytab} {hdfs_principal_name}"), user=params.hdfs_user ) ExecuteHadoop(safemode_command, user=params.hdfs_user, logoutput=True, conf_dir=params.hadoop_conf_dir, try_sleep=3, tries=20, bin_dir=params.hadoop_bin_dir ) params.HdfsResource(dir, type="directory", action="create_on_execute", mode=0777 ) params.HdfsResource(tmp_file, type="file", action="delete_on_execute", ) params.HdfsResource(tmp_file, type="file", source="/etc/passwd", action="create_on_execute" ) params.HdfsResource(None, action="execute") if params.has_journalnode_hosts: if params.security_enabled: for host in params.journalnode_hosts: if params.https_only: uri = format("https://{host}:{journalnode_port}") else: uri = format("http://{host}:{journalnode_port}") response, errmsg, time_millis = curl_krb_request(params.tmp_dir, params.smoke_user_keytab, params.smokeuser_principal, uri, "jn_service_check", params.kinit_path_local, False, None, params.smoke_user) if not response: Logger.error("Cannot access WEB UI on: {0}. Error : {1}", uri, errmsg) return 1 else: journalnode_port = params.journalnode_port checkWebUIFileName = "checkWebUI.py" checkWebUIFilePath = format("{tmp_dir}/{checkWebUIFileName}") comma_sep_jn_hosts = ",".join(params.journalnode_hosts) checkWebUICmd = format("python {checkWebUIFilePath} -m {comma_sep_jn_hosts} -p {journalnode_port} -s {https_only}") File(checkWebUIFilePath, content=StaticFile(checkWebUIFileName), mode=0775) Execute(checkWebUICmd, logoutput=True, try_sleep=3, tries=5, user=params.smoke_user ) if params.is_namenode_master: if params.has_zkfc_hosts: pid_dir = format("{hadoop_pid_dir_prefix}/{hdfs_user}") pid_file = format("{pid_dir}/hadoop-{hdfs_user}-zkfc.pid") check_zkfc_process_cmd = as_user(format( "ls {pid_file} >/dev/null 2>&1 && ps -p `cat {pid_file}` >/dev/null 2>&1"), user=params.hdfs_user) Execute(check_zkfc_process_cmd, logoutput=True, try_sleep=3, tries=5 )