def service_check(self, env): import params env.set_params(params) path_to_tez_jar = format(params.tez_examples_jar) wordcount_command = format( "jar {path_to_tez_jar} orderedwordcount /tmp/tezsmokeinput/sample-tez-test /tmp/tezsmokeoutput/" ) test_command = format("fs -test -e /tmp/tezsmokeoutput/_SUCCESS") File(format("{tmp_dir}/sample-tez-test"), content="foo\nbar\nfoo\nbar\nfoo", mode=0755) params.HdfsResource("/tmp/tezsmokeoutput", action="delete_on_execute", type="directory") params.HdfsResource( "/tmp/tezsmokeinput", action="create_on_execute", type="directory", owner=params.smokeuser, ) params.HdfsResource( "/tmp/tezsmokeinput/sample-tez-test", action="create_on_execute", type="file", owner=params.smokeuser, source=format("{tmp_dir}/sample-tez-test"), ) if params.stack_version_formatted and compare_versions( params.stack_version_formatted, '2.2.0.0') >= 0: copy_to_hdfs("tez", params.user_group, params.hdfs_user, host_sys_prepped=params.host_sys_prepped) params.HdfsResource(None, action="execute") if params.security_enabled: kinit_cmd = format( "{kinit_path_local} -kt {smoke_user_keytab} {smokeuser_principal};" ) Execute(kinit_cmd, user=params.smokeuser) ExecuteHadoop(wordcount_command, tries=3, try_sleep=5, user=params.smokeuser, conf_dir=params.hadoop_conf_dir, bin_dir=params.hadoop_bin_dir) ExecuteHadoop(test_command, tries=10, try_sleep=6, user=params.smokeuser, conf_dir=params.hadoop_conf_dir, bin_dir=params.hadoop_bin_dir)
def format_namenode(force=None): import params old_mark_dir = params.namenode_formatted_old_mark_dirs mark_dir = params.namenode_formatted_mark_dirs dfs_name_dir = params.dfs_name_dir hdfs_user = params.hdfs_user hadoop_conf_dir = params.hadoop_conf_dir if not params.dfs_ha_enabled: if force: ExecuteHadoop('namenode -format', bin_dir=params.hadoop_bin_dir, conf_dir=hadoop_conf_dir, logoutput=True) else: if not is_namenode_formatted(params): Execute(format( "hdfs --config {hadoop_conf_dir} namenode -format -nonInteractive" ), user=params.hdfs_user, path=[params.hadoop_bin_dir], logoutput=True) for m_dir in mark_dir: Directory(m_dir, create_parents=True) else: if params.hostname in params.dfs_ha_namenode_active: dfs_ha_initial_cluster_id = params.dfs_ha_initial_cluster_id cluster_id_clause = format( "-clusterId '{dfs_ha_initial_cluster_id}'" ) if dfs_ha_initial_cluster_id else '' # check and run the format command in the HA deployment scenario # only format the "active" namenode in an HA deployment if force: ExecuteHadoop("namenode -format {cluster_id_clause}", bin_dir=params.hadoop_bin_dir, conf_dir=hadoop_conf_dir, logoutput=True) else: nn_name_dirs = params.dfs_name_dir.split(',') if not is_namenode_formatted(params): try: Execute(format( "hdfs --config {hadoop_conf_dir} namenode -format -nonInteractive {cluster_id_clause}" ), user=params.hdfs_user, path=[params.hadoop_bin_dir], logoutput=True) except Fail: # We need to clean-up mark directories, so we can re-run format next time. for nn_name_dir in nn_name_dirs: Execute( format("rm -rf {nn_name_dir}/*"), user=params.hdfs_user, ) raise for m_dir in mark_dir: Directory(m_dir, create_parents=True)
def format_namenode(force=None): import params old_mark_dir = params.namenode_formatted_old_mark_dirs mark_dir = params.namenode_formatted_mark_dirs dfs_name_dir = params.dfs_name_dir hdfs_user = params.hdfs_user hadoop_conf_dir = params.hadoop_conf_dir if not params.dfs_ha_enabled: if force: ExecuteHadoop( 'namenode -format -clusterId CID-5bd58e9f-e754-4d01-984b-1298486631b1 ', bin_dir=params.hadoop_bin_dir, conf_dir=hadoop_conf_dir, logoutput=True) else: if not is_namenode_formatted(params): Execute( format( "/opt/hadoop/bin/hdfs --config {hadoop_conf_dir} namenode -format -clusterId CID-5bd58e9f-e754-4d01-984b-1298486631b1 -nonInteractive"), user=params.hdfs_user, path=[params.hadoop_bin_dir], logoutput=True) for m_dir in mark_dir: Directory(m_dir, create_parents=True) else: if params.dfs_ha_namenode_active is not None and \ params.hostname == params.dfs_ha_namenode_active: # check and run the format command in the HA deployment scenario # only format the "active" namenode in an HA deployment if force: ExecuteHadoop( 'namenode -format -clusterId CID-5bd58e9f-e754-4d01-984b-1298486631b1', bin_dir=params.hadoop_bin_dir, conf_dir=hadoop_conf_dir, logoutput=True) else: nn_name_dirs = params.dfs_name_dir.split(',') if not is_namenode_formatted(params): try: Execute( format( "/opt/hadoop/bin/hdfs --config {hadoop_conf_dir} namenode -format -clusterId CID-5bd58e9f-e754-4d01-984b-1298486631b1 -nonInteractive"), user=params.hdfs_user, path=[params.hadoop_bin_dir], logoutput=True) except Fail: # We need to clean-up mark directories, so we can re-run format next time. for nn_name_dir in nn_name_dirs: Execute( format("rm -rf {nn_name_dir}/*"), user=params.hdfs_user, ) raise for m_dir in mark_dir: Directory(m_dir, create_parents=True)
def format_namenode(force=None): import params old_mark_dir = params.namenode_formatted_old_mark_dirs mark_dir = params.namenode_formatted_mark_dirs dfs_name_dir = params.dfs_name_dir hdfs_user = params.hdfs_user hadoop_conf_dir = params.hadoop_conf_dir if not params.dfs_ha_enabled: if force: ExecuteHadoop('namenode -format', kinit_override=True, bin_dir=params.hadoop_bin_dir, conf_dir=hadoop_conf_dir) else: if not is_namenode_formatted(params): Execute(format( "yes Y | hdfs --config {hadoop_conf_dir} namenode -format" ), user=params.hdfs_user, path=[params.hadoop_bin_dir]) for m_dir in mark_dir: Directory(m_dir, recursive=True) else: if params.dfs_ha_namenode_active is not None and \ params.hostname == params.dfs_ha_namenode_active: # check and run the format command in the HA deployment scenario # only format the "active" namenode in an HA deployment if force: ExecuteHadoop('namenode -format', kinit_override=True, bin_dir=params.hadoop_bin_dir, conf_dir=hadoop_conf_dir) else: nn_name_dirs = params.dfs_name_dir.split(',') if not is_namenode_formatted(params): try: Execute(format( "yes Y | hdfs --config {hadoop_conf_dir} namenode -format" ), user=params.hdfs_user, path=[params.hadoop_bin_dir]) except Fail: # We need to clean-up mark directories, so we can re-run format next time. for nn_name_dir in nn_name_dirs: Execute( format("rm -rf {nn_name_dir}/*"), user=params.hdfs_user, ) raise for m_dir in mark_dir: Directory(m_dir, recursive=True)
def test_run_secured(self, execute_mock): ''' Test security_enabled=True behaviour ''' with Environment("/") as env: ExecuteHadoop("command", action="run", kinit_path_local="path", conf_dir="conf_dir", user="******", tries=1, keytab="keytab", security_enabled=True, kinit_override=False, try_sleep=0, logoutput=True ) self.assertEqual(execute_mock.call_count, 2) self.assertEqual(str(execute_mock.call_args_list[0][0][0]), "Execute['path -kt keytab user']") self.assertEqual(execute_mock.call_args_list[0][0][0].command, 'path -kt keytab user') self.assertEqual(execute_mock.call_args_list[0][0][0].arguments, {'path': ['/bin'], 'user': '******'}) self.assertEqual(execute_mock.call_args_list[1][0][0].command, 'hadoop --config conf_dir command') self.assertEqual(execute_mock.call_args_list[1][0][0].arguments, {'logoutput': True, 'tries': 1, 'user': '******', 'try_sleep': 0})
def hcat_service_check(): import params unique = get_unique_id_and_date() output_file = format("{hive_apps_whs_dir}/hcatsmoke{unique}") test_cmd = format("fs -test -e {output_file}") if params.security_enabled: kinit_cmd = format( "{kinit_path_local} -kt {smoke_user_keytab} {smokeuser_principal}; " ) else: kinit_cmd = "" File(format("{tmp_dir}/hcatSmoke.sh"), content=StaticFile("hcatSmoke.sh"), mode=0755) prepare_cmd = format( "{kinit_cmd}env JAVA_HOME={java64_home} {tmp_dir}/hcatSmoke.sh hcatsmoke{unique} prepare {purge_tables}" ) exec_path = params.execute_path if params.version and params.stack_root: upgrade_hive_bin = format("{stack_root}/{version}/hive/bin") exec_path = os.environ[ 'PATH'] + os.pathsep + params.hadoop_bin_dir + os.pathsep + upgrade_hive_bin Execute( prepare_cmd, tries=3, user=params.smokeuser, try_sleep=5, path=['/usr/sbin', '/usr/local/bin', '/bin', '/usr/bin', exec_path], logoutput=True) if params.security_enabled: Execute( format( "{kinit_path_local} -kt {hdfs_user_keytab} {hdfs_principal_name}" ), user=params.hdfs_user, ) ExecuteHadoop(test_cmd, user=params.hdfs_user, logoutput=True, conf_dir=params.hadoop_conf_dir, bin_dir=params.execute_path) cleanup_cmd = format( "{kinit_cmd} {tmp_dir}/hcatSmoke.sh hcatsmoke{unique} cleanup {purge_tables}" ) Execute( cleanup_cmd, tries=3, user=params.smokeuser, try_sleep=5, path=['/usr/sbin', '/usr/local/bin', '/bin', '/usr/bin', exec_path], logoutput=True)
def test_run_defined_args(self, execute_mock): ''' Test if defined arguments are passed to Execute ''' with Environment("/") as env: ExecuteHadoop("command", action="run", kinit_path_local="path", conf_dir="conf_dir", user="******", tries=2, keytab="keytab", security_enabled=False, kinit_override=False, try_sleep=2, logoutput=True, principal="principal" ) self.assertEqual(execute_mock.call_count, 1) self.assertEqual(execute_mock.call_args[0][0].command,'hadoop --config conf_dir command') self.assertEqual(execute_mock.call_args[0][0].arguments, {'logoutput': True, 'tries': 2, 'user': '******', 'try_sleep': 2, 'path': [], 'environment': {}})
def test_run_command_list(self, execute_mock): ''' Test for "command" passed as List ''' with Environment("/") as env: ExecuteHadoop(["command1","command2"], action="run", kinit_path_local="path", conf_dir="conf_dir", user="******", keytab="keytab" ) self.assertEqual(execute_mock.call_count, 2) self.assertEqual(execute_mock.call_args_list[0][0][0].command, 'hadoop --config conf_dir command1') self.assertEqual(execute_mock.call_args_list[1][0][0].command, 'hadoop --config conf_dir command2') self.assertEqual(execute_mock.call_args_list[0][0][0].arguments, {'logoutput': None, 'tries': 1, 'user': '******', 'environment': {}, 'try_sleep': 0, 'path': []}) self.assertEqual(execute_mock.call_args_list[1][0][0].arguments, {'logoutput': None, 'tries': 1, 'user': '******', 'try_sleep': 0, 'path': [], 'environment': {}})
def test_run_unknown_conf(self, execute_mock): ''' Test when UnknownConfiguration passed ''' with Environment() as env: ExecuteHadoop( "command", kinit_path_local=UnknownConfiguration(name="kinit_path_local"), conf_dir="conf_dir", user="******", keytab=UnknownConfiguration(name="keytab"), security_enabled=False, principal=UnknownConfiguration(name="principal")) self.assertEqual(execute_mock.call_count, 1) self.assertEqual(execute_mock.call_args[0][0].command, 'hadoop --config conf_dir command') self.assertEqual( execute_mock.call_args[0][0].arguments, { 'logoutput': None, 'tries': 1, 'user': '******', 'try_sleep': 0, 'path': [], 'environment': {} })
def decommission(): import params hdfs_user = params.hdfs_user conf_dir = params.hadoop_conf_dir user_group = params.user_group nn_kinit_cmd = params.nn_kinit_cmd File(params.exclude_file_path, content=Template("exclude_hosts_list.j2"), owner=hdfs_user, group=user_group) if not params.update_exclude_file_only: Execute(nn_kinit_cmd, user=hdfs_user) if params.dfs_ha_enabled: # due to a bug in hdfs, refreshNodes will not run on both namenodes so we # need to execute each command scoped to a particular namenode nn_refresh_cmd = format( 'dfsadmin -fs hdfs://{namenode_rpc} -refreshNodes') else: nn_refresh_cmd = format( 'dfsadmin -fs {namenode_address} -refreshNodes') ExecuteHadoop(nn_refresh_cmd, user=hdfs_user, conf_dir=conf_dir, kinit_override=True, bin_dir=params.hadoop_bin_dir)
def format_namenode(force=None): import params old_mark_dir = params.namenode_formatted_old_mark_dirs mark_dir = params.namenode_formatted_mark_dirs dfs_name_dir = params.dfs_name_dir hdfs_user = params.hdfs_user hadoop_conf_dir = params.hadoop_conf_dir if not params.dfs_ha_enabled: if force: ExecuteHadoop('namenode -format', kinit_override=True, bin_dir=params.hadoop_bin_dir, conf_dir=hadoop_conf_dir) else: if not is_namenode_formatted(params): Execute(format("yes Y | hdfs --config {hadoop_conf_dir} namenode -format"), user = params.hdfs_user, path = [params.hadoop_bin_dir] ) for m_dir in mark_dir: Directory(m_dir, create_parents = True ) else: if params.dfs_ha_namenode_active is not None and \ params.hostname == params.dfs_ha_namenode_active: # check and run the format command in the HA deployment scenario # only format the "active" namenode in an HA deployment if force: ExecuteHadoop('namenode -format', kinit_override=True, bin_dir=params.hadoop_bin_dir, conf_dir=hadoop_conf_dir) else: if not is_namenode_formatted(params): Execute(format("yes Y | hdfs --config {hadoop_conf_dir} namenode -format"), user = params.hdfs_user, path = [params.hadoop_bin_dir] ) for m_dir in mark_dir: Directory(m_dir, create_parents = True )
def service_check(self, env): import params env.set_params(params) mahout_command = format("mahout seqdirectory --input /user/{smokeuser}/mahoutsmokeinput/sample-mahout-test.txt " "--output /user/{smokeuser}/mahoutsmokeoutput/ --charset utf-8") test_command = format("fs -test -e /user/{smokeuser}/mahoutsmokeoutput/_SUCCESS") File( format("{tmp_dir}/sample-mahout-test.txt"), content = "Test text which will be converted to sequence file.", mode = 0755 ) params.HdfsResource(format("/user/{smokeuser}"), type="directory", action="create_on_execute", owner=params.smokeuser, mode=params.smoke_hdfs_user_mode, ) params.HdfsResource(format("/user/{smokeuser}/mahoutsmokeoutput"), action="delete_on_execute", type="directory", ) params.HdfsResource(format("/user/{smokeuser}/mahoutsmokeinput"), action="create_on_execute", type="directory", owner=params.smokeuser, ) params.HdfsResource(format("/user/{smokeuser}/mahoutsmokeinput/sample-mahout-test.txt"), action="create_on_execute", type="file", owner=params.smokeuser, source=format("{tmp_dir}/sample-mahout-test.txt") ) params.HdfsResource(None, action="execute") if params.security_enabled: kinit_cmd = format("{kinit_path_local} -kt {smoke_user_keytab} {smokeuser_principal};") Execute(kinit_cmd, user=params.smokeuser) Execute( mahout_command, tries = 3, try_sleep = 5, environment={'MAHOUT_HOME': params.mahout_home,'JAVA_HOME': params.java64_home}, path = format('/usr/sbin:/sbin:/usr/local/bin:/bin:/usr/bin'), user = params.smokeuser ) ExecuteHadoop( test_command, tries = 10, try_sleep = 6, user = params.smokeuser, conf_dir = params.hadoop_conf_dir, bin_dir = params.hadoop_bin_dir )
def test_run_command_tuple(self, execute_mock): ''' Test for "command" passed as Tuple ''' with Environment("/") as env: ExecuteHadoop(("command1","command2","command3"), action="run", conf_dir="conf_dir", user="******", ) self.assertEqual(execute_mock.call_count, 1) self.assertEqual(execute_mock.call_args[0][0].command, 'hadoop --config conf_dir command1 command2 command3')
def test_run_default_args(self, execute_mock): ''' Test if default arguments are passed to Execute ''' with Environment() as env: ExecuteHadoop("command", conf_dir="conf_dir", user="******", logoutput=True, ) self.assertEqual(execute_mock.call_count, 1) self.assertEqual(execute_mock.call_args[0][0].command,'hadoop --config conf_dir command') self.assertEqual(execute_mock.call_args[0][0].arguments, {'logoutput': True, 'tries': 1, 'user': '******', 'try_sleep': 0})
def refresh_nodes(): import params if params.dfs_ha_enabled: # due to a bug in hdfs, refreshNodes will not run on both namenodes so we # need to execute each command scoped to a particular namenode nn_refresh_cmd = format( 'dfsadmin -fs hdfs://{namenode_rpc} -refreshNodes') else: nn_refresh_cmd = format( 'dfsadmin -fs {namenode_address} -refreshNodes') Execute(params.nn_kinit_cmd, user=params.hdfs_user) ExecuteHadoop(nn_refresh_cmd, user=params.hdfs_user, conf_dir=params.hadoop_conf_dir, bin_dir=params.hadoop_bin_dir)
def is_balancer_running(): import params check_balancer_command = "fs -test -e /system/balancer.id" does_hdfs_file_exist = False try: _print("Checking if the balancer is running ...") ExecuteHadoop(check_balancer_command, user=params.hdfs_user, logoutput=True, conf_dir=params.hadoop_conf_dir, bin_dir=params.hadoop_bin_dir) does_hdfs_file_exist = True _print("Balancer is running. ") except Fail: pass return does_hdfs_file_exist
def test_run_secured_kinit_override(self, execute_mock): ''' Test security_enabled=True and kinit_override=True behaviour ''' with Environment("/") as env: ExecuteHadoop("command", action="run", kinit_path_local="path", conf_dir="conf_dir", user="******", tries=1, keytab="keytab", security_enabled=True, kinit_override=True, try_sleep=0, logoutput=True) self.assertEqual(execute_mock.call_count, 1) self.assertEqual(execute_mock.call_args_list[0][0][0].command, 'hadoop --config conf_dir command')
def test_run_secured_principal(self, execute_mock): ''' Test with "principal" argument ''' with Environment("/") as env: ExecuteHadoop("command", action="run", kinit_path_local="path", conf_dir="conf_dir", user="******", tries=1, keytab="keytab", security_enabled=True, kinit_override=False, try_sleep=0, logoutput=True, principal="principal") self.assertEqual(execute_mock.call_count, 2) self.assertEqual(execute_mock.call_args_list[0][0][0].command, 'path -kt keytab principal') self.assertEqual(execute_mock.call_args_list[1][0][0].command, 'hadoop --config conf_dir command')
def service_check(self, env): import params env.set_params(params) input_file = format('/user/{smokeuser}/passwd') output_dir = format('/user/{smokeuser}/pigsmoke.out') # cleanup output params.HdfsResource( output_dir, type="directory", action="delete_on_execute", owner=params.smokeuser, ) # re-create input. Be able to delete it if it already exists params.HdfsResource( input_file, type="file", source="/etc/passwd", action="create_on_execute", owner=params.smokeuser, ) params.HdfsResource(None, action="execute") File(format("{tmp_dir}/pigSmoke.sh"), content=StaticFile("pigSmoke.sh"), mode=0755) # check for Pig-on-M/R Execute( format("pig {tmp_dir}/pigSmoke.sh"), tries=3, try_sleep=5, path=format( '{pig_bin_dir}:/usr/sbin:/sbin:/usr/local/bin:/bin:/usr/bin'), user=params.smokeuser, logoutput=True) test_cmd = format("fs -test -e {output_dir}") ExecuteHadoop(test_cmd, user=params.smokeuser, conf_dir=params.hadoop_conf_dir, bin_dir=params.hadoop_bin_dir) if params.iop_stack_version != "" and compare_versions( params.iop_stack_version, '4.0') >= 0: # cleanup results from previous test # cleanup output params.HdfsResource( output_dir, type="directory", action="delete_on_execute", owner=params.smokeuser, ) # re-create input. Be able to delete it firstly if it already exists params.HdfsResource( input_file, type="file", source="/etc/passwd", action="create_on_execute", owner=params.smokeuser, ) params.HdfsResource(None, action="execute") if params.security_enabled: kinit_cmd = format( "{kinit_path_local} -kt {smoke_user_keytab} {smokeuser_principal};" ) Execute(kinit_cmd, user=params.smokeuser) Execute( format("pig {tmp_dir}/pigSmoke.sh"), tries=3, try_sleep=5, path=format( '{pig_bin_dir}:/usr/sbin:/sbin:/usr/local/bin:/bin:/usr/bin' ), user=params.smokeuser, logoutput=True) ExecuteHadoop(test_cmd, user=params.smokeuser, conf_dir=params.hadoop_conf_dir, bin_dir=params.hadoop_bin_dir)
def service_check(self, env): import params env.set_params(params) path_to_tez_jar = format(params.tez_examples_jar) wordcount_command = format( "jar {path_to_tez_jar} orderedwordcount /tmp/tezsmokeinput/sample-tez-test /tmp/tezsmokeoutput/" ) test_command = format("fs -test -e /tmp/tezsmokeoutput/_SUCCESS") File(format("{tmp_dir}/sample-tez-test"), content="foo\nbar\nfoo\nbar\nfoo", mode=0755) params.HdfsResource("/tmp/tezsmokeoutput", action="delete_on_execute", type="directory") params.HdfsResource( "/tmp/tezsmokeinput", action="create_on_execute", type="directory", owner=params.smokeuser, ) params.HdfsResource( "/tmp/tezsmokeinput/sample-tez-test", action="create_on_execute", type="file", owner=params.smokeuser, source=format("{tmp_dir}/sample-tez-test"), ) Execute( 'wget http://yum.example.com/hadoop/hdfs/tez.tar.gz -O /tmp/tez.tar.gz' ) copy_to_hdfs("tez", params.user_group, params.hdfs_user, custom_source_file='/tmp/tez.tar.gz', custom_dest_file='/apps/tez/tez.tar.gz') params.HdfsResource(None, action="execute") Execute('/bin/rm -f /tmp/tez.tar.gz') if params.security_enabled: kinit_cmd = format( "{kinit_path_local} -kt {smoke_user_keytab} {smokeuser_principal};" ) Execute(kinit_cmd, user=params.smokeuser) ExecuteHadoop(wordcount_command, tries=3, try_sleep=5, user=params.smokeuser, environment={'JAVA_HOME': params.java64_home}, conf_dir=params.hadoop_conf_dir, bin_dir=params.hadoop_bin_dir) ExecuteHadoop(test_command, tries=10, try_sleep=6, user=params.smokeuser, environment={'JAVA_HOME': params.java64_home}, conf_dir=params.hadoop_conf_dir, bin_dir=params.hadoop_bin_dir)
def action_run(self): path = self.resource.path dest_dir = self.resource.dest_dir dest_file = self.resource.dest_file kinnit_if_needed = self.resource.kinnit_if_needed owner = self.resource.owner group = self.resource.group mode = self.resource.mode hdfs_usr = self.resource.hdfs_user hadoop_conf_path = self.resource.hadoop_conf_dir bin_dir = self.resource.hadoop_bin_dir if dest_file: copy_cmd = format( "fs -copyFromLocal {path} {dest_dir}/{dest_file}") dest_path = dest_dir + dest_file if dest_dir.endswith( os.sep) else dest_dir + os.sep + dest_file else: dest_file_name = os.path.split(path)[1] copy_cmd = format("fs -copyFromLocal {path} {dest_dir}") dest_path = dest_dir + os.sep + dest_file_name # Need to run unless as resource user if kinnit_if_needed: Execute( kinnit_if_needed, user=owner, ) unless_cmd = as_user( format("PATH=$PATH:{bin_dir} hadoop fs -ls {dest_path}"), owner) ExecuteHadoop(copy_cmd, not_if=unless_cmd, user=owner, bin_dir=bin_dir, conf_dir=hadoop_conf_path) if not owner: chown = None else: if not group: chown = owner else: chown = format('{owner}:{group}') if chown: chown_cmd = format("fs -chown {chown} {dest_path}") ExecuteHadoop(chown_cmd, user=hdfs_usr, bin_dir=bin_dir, conf_dir=hadoop_conf_path) pass if mode: dir_mode = oct(mode)[1:] chmod_cmd = format('fs -chmod {dir_mode} {dest_path}') ExecuteHadoop(chmod_cmd, user=hdfs_usr, bin_dir=bin_dir, conf_dir=hadoop_conf_path) pass
def service_check(self, env): import params env.set_params(params) input_file = format('/user/{smokeuser}/passwd') output_dir = format('/user/{smokeuser}/pigsmoke.out') params.HdfsResource( format("/user/{smokeuser}"), type="directory", action="create_on_execute", owner=params.smokeuser, mode=params.smoke_hdfs_user_mode, ) params.HdfsResource( output_dir, type="directory", action="delete_on_execute", owner=params.smokeuser, ) params.HdfsResource( input_file, type="file", source="/etc/passwd", action="create_on_execute", owner=params.smokeuser, ) params.HdfsResource(None, action="execute") if params.security_enabled: kinit_cmd = format( "{kinit_path_local} -kt {smoke_user_keytab} {smokeuser_principal};" ) Execute(kinit_cmd, user=params.smokeuser) File(format("{tmp_dir}/pigSmoke.sh"), content=StaticFile("pigSmoke.sh"), mode=0755) # check for Pig-on-M/R Execute( format("pig {tmp_dir}/pigSmoke.sh"), tries=3, try_sleep=5, path=format( '{pig_bin_dir}:/usr/sbin:/sbin:/usr/local/bin:/bin:/usr/bin'), user=params.smokeuser, logoutput=True) test_cmd = format("fs -test -e {output_dir}") ExecuteHadoop(test_cmd, user=params.smokeuser, conf_dir=params.hadoop_conf_dir, bin_dir=params.hadoop_bin_dir) if params.has_tez and params.stack_version_formatted and check_stack_feature( StackFeature.PIG_ON_TEZ, params.stack_version_formatted): # cleanup results from previous test params.HdfsResource( output_dir, type="directory", action="delete_on_execute", owner=params.smokeuser, ) params.HdfsResource( input_file, type="file", source="/etc/passwd", action="create_on_execute", owner=params.smokeuser, ) # Check for Pig-on-Tez resource_created = copy_to_hdfs( "tez", params.user_group, params.hdfs_user, skip=params.sysprep_skip_copy_tarballs_hdfs) if resource_created: params.HdfsResource(None, action="execute") Execute( format("pig -x tez {tmp_dir}/pigSmoke.sh"), tries=3, try_sleep=5, path=format( '{pig_bin_dir}:/usr/sbin:/sbin:/usr/local/bin:/bin:/usr/bin' ), user=params.smokeuser, logoutput=True) ExecuteHadoop(test_cmd, user=params.smokeuser, conf_dir=params.hadoop_conf_dir, bin_dir=params.hadoop_bin_dir)
def copy_tarballs_to_hdfs(source, dest, hdp_select_component_name, component_user, file_owner, group_owner): """ :param tarball_prefix: Prefix of the tarball must be one of tez, hive, mr, pig :param hdp_select_component_name: Component name to get the status to determine the version :param component_user: User that will execute the Hadoop commands :param file_owner: Owner of the files copied to HDFS (typically hdfs account) :param group_owner: Group owner of the files copied to HDFS (typically hadoop group) :return: Returns 0 on success, 1 if no files were copied, and in some cases may raise an exception. In order to call this function, params.py must have all of the following, hdp_stack_version, kinit_path_local, security_enabled, hdfs_user, hdfs_principal_name, hdfs_user_keytab, hadoop_bin_dir, hadoop_conf_dir, and HdfsDirectory as a partial function. """ component_tar_source_file, component_tar_destination_folder = source, dest if not os.path.exists(component_tar_source_file): Logger.warning("Could not find file: %s" % str(component_tar_source_file)) return 1 # Ubuntu returns: "stdin: is not a tty", as subprocess output. tmpfile = tempfile.NamedTemporaryFile() out = None with open(tmpfile.name, 'r+') as file: get_hdp_version_cmd = '/usr/bin/hdp-select status %s > %s' % ( hdp_select_component_name, tmpfile.name) code, stdoutdata = shell.call(get_hdp_version_cmd) out = file.read() pass if code != 0 or out is None: Logger.warning( "Could not verify HDP version by calling '%s'. Return Code: %s, Output: %s." % (get_hdp_version_cmd, str(code), str(out))) return 1 matches = re.findall(r"([\d\.]+\-\d+)", out) hdp_version = matches[0] if matches and len(matches) > 0 else None if not hdp_version: Logger.error( "Could not parse HDP version from output of hdp-select: %s" % str(out)) return 1 file_name = os.path.basename(component_tar_source_file) destination_file = os.path.join(component_tar_destination_folder, file_name) destination_file = destination_file.replace("{{ hdp_stack_version }}", hdp_version) kinit_if_needed = "" if params.security_enabled: kinit_if_needed = format( "{kinit_path_local} -kt {hdfs_user_keytab} {hdfs_principal_name};") if kinit_if_needed: Execute(kinit_if_needed, user=component_user, path='/bin') #Check if destination folder already exists does_hdfs_dir_exist = False does_hdfs_file_exist_cmd = "fs -ls %s" % os.path.dirname(destination_file) try: ExecuteHadoop(does_hdfs_file_exist_cmd, user=component_user, logoutput=True, conf_dir=params.hadoop_conf_dir, bin_dir=params.hadoop_bin_dir) does_hdfs_dir_exist = True except Fail: pass does_hdfs_file_exist_cmd = "fs -ls %s" % destination_file does_hdfs_file_exist = False try: ExecuteHadoop(does_hdfs_file_exist_cmd, user=component_user, logoutput=True, conf_dir=params.hadoop_conf_dir, bin_dir=params.hadoop_bin_dir) does_hdfs_file_exist = True except Fail: pass if not does_hdfs_file_exist and not does_hdfs_dir_exist: source_and_dest_pairs = [ (component_tar_source_file, destination_file), ] return _copy_files(source_and_dest_pairs, file_owner, group_owner, kinit_if_needed) return 1
#However given that oozie_setup_sh does not support an arbitrary hdfs path prefix, we are simulating the same command below put_shared_lib_to_hdfs_cmd = format( "hadoop --config {hadoop_conf_dir} dfs -copyFromLocal {oozie_shared_lib}/lib/** {oozie_hdfs_user_dir}/share/lib/lib_20150212065327" ) oozie_cmd = format( "{put_shared_lib_to_hdfs_cmd} ; hadoop --config {hadoop_conf_dir} dfs -chmod -R 755 {oozie_hdfs_user_dir}/share" ) #Check if destination folder already exists does_hdfs_file_exist_cmd = "fs -ls %s" % format( "{oozie_hdfs_user_dir}/share") try: ExecuteHadoop(does_hdfs_file_exist_cmd, user=oozie_user, logoutput=True, conf_dir=params.hadoop_conf_dir, bin_dir=params.hadoop_bin_dir) except Fail: #If dir does not exist create it and put files there HdfsDirectory( format("{oozie_hdfs_user_dir}/share/lib/lib_20150212065327"), action="create", owner=oozie_user, mode=0555, conf_dir=params.hadoop_conf_dir, hdfs_user=params.hdfs_user, ) Execute(oozie_cmd, user=params.oozie_user, not_if=None,
def copy_tarballs_to_hdfs(tarball_prefix, stack_select_component_name, component_user, file_owner, group_owner, ignore_sysprep=False): """ :param tarball_prefix: Prefix of the tarball must be one of tez, hive, mr, pig :param stack_select_component_name: Component name to get the status to determine the version :param component_user: User that will execute the Hadoop commands, usually smokeuser :param file_owner: Owner of the files copied to HDFS (typically hdfs user) :param group_owner: Group owner of the files copied to HDFS (typically hadoop group) :param ignore_sysprep: Ignore sysprep directives :return: Returns 0 on success, 1 if no files were copied, and in some cases may raise an exception. In order to call this function, params.py must have all of the following, stack_version_formatted, kinit_path_local, security_enabled, hdfs_user, hdfs_principal_name, hdfs_user_keytab, hadoop_bin_dir, hadoop_conf_dir, and HdfsDirectory as a partial function. """ import params if not ignore_sysprep and hasattr( params, "host_sys_prepped") and params.host_sys_prepped: Logger.info( "Host is sys-prepped. Tarball %s will not be copied for %s." % (tarball_prefix, stack_select_component_name)) return 0 if not hasattr(params, "stack_version_formatted" ) or params.stack_version_formatted is None: Logger.warning("Could not find stack_version_formatted") return 1 component_tar_source_file, component_tar_destination_folder = _get_tar_source_and_dest_folder( tarball_prefix) if not component_tar_source_file or not component_tar_destination_folder: Logger.warning( "Could not retrieve properties for tarball with prefix: %s" % str(tarball_prefix)) return 1 if not os.path.exists(component_tar_source_file): Logger.warning("Could not find file: %s" % str(component_tar_source_file)) return 1 # Ubuntu returns: "stdin: is not a tty", as subprocess32 output. tmpfile = tempfile.NamedTemporaryFile() out = None (stack_selector_name, stack_selector_path, stack_selector_package) = stack_tools.get_stack_tool( stack_tools.STACK_SELECTOR_NAME) with open(tmpfile.name, 'r+') as file: get_stack_version_cmd = '%s status %s > %s' % ( stack_selector_path, stack_select_component_name, tmpfile.name) code, stdoutdata = shell.call(get_stack_version_cmd) out = file.read() pass if code != 0 or out is None: Logger.warning( "Could not verify stack version by calling '%s'. Return Code: %s, Output: %s." % (get_stack_version_cmd, str(code), str(out))) return 1 matches = re.findall(r"([\d\.]+(?:-\d+)?)", out) stack_version = matches[0] if matches and len(matches) > 0 else None if not stack_version: Logger.error("Could not parse stack version from output of %s: %s" % (stack_selector_name, str(out))) return 1 file_name = os.path.basename(component_tar_source_file) destination_file = os.path.join(component_tar_destination_folder, file_name) destination_file = destination_file.replace( "{{ stack_version_formatted }}", stack_version) does_hdfs_file_exist_cmd = "fs -ls %s" % destination_file kinit_if_needed = "" if params.security_enabled: kinit_if_needed = format( "{kinit_path_local} -kt {hdfs_user_keytab} {hdfs_principal_name};") if kinit_if_needed: Execute(kinit_if_needed, user=component_user, path='/bin') does_hdfs_file_exist = False try: ExecuteHadoop(does_hdfs_file_exist_cmd, user=component_user, logoutput=True, conf_dir=params.hadoop_conf_dir, bin_dir=params.hadoop_bin_dir) does_hdfs_file_exist = True except Fail: pass if not does_hdfs_file_exist: source_and_dest_pairs = [ (component_tar_source_file, destination_file), ] return _copy_files(source_and_dest_pairs, component_user, file_owner, group_owner, kinit_if_needed) return 1
def _copy_files(source_and_dest_pairs, component_user, file_owner, group_owner, kinit_if_needed): """ :param source_and_dest_pairs: List of tuples (x, y), where x is the source file in the local file system, and y is the destination file path in HDFS :param component_user: User that will execute the Hadoop commands, usually smokeuser :param file_owner: Owner to set for the file copied to HDFS (typically hdfs account) :param group_owner: Owning group to set for the file copied to HDFS (typically hadoop group) :param kinit_if_needed: kinit command if it is needed, otherwise an empty string :return: Returns 0 if at least one file was copied and no exceptions occurred, and 1 otherwise. Must kinit before calling this function. """ import params return_value = 1 if source_and_dest_pairs and len(source_and_dest_pairs) > 0: return_value = 0 for (source, destination) in source_and_dest_pairs: try: destination_dir = os.path.dirname(destination) params.HdfsDirectory( destination_dir, action="create", owner=file_owner, hdfs_user=params. hdfs_user, # this will be the user to run the commands as mode=0555) # Because CopyFromLocal does not guarantee synchronization, it's possible for two processes to first attempt to # copy the file to a temporary location, then process 2 fails because the temporary file was already created by # process 1, so process 2 tries to clean up by deleting the temporary file, and then process 1 # cannot finish the copy to the final destination, and both fail! # For this reason, the file name on the destination must be unique, and we then rename it to the intended value. # The rename operation is synchronized by the Namenode. orig_dest_file_name = os.path.split(destination)[1] unique_string = str(uuid.uuid4())[:8] new_dest_file_name = orig_dest_file_name + "." + unique_string new_destination = os.path.join(destination_dir, new_dest_file_name) CopyFromLocal( source, mode=0444, owner=file_owner, group=group_owner, user=params. hdfs_user, # this will be the user to run the commands as dest_dir=destination_dir, dest_file=new_dest_file_name, kinnit_if_needed=kinit_if_needed, hdfs_user=params.hdfs_user, hadoop_bin_dir=params.hadoop_bin_dir, hadoop_conf_dir=params.hadoop_conf_dir) mv_command = format("fs -mv {new_destination} {destination}") ExecuteHadoop(mv_command, user=params.hdfs_user, bin_dir=params.hadoop_bin_dir, conf_dir=params.hadoop_conf_dir) except Exception, e: Logger.error( "Failed to copy file. Source: %s, Destination: %s. Error: %s" % (source, destination, e.message)) return_value = 1
def service_check(self, env): import params env.set_params(params) unique = functions.get_unique_id_and_date() dir = params.hdfs_tmp_dir tmp_file = format("{dir}/{unique}") safemode_command = format( "dfsadmin -fs {namenode_address} -safemode get | grep OFF") if params.security_enabled: Execute(format( "{kinit_path_local} -kt {hdfs_user_keytab} {hdfs_principal_name}" ), user=params.hdfs_user) ExecuteHadoop(safemode_command, user=params.hdfs_user, logoutput=True, conf_dir=params.hadoop_conf_dir, try_sleep=3, tries=20, bin_dir=params.hadoop_bin_dir) params.HdfsResource(dir, type="directory", action="create_on_execute", mode=0777) params.HdfsResource( tmp_file, type="file", action="delete_on_execute", ) params.HdfsResource(tmp_file, type="file", source="/etc/passwd", action="create_on_execute") params.HdfsResource(None, action="execute") if params.has_journalnode_hosts: if params.security_enabled: for host in params.journalnode_hosts: if params.https_only: uri = format("https://{host}:{journalnode_port}") else: uri = format("http://{host}:{journalnode_port}") response, errmsg, time_millis = curl_krb_request( params.tmp_dir, params.smoke_user_keytab, params.smokeuser_principal, uri, "jn_service_check", params.kinit_path_local, False, None, params.smoke_user) if not response: Logger.error( "Cannot access WEB UI on: {0}. Error : {1}", uri, errmsg) return 1 else: journalnode_port = params.journalnode_port checkWebUIFileName = "checkWebUI.py" checkWebUIFilePath = format("{tmp_dir}/{checkWebUIFileName}") comma_sep_jn_hosts = ",".join(params.journalnode_hosts) checkWebUICmd = format( "ambari-python-wrap {checkWebUIFilePath} -m {comma_sep_jn_hosts} -p {journalnode_port} -s {https_only}" ) File(checkWebUIFilePath, content=StaticFile(checkWebUIFileName), mode=0775) Execute(checkWebUICmd, logoutput=True, try_sleep=3, tries=5, user=params.smoke_user) if params.is_namenode_master: if params.has_zkfc_hosts: pid_dir = format("{hadoop_pid_dir_prefix}/{hdfs_user}") pid_file = format("{pid_dir}/hadoop-{hdfs_user}-zkfc.pid") check_zkfc_process_cmd = as_user(format( "ls {pid_file} >/dev/null 2>&1 && ps -p `cat {pid_file}` >/dev/null 2>&1" ), user=params.hdfs_user) Execute(check_zkfc_process_cmd, logoutput=True, try_sleep=3, tries=5)
def service_check(self, env): import params env.set_params(params) jar_path = format( "{hadoop_mapred2_jar_location}/{hadoopMapredExamplesJarName}") input_file = format("/user/{smokeuser}/mapredsmokeinput") output_file = format("/user/{smokeuser}/mapredsmokeoutput") test_cmd = format("fs -test -e {output_file}") run_wordcount_job = format( "jar {jar_path} wordcount {input_file} {output_file}") params.HdfsResource( format("/user/{smokeuser}"), type="directory", action="create_on_execute", owner=params.smokeuser, mode=params.smoke_hdfs_user_mode, ) params.HdfsResource( output_file, action="delete_on_execute", type="directory", dfs_type=params.dfs_type, ) test_file = params.mapred2_service_check_test_file if not os.path.isfile(test_file): try: Execute( format( "dd if=/dev/urandom of={test_file} count=1 bs=1024")) except: try: Execute(format("rm {test_file}")) #clean up except: pass test_file = "/etc/passwd" params.HdfsResource( input_file, action="create_on_execute", type="file", source=test_file, dfs_type=params.dfs_type, ) params.HdfsResource(None, action="execute") # initialize the ticket if params.security_enabled: kinit_cmd = format( "{kinit_path_local} -kt {smoke_user_keytab} {smokeuser_principal};" ) Execute(kinit_cmd, user=params.smokeuser) ExecuteHadoop( run_wordcount_job, tries=1, try_sleep=5, user=params.smokeuser, bin_dir=params.execute_path, conf_dir=params.hadoop_conf_dir, logoutput=True) # the ticket may have expired, so re-initialize if params.security_enabled: kinit_cmd = format( "{kinit_path_local} -kt {smoke_user_keytab} {smokeuser_principal};" ) Execute(kinit_cmd, user=params.smokeuser) ExecuteHadoop( test_cmd, user=params.smokeuser, bin_dir=params.execute_path, conf_dir=params.hadoop_conf_dir)
def service_check(self, env): import params env.set_params(params) input_file = format('/user/{smokeuser}/passwd') output_dir = format('/user/{smokeuser}/pigsmoke.out') params.HdfsResource( format("/user/{smokeuser}"), type="directory", action="create_on_execute", owner=params.smokeuser, mode=params.smoke_hdfs_user_mode, ) params.HdfsResource( output_dir, type="directory", action="delete_on_execute", owner=params.smokeuser, ) params.HdfsResource( input_file, type="file", source="/etc/passwd", action="create_on_execute", owner=params.smokeuser, ) params.HdfsResource(None, action="execute") if params.security_enabled: kinit_cmd = format( "{kinit_path_local} -kt {smoke_user_keytab} {smokeuser_principal};" ) Execute(kinit_cmd, user=params.smokeuser) File(format("{tmp_dir}/pigSmoke.sh"), content=StaticFile("pigSmoke.sh"), mode=0755) # check for Pig-on-M/R Execute( format("source /etc/pig/pig-env.sh; pig {tmp_dir}/pigSmoke.sh"), tries=3, try_sleep=5, path=format( '{pig_bin_dir}:/usr/sbin:/sbin:/usr/local/bin:/bin:/usr/bin'), user=params.smokeuser, environment={'JAVA_HOME': params.java64_home}, logoutput=True) test_cmd = format("fs -test -e {output_dir}") ExecuteHadoop(test_cmd, user=params.smokeuser, conf_dir=params.hadoop_conf_dir, bin_dir=params.hadoop_bin_dir ) # cleanup results from previous test params.HdfsResource( output_dir, type="directory", action="delete_on_execute", owner=params.smokeuser, ) params.HdfsResource( input_file, type="file", source="/etc/passwd", action="create_on_execute", owner=params.smokeuser, ) params.HdfsResource(None, action="execute") Execute( format( "source /etc/pig/pig-env.sh; pig -x tez {tmp_dir}/pigSmoke.sh" ), tries=3, try_sleep=5, path=format( '{pig_bin_dir}:/usr/sbin:/sbin:/usr/local/bin:/bin:/usr/bin'), user=params.smokeuser, environment={'JAVA_HOME': params.java64_home}, logoutput=True) ExecuteHadoop(test_cmd, user=params.smokeuser, conf_dir=params.hadoop_conf_dir, bin_dir=params.hadoop_bin_dir)