def status(self, env): Execute('service slapd status')
def restart(self, env): import params env.set_params(params) self.configure(env) restart_cmd = "source " + params.conf_dir + "/elastic-env.sh;service elasticsearch restart" Execute(restart_cmd)
def oozie(is_server=False, upgrade_type=None): import params if is_server: params.HdfsResource(params.oozie_hdfs_user_dir, type="directory", action="create_on_execute", owner=params.oozie_user, mode=params.oozie_hdfs_user_mode ) params.HdfsResource(None, action="execute") generate_logfeeder_input_config('oozie', Template("input.config-oozie.json.j2", extra_imports=[default])) Directory(params.conf_dir, create_parents = True, owner = params.oozie_user, group = params.user_group ) params.oozie_site = update_credential_provider_path(params.oozie_site, 'oozie-site', os.path.join(params.conf_dir, 'oozie-site.jceks'), params.oozie_user, params.user_group, use_local_jceks=True ) XmlConfig("oozie-site.xml", conf_dir = params.conf_dir, configurations = params.oozie_site, configuration_attributes=params.config['configurationAttributes']['oozie-site'], owner = params.oozie_user, group = params.user_group, mode = 0664 ) File(format("{conf_dir}/oozie-env.sh"), owner=params.oozie_user, content=InlineTemplate(params.oozie_env_sh_template), group=params.user_group, ) # On some OS this folder could be not exists, so we will create it before pushing there files Directory(params.limits_conf_dir, create_parents=True, owner='root', group='root' ) File(os.path.join(params.limits_conf_dir, 'oozie.conf'), owner='root', group='root', mode=0644, content=Template("oozie.conf.j2") ) if (params.log4j_props != None): File(format("{params.conf_dir}/oozie-log4j.properties"), mode=0644, group=params.user_group, owner=params.oozie_user, content=InlineTemplate(params.log4j_props) ) elif (os.path.exists(format("{params.conf_dir}/oozie-log4j.properties"))): File(format("{params.conf_dir}/oozie-log4j.properties"), mode=0644, group=params.user_group, owner=params.oozie_user ) if params.stack_version_formatted and check_stack_feature(StackFeature.OOZIE_ADMIN_USER, params.stack_version_formatted): File(format("{params.conf_dir}/adminusers.txt"), mode=0644, group=params.user_group, owner=params.oozie_user, content=Template('adminusers.txt.j2', oozie_admin_users=params.oozie_admin_users) ) else: File ( format("{params.conf_dir}/adminusers.txt"), owner = params.oozie_user, group = params.user_group ) if params.jdbc_driver_name == "com.mysql.jdbc.Driver" or \ params.jdbc_driver_name == "com.microsoft.sqlserver.jdbc.SQLServerDriver" or \ params.jdbc_driver_name == "org.postgresql.Driver" or \ params.jdbc_driver_name == "oracle.jdbc.driver.OracleDriver": File(format("/usr/lib/ambari-agent/{check_db_connection_jar_name}"), content = DownloadSource(format("{jdk_location}/{check_db_connection_jar_name}")), ) pass oozie_ownership() if params.lzo_enabled: install_lzo_if_needed() Execute(format('{sudo} cp {hadoop_lib_home}/hadoop-lzo*.jar {oozie_lib_dir}'), ) if is_server: oozie_server_specific(upgrade_type)
def rebalancehdfs(self, env): import params env.set_params(params) name_node_parameters = json.loads( params.name_node_params ) threshold = name_node_parameters['threshold'] _print("Starting balancer with threshold = %s\n" % threshold) rebalance_env = {'PATH': params.hadoop_bin_dir} if params.security_enabled: # Create the kerberos credentials cache (ccache) file and set it in the environment to use # when executing HDFS rebalance command. Use the md5 hash of the combination of the principal and keytab file # to generate a (relatively) unique cache filename so that we can use it as needed. # TODO: params.tmp_dir=/var/lib/ambari-agent/tmp. However hdfs user doesn't have access to this path. # TODO: Hence using /tmp ccache_file_name = "hdfs_rebalance_cc_" + _md5(format("{hdfs_principal_name}|{hdfs_user_keytab}")).hexdigest() ccache_file_path = os.path.join(tempfile.gettempdir(), ccache_file_name) rebalance_env['KRB5CCNAME'] = ccache_file_path # If there are no tickets in the cache or they are expired, perform a kinit, else use what # is in the cache klist_cmd = format("{klist_path_local} -s {ccache_file_path}") kinit_cmd = format("{kinit_path_local} -c {ccache_file_path} -kt {hdfs_user_keytab} {hdfs_principal_name}") if shell.call(klist_cmd, user=params.hdfs_user)[0] != 0: Execute(kinit_cmd, user=params.hdfs_user) def calculateCompletePercent(first, current): # avoid division by zero try: division_result = current.bytesLeftToMove/first.bytesLeftToMove except ZeroDivisionError: Logger.warning("Division by zero. Bytes Left To Move = {0}. Return 1.0".format(first.bytesLeftToMove)) return 1.0 return 1.0 - division_result def startRebalancingProcess(threshold, rebalance_env): rebalanceCommand = format('hdfs --config {hadoop_conf_dir} balancer -threshold {threshold}') return as_user(rebalanceCommand, params.hdfs_user, env=rebalance_env) command = startRebalancingProcess(threshold, rebalance_env) basedir = os.path.join(env.config.basedir, 'scripts') if(threshold == 'DEBUG'): #FIXME TODO remove this on PROD basedir = os.path.join(env.config.basedir, 'scripts', 'balancer-emulator') command = ['ambari-python-wrap','hdfs-command.py'] _print("Executing command %s\n" % command) parser = hdfs_rebalance.HdfsParser() def handle_new_line(line, is_stderr): if is_stderr: return _print('[balancer] %s' % (line)) pl = parser.parseLine(line) if pl: res = pl.toJson() res['completePercent'] = calculateCompletePercent(parser.initialLine, pl) self.put_structured_out(res) elif parser.state == 'PROCESS_FINISED' : _print('[balancer] %s' % ('Process is finished' )) self.put_structured_out({'completePercent' : 1}) return if (not hdfs_rebalance.is_balancer_running()): # As the rebalance may take a long time (haours, days) the process is triggered only # Tracking the progress based on the command output is no longer supported due to this Execute(command, wait_for_finish=False) _print("The rebalance process has been triggered") else: _print("There is another balancer running. This means you or another Ambari user may have triggered the " "operation earlier. The process may take a long time to finish (hours, even days). If the problem persists " "please consult with the HDFS administrators if they have triggred or killed the operation.") if params.security_enabled: # Delete the kerberos credentials cache (ccache) file File(ccache_file_path, action = "delete", )
def start(self, env, upgrade_type=None): import params Execute(("touch", params.splice_pid_file))
def _prepare_tez_tarball(): """ Prepares the Tez tarball by adding the Hadoop native libraries found in the mapreduce tarball. It's very important to use the version of mapreduce which matches tez here. Additionally, this will also copy native LZO to the tez tarball if LZO is enabled and the GPL license has been accepted. :return: the full path of the newly created tez tarball to use """ import tempfile Logger.info("Preparing the Tez tarball...") # get the mapreduce tarball which matches the version of tez # tez installs the mapreduce tar, so it should always be present _, mapreduce_source_file, _, _ = get_tarball_paths("mapreduce") _, tez_source_file, _, _ = get_tarball_paths("tez") temp_dir = Script.get_tmp_dir() # create the temp staging directories ensuring that non-root agents using tarfile can work with them mapreduce_temp_dir = tempfile.mkdtemp(prefix="mapreduce-tarball-", dir=temp_dir) tez_temp_dir = tempfile.mkdtemp(prefix="tez-tarball-", dir=temp_dir) sudo.chmod(mapreduce_temp_dir, 0777) sudo.chmod(tez_temp_dir, 0777) Logger.info("Extracting {0} to {1}".format(mapreduce_source_file, mapreduce_temp_dir)) tar_archive.extract_archive(mapreduce_source_file, mapreduce_temp_dir) Logger.info("Extracting {0} to {1}".format(tez_source_file, tez_temp_dir)) tar_archive.untar_archive(tez_source_file, tez_temp_dir) hadoop_lib_native_dir = os.path.join(mapreduce_temp_dir, "hadoop", "lib", "native") tez_lib_dir = os.path.join(tez_temp_dir, "lib") if not os.path.exists(hadoop_lib_native_dir): raise Fail("Unable to seed the Tez tarball with native libraries since the source Hadoop native lib directory {0} does not exist".format(hadoop_lib_native_dir)) if not os.path.exists(tez_lib_dir): raise Fail("Unable to seed the Tez tarball with native libraries since the target Tez lib directory {0} does not exist".format(tez_lib_dir)) # copy native libraries from hadoop to tez Execute(("cp", "-a", hadoop_lib_native_dir, tez_lib_dir), sudo = True) # if enabled, LZO GPL libraries must be copied as well if lzo_utils.should_install_lzo(): stack_root = Script.get_stack_root() service_version = component_version.get_component_repository_version(service_name = "TEZ") # some installations might not have Tez, but MapReduce2 should be a fallback to get the LZO libraries from if service_version is None: Logger.warning("Tez does not appear to be installed, using the MapReduce version to get the LZO libraries") service_version = component_version.get_component_repository_version(service_name = "MAPREDUCE2") hadoop_lib_native_lzo_dir = os.path.join(stack_root, service_version, "hadoop", "lib", "native") if not sudo.path_isdir(hadoop_lib_native_lzo_dir): Logger.warning("Unable to located native LZO libraries at {0}, falling back to hadoop home".format(hadoop_lib_native_lzo_dir)) hadoop_lib_native_lzo_dir = os.path.join(stack_root, "current", "hadoop-client", "lib", "native") if not sudo.path_isdir(hadoop_lib_native_lzo_dir): raise Fail("Unable to seed the Tez tarball with native libraries since LZO is enabled but the native LZO libraries could not be found at {0}".format(hadoop_lib_native_lzo_dir)) Execute(("cp", "-a", hadoop_lib_native_lzo_dir, tez_lib_dir), sudo = True) # ensure that the tez/lib directory is readable by non-root (which it typically is not) Directory(tez_lib_dir, mode = 0755, cd_access = 'a', recursive_ownership = True) # create the staging directory so that non-root agents can write to it tez_native_tarball_staging_dir = os.path.join(temp_dir, "tez-native-tarball-staging") if not os.path.exists(tez_native_tarball_staging_dir): Directory(tez_native_tarball_staging_dir, mode = 0777, cd_access='a', create_parents = True, recursive_ownership = True) tez_tarball_with_native_lib = os.path.join(tez_native_tarball_staging_dir, "tez-native.tar.gz") Logger.info("Creating a new Tez tarball at {0}".format(tez_tarball_with_native_lib)) # tar up Tez, making sure to specify nothing for the arcname so that it does not include an absolute path with closing(tarfile.open(tez_tarball_with_native_lib, "w:gz")) as new_tez_tarball: new_tez_tarball.add(tez_temp_dir, arcname=os.path.sep) # ensure that the tarball can be read and uploaded sudo.chmod(tez_tarball_with_native_lib, 0744) # cleanup sudo.rmtree(mapreduce_temp_dir) sudo.rmtree(tez_temp_dir) return tez_tarball_with_native_lib
def stop(self, env, upgrade_type=None): import params env.set_params(params) stop_cmd = format("service elasticsearch stop") print 'Stop the Slave' Execute(stop_cmd)
def restart(self, env): import params Logger.info('Restart DMS') env.set_params(params) Execute(('systemctl', 'restart', 'discounts-management-service'))
def chown_zeppelin_pid_dir(self, env): import params env.set_params(params) Execute(("chown", "-R", format("{zeppelin_user}") + ":" + format("{zeppelin_group}"), params.zeppelin_pid_dir), sudo=True)
def start(self, env, upgrade_type=None): import params Logger.info('Start DMS') env.set_params(params) Execute(('systemctl', 'start', 'discounts-management-service'))
def status(self, env): import params Logger.info('Status check DMS') env.set_params(params) Execute(('systemctl', 'status', 'discounts-management-service'))
def service_check(self, env): import params env.set_params(params) try: params.HdfsResource(params.revo_share_hdfs_folder, type="directory", action="create_on_execute", owner=params.hdfs_user, mode=0777) params.HdfsResource(None, action="execute") except Exception as exception: Logger.warning( "Could not check the existence of /user/RevoShare on HDFS, exception: {0}" .format(str(exception))) if params.security_enabled: kinit_cmd = format( "{kinit_path_local} -kt {smoke_user_keytab} {smokeuser_principal};" ) Execute(kinit_cmd, user=params.smokeuser) output_file = format('{tmp_dir}/microsoft_r_server_serviceCheck.out') File(format("{tmp_dir}/microsoft_r_server_serviceCheck.r"), content=StaticFile("microsoft_r_server_serviceCheck.r"), mode=0755) Execute(format( "Revo64 --no-save < {tmp_dir}/microsoft_r_server_serviceCheck.r | tee {output_file}" ), tries=1, try_sleep=1, path=format('/usr/sbin:/sbin:/usr/local/bin:/bin:/usr/bin'), user=params.smokeuser, logoutput=True) # Verify correct output from resource_management.core import sudo output_content = sudo.read_file(format('{output_file}')) import re values_list = re.findall( r"\s(Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday)\s+(\d*)", output_content) if 7 != len(values_list): Logger.info("Unable to verify output of service check run") raise Fail("Unable to verify output of service check run") dayCountDictionary = { 'Monday': '97975', 'Tuesday': '77725', 'Wednesday': '78875', 'Thursday': '81304', 'Friday': '82987', 'Saturday': '86159', 'Sunday': '94975' } for (day, count) in values_list: if count != dayCountDictionary[day]: Logger.info( "Service check produced incorrect output for {0}. Was expecting {1} but encountered {2}" .format(day, dayCountDictionary[day], count)) raise Fail( "Service check produced incorrect output for {0}. Was expecting {1} but encountered {2}" .format(day, dayCountDictionary[day], count))
def setup_logsearch(): import params Directory([params.logsearch_log_dir, params.logsearch_pid_dir], mode=0755, cd_access='a', owner=params.logsearch_user, group=params.user_group, create_parents=True) Directory([ params.logsearch_dir, params.logsearch_server_conf, params.logsearch_config_set_dir ], mode=0755, cd_access='a', owner=params.logsearch_user, group=params.user_group, create_parents=True, recursive_ownership=True) Directory(params.logsearch_server_keys_folder, cd_access='a', mode=0755, owner=params.logsearch_user, group=params.user_group) File(params.logsearch_log, mode=0644, owner=params.logsearch_user, group=params.user_group, content='') if params.credential_store_enabled: params.logsearch_env_config = update_credential_provider_path( params.logsearch_env_config, 'logsearch-env', params.logsearch_env_jceks_file, params.logsearch_user, params.user_group) params.logsearch_properties[ HADOOP_CREDENTIAL_PROVIDER_PROPERTY_NAME] = 'jceks://file' + params.logsearch_env_jceks_file File(format("{logsearch_server_keys_folder}/ks_pass.txt"), action="delete") File(format("{logsearch_server_keys_folder}/ts_pass.txt"), action="delete") else: File(format("{logsearch_server_keys_folder}/ks_pass.txt"), content=params.logsearch_keystore_password, mode=0600, owner=params.logsearch_user, group=params.user_group) File(format("{logsearch_server_keys_folder}/ts_pass.txt"), content=params.logsearch_truststore_password, mode=0600, owner=params.logsearch_user, group=params.user_group) PropertiesFile(format("{logsearch_server_conf}/logsearch.properties"), properties=params.logsearch_properties) File(format("{logsearch_server_conf}/HadoopServiceConfig.json"), content=Template("HadoopServiceConfig.json.j2"), owner=params.logsearch_user, group=params.user_group) File(format("{logsearch_server_conf}/log4j.xml"), content=InlineTemplate(params.logsearch_app_log4j_content), owner=params.logsearch_user, group=params.user_group) File(format("{logsearch_server_conf}/logsearch-env.sh"), content=InlineTemplate(params.logsearch_env_content), mode=0755, owner=params.logsearch_user, group=params.user_group) File(format("{logsearch_server_conf}/logsearch-admin.json"), content=InlineTemplate(params.logsearch_admin_content), owner=params.logsearch_user, group=params.user_group) File(format("{logsearch_config_set_dir}/hadoop_logs/conf/solrconfig.xml"), content=InlineTemplate( params.logsearch_service_logs_solrconfig_content), owner=params.logsearch_user, group=params.user_group) File(format("{logsearch_config_set_dir}/audit_logs/conf/solrconfig.xml"), content=InlineTemplate( params.logsearch_audit_logs_solrconfig_content), owner=params.logsearch_user, group=params.user_group) if params.security_enabled: kinit_cmd = format( "{kinit_path_local} -kt {logsearch_kerberos_keytab} {logsearch_kerberos_principal};" ) Execute(kinit_cmd, user=params.logsearch_user) if params.security_enabled: File(format("{logsearch_jaas_file}"), content=Template("logsearch_jaas.conf.j2"), owner=params.logsearch_user) Execute(("chmod", "-R", "ugo+r", format("{logsearch_server_conf}/solr_configsets")), sudo=True) check_znode() if params.security_enabled and not params.logsearch_use_external_solr: solr_cloud_util.add_solr_roles( params.config, roles=[ params.infra_solr_role_logsearch, params.infra_solr_role_ranger_admin, params.infra_solr_role_dev ], new_service_principals=[params.logsearch_kerberos_principal]) solr_cloud_util.add_solr_roles( params.config, roles=[ params.infra_solr_role_logfeeder, params.infra_solr_role_dev ], new_service_principals=[params.logfeeder_kerberos_principal])
def hive_service(name, action='start', upgrade_type=None): import params if name == 'metastore': pid_file = format("{hive_pid_dir}/{hive_metastore_pid}") cmd = format("{start_metastore_path} {hive_log_dir}/hive.out {hive_log_dir}/hive.err {pid_file} {hive_server_conf_dir} {hive_log_dir}") elif name == 'hiveserver2': pid_file = format("{hive_pid_dir}/{hive_pid}") cmd = format("{start_hiveserver2_path} {hive_log_dir}/hive-server2.out {hive_log_dir}/hive-server2.err {pid_file} {hive_server_conf_dir} {hive_log_dir}") if params.security_enabled and params.current_version and check_stack_feature(StackFeature.HIVE_SERVER2_KERBERIZED_ENV, params.current_version): hive_kinit_cmd = format("{kinit_path_local} -kt {hive_server2_keytab} {hive_principal}; ") Execute(hive_kinit_cmd, user=params.hive_user) pid = get_user_call_output.get_user_call_output(format("cat {pid_file}"), user=params.hive_user, is_checked_call=False)[1] process_id_exists_command = format("ls {pid_file} >/dev/null 2>&1 && ps -p {pid} >/dev/null 2>&1") if action == 'start': if name == 'hiveserver2': check_fs_root(params.hive_server_conf_dir, params.execute_path) daemon_cmd = cmd hadoop_home = params.hadoop_home hive_bin = "hive" # upgrading hiveserver2 (rolling_restart) means that there is an existing, # de-registering hiveserver2; the pid will still exist, but the new # hiveserver is spinning up on a new port, so the pid will be re-written if upgrade_type == UPGRADE_TYPE_ROLLING: process_id_exists_command = None if params.version and params.stack_root: import os hadoop_home = format("{stack_root}/{version}/hadoop") hive_bin = os.path.join(params.hive_bin, hive_bin) Execute(daemon_cmd, user = params.hive_user, environment = { 'HADOOP_HOME': hadoop_home, 'JAVA_HOME': params.java64_home, 'HIVE_BIN': hive_bin }, path = params.execute_path, not_if = process_id_exists_command) if params.hive_jdbc_driver == "com.mysql.jdbc.Driver" or \ params.hive_jdbc_driver == "org.postgresql.Driver" or \ params.hive_jdbc_driver == "oracle.jdbc.driver.OracleDriver": db_connection_check_command = format( "{java64_home}/bin/java -cp {check_db_connection_jar}:{target_hive} org.apache.ambari.server.DBConnectionVerification '{hive_jdbc_connection_url}' {hive_metastore_user_name} {hive_metastore_user_passwd!p} {hive_jdbc_driver}") try: Execute(db_connection_check_command, path='/usr/sbin:/sbin:/usr/local/bin:/bin:/usr/bin', tries=5, try_sleep=10) except: show_logs(params.hive_log_dir, params.hive_user) raise elif action == 'stop': daemon_kill_cmd = format("{sudo} kill {pid}") daemon_hard_kill_cmd = format("{sudo} kill -9 {pid}") Execute(daemon_kill_cmd, not_if = format("! ({process_id_exists_command})") ) wait_time = 5 Execute(daemon_hard_kill_cmd, not_if = format("! ({process_id_exists_command}) || ( sleep {wait_time} && ! ({process_id_exists_command}) )") ) try: # check if stopped the process, else fail the task Execute(format("! ({process_id_exists_command})"), tries=20, try_sleep=3, ) except: show_logs(params.hive_log_dir, params.hive_user) raise File(pid_file, action = "delete" )
def initiate_safe_zkfc_failover(): """ If this is the active namenode, initiate a safe failover and wait for it to become the standby. If an error occurs, force a failover to happen by killing zkfc on this host. In this case, during the Restart, will also have to start ZKFC manually. """ import params # Must kinit before running the HDFS command if params.security_enabled: Execute(format( "{kinit_path_local} -kt {hdfs_user_keytab} {hdfs_principal_name}"), user=params.hdfs_user) active_namenode_id = None standby_namenode_id = None active_namenodes, standby_namenodes, unknown_namenodes = get_namenode_states( params.hdfs_site, params.security_enabled, params.hdfs_user) if active_namenodes: active_namenode_id = active_namenodes[0][0] if standby_namenodes: standby_namenode_id = standby_namenodes[0][0] if active_namenode_id: Logger.info(format("Active NameNode id: {active_namenode_id}")) if standby_namenode_id: Logger.info(format("Standby NameNode id: {standby_namenode_id}")) if unknown_namenodes: for unknown_namenode in unknown_namenodes: Logger.info("NameNode HA state for {0} is unknown".format( unknown_namenode[0])) if params.namenode_id == active_namenode_id and params.other_namenode_id == standby_namenode_id: # Failover if this NameNode is active and other NameNode is up and in standby (i.e. ready to become active on failover) Logger.info( format( "NameNode {namenode_id} is active and NameNode {other_namenode_id} is in standby" )) failover_command = format( "hdfs haadmin -failover {namenode_id} {other_namenode_id}") check_standby_cmd = format( "hdfs haadmin -getServiceState {namenode_id} | grep standby") msg = "Rolling Upgrade - Initiating a ZKFC failover on active NameNode host {0}.".format( params.hostname) Logger.info(msg) code, out = shell.call(failover_command, user=params.hdfs_user, logoutput=True) Logger.info( format("Rolling Upgrade - failover command returned {code}")) wait_for_standby = False if code == 0: wait_for_standby = True else: # Try to kill ZKFC manually was_zkfc_killed = kill_zkfc(params.hdfs_user) code, out = shell.call(check_standby_cmd, user=params.hdfs_user, logoutput=True) Logger.info( format("Rolling Upgrade - check for standby returned {code}")) if code == 255 and out: Logger.info("Rolling Upgrade - NameNode is already down.") else: if was_zkfc_killed: # Only mandate that this be the standby namenode if ZKFC was indeed killed to initiate a failover. wait_for_standby = True if wait_for_standby: Logger.info("Waiting for this NameNode to become the standby one.") Execute(check_standby_cmd, user=params.hdfs_user, tries=50, try_sleep=6, logoutput=True) else: msg = "Rolling Upgrade - Skipping ZKFC failover on NameNode host {0}.".format( params.hostname) Logger.info(msg)
def install(self, env): from params import java_home Execute('{0} | xargs wget -O /tmp/azkaban-exec.tgz'.format( AZKABAN_EXECUTOR_URL)) Execute('{0} | xargs wget -O /tmp/execute-as-user.c'.format( AZKABAN_EXEC_AS_USER_C_URL)) Execute( 'export JAVA_HOME={0} && tar -zxvf /tmp/azkaban-exec.tgz -C {1}'. format(java_home, AZKABAN_INSTALL_DIR)) Execute('rm -f /tmp/azkaban-exec.tgz') Execute('mv /usr/local/azkaban-exec-server-0.1.0-SNAPSHOT {0}'.format( AZKABAN_EXEC_HOME)) Execute('mkdir {0}'.format(AZKABAN_EXEC_HOME + '/native-lib')) Execute('gcc /tmp/execute-as-user.c -o /tmp/execute-as-user') Execute('cp /tmp/execute-as-user {0}'.format(AZKABAN_EXEC_HOME + '/native-lib')) Execute('chown root {0}'.format(AZKABAN_EXEC_HOME + '/native-lib/execute-as-user')) Execute('chown 6050 {0}'.format(AZKABAN_EXEC_HOME + '/native-lib/execute-as-user')) Execute('echo execute.as.user=true > {0} '.format( AZKABAN_EXEC_HOME + '/plugins/jobtypes/commonprivate.properties')) Execute('echo azkaban.native.lib={0} >> {1} '.format( AZKABAN_EXEC_HOME + '/native-lib', AZKABAN_EXEC_HOME + '/plugins/jobtypes/commonprivate.properties')) Execute('echo azkaban.group.name=hadoop >> {0} '.format( AZKABAN_EXEC_HOME + '/plugins/jobtypes/commonprivate.properties')) self.configure(env)
def _prepare_mapreduce_tarball(): """ Prepares the mapreduce tarball by including the native LZO libraries if necessary. If LZO is not enabled or has not been opted-in, then this will do nothing and return the original tarball to upload to HDFS. :return: the full path of the newly created mapreduce tarball to use or the original path if no changes were made """ # get the mapreduce tarball to crack open and add LZO libraries to _, mapreduce_source_file, _, _ = get_tarball_paths("mapreduce") if not lzo_utils.should_install_lzo(): return mapreduce_source_file Logger.info("Preparing the mapreduce tarball with native LZO libraries...") temp_dir = Script.get_tmp_dir() # create the temp staging directories ensuring that non-root agents using tarfile can work with them mapreduce_temp_dir = tempfile.mkdtemp(prefix="mapreduce-tarball-", dir=temp_dir) sudo.chmod(mapreduce_temp_dir, 0777) # calculate the source directory for LZO hadoop_lib_native_source_dir = os.path.join(os.path.dirname(mapreduce_source_file), "lib", "native") if not sudo.path_exists(hadoop_lib_native_source_dir): raise Fail("Unable to seed the mapreduce tarball with native LZO libraries since the source Hadoop native lib directory {0} does not exist".format(hadoop_lib_native_source_dir)) Logger.info("Extracting {0} to {1}".format(mapreduce_source_file, mapreduce_temp_dir)) tar_archive.extract_archive(mapreduce_source_file, mapreduce_temp_dir) mapreduce_lib_dir = os.path.join(mapreduce_temp_dir, "hadoop", "lib") # copy native libraries from source hadoop to target Execute(("cp", "-af", hadoop_lib_native_source_dir, mapreduce_lib_dir), sudo = True) # ensure that the hadoop/lib/native directory is readable by non-root (which it typically is not) Directory(mapreduce_lib_dir, mode = 0755, cd_access = 'a', recursive_ownership = True) # create the staging directory so that non-root agents can write to it mapreduce_native_tarball_staging_dir = os.path.join(temp_dir, "mapreduce-native-tarball-staging") if not os.path.exists(mapreduce_native_tarball_staging_dir): Directory(mapreduce_native_tarball_staging_dir, mode = 0777, cd_access = 'a', create_parents = True, recursive_ownership = True) mapreduce_tarball_with_native_lib = os.path.join(mapreduce_native_tarball_staging_dir, "mapreduce-native.tar.gz") Logger.info("Creating a new mapreduce tarball at {0}".format(mapreduce_tarball_with_native_lib)) # tar up mapreduce, making sure to specify nothing for the arcname so that it does not include an absolute path with closing(tarfile.open(mapreduce_tarball_with_native_lib, "w:gz")) as new_tarball: new_tarball.add(mapreduce_temp_dir, arcname = os.path.sep) # ensure that the tarball can be read and uploaded sudo.chmod(mapreduce_tarball_with_native_lib, 0744) # cleanup sudo.rmtree(mapreduce_temp_dir) return mapreduce_tarball_with_native_lib
def stop(self, env): from params import java_home Execute('cd {0} && export PATH=$PATH:{1}/bin && bin/shutdown-exec.sh'. format(AZKABAN_EXEC_HOME, java_home))
def spark_service(name, upgrade_type=None, action=None): import params if action == 'start': effective_version = params.version if upgrade_type is not None else params.stack_version_formatted if effective_version: effective_version = format_stack_version(effective_version) if name == 'jobhistoryserver' and effective_version and check_stack_feature(StackFeature.SPARK_16PLUS, effective_version): # create & copy spark-hdp-yarn-archive.tar.gz to hdfs if not params.sysprep_skip_copy_tarballs_hdfs: source_dir=params.spark_home+"/jars" tmp_archive_file=get_tarball_paths("spark")[1] make_tarfile(tmp_archive_file, source_dir) copy_to_hdfs("spark", params.user_group, params.hdfs_user, skip=params.sysprep_skip_copy_tarballs_hdfs, replace_existing_files=True) # create spark history directory params.HdfsResource(params.spark_history_dir, type="directory", action="create_on_execute", owner=params.spark_user, group=params.user_group, mode=0777, recursive_chmod=True ) params.HdfsResource(None, action="execute") if params.security_enabled: spark_kinit_cmd = format("{kinit_path_local} -kt {spark_kerberos_keytab} {spark_principal}; ") Execute(spark_kinit_cmd, user=params.spark_user) # Spark 1.3.1.2.3, and higher, which was included in HDP 2.3, does not have a dependency on Tez, so it does not # need to copy the tarball, otherwise, copy it. if params.stack_version_formatted and check_stack_feature(StackFeature.TEZ_FOR_SPARK, params.stack_version_formatted): resource_created = copy_to_hdfs("tez", params.user_group, params.hdfs_user, skip=params.sysprep_skip_copy_tarballs_hdfs) if resource_created: params.HdfsResource(None, action="execute") if name == 'jobhistoryserver': historyserver_no_op_test = format( 'ls {spark_history_server_pid_file} >/dev/null 2>&1 && ps -p `cat {spark_history_server_pid_file}` >/dev/null 2>&1') try: Execute(format('{spark_history_server_start}'), user=params.spark_user, environment={'JAVA_HOME': params.java_home}, not_if=historyserver_no_op_test) except: show_logs(params.spark_log_dir, user=params.spark_user) raise elif name == 'sparkthriftserver': if params.security_enabled: hive_principal = params.hive_kerberos_principal hive_kinit_cmd = format("{kinit_path_local} -kt {hive_kerberos_keytab} {hive_principal}; ") Execute(hive_kinit_cmd, user=params.hive_user) thriftserver_no_op_test = format( 'ls {spark_thrift_server_pid_file} >/dev/null 2>&1 && ps -p `cat {spark_thrift_server_pid_file}` >/dev/null 2>&1') try: Execute(format('{spark_thrift_server_start} --properties-file {spark_thrift_server_conf_file} {spark_thrift_cmd_opts_properties}'), user=params.hive_user, environment={'JAVA_HOME': params.java_home}, not_if=thriftserver_no_op_test ) except: show_logs(params.spark_log_dir, user=params.hive_user) raise elif action == 'stop': if name == 'jobhistoryserver': try: Execute(format('{spark_history_server_stop}'), user=params.spark_user, environment={'JAVA_HOME': params.java_home} ) except: show_logs(params.spark_log_dir, user=params.spark_user) raise File(params.spark_history_server_pid_file, action="delete" ) elif name == 'sparkthriftserver': try: Execute(format('{spark_thrift_server_stop}'), user=params.hive_user, environment={'JAVA_HOME': params.java_home} ) except: show_logs(params.spark_log_dir, user=params.hive_user) raise File(params.spark_thrift_server_pid_file, action="delete" )
def status(self, env): status_cmd = format('service metron-rest status') try: Execute(status_cmd) except ExecutionFailed: raise ComponentIsNotRunning()
def status(self, env): import params env.set_params(params) status_cmd = format("service elasticsearch status") print 'Status of the Slave' Execute(status_cmd)
def stop(self, env): import params env.set_params(params) Execute("systemctl stop graphite-web")
def convert_conf_directories_to_symlinks(package, version, dirs, skip_existing_links=True, link_to="current"): """ Assumes HDP 2.3+, moves around directories and creates the conf symlink for the given package. If the package does not exist, then no work is performed. - Creates a /etc/<component>/conf.backup directory - Copies all configs from /etc/<component>/conf to conf.backup - Removes /etc/<component>/conf - Creates /etc/<component>/<version>/0 via conf-select - /usr/hdp/current/<component>-client/conf -> /etc/<component>/<version>/0 via conf-select - Links /etc/<component>/conf to <something> depending on function paramter -- /etc/<component>/conf -> /usr/hdp/current/[component]-client/conf (usually) -- /etc/<component>/conf -> /etc/<component>/conf.backup (only when supporting < HDP 2.3) :param package: the package to create symlinks for (zookeeper, falcon, etc) :param version: the version number to use with conf-select (2.3.0.0-1234) :param dirs: the directories associated with the package (from PACKAGE_DIRS) :param skip_existing_links: True to not do any work if already a symlink :param link_to: link to "current" or "backup" """ bad_dirs = [] for dir_def in dirs: if not os.path.exists(dir_def['conf_dir']): bad_dirs.append(dir_def['conf_dir']) if len(bad_dirs) > 0: Logger.info("Skipping {0} as it does not exist.".format(",".join(bad_dirs))) return # existing links should be skipped since we assume there's no work to do if skip_existing_links: bad_dirs = [] for dir_def in dirs: # check if conf is a link already old_conf = dir_def['conf_dir'] if os.path.islink(old_conf): Logger.info("{0} is already linked to {1}".format(old_conf, os.path.realpath(old_conf))) bad_dirs.append(old_conf) if len(bad_dirs) > 0: return # make backup dir and copy everything in case configure() was called after install() backup_dir = None for dir_def in dirs: old_conf = dir_def['conf_dir'] old_parent = os.path.abspath(os.path.join(old_conf, os.pardir)) backup_dir = os.path.join(old_parent, "conf.backup") Logger.info("Backing up {0} to {1} if destination doesn't exist already.".format(old_conf, backup_dir)) Execute(("cp", "-R", "-p", old_conf, backup_dir), not_if = format("test -e {backup_dir}"), sudo = True) # we're already in the HDP stack # Create the versioned /etc/[component]/[version]/0 folder. # The component must be installed on the host. versioned_confs = create("HDP", package, version, dry_run = True) Logger.info("Package {0} will have new conf directories: {1}".format(package, ", ".join(versioned_confs))) need_dirs = [] for d in versioned_confs: if not os.path.exists(d): need_dirs.append(d) if len(need_dirs) > 0: create("HDP", package, version) # find the matching definition and back it up (not the most efficient way) ONLY if there is more than one directory if len(dirs) > 1: for need_dir in need_dirs: for dir_def in dirs: if 'prefix' in dir_def and need_dir.startswith(dir_def['prefix']): old_conf = dir_def['conf_dir'] versioned_conf = need_dir Execute(as_sudo(["cp", "-R", "-p", os.path.join(old_conf, "*"), versioned_conf], auto_escape=False), only_if = format("ls -d {old_conf}/*")) elif 1 == len(dirs) and 1 == len(need_dirs): old_conf = dirs[0]['conf_dir'] versioned_conf = need_dirs[0] Execute(as_sudo(["cp", "-R", "-p", os.path.join(old_conf, "*"), versioned_conf], auto_escape=False), only_if = format("ls -d {old_conf}/*")) # /usr/hdp/current/[component] is already set to to the correct version, e.g., /usr/hdp/[version]/[component] select("HDP", package, version, ignore_errors = True) # Symlink /etc/[component]/conf to /etc/[component]/conf.backup try: # No more references to /etc/[component]/conf for dir_def in dirs: # E.g., /etc/[component]/conf new_symlink = dir_def['conf_dir'] # Remove new_symlink to pave the way, but only if it's a directory if not os.path.islink(new_symlink): Directory(new_symlink, action="delete") if link_to in ["current", "backup"]: # link /etc/[component]/conf -> /usr/hdp/current/[component]-client/conf if link_to == "backup": Link(new_symlink, to = backup_dir) else: Link(new_symlink, to = dir_def['current_dir']) else: Logger.error("Unsupported 'link_to' argument. Could not link package {0}".format(package)) except Exception, e: Logger.warning("Could not change symlink for package {0} to point to {1} directory. Error: {2}".format(package, link_to, e))
def start(self, env): import params env.set_params(params) install_graphite_web() self.configure(env) Execute("systemctl start graphite-web")
def stop(self, env, upgrade_type=None): import params Execute(("rm", params.splice_pid_file))
def install(self, env): Execute( "yum install -y blas lapack libffi-devel openblas openblas-devel openblas-openmp openblas-threads superset_2_6_1_0_129" )
def stop(self, env): import params env.set_params(params) stop_cmd = "source " + params.conf_dir + "/elastic-env.sh; service elasticsearch stop" Execute(stop_cmd)
def service(action=None, name=None, user=None, options="", create_pid_dir=False, create_log_dir=False): """ :param action: Either "start" or "stop" :param name: Component name, e.g., "namenode", "datanode", "secondarynamenode", "zkfc" :param user: User to run the command as :param options: Additional options to pass to command as a string :param create_pid_dir: Create PID directory :param create_log_dir: Crate log file directory """ import params options = options if options else "" pid_dir = format("{hadoop_pid_dir_prefix}/{user}") pid_file = format("{pid_dir}/hadoop-{user}-{name}.pid") hadoop_env_exports = {'HADOOP_LIBEXEC_DIR': params.hadoop_libexec_dir} log_dir = format("{hdfs_log_dir_prefix}/{user}") # NFS GATEWAY is always started by root using jsvc due to rpcbind bugs # on Linux such as CentOS6.2. https://bugzilla.redhat.com/show_bug.cgi?id=731542 if name == "nfs3": pid_file = format("{pid_dir}/hadoop_privileged_nfs3.pid") custom_export = { 'HADOOP_PRIVILEGED_NFS_USER': params.hdfs_user, 'HADOOP_PRIVILEGED_NFS_PID_DIR': pid_dir, 'HADOOP_PRIVILEGED_NFS_LOG_DIR': log_dir } hadoop_env_exports.update(custom_export) process_id_exists_command = as_sudo( ["test", "-f", pid_file]) + " && " + as_sudo(["pgrep", "-F", pid_file]) # on STOP directories shouldn't be created # since during stop still old dirs are used (which were created during previous start) if action != "stop": if name == "nfs3": Directory(params.hadoop_pid_dir_prefix, mode=0755, owner=params.root_user, group=params.root_group) else: Directory(params.hadoop_pid_dir_prefix, mode=0755, owner=params.hdfs_user, group=params.user_group) if create_pid_dir: Directory(pid_dir, owner=user, recursive=True) if create_log_dir: if name == "nfs3": Directory(log_dir, mode=0775, owner=params.root_user, group=params.user_group) else: Directory(log_dir, owner=user, recursive=True) if params.security_enabled and name == "datanode": ## The directory where pid files are stored in the secure data environment. hadoop_secure_dn_pid_dir = format( "{hadoop_pid_dir_prefix}/{hdfs_user}") hadoop_secure_dn_pid_file = format( "{hadoop_secure_dn_pid_dir}/hadoop_secure_dn.pid") # At Champlain stack and further, we may start datanode as a non-root even in secure cluster if not (params.hdp_stack_version != "" and compare_versions(params.hdp_stack_version, '2.2') >= 0 ) or params.secure_dn_ports_are_in_use: user = "******" pid_file = format( "{hadoop_pid_dir_prefix}/{hdfs_user}/hadoop-{hdfs_user}-{name}.pid" ) if action == 'stop' and (params.hdp_stack_version != "" and compare_versions(params.hdp_stack_version, '2.2') >= 0) and \ os.path.isfile(hadoop_secure_dn_pid_file): # We need special handling for this case to handle the situation # when we configure non-root secure DN and then restart it # to handle new configs. Otherwise we will not be able to stop # a running instance user = "******" try: check_process_status(hadoop_secure_dn_pid_file) custom_export = {'HADOOP_SECURE_DN_USER': params.hdfs_user} hadoop_env_exports.update(custom_export) except ComponentIsNotRunning: pass hadoop_daemon = format("{hadoop_bin}/hadoop-daemon.sh") if user == "root": cmd = [hadoop_daemon, "--config", params.hadoop_conf_dir, action, name] if options: cmd += [ options, ] daemon_cmd = as_sudo(cmd) else: cmd = format( "{ulimit_cmd} {hadoop_daemon} --config {hadoop_conf_dir} {action} {name}" ) if options: cmd += " " + options daemon_cmd = as_user(cmd, user) if action == "start": # remove pid file from dead process File(pid_file, action="delete", not_if=process_id_exists_command) Execute(daemon_cmd, not_if=process_id_exists_command, environment=hadoop_env_exports) elif action == "stop": Execute(daemon_cmd, only_if=process_id_exists_command, environment=hadoop_env_exports) File(pid_file, action="delete")
def oozie_server_specific(upgrade_type): import params no_op_test = as_user(format("ls {pid_file} >/dev/null 2>&1 && ps -p `cat {pid_file}` >/dev/null 2>&1"), user=params.oozie_user) File(params.pid_file, action="delete", not_if=no_op_test ) oozie_server_directories = [format("{oozie_home}/{oozie_tmp_dir}"), params.oozie_pid_dir, params.oozie_log_dir, params.oozie_tmp_dir, params.oozie_data_dir, params.oozie_lib_dir, params.oozie_webapps_dir, params.oozie_webapps_conf_dir, params.oozie_server_dir] Directory( oozie_server_directories, owner = params.oozie_user, group = params.user_group, mode = 0755, create_parents = True, cd_access="a", ) Directory(params.oozie_libext_dir, create_parents = True, ) hashcode_file = format("{oozie_home}/.hashcode") skip_recreate_sharelib = format("test -f {hashcode_file} && test -d {oozie_home}/share") untar_sharelib = ('tar','-xvf',format('{oozie_home}/oozie-sharelib.tar.gz'),'-C',params.oozie_home) Execute(('ln','-sf', format('{oozie_webapps_conf_dir}'), format('{oozie_server_dir}')), sudo=True) Execute( untar_sharelib, # time-expensive not_if = format("{no_op_test} || {skip_recreate_sharelib}"), sudo = True, ) configure_cmds = [] # Default to /usr/share/$TARGETSTACK-oozie/ext-2.2.zip as the first path source_ext_zip_paths = get_oozie_ext_zip_source_paths(upgrade_type, params) # Copy the first oozie ext-2.2.zip file that is found. # This uses a list to handle the cases when migrating from some versions of BigInsights to HDP. if source_ext_zip_paths is not None: for source_ext_zip_path in source_ext_zip_paths: if os.path.isfile(source_ext_zip_path): configure_cmds.append(('cp', source_ext_zip_path, params.oozie_libext_dir)) configure_cmds.append(('chown', format('{oozie_user}:{user_group}'), format('{oozie_libext_dir}/{ext_js_file}'))) Execute(configure_cmds, not_if=no_op_test, sudo=True, ) break Directory(params.oozie_webapps_conf_dir, owner = params.oozie_user, group = params.user_group, recursive_ownership = True, recursion_follow_links = True, ) # download the database JAR download_database_library_if_needed() #falcon el extension if params.has_falcon_host: Execute(format('{sudo} cp {falcon_home}/oozie/ext/falcon-oozie-el-extension-*.jar {oozie_libext_dir}'), not_if = no_op_test) Execute(format('{sudo} chown {oozie_user}:{user_group} {oozie_libext_dir}/falcon-oozie-el-extension-*.jar'), not_if = no_op_test) prepare_war(params) File(hashcode_file, mode = 0644, ) if params.stack_version_formatted and check_stack_feature(StackFeature.OOZIE_CREATE_HIVE_TEZ_CONFIGS, params.stack_version_formatted): # Create hive-site and tez-site configs for oozie Directory(params.hive_conf_dir, create_parents = True, owner = params.oozie_user, group = params.user_group ) if 'hive-site' in params.config['configurations']: hive_site_config = update_credential_provider_path(params.config['configurations']['hive-site'], 'hive-site', os.path.join(params.hive_conf_dir, 'hive-site.jceks'), params.oozie_user, params.user_group ) XmlConfig("hive-site.xml", conf_dir=params.hive_conf_dir, configurations=hive_site_config, configuration_attributes=params.config['configurationAttributes']['hive-site'], owner=params.oozie_user, group=params.user_group, mode=0644 ) if 'tez-site' in params.config['configurations']: XmlConfig( "tez-site.xml", conf_dir = params.hive_conf_dir, configurations = params.config['configurations']['tez-site'], configuration_attributes=params.config['configurationAttributes']['tez-site'], owner = params.oozie_user, group = params.user_group, mode = 0664 ) # If Atlas is also installed, need to generate Atlas Hive hook (hive-atlas-application.properties file) in directory # {stack_root}/{current_version}/atlas/hook/hive/ # Because this is a .properties file instead of an xml file, it will not be read automatically by Oozie. # However, should still save the file on this host so that can upload it to the Oozie Sharelib in DFS. if has_atlas_in_cluster(): atlas_hook_filepath = os.path.join(params.hive_conf_dir, params.atlas_hook_filename) Logger.info("Has atlas in cluster, will save Atlas Hive hook into location %s" % str(atlas_hook_filepath)) setup_atlas_hook(SERVICE.HIVE, params.hive_atlas_application_properties, atlas_hook_filepath, params.oozie_user, params.user_group) Directory(params.oozie_server_dir, owner = params.oozie_user, group = params.user_group, recursive_ownership = True, ) if params.security_enabled: File(os.path.join(params.conf_dir, 'zkmigrator_jaas.conf'), owner=params.oozie_user, group=params.user_group, content=Template("zkmigrator_jaas.conf.j2") )
def start(self, env): self.configure(env) Execute('service slapd start')