def status(self, env): import status_params env.set_params(status_params) pid_file = format("{hive_pid_dir}/{hive_pid}") # Recursively check all existing gmetad pid files check_process_status(pid_file)
def start(self, env): import os, sys, time import jkg_toree_params as params env.set_params(params) self.configure(env) delay_checks = 8 # Need HDFS started for the next step helpers.create_hdfs_dirs(params.user, params.group, params.dirs) Execute(params.start_command, user=params.user, logoutput=True) check_process_status(params.jkg_pid_file) time.sleep(delay_checks) with open(params.jkg_pid_file, 'r') as fp: try: os.kill(int(fp.read().strip()), 0) except OSError as ose: if ose.errno != errno.EPERM: raise Fail( "Error starting Jupyter Kernel Gateway. Check {0} for the possible cause." .format(params.log_dir + "/jupyter_kernel_gateway.log")) else: # non-root install might have to resort to status check but # with the side-effect that any error might only reflected during # the status check after a minute rather than immediately check_process_status(params.jkg_pid_file)
def status(self, env): import status_params env.set_params(status_params) pid_file = format("{spark_history_server_pid_file}") # Recursively check all existing gmetad pid files check_process_status(pid_file)
def status(self, env): import status_params env.set_params(status_params) # We are not doing 'llap' status check done here as part of status check for 'HSI', as 'llap' status # check is a heavy weight operation. # Recursively check all existing gmetad pid files check_process_status(status_params.hive_interactive_pid)
def status(self, env): import params env.set_params(params) import os if not os.path.exists(self.pid_file): Execute( "echo `ps aux|grep '/etc/confluent-kafka-mqtt/kafka-mqtt-production.properties' | grep -v grep | awk '{print $2}'` > " + self.pid_file) check_process_status(self.pid_file)
def status(self, env): import status_params env.set_params(status_params) try: pid_file = glob.glob(status_params.dpprofiler_pid_file)[0] except IndexError: pid_file = '' check_process_status(pid_file)
def status(self, env): import params env.set_params(params) import os if not os.path.exists(params.pid_file): Execute( "echo `ps aux|grep 'edp.rider.RiderStarter' | grep -v grep | awk '{print $2}'` > " + self.pid_file) check_process_status(params.pid_file)
def status(self, env): import params env.set_params(params) import os if not os.path.exists(self.pid_file): Execute( "echo `ps aux|grep '/etc/kafka/server.properties' | grep -v grep | awk '{print $2}'` > " + self.pid_file) check_process_status(self.pid_file)
def status(self, env): import params env.set_params(params) import os if not os.path.exists(self.pid_file): Execute( "echo `ps aux|grep 'wherehows-backend' | grep -v grep | awk '{print $2}'` > " + self.pid_file) check_process_status(self.pid_file)
def status(self, env): print "check start" os.system( r"ps -ef | grep org.logstash.Logstash | grep -v grep | awk '{print $2}' > /var/run/logstash.pid" ) import status_params env.set_params(status_params) check_process_status(status_params.logstash_pid_file)
def execute(configurations={}, parameters={}, host_name=None): try: pid_file = glob.glob(zeppelin_pid_dir + '/zeppelin-*.pid')[0] check_process_status(pid_file) except ComponentIsNotRunning as ex: return (RESULT_CODE_CRITICAL, [str(ex)]) except: return (RESULT_CODE_CRITICAL, ["Zeppelin is not running"]) return (RESULT_CODE_OK, ["Successful connection to Zeppelin"])
def status(self, env): import status_params env.set_params(status_params) try: pid_file = glob.glob(status_params.zeppelin_pid_dir + '/zeppelin-' + status_params.zeppelin_user + '*.pid')[0] except IndexError: pid_file = '' check_process_status(pid_file)
def status(self, env): import status_params env.set_params(status_params) try: check_process_status(status_params.nfsgateway_pid_file) except ComponentIsNotRunning: check_process_status( status_params.unprivileged_nfsgateway_pid_file)
def execute(configurations={}, parameters={}, host_name=None): try: pid_file = nifi_pid_dir + '/nifi.pid' check_process_status(pid_file) except ComponentIsNotRunning as ex: return (RESULT_CODE_CRITICAL, [str(ex)]) except: return (RESULT_CODE_CRITICAL, ["Nifi is not running"]) return (RESULT_CODE_OK, ["Successful connection to Nifi"])
def wait_for_znode(): import params import status_params try: check_process_status(status_params.hive_pid) except ComponentIsNotRunning: raise Exception(format("HiveServer2 is no longer running, check the logs at {hive_log_dir}")) cmd = format("{zk_bin}/zkCli.sh -server {zk_quorum} ls /{hive_server2_zookeeper_namespace} | grep 'serverUri='") code, out = shell.call(cmd) if code == 1: raise Fail(format("ZooKeeper node /{hive_server2_zookeeper_namespace} is not ready yet"))
def status(self, env): import status_params env.set_params(status_params) if status_params.stack_supports_pid: check_process_status(status_params.ranger_kms_pid_file) return cmd = 'ps -ef | grep proc_rangerkms | grep -v grep' code, output = shell.call(cmd, timeout=20) if code != 0: Logger.debug('KMS process not running') raise ComponentIsNotRunning() pass
def is_monitor_process_live(pid_file): """ Gets whether the Metrics Monitor represented by the specified file is running. :param pid_file: the PID file of the monitor to check :return: True if the monitor is running, False otherwise """ live = False try: check_process_status(pid_file) live = True except ComponentIsNotRunning: pass return live
def execute(configurations={}, parameters={}, host_name=None): """ Returns a tuple containing the result code and a pre-formatted result label Keyword arguments: configurations (dictionary): a mapping of configuration key to value parameters (dictionary): a mapping of script parameter key to value host_name (string): the name of this host where the alert is running """ try: beacon_pid_dir = configurations[BEACON_PID_DIR_KEY] beacon_pid_file = os.path.join(beacon_pid_dir, "beacon.pid") check_process_status(beacon_pid_file) return (RESULT_CODE_OK, ["Beacon Server process is running"]) except: return (RESULT_CODE_CRITICAL, ['Beacon Server process is not running'])
def check_service_status(name): from resource_management.libraries.functions.check_process_status import check_process_status if name=='collector': pid_file = format("{ams_collector_pid_dir}/ambari-metrics-collector.pid") check_process_status(pid_file) pid_file = format("{hbase_pid_dir}/hbase-{hbase_user}-master.pid") check_process_status(pid_file) if os.path.exists(format("{hbase_pid_dir}/distributed_mode")): pid_file = format("{hbase_pid_dir}/hbase-{hbase_user}-regionserver.pid") check_process_status(pid_file) elif name == 'monitor': pid_file = format("{ams_monitor_pid_dir}/ambari-metrics-monitor.pid") check_process_status(pid_file) elif name == 'grafana': pid_file = format("{ams_grafana_pid_dir}/grafana-server.pid") check_process_status(pid_file)
def status(self, env): import status_params env.set_params(status_params) if status_params.stack_supports_pid: check_process_status(status_params.ranger_admin_pid_file) return cmd = 'ps -ef | grep proc_rangeradmin | grep -v grep' code, output = shell.call(cmd, timeout=20) if code != 0: if self.is_ru_rangeradmin_in_progress(status_params.upgrade_marker_file): Logger.info('Ranger admin process not running - skipping as stack upgrade is in progress') else: Logger.debug('Ranger admin process not running') raise ComponentIsNotRunning() pass
def snamenode(action=None, format=False): if action == "configure": import params for fs_checkpoint_dir in params.fs_checkpoint_dirs: Directory(fs_checkpoint_dir, create_parents=True, cd_access="a", mode=0755, owner=params.hdfs_user, group=params.user_group) File(params.exclude_file_path, content=Template("exclude_hosts_list.j2"), owner=params.hdfs_user, group=params.user_group) elif action == "start" or action == "stop": import params service(action=action, name="secondarynamenode", user=params.hdfs_user, create_pid_dir=True, create_log_dir=True) elif action == "status": import status_params check_process_status(status_params.snamenode_pid_file)
def status(self, env): import params if OSCheck.is_suse_family(): try: Execute('checkproc `which krb5kdc`') Execute('checkproc `which kadmind`') except Fail as ex: raise ComponentIsNotRunning() elif OSCheck.is_ubuntu_family(): check_process_status(params.kdamin_pid_path) check_process_status(params.krb5kdc_pid_path) else: check_process_status(params.kdamin_pid_path) check_process_status(params.krb5kdc_pid_path)
def observer_namenode(action=None, format=False): if action == "configure": import params for fs_checkpoint_dir in params.fs_checkpoint_dirs: Directory( fs_checkpoint_dir, create_parents=True, cd_access="a", mode=0755, owner=params.hdfs_user, group=params.user_group) File( params.exclude_file_path, content=Template("exclude_hosts_list.j2"), owner=params.hdfs_user, group=params.user_group) if params.hdfs_include_file: File( params.include_file_path, content=Template("include_hosts_list.j2"), owner=params.hdfs_user, group=params.user_group) generate_logfeeder_input_config( 'hdfs', Template("input.config-hdfs.json.j2", extra_imports=[default])) elif action == "start" or action == "stop": import params service( action=action, name="observernamenode", user=params.hdfs_user, create_pid_dir=True, create_log_dir=True) elif action == "status": import status_params check_process_status(status_params.snamenode_pid_file)
def check_service_status(env, name): import status_params env.set_params(status_params) if name == 'collector': for pid_file in get_collector_pid_files(): check_process_status(pid_file) elif name == 'monitor': check_process_status(status_params.monitor_pid_file) elif name == 'grafana': check_process_status(status_params.grafana_pid_file)
def check_service_status(env, name): import status_params env.set_params(status_params) from resource_management.libraries.functions.check_process_status import check_process_status if name == 'collector': for pid_file in get_collector_pid_files(): check_process_status(pid_file) elif name == 'monitor': check_process_status(status_params.monitor_pid_file) elif name == 'grafana': check_process_status(status_params.grafana_pid_file)
def status(self, env): import status_params env.set_params(status_params) check_process_status(status_params.zkfc_pid_file)
def status(self, env): import status_params check_process_status(status_params.nifi_ca_pid_file)
def status(self, env): import status_params env.set_params(status_params) check_process_status(status_params.spark_thrift_server_pid_file)
def status(self, env): print "checking status..." check_process_status(self.PID_CONFIG_FILE)
def status(self, env): import status_params check_process_status(status_params.registry_pid_file)
def check_oozie_server_status(): import status_params from resource_management.libraries.functions.check_process_status import check_process_status check_process_status(status_params.pid_file)
def status(self, env): import status_params env.set_params(status_params) check_process_status(status_params.spark_history_server_pid_file)
def status(self, env): import status_params as params env.set_params(params) pid_file = format("{pid_dir}/TachyonMaster.pid") check_process_status(pid_file)
def status(self, env): check_process_status('/var/run/datatorrent/dtgateway.pid')
def status(self, env): import status_params env.set_params(status_params) check_process_status(status_params.resourcemanager_pid_file) pass
def status(self, env): import status_params env.set_params(status_params) check_process_status(status_params.namenode_pid_file) pass
def namenode(action=None, hdfs_binary=None, do_format=True, upgrade_type=None, env=None): if action is None: raise Fail('"action" parameter is required for function namenode().') if action in ["start", "stop"] and hdfs_binary is None: raise Fail('"hdfs_binary" parameter is required for function namenode().') if action == "configure": import params #we need this directory to be present before any action(HA manual steps for #additional namenode) create_name_dirs(params.dfs_name_dir) elif action == "start": Logger.info("Called service {0} with upgrade_type: {1}".format(action, str(upgrade_type))) setup_ranger_hdfs(upgrade_type=upgrade_type) import params if do_format and not params.hdfs_namenode_format_disabled: format_namenode() pass File(params.exclude_file_path, content=Template("exclude_hosts_list.j2"), owner=params.hdfs_user, group=params.user_group ) if params.dfs_ha_enabled and \ params.dfs_ha_namenode_standby is not None and \ params.hostname == params.dfs_ha_namenode_standby: # if the current host is the standby NameNode in an HA deployment # run the bootstrap command, to start the NameNode in standby mode # this requires that the active NameNode is already up and running, # so this execute should be re-tried upon failure, up to a timeout success = bootstrap_standby_namenode(params) if not success: raise Fail("Could not bootstrap standby namenode") if upgrade_type == "rolling" and params.dfs_ha_enabled: # Most likely, ZKFC is up since RU will initiate the failover command. However, if that failed, it would have tried # to kill ZKFC manually, so we need to start it if not already running. safe_zkfc_op(action, env) options = "" if upgrade_type == "rolling": if params.upgrade_direction == Direction.UPGRADE: options = "-rollingUpgrade started" elif params.upgrade_direction == Direction.DOWNGRADE: options = "-rollingUpgrade downgrade" elif upgrade_type == "nonrolling": is_previous_image_dir = is_previous_fs_image() Logger.info(format("Previous file system image dir present is {is_previous_image_dir}")) if params.upgrade_direction == Direction.UPGRADE: options = "-rollingUpgrade started" elif params.upgrade_direction == Direction.DOWNGRADE: options = "-rollingUpgrade downgrade" Logger.info(format("Option for start command: {options}")) service( action="start", name="namenode", user=params.hdfs_user, options=options, create_pid_dir=True, create_log_dir=True ) if params.security_enabled: Execute(format("{kinit_path_local} -kt {hdfs_user_keytab} {hdfs_principal_name}"), user = params.hdfs_user) if params.dfs_ha_enabled: is_active_namenode_cmd = as_user(format("{hdfs_binary} --config {hadoop_conf_dir} haadmin -getServiceState {namenode_id} | grep active"), params.hdfs_user, env={'PATH':params.hadoop_bin_dir}) else: is_active_namenode_cmd = True # During NonRolling Upgrade, both NameNodes are initially down, # so no point in checking if this is the active or standby. if upgrade_type == "nonrolling": is_active_namenode_cmd = False # ___Scenario___________|_Expected safemode state__|_Wait for safemode OFF____| # no-HA | ON -> OFF | Yes | # HA and active | ON -> OFF | Yes | # HA and standby | no change | no check | # RU with HA on active | ON -> OFF | Yes | # RU with HA on standby | ON -> OFF | Yes | # EU with HA on active | no change | no check | # EU with HA on standby | no change | no check | # EU non-HA | no change | no check | check_for_safemode_off = False msg = "" if params.dfs_ha_enabled: if upgrade_type is not None: check_for_safemode_off = True msg = "Must wait to leave safemode since High Availability is enabled during a Stack Upgrade" else: Logger.info("Wait for NameNode to become active.") if is_active_namenode(hdfs_binary): # active check_for_safemode_off = True msg = "Must wait to leave safemode since High Availability is enabled and this is the Active NameNode." else: msg = "Will remain in the current safemode state." else: msg = "Must wait to leave safemode since High Availability is not enabled." check_for_safemode_off = True Logger.info(msg) # During a NonRolling (aka Express Upgrade), stay in safemode since the DataNodes are down. stay_in_safe_mode = False if upgrade_type == "nonrolling": stay_in_safe_mode = True if check_for_safemode_off: Logger.info("Stay in safe mode: {0}".format(stay_in_safe_mode)) if not stay_in_safe_mode: wait_for_safemode_off(hdfs_binary) # Always run this on non-HA, or active NameNode during HA. create_hdfs_directories(is_active_namenode_cmd) create_ranger_audit_hdfs_directories(is_active_namenode_cmd) elif action == "stop": import params service( action="stop", name="namenode", user=params.hdfs_user ) elif action == "status": import status_params check_process_status(status_params.namenode_pid_file) elif action == "decommission": decommission()
def status(self, env): import status_params env.set_params(status_params) check_process_status(status_params.pid_file)
def status(self, env): import status_params env.set_params(status_params) check_process_status(status_params.nfsgateway_pid_file)
def status(self, env): import params env.set_params(params) pid_file = params.mysql_pid_file check_process_status(pid_file)
def status(self, env): from hawqstatus import get_pid_file check_process_status(get_pid_file())