def killpg_gracefully(proc, timeout=GRACEFUL_PG_KILL_TIMEOUT_SECONDS): """ Tries to kill pgroup (process group) of process with SIGTERM. If the process is still alive after waiting for timeout, SIGKILL is sent to the pgroup. """ from resource_management.core import sudo from resource_management.core.logger import Logger if proc.poll() == None: try: pgid = os.getpgid(proc.pid) sudo.kill(-pgid, signal.SIGTERM) for i in xrange(10 * timeout): if proc.poll() is not None: break time.sleep(0.1) else: Logger.info( "Cannot gracefully kill process group {0}. Resorting to SIGKILL." .format(pgid)) sudo.kill(-pgid, signal.SIGKILL) proc.wait() # catch race condition if proc already dead except OSError: pass
def check_process_status(pid_file): """ Function checks whether process is running. Process is considered running, if pid file exists, and process with a pid, mentioned in pid file is running If process is not running, will throw ComponentIsNotRunning exception @param pid_file: path to service pid file """ from resource_management.core import sudo if not pid_file or not os.path.isfile(pid_file): Logger.info("Pid file {0} is empty or does not exist".format(str(pid_file))) raise ComponentIsNotRunning() try: pid = int(sudo.read_file(pid_file)) except: Logger.info("Pid file {0} does not exist or does not contain a process id number".format(pid_file)) raise ComponentIsNotRunning() try: # Kill will not actually kill the process # From the doc: # If sig is 0, then no signal is sent, but error checking is still # performed; this can be used to check for the existence of a # process ID or process group ID. sudo.kill(pid, 0) except OSError: Logger.info("Process with pid {0} is not running. Stale pid file" " at {1}".format(pid, pid_file)) raise ComponentIsNotRunning()
def check_process_status(pid_file): """ Function checks whether process is running. Process is considered running, if pid file exists, and process with a pid, mentioned in pid file is running If process is not running, will throw ComponentIsNotRunning exception @param pid_file: path to service pid file """ from resource_management.core import sudo if not pid_file or not os.path.isfile(pid_file): Logger.info("Pid file {0} is empty or does not exist".format( str(pid_file))) raise ComponentIsNotRunning() try: pid = int(sudo.read_file(pid_file)) except: Logger.info( "Pid file {0} does not exist or does not contain a process id number" .format(pid_file)) raise ComponentIsNotRunning() try: # Kill will not actually kill the process # From the doc: # If sig is 0, then no signal is sent, but error checking is still # performed; this can be used to check for the existence of a # process ID or process group ID. sudo.kill(pid, 0) except OSError: Logger.info("Process with pid {0} is not running. Stale pid file" " at {1}".format(pid, pid_file)) raise ComponentIsNotRunning()
def status(self, env): print('status') from resource_management.core import sudo import params env.set_params(params) # file_handle=open('/root/1.txt',mode='a') # file_handle.write('\n' + "check_pricess_status : " + str(check_process_status(status_params.pid_file))) # file_handle.close() # # Use built-in method to check status using pidfile # check_process_status(status_params.pid_file) if (os.path.exists('/data/redis/redis/cluster') == False): raise Exception('---> /data/redis/redis/cluster目录不存在!!! <---') ## 判断每个redis实例的pid文件是否存在,不存在则抛异常 redis_node_detail = params.redis_node_detail CLUSTER_HOME = '/data/redis/redis' REDIS_HOME = '/opt/redis/redis-4.0.8' localIP = socket.gethostbyname(socket.gethostname()) for i in range(len(redis_node_detail.split(','))): if (redis_node_detail.split(',')[i].split(':')[0] == localIP): port_num = int(redis_node_detail.split(',')[i].split(':')[1]) for j in range(port_num): port = 7000 + j if (os.path.exists("/data/redis/redis/cluster/" + str(port) + "/redis.pid") == False): raise ComponentIsNotRunning() pid = int( sudo.read_file("/data/redis/redis/cluster/" + str(port) + "/redis.pid")) try: sudo.kill(pid, 0) except: raise ComponentIsNotRunning()
def stop(self, env): import status_params if path_isfile(status_params.nifi_ca_pid_file): try: self.status(env) pid = int(read_file(status_params.nifi_ca_pid_file)) for i in range(25): kill(pid, SIGTERM) time.sleep(1) self.status(env) kill(pid, SIGKILL) time.sleep(5) self.status(env) except ComponentIsNotRunning: unlink(status_params.nifi_ca_pid_file)
def kill_process_with_children(base_pid): """ Process tree killer :type base_pid int """ from resource_management.core import sudo exception_list = ["apt-get", "apt", "yum", "zypper", "zypp"] signals_to_post = {"SIGTERM": signal.SIGTERM, "SIGKILL": signal.SIGKILL} full_child_pids = get_all_children(base_pid) all_child_pids = [ item[0] for item in full_child_pids if item[1].lower() not in exception_list and item[0] != os.getpid() ] error_log = [] for sig_name, sig in signals_to_post.items(): # we need to kill processes from the bottom of the tree pids_to_kill = sorted(get_existing_pids(all_child_pids), reverse=True) for pid in pids_to_kill: try: sudo.kill(pid, sig) except OSError as e: error_log.append((sig_name, pid, repr(e))) if pids_to_kill: wait_for_process_list_kill(pids_to_kill) still_existing_pids = get_existing_pids(pids_to_kill) if still_existing_pids: _logger.warning( "These PIDs {0} did not respond to {1} signal. Detailed commands list:\n {2}" .format( ", ".join([str(i) for i in still_existing_pids]), sig_name, "\n".join([ i[2] for i in full_child_pids if i[0] in still_existing_pids ]))) if get_existing_pids( all_child_pids ) and error_log: # we're unable to kill all requested PIDs _logger.warn("Process termination error log:\n") for error_item in error_log: _logger.warn( "PID: {0}, Process: {1}, Exception message: {2}".format( *error_item))
def execute(configurations={}, parameters={}, host_name=None): """ Returns a tuple containing the result code and a pre-formatted result label Keyword arguments: configurations : a mapping of configuration key to value parameters : a mapping of script parameter key to value host_name : the name of this host where the alert is running :type configurations dict :type parameters dict :type host_name str """ alert_name = parameters['alertName'] dolphin_pidfile_dir = "/opt/soft/run/dolphinscheduler" pid = "0" from resource_management.core import sudo is_running = True pid_file_path = "" if alert_name == 'DOLPHIN_MASTER': pid_file_path = dolphin_pidfile_dir + "/master-server.pid" elif alert_name == 'DOLPHIN_WORKER': pid_file_path = dolphin_pidfile_dir + "/worker-server.pid" elif alert_name == 'DOLPHIN_ALERT': pid_file_path = dolphin_pidfile_dir + "/alert-server.pid" elif alert_name == 'DOLPHIN_LOGGER': pid_file_path = dolphin_pidfile_dir + "/logger-server.pid" elif alert_name == 'DOLPHIN_API': pid_file_path = dolphin_pidfile_dir + "/api-server.pid" if not pid_file_path or not os.path.isfile(pid_file_path): is_running = False try: pid = int(sudo.read_file(pid_file_path)) except: is_running = False try: # Kill will not actually kill the process # From the doc: # If sig is 0, then no signal is sent, but error checking is still # performed; this can be used to check for the existence of a # process ID or process group ID. sudo.kill(pid, 0) except OSError: is_running = False if host_name is None: host_name = socket.getfqdn() if not is_running: result_code = "CRITICAL" else: result_code = "OK" label = "The comment {0} of DOLPHIN_SCHEDULER on {1} is {2}".format( alert_name, host_name, result_code) return ((result_code, [label]))