예제 #1
0
def killpg_gracefully(proc, timeout=GRACEFUL_PG_KILL_TIMEOUT_SECONDS):
    """
  Tries to kill pgroup (process group) of process with SIGTERM.
  If the process is still alive after waiting for timeout, SIGKILL is sent to the pgroup.
  """
    from resource_management.core import sudo
    from resource_management.core.logger import Logger

    if proc.poll() == None:
        try:
            pgid = os.getpgid(proc.pid)
            sudo.kill(-pgid, signal.SIGTERM)

            for i in xrange(10 * timeout):
                if proc.poll() is not None:
                    break
                time.sleep(0.1)
            else:
                Logger.info(
                    "Cannot gracefully kill process group {0}. Resorting to SIGKILL."
                    .format(pgid))
                sudo.kill(-pgid, signal.SIGKILL)
                proc.wait()
        # catch race condition if proc already dead
        except OSError:
            pass
def check_process_status(pid_file):
  """
  Function checks whether process is running.
  Process is considered running, if pid file exists, and process with
  a pid, mentioned in pid file is running
  If process is not running, will throw ComponentIsNotRunning exception

  @param pid_file: path to service pid file
  """
  from resource_management.core import sudo

  if not pid_file or not os.path.isfile(pid_file):
    Logger.info("Pid file {0} is empty or does not exist".format(str(pid_file)))
    raise ComponentIsNotRunning()
  
  try:
    pid = int(sudo.read_file(pid_file))
  except:
    Logger.info("Pid file {0} does not exist or does not contain a process id number".format(pid_file))
    raise ComponentIsNotRunning()

  try:
    # Kill will not actually kill the process
    # From the doc:
    # If sig is 0, then no signal is sent, but error checking is still
    # performed; this can be used to check for the existence of a
    # process ID or process group ID.
    sudo.kill(pid, 0)
  except OSError:
    Logger.info("Process with pid {0} is not running. Stale pid file"
              " at {1}".format(pid, pid_file))
    raise ComponentIsNotRunning()
예제 #3
0
def check_process_status(pid_file):
    """
  Function checks whether process is running.
  Process is considered running, if pid file exists, and process with
  a pid, mentioned in pid file is running
  If process is not running, will throw ComponentIsNotRunning exception

  @param pid_file: path to service pid file
  """
    from resource_management.core import sudo

    if not pid_file or not os.path.isfile(pid_file):
        Logger.info("Pid file {0} is empty or does not exist".format(
            str(pid_file)))
        raise ComponentIsNotRunning()

    try:
        pid = int(sudo.read_file(pid_file))
    except:
        Logger.info(
            "Pid file {0} does not exist or does not contain a process id number"
            .format(pid_file))
        raise ComponentIsNotRunning()

    try:
        # Kill will not actually kill the process
        # From the doc:
        # If sig is 0, then no signal is sent, but error checking is still
        # performed; this can be used to check for the existence of a
        # process ID or process group ID.
        sudo.kill(pid, 0)
    except OSError:
        Logger.info("Process with pid {0} is not running. Stale pid file"
                    " at {1}".format(pid, pid_file))
        raise ComponentIsNotRunning()
예제 #4
0
    def status(self, env):
        print('status')
        from resource_management.core import sudo
        import params
        env.set_params(params)
        # file_handle=open('/root/1.txt',mode='a')
        # file_handle.write('\n' + "check_pricess_status : " + str(check_process_status(status_params.pid_file)))
        # file_handle.close()
        # # Use built-in method to check status using pidfile
        # check_process_status(status_params.pid_file)
        if (os.path.exists('/data/redis/redis/cluster') == False):
            raise Exception('---> /data/redis/redis/cluster目录不存在!!! <---')

        ## 判断每个redis实例的pid文件是否存在,不存在则抛异常
        redis_node_detail = params.redis_node_detail
        CLUSTER_HOME = '/data/redis/redis'
        REDIS_HOME = '/opt/redis/redis-4.0.8'
        localIP = socket.gethostbyname(socket.gethostname())
        for i in range(len(redis_node_detail.split(','))):
            if (redis_node_detail.split(',')[i].split(':')[0] == localIP):
                port_num = int(redis_node_detail.split(',')[i].split(':')[1])
                for j in range(port_num):
                    port = 7000 + j
                    if (os.path.exists("/data/redis/redis/cluster/" +
                                       str(port) + "/redis.pid") == False):
                        raise ComponentIsNotRunning()

                    pid = int(
                        sudo.read_file("/data/redis/redis/cluster/" +
                                       str(port) + "/redis.pid"))
                    try:
                        sudo.kill(pid, 0)
                    except:
                        raise ComponentIsNotRunning()
예제 #5
0
    def stop(self, env):
        import status_params

        if path_isfile(status_params.nifi_ca_pid_file):
            try:
                self.status(env)
                pid = int(read_file(status_params.nifi_ca_pid_file))
                for i in range(25):
                    kill(pid, SIGTERM)
                    time.sleep(1)
                    self.status(env)
                kill(pid, SIGKILL)
                time.sleep(5)
                self.status(env)
            except ComponentIsNotRunning:
                unlink(status_params.nifi_ca_pid_file)
예제 #6
0
파일: shell.py 프로젝트: marsfun/ambari
def kill_process_with_children(base_pid):
    """
  Process tree killer

  :type base_pid int
  """
    from resource_management.core import sudo

    exception_list = ["apt-get", "apt", "yum", "zypper", "zypp"]
    signals_to_post = {"SIGTERM": signal.SIGTERM, "SIGKILL": signal.SIGKILL}
    full_child_pids = get_all_children(base_pid)
    all_child_pids = [
        item[0] for item in full_child_pids
        if item[1].lower() not in exception_list and item[0] != os.getpid()
    ]
    error_log = []

    for sig_name, sig in signals_to_post.items():
        # we need to kill processes from the bottom of the tree
        pids_to_kill = sorted(get_existing_pids(all_child_pids), reverse=True)
        for pid in pids_to_kill:
            try:
                sudo.kill(pid, sig)
            except OSError as e:
                error_log.append((sig_name, pid, repr(e)))

        if pids_to_kill:
            wait_for_process_list_kill(pids_to_kill)
            still_existing_pids = get_existing_pids(pids_to_kill)
            if still_existing_pids:
                _logger.warning(
                    "These PIDs {0} did not respond to {1} signal. Detailed commands list:\n {2}"
                    .format(
                        ", ".join([str(i) for i in still_existing_pids]),
                        sig_name, "\n".join([
                            i[2] for i in full_child_pids
                            if i[0] in still_existing_pids
                        ])))

    if get_existing_pids(
            all_child_pids
    ) and error_log:  # we're unable to kill all requested PIDs
        _logger.warn("Process termination error log:\n")
        for error_item in error_log:
            _logger.warn(
                "PID: {0}, Process: {1}, Exception message: {2}".format(
                    *error_item))
def execute(configurations={}, parameters={}, host_name=None):
    """
    Returns a tuple containing the result code and a pre-formatted result label
    
    Keyword arguments:
    configurations : a mapping of configuration key to value
    parameters : a mapping of script parameter key to value
    host_name : the name of this host where the alert is running
    
    :type configurations dict
    :type parameters dict
    :type host_name str
    """

    alert_name = parameters['alertName']

    dolphin_pidfile_dir = "/opt/soft/run/dolphinscheduler"

    pid = "0"

    from resource_management.core import sudo

    is_running = True
    pid_file_path = ""
    if alert_name == 'DOLPHIN_MASTER':
        pid_file_path = dolphin_pidfile_dir + "/master-server.pid"
    elif alert_name == 'DOLPHIN_WORKER':
        pid_file_path = dolphin_pidfile_dir + "/worker-server.pid"
    elif alert_name == 'DOLPHIN_ALERT':
        pid_file_path = dolphin_pidfile_dir + "/alert-server.pid"
    elif alert_name == 'DOLPHIN_LOGGER':
        pid_file_path = dolphin_pidfile_dir + "/logger-server.pid"
    elif alert_name == 'DOLPHIN_API':
        pid_file_path = dolphin_pidfile_dir + "/api-server.pid"

    if not pid_file_path or not os.path.isfile(pid_file_path):
        is_running = False

    try:
        pid = int(sudo.read_file(pid_file_path))
    except:
        is_running = False

    try:
        # Kill will not actually kill the process
        # From the doc:
        # If sig is 0, then no signal is sent, but error checking is still
        # performed; this can be used to check for the existence of a
        # process ID or process group ID.
        sudo.kill(pid, 0)
    except OSError:
        is_running = False

    if host_name is None:
        host_name = socket.getfqdn()

    if not is_running:
        result_code = "CRITICAL"
    else:
        result_code = "OK"

    label = "The comment {0} of DOLPHIN_SCHEDULER on {1} is {2}".format(
        alert_name, host_name, result_code)

    return ((result_code, [label]))