Exemple #1
0
def get_tentative_arg(pid, funcname, argN):
    foundarg = None
    s = exec_bt("foreach {} bt".format(pid), MEMOIZE=False)[0]

    with DisasmFlavor('att'):
        search_for_registers(s)
        for f in s.frames:
            if (funcname not in f.func):
                continue

            if (f.lookup_regs):
                for reg in f.reg:
                    if (not reg in __ARG_REG):
                        continue
                    if (__ARG_REG.index(reg) == argN):
                        addr, conf = f.reg[reg]
                        foundarg = addr
                        break
    return foundarg
Exemple #2
0
def get_interesting_arguments(pid, re_funcnames, re_ctypes):
    __ARG_REG = ('RDI', 'RSI', 'RDX', 'RCX', 'R8', 'R9')
    s = exec_bt("bt {}".format(pid), MEMOIZE=False)[0]
    with DisasmFlavor('att'):
        #search_for_registers(s, re_funcnames)
        #for f in s.frames:
        for f in search_for_registers(s, re_funcnames):
            #print(f.func)
            #if (not re_funcnames.search(f.func)):
            #    continue
            if (f.lookup_regs):
                once = TrueOnce(1)
                if (not f.func):
                    continue
                argprotos = funcargs(f.func)
                if (not argprotos):
                    continue
                nargs = len(argprotos)
                #print(f.func, argprotos)
                for reg in f.reg:
                    if (not reg in __ARG_REG or not f.func):
                        continue
                    index = __ARG_REG.index(reg)
                    if (index >= nargs):
                        continue
                    ctype = argprotos[index]
                    if (not re_ctypes.search(ctype)):
                        continue
                    #if (once):
                    #print(f.func)
                    addr = f.reg[reg][0]
                    #print('  {} {} {:#x}'.format(index, reg, addr))
                    #print('    ', ctype)
                    m = __re_struct.match(ctype)
                    if (not m):
                        continue
                    sname = m.group(1)
                    yield (f.func, sname, addr)
Exemple #3
0
def do_check():
    tt = TaskTable()
    has_do_coredump = False
    has_UN = False
    for task in tt.getThreadsByComm('adclient'):
        pid = task.pid
        stack = exec_bt("bt {}".format(task.pid))[0]
        if (stack.hasfunc('do_coredump')):
            has_do_coredump = True
        if (task.ts.state & TASK_STATE.TASK_UNINTERRUPTIBLE):
            has_UN = True
        if (has_do_coredump and has_UN):
            break
    else:
        return

    # After some commands issued, GDB returns incorrect type for this -
    # 'char core_pattern[];' instead of ' char core_pattern[CORENAME_MAX_SIZE]'
    addr = sym2addr("core_pattern")
    core_pattern = SmartString(readmem(addr, 1), addr, None)
    #core_pattern = readSymbol("core_pattern")
    if (not core_pattern.startswith("|")):
        return
    abrt_hook = tt.getByComm('abrt-hook-ccpp')

    if (not abrt_hook):
        return

    __daemon = "/var/centrifydc/daemon"
    for sock in abrt_hook[0].get_task_socks():
        family, sktype, protoname, inet = decodeSock(sock)
        if (protoname == 'UNIX'):
            sock = sock.castTo("struct unix_sock")
            state, ino, s_path = unix_sock(sock)
            p_state, p_ino, p_path = unix_sock(sock.Peer)
            for path in (s_path, p_path):
                if (path.startswith(__daemon)):
                    pylog.info(__txt)
Exemple #4
0
def run_check_on_multipath():
    tt = TaskTable()
    bts = []
    errors = 0
    task_cnt = 0
    multipathd_daemon = 0  # To verify if multipathd daemon is running
    multipath_blocked = 0  # To verify if multipathd daemon or command is blocked
    mpath_present = 0  # To verify if multipath device exists with or without
    # multipathd daemon running
    wq_blocked = 0  # To verify if scsi_wq or fc_wq is blocked
    kworker_md_blocked = 0  # Counter for hung worker threads which are waiting for
    # IO requests on mdraid devices

    print("\nChecking for device-mapper issues...\n")

    for t in tt.allThreads():
        print("Getting a list of processes in UN state..."
              "(Count: {:d})".format(task_cnt),
              end="\r")
        if ('multipathd' in t.comm):
            multipathd_daemon = 1
        if (t.ts.state & TASK_STATE.TASK_UNINTERRUPTIBLE):
            task_cnt += 1
            # crash can miss some threads when there are pages missing
            # and it will not do 'bt' in that case.
            try:
                bts.append(exec_bt("bt %d" % t.pid)[0])
            except:
                pass
    print("Getting a list of processes in UN state...\t\t\t[Done]")

    if (task_cnt):
        print("\nProcessing the back trace of hung tasks...\t\t\t", end='')
        for bt in bts:
            if ('kworker' in bt.cmd):
                if (bt.hasfunc('md_flush_request')
                        and bt.hasfunc('dio_aio_complete_work')):
                    kworker_md_blocked += 1

            if ('multipath' in bt.cmd):
                multipath_blocked = 1

            if (('scsi_wq' in bt.cmd) or ('fc_wq' in bt.cmd)):
                wq_blocked = 1
        print("[Done]")

    # Checks for dm devices
    for dev in devlist:
        md, name = dev
        dm_table_map = StructResult("struct dm_table", md.map)
        # Check if there is any multipath device present in device-mapper table
        if (dm_table_map.targets.type.name == "multipath"):
            mpath_present += 1

    # Check if kworker threads are stuck waiting to flush IO on mdraid devices
    if (kworker_md_blocked >= 5):
        print(
            "\n ** {} kworker threads are stuck in UN state waiting to flush the IO"
            "\n    requests on mdraid devices. This could be a result of thundering"
            "\n    herd problem. See reference: "
            "\n    https://marc.info/?l=linux-raid&m=155364683109115&w=2".
            format(kworker_md_blocked))
        print(
            "\n    Run 'hanginfo' for more information on processes in UN state."
        )
        errors += 1

    # multipath devices are present but multipathd is not running
    if (mpath_present != 0 and multipathd_daemon == 0):
        print(
            "\n ** multipath device(s) are present, but multipathd service is"
            "\n    not running. IO failover/failback may not work.")
        errors += 1

    # scsi or fc work queue and multipathd are blocked
    if (multipath_blocked == 1 and wq_blocked == 1):
        print(
            "\n ** multipathd and scsi/fc work_queue processes are stuck in UN state,"
            "\n    this could block IO failover on multipath devices")
        print(
            "\n    Run 'hanginfo' for more information on processes in UN state."
        )
        errors += 1
    # only multipathd process is stuck in UN state
    elif (multipath_blocked == 1):
        print("\n ** multipathd processes stuck in UN state,"
              "\n    this could block IO failover on multipath devices")
        print(
            "\n    Run 'hanginfo' for more information on processes in UN state."
        )
        errors += 1

    if (errors == 0 and task_cnt != 0):
        print("\n    No device-mapper, multipath issues detected by utility,"
              "\n    but found {} processes in UN state.".format(task_cnt))
        print(
            "\n    Run 'hanginfo' for more information on processes in UN state."
        )
    elif (errors == 0 and task_cnt == 0):
        print("No issues detected by utility.")
Exemple #5
0
def printTasks(reverse=False, maxtoprint=-1):
    tt = TaskTable()
    if (debug):
        print("Uptime:", ms2uptime(tt.basems))

    out = []

    if (not reverse):
        # Natural order (task followed by its threads)
        for mt in tt.allTasks():
            out.append((mt.Ran_ago, mt.pid, mt))
            for t in mt.threads:
                #print ("    struct thread_info 0x%x" % long(t))
                out.append((t.Ran_ago, t.pid, t))
        hdr = 'Tasks in PID order, grouped by Thread Group leader'
    else:
        # Most recent first
        for t in tt.allThreads():
            out.append((t.Ran_ago, t.pid, t))
        out.sort()
        hdr = 'Tasks in reverse order, scheduled recently first'

    # Apply the filter
    if (taskstates_filter):
        out1 = []
        for *group, t in out:
            sstate = t.state[5:7]
            if (sstate in taskstates_filter):
                out1.append((*group, t))
        out = out1

    nthreads = len(out)
    if (maxtoprint != -1 and maxtoprint < nthreads):
        # Split them 1:1
        nbeg = maxtoprint // 2
        nend = maxtoprint - nbeg
        out = out[:nbeg] + [(None, None, None)] + out[-nend:]
        extra = " ({} tasks skipped)".format(nthreads - maxtoprint)
    else:
        extra = ''

    # Print the header
    print("=== {}{} ===".format(hdr, extra))
    _header = " PID          CMD       CPU   Ran ms ago   STATE\n" +\
    "--------   ------------  --  ------------- -----"
    if (not runcmd):
        print(_header)

    for ran_ms_ago, pid, t in out:
        if (pid is None):
            print("           <snip>")
            continue
        sstate = t.state[5:7]
        tgid = t.tgid
        pid_template = " {:6d}"
        if (pid != tgid):
            if (not reverse):
                pid_template = "  {:6d}"
            extra = " (tgid=%d)" % tgid
        else:
            extra = ""
        uid = t.Uid
        pid_s = pid_template.format(pid)
        extra = "%13s UID=%d" % (extra, uid)
        if (is_task_active(long(t.ts))):
            pid_s = ">" + pid_s[1:]

        uid = t.Uid
        # Thread pointers might be corrupted
        try:
            if (runcmd):
                print(_header)
            print ("%s %14s %3d %14d  %s %s" \
                        % (pid_s, t.comm,  t.cpu,
                            int(ran_ms_ago), sstate, extra))
            if (runcmd):
                _cmdline = "{} {}".format(runcmd, pid)
                print("\ncrash> {}".format(_cmdline))
                out = exec_crash_command(_cmdline)
                if (": command not found: " in out):
                    sys.exit(1)
                print(out)
            # In versbose mode, print stack as well
            if (verbose):
                bt = exec_bt("bt %d" % pid)
                print(bt[0])
            if (verbose or runcmd):
                print("\n", "-" * 78, "\n", sep='')

        except crash.error:
            pylog.error("corrupted", t)
Exemple #6
0
    else:
        btcmd = "foreach " + args.pid + " " + bts

    # Make sure we're on an x86_64 vmcore, or this will fail miserably.
    if (sys_info.machine != "x86_64"):
        print("Register decoding is supported on x86_64 dumps only.")
        sys.exit()

    # Purge the memoize cache if a 'mod' command has been done since
    # our last invocation, since new symbols may not be available

    purge_memoize_cache(CU_LOAD)

    with DisasmFlavor('att'):
        try:
            stacklist = exec_bt(btcmd, MEMOIZE=False)
        except:
            print("Unable to get stack trace")
            sys.exit()

        for s in stacklist:

            search_for_registers(s, routine)

            print("\nPID: {}  TASK: {:x}  CPU: {}  COMMAND: {}".format(
                s.pid, s.addr, s.cpu, s.cmd))

            for f in s.frames:

                # Skip frame if it doesn't match routine name pattern.
                # If no routine was specified, the frame will print because
Exemple #7
0
def classify_UN(v):
    # Reset owners
    resource_owners_clear()
    # We get a list of UN tasks
    tasksrem = getUNTasks()

    if (not tasksrem):
        print("There are no UNINTERRUPTIBLE tasks")
        return

    print(" *** UNINTERRUPTIBLE threads, classified ***")

    # Now we are do a number of tests trying to classify the threads
    # Every time we succeed, we remove these threads from tasksrem
    check_stack_and_print('io_schedule', tasksrem)
    check_stack_and_print('btrfs_tree_read_lock', tasksrem)
    check_inode_mutexes(tasksrem)
    check_other_mutexes(tasksrem)
    check_mmap_sem(tasksrem)
    check_congestion_queues(tasksrem)
    check_kthread_create_list(tasksrem)
    check_throttle_direct_reclaim(tasksrem)
    check_console_sem(tasksrem)
    check_stack_and_print('schedule_timeout', tasksrem)
    check_stack_and_print('alloc_pages_slowpath', tasksrem)
    check_stack_and_print('nfs_idmap_id', tasksrem, "NFS idmapper")

    if (tasksrem):
        print("\n\n ********  Non-classified UN Threads ********** {}"
              " in total".format(len(tasksrem)))
        # Print what remains
        btlist = []
        for pid in tasksrem:
            try:
                btlist.append(exec_bt("bt %d" % pid)[0])
            except IndexError:
                pylog.warning("Cannot get stack for PID={}".format(pid))
        #btlist = [exec_bt("bt %d" % pid)[0] for pid in tasksrem]
        bt_mergestacks(btlist, verbose=1)
    #print(un)
    # Print resource owners. We have two kinds: real pids and pseudo-owners,
    # such as "io_schedule"
    __real_owners = {x for x in __resource_owners if isinstance(x, int)}
    __pseudo_owners = __resource_owners - __real_owners
    if (__real_owners):
        print("\n*** Threads that own resources the other threads are"
              " waiting on ***")
        for pid in __real_owners:
            s = exec_bt("bt {}".format(pid))[0]
            print(s)
            print(__resource_owner_extra[pid])
    if (__pseudo_owners):
        print("\n*** System activities other threads are waiting for ***")
        for pid in __pseudo_owners:
            print("  --- Doing {} ---".format(pid))
            print(__resource_owner_extra[pid])

    # Are any of these owners looping in zone allocator?
    #_owners = zvm_pids & rem
    #if (_owners):
    #    print("        Looping in zone allocator:", list(_owners))
    return

    # The following code is not ready yet
    for vfsmount, superblk, fstype, devname, mnt in getMount():
        sb = readSU("struct super_block", superblk)
        um = sb.s_umount
        if (um.count):
            print(devname, sb)
            bdi = sb.s_bdi
            if (bdi):
                print_backing_dev_info(bdi)