Esempio n. 1
0
class TestKprobeMaxactive(TestCase):
    def setUp(self):
        self.b = BPF(text=b"""
        typedef struct { int idx; } Key;
        typedef struct { u64 val; } Val;
        BPF_HASH(stats, Key, Val, 3);
        int hello(void *ctx) {
          stats.lookup_or_init(&(Key){1}, &(Val){0})->val++;
          return 0;
        }
        int goodbye(void *ctx) {
          stats.lookup_or_init(&(Key){2}, &(Val){0})->val++;
          return 0;
        }
        """)
        self.b.attach_kprobe(event_re=self.b.get_syscall_prefix() + b"bpf",
                             fn_name=b"hello")
        self.b.attach_kretprobe(event_re=self.b.get_syscall_prefix() + b"bpf",
                                fn_name=b"goodbye", maxactive=128)

    def test_send1(self):
        k1 = self.b[b"stats"].Key(1)
        k2 = self.b[b"stats"].Key(2)
        self.assertTrue(self.b[b"stats"][k1].val >= 2)
        self.assertTrue(self.b[b"stats"][k2].val == 1)
Esempio n. 2
0
class TestKprobeRgx(TestCase):
    def setUp(self):
        self.b = BPF(text="""
        typedef struct { int idx; } Key;
        typedef struct { u64 val; } Val;
        BPF_TABLE("hash", Key, Val, stats, 3);
        int hello(void *ctx) {
          stats.lookup_or_init(&(Key){1}, &(Val){0})->val++;
          return 0;
        }
        int goodbye(void *ctx) {
          stats.lookup_or_init(&(Key){2}, &(Val){0})->val++;
          return 0;
        }
        """)
        self.b.attach_kprobe(event_re="^SyS_bp.*", fn_name="hello")
        self.b.attach_kretprobe(event_re="^SyS_bp.*", fn_name="goodbye")

    def test_send1(self):
        k1 = self.b["stats"].Key(1)
        k2 = self.b["stats"].Key(2)
        self.assertEqual(self.b["stats"][k1].val, self.b["stats"][k2].val + 1)
Esempio n. 3
0
from bcc.utils import printb


def ntoa(addr):
    ipaddr = b''
    for n in range(0, 4):
        ipaddr = ipaddr + str(addr & 0xff).encode()
        if (n != 3):
            ipaddr = ipaddr + b'.'
        addr = addr >> 8
    return ipaddr


def print_event(cpu, data, size):
    event = bpf["events"].event(data)
    printb(b"%-6d %-3s %-16s %-16s %-16s %-16d" %
           (event.pid, event.nodename, event.comm, ntoa(
               event.saddr), ntoa(event.daddr), event.dport))


bpf = BPF(src_file="trace.c")
bpf.attach_kprobe(event="tcp_v4_connect", fn_name="tcp_connect")
bpf.attach_kretprobe(event="tcp_v4_connect", fn_name="tcp_connect_ret")
bpf["events"].open_perf_buffer(print_event)

print("%-6s %-3s %-16s %-16s %-16s %-16s" %
      ("PID", "CONTAINER-ID", "COMMAND", "SOURCE-IPADDR", "DESTINATION-IPADDR",
       "DPORT"))
while 1:
    bpf.perf_buffer_poll()
Esempio n. 4
0
    bpf_text = bpf_text.replace('TYPE_FILTER', '!S_ISREG(mode)')

if debug or args.ebpf:
    print(bpf_text)
    if args.ebpf:
        exit()

# initialize BPF
b = BPF(text=bpf_text)

# I'd rather trace these via new_sync_read/new_sync_write (which used to be
# do_sync_read/do_sync_write), but those became static. So trace these from
# the parent functions, at the cost of more overhead, instead.
# Ultimately, we should be using [V]FS tracepoints.
b.attach_kprobe(event="__vfs_read", fn_name="trace_read_entry")
b.attach_kretprobe(event="__vfs_read", fn_name="trace_read_return")
try:
    b.attach_kprobe(event="__vfs_write", fn_name="trace_write_entry")
    b.attach_kretprobe(event="__vfs_write", fn_name="trace_write_return")
except:
    # older kernels don't have __vfs_write so try vfs_write instead
    b.attach_kprobe(event="vfs_write", fn_name="trace_write_entry")
    b.attach_kretprobe(event="vfs_write", fn_name="trace_write_return")

TASK_COMM_LEN = 16  # linux/sched.h
DNAME_INLINE_LEN = 32  # linux/dcache.h

class Data(ct.Structure):
    _fields_ = [
        ("mode", ct.c_int),
        ("pid", ct.c_uint),
Esempio n. 5
0
    events.perf_submit(ctx, &data, sizeof(data));

    return 0;
}
"""

bpf_text = bpf_text.replace("MAXARG", args.max_args)
if args.ebpf:
    print(bpf_text)
    exit()

# initialize BPF
b = BPF(text=bpf_text)
execve_fnname = b.get_syscall_fnname("execve")
b.attach_kprobe(event=execve_fnname, fn_name="syscall__execve")
b.attach_kretprobe(event=execve_fnname, fn_name="do_ret_sys_execve")

# header
if args.timestamp:
    print("%-8s" % ("TIME(s)"), end="")
print("%-16s %-6s %-6s %3s %s" % ("PCOMM", "PID", "PPID", "RET", "ARGS"))

class EventType(object):
    EVENT_ARG = 0
    EVENT_RET = 1

start_ts = time.time()
argv = defaultdict(list)

# This is best-effort PPID matching. Short-lived processes may exit
# before we get a chance to read the PPID.
Esempio n. 6
0
            event.ts_us, event.task, event.pid, type, event.size,
            event.offset, event.delta_us, event.file))
        return
    print("%-8s %-14.14s %-6s %1s %-7s %-8d %7.2f %s" % (strftime("%H:%M:%S"),
        event.task, event.pid, type, event.size, event.offset / 1024,
        float(event.delta_us) / 1000, event.file))

# initialize BPF
b = BPF(text=bpf_text)

# common file functions
b.attach_kprobe(event="xfs_file_read_iter", fn_name="trace_rw_entry")
b.attach_kprobe(event="xfs_file_write_iter", fn_name="trace_rw_entry")
b.attach_kprobe(event="xfs_file_open", fn_name="trace_open_entry")
b.attach_kprobe(event="xfs_file_fsync", fn_name="trace_fsync_entry")
b.attach_kretprobe(event="xfs_file_read_iter", fn_name="trace_read_return")
b.attach_kretprobe(event="xfs_file_write_iter", fn_name="trace_write_return")
b.attach_kretprobe(event="xfs_file_open", fn_name="trace_open_return")
b.attach_kretprobe(event="xfs_file_fsync", fn_name="trace_fsync_return")

# header
if (csv):
    print("ENDTIME_us,TASK,PID,TYPE,BYTES,OFFSET_b,LATENCY_us,FILE")
else:
    if min_ms == 0:
        print("Tracing XFS operations")
    else:
        print("Tracing XFS operations slower than %d ms" % min_ms)
    print("%-8s %-14s %-6s %1s %-7s %-8s %7s %s" % ("TIME", "COMM", "PID", "T",
        "BYTES", "OFF_KB", "LAT(ms)", "FILENAME"))
Esempio n. 7
0
b = BPF(text=bpf_text)

# common file functions
if BPF.get_kprobe_functions('zpl_iter'):
    b.attach_kprobe(event="zpl_iter_read", fn_name="trace_entry")
    b.attach_kprobe(event="zpl_iter_write", fn_name="trace_entry")
elif BPF.get_kprobe_functions('zpl_aio'):
    b.attach_kprobe(event="zpl_aio_read", fn_name="trace_entry")
    b.attach_kprobe(event="zpl_aio_write", fn_name="trace_entry")
else:
    b.attach_kprobe(event="zpl_read", fn_name="trace_entry")
    b.attach_kprobe(event="zpl_write", fn_name="trace_entry")
b.attach_kprobe(event="zpl_open", fn_name="trace_entry")
b.attach_kprobe(event="zpl_fsync", fn_name="trace_entry")
if BPF.get_kprobe_functions('zpl_iter'):
    b.attach_kretprobe(event="zpl_iter_read", fn_name="trace_read_return")
    b.attach_kretprobe(event="zpl_iter_write", fn_name="trace_write_return")
elif BPF.get_kprobe_functions('zpl_aio'):
    b.attach_kretprobe(event="zpl_aio_read", fn_name="trace_read_return")
    b.attach_kretprobe(event="zpl_aio_write", fn_name="trace_write_return")
else:
    b.attach_kretprobe(event="zpl_read", fn_name="trace_read_return")
    b.attach_kretprobe(event="zpl_write", fn_name="trace_write_return")
b.attach_kretprobe(event="zpl_open", fn_name="trace_open_return")
b.attach_kretprobe(event="zpl_fsync", fn_name="trace_fsync_return")

print("Tracing ZFS operation latency... Hit Ctrl-C to end.")

# output
exiting = 0
dist = b.get_table("dist")
Esempio n. 8
0
interval = 5
count = -1
if len(argv) > 1:
	try:
		interval = int(argv[1])
		if interval == 0:
			raise
		if len(argv) > 2:
			count = int(argv[2])
	except:	# also catches -h, --help
		usage()

# load BPF program
b = BPF(src_file = "vfsreadlat.c")
b.attach_kprobe(event="vfs_read", fn_name="do_entry")
b.attach_kretprobe(event="vfs_read", fn_name="do_return")

# header
print("Tracing... Hit Ctrl-C to end.")

# output
loop = 0
do_exit = 0
while (1):
	if count > 0:
		loop += 1
		if loop > count:
			exit()
	try:
		sleep(interval)
	except KeyboardInterrupt:
    }

    udata.t = udata.t1 - udata.t0;

    // Print only fsyncs that take 1 second or longer
//    if (udata.t < 1 * 1000 * 1000 * 1000) {
//        return;
//    }

    events.perf_submit(ctx, &udata, sizeof(udata));
};
""")
b.attach_kprobe(event="jbd2_journal_lock_updates",
                fn_name="trace_jbd2_journal_lock_updates")

b.attach_kretprobe(event="jbd2_journal_lock_updates",
                   fn_name="trace_ret_jbd2_journal_lock_updates")


class Data(ct.Structure):
    _fields_ = [
        ("tid", ct.c_uint),
        ("comm", ct.c_char * 16),
        ("fd", ct.c_uint),
        ("t0", ct.c_ulonglong),
        ("t1", ct.c_ulonglong),
        ("t", ct.c_ulonglong),
        ("stack_id", ct.c_ulong),
    ]


stacks = b["stacks"]
Esempio n. 10
0
class EventMonitor:

    def __init__(self, args):
        self.start_off = 0
        self.cur_off = 0
        self.max_off = 0
        self.end_off = 0
        self.events = list()
        self.do_trace = True
        self.bpf = None
        self.cont_mode = args.container
        self.json = args.json
        self.ebpf = args.ebpf
        self.events_to_trace = args.events_to_trace

    def init_bpf(self):
        bpf_text = load_bpf_program().replace("MAXARG", str(MAX_ARGS))
        if self.cont_mode:
            bpf_text = bpf_text.replace("CONTAINER_MODE", "1")
        else:
            bpf_text = bpf_text.replace("CONTAINER_MODE", "0")

        if self.ebpf:
            log.debug(bpf_text)
            exit()

        # initialize BPF
        self.bpf = BPF(text=bpf_text)

        # attaching kprobes
        sk, se = get_kprobes(self.events_to_trace)

        for syscall in sk:
            syscall_fnname = self.bpf.get_syscall_fnname(syscall)
            self.bpf.attach_kprobe(event=syscall_fnname, fn_name="trace_sys_" + syscall)
            self.bpf.attach_kretprobe(event=syscall_fnname, fn_name="trace_ret_sys_" + syscall)

        for sysevent in se:
            self.bpf.attach_kprobe(event=sysevent, fn_name="trace_" + sysevent)

        if not self.json:
            log.info("%-14s %-16s %-12s %-12s %-6s %-16s %-16s %-6s %-6s %-6s %-12s %s" % (
                "TIME(s)", "UTS_NAME", "MNT_NS", "PID_NS", "UID", "EVENT", "COMM", "PID", "TID", "PPID", "RET", "ARGS"))

    def get_sockaddr_from_buf(self, buf):
        # handle buffer wrap
        # todo: parse all fields
        if self.cur_off >= self.max_off:
            self.cur_off = 0
        c_val = ctypes.cast(ctypes.byref(buf, self.cur_off), ctypes.POINTER(ctypes.c_short)).contents
        self.cur_off = self.cur_off + 2
        domain = c_val.value
        if domain in sock_domain:
            return sock_domain[domain]
        else:
            return str(domain)

    def get_int_from_buf(self, buf):
        # handle buffer wrap
        if self.cur_off >= self.max_off:
            self.cur_off = 0
        c_val = ctypes.cast(ctypes.byref(buf, self.cur_off), ctypes.POINTER(ctypes.c_int)).contents
        self.cur_off = self.cur_off + 4
        return c_val.value

    def get_uint_from_buf(self, buf):
        # handle buffer wrap
        if self.cur_off >= self.max_off:
            self.cur_off = 0
        c_val = ctypes.cast(ctypes.byref(buf, self.cur_off), ctypes.POINTER(ctypes.c_uint)).contents
        self.cur_off = self.cur_off + 4
        return c_val.value

    def get_ulong_from_buf(self, buf):
        # handle buffer wrap
        if self.cur_off >= self.max_off:
            self.cur_off = 0
        c_val = ctypes.cast(ctypes.byref(buf, self.cur_off), ctypes.POINTER(ctypes.c_ulong)).contents
        self.cur_off = self.cur_off + 8
        return c_val.value

    def get_pointer_from_buf(self, buf):
        # handle buffer wrap
        if self.cur_off >= self.max_off:
            self.cur_off = 0
        c_val = ctypes.cast(ctypes.byref(buf, self.cur_off), ctypes.POINTER(ctypes.c_void_p)).contents
        self.cur_off = self.cur_off + 8
        return hex(0 if c_val.value is None else c_val.value)

    def get_string_from_buf(self, buf):
        # handle buffer wrap
        if self.cur_off >= self.max_off:
            self.cur_off = 0
        str_size = ctypes.cast(ctypes.byref(buf, self.cur_off), ctypes.POINTER(ctypes.c_uint)).contents.value
        str_off = self.cur_off + 4
        str_buf = buf[str_off:str_off + str_size]
        self.cur_off = self.cur_off + str_size + 4
        try:
            ret_str = str(array.array('B', str_buf).tostring().decode("utf-8"))
            return ret_str
        except:
            return ""

    def get_argv_from_buf(self, buf, args):
        while True:
            # first, check if there are more args to parse
            if self.cur_off >= self.end_off:
                if self.end_off > self.start_off:
                    return  # reached args end
                elif self.cur_off < self.start_off:  # wrapped buffer
                    return  # reached args end (wrapped buffer)
            args.append(self.get_string_from_buf(buf))

    # process event
    def print_event(self, cpu, data, size):
        event = self.bpf["events"].event(data)
        self.start_off = event.start_off
        self.cur_off = event.start_off
        self.max_off = event.max_off
        self.end_off = event.end_off
        event_buf = self.bpf["submission_buf"][cpu].buf

        context = ctypes.cast(ctypes.byref(event_buf, self.cur_off), ctypes.POINTER(context_t)).contents
        self.cur_off += ctypes.sizeof(context_t)

        pid = context.pid
        tid = context.tid
        ppid = context.ppid

        args = list()

        if context.eventid == EventId.SYS_EXECVE:
            eventname = "execve"
            event_type = self.get_int_from_buf(event_buf)
            if event_type == EventType.EVENT_ARG:
                self.get_argv_from_buf(event_buf, args)  # argv
            # EVENT_RET will happen only when exec failed - print context below
        elif context.eventid == EventId.SYS_EXECVEAT:
            eventname = "execveat"
            event_type = self.get_int_from_buf(event_buf)
            if event_type == EventType.EVENT_ARG:
                args.append(str(self.get_int_from_buf(event_buf)))  # dirfd
                self.get_argv_from_buf(event_buf, args)  # argv
                flags = self.get_int_from_buf(event_buf)
                args.append(execveat_flags_to_str(flags))  # flags
            # EVENT_RET will happen only when exec failed - print context below
        elif context.eventid == EventId.SYS_OPEN:
            eventname = "open"
            args.append(self.get_string_from_buf(event_buf))  # filename
            args.append(open_flags_to_str(self.get_int_from_buf(event_buf)))  # flags
        elif context.eventid == EventId.SYS_OPENAT:
            eventname = "openat"
            args.append(str(self.get_int_from_buf(event_buf)))  # dirfd
            args.append(self.get_string_from_buf(event_buf))  # filename
            args.append(open_flags_to_str(self.get_int_from_buf(event_buf)))  # flags
        elif context.eventid == EventId.SYS_CREAT:
            eventname = "creat"
            args.append(self.get_string_from_buf(event_buf))  # pathname
            args.append(str(self.get_uint_from_buf(event_buf)))  # mode
        elif context.eventid == EventId.SYS_MEMFD_CREATE:
            eventname = "memfd_create"
            args.append(self.get_string_from_buf(event_buf))  # name
            args.append(str(self.get_uint_from_buf(event_buf)))  # flags
        elif context.eventid == EventId.CAP_CAPABLE:
            eventname = "cap_capable"
            capability = self.get_int_from_buf(event_buf)
            if capability in capabilities:
                args.append(capabilities[capability])
            else:
                args.append(str(capability))
        elif context.eventid == EventId.SYS_MMAP:
            eventname = "mmap"
            args.append(str(self.get_pointer_from_buf(event_buf)))  # addr
            args.append(str(self.get_ulong_from_buf(event_buf)))  # length
            args.append(str(self.get_int_from_buf(event_buf)))  # prot
            args.append(str(self.get_int_from_buf(event_buf)))  # flags
            args.append(str(self.get_int_from_buf(event_buf)))  # fd
            args.append(str(self.get_ulong_from_buf(event_buf)))  # offset
        elif context.eventid == EventId.SYS_MKNOD:
            eventname = "mknod"
            args.append(self.get_string_from_buf(event_buf))  # pathname
            args.append(str(self.get_uint_from_buf(event_buf)))  # mode
            args.append(str(self.get_uint_from_buf(event_buf)))  # dev
        elif context.eventid == EventId.SYS_MKNOD:
            eventname = "mknodat"
            args.append(str(self.get_int_from_buf(event_buf)))  # dirfd
            args.append(self.get_string_from_buf(event_buf))  # pathname
            args.append(str(self.get_uint_from_buf(event_buf)))  # mode
            args.append(str(self.get_uint_from_buf(event_buf)))  # dev
        elif context.eventid == EventId.SYS_DUP:
            eventname = "dup"
            args.append(str(self.get_int_from_buf(event_buf)))  # oldfd
        elif context.eventid == EventId.SYS_DUP2:
            eventname = "dup2"
            args.append(str(self.get_int_from_buf(event_buf)))  # oldfd
            args.append(str(self.get_int_from_buf(event_buf)))  # newfd
        elif context.eventid == EventId.SYS_DUP3:
            eventname = "dup3"
            args.append(str(self.get_int_from_buf(event_buf)))  # oldfd
            args.append(str(self.get_int_from_buf(event_buf)))  # newfd
            args.append(str(self.get_int_from_buf(event_buf)))  # flags
        elif context.eventid == EventId.SYS_CLOSE:
            eventname = "close"
            args.append(str(self.get_uint_from_buf(event_buf)))  # fd
        elif context.eventid == EventId.SYS_IOCTL:
            eventname = "ioctl"
            args.append(str(self.get_uint_from_buf(event_buf)))  # fd
            args.append(str(self.get_ulong_from_buf(event_buf)))  # request
        elif context.eventid == EventId.SYS_KILL:
            eventname = "kill"
            args.append(str(self.get_int_from_buf(event_buf)))  # pid
            args.append(str(self.get_int_from_buf(event_buf)))  # sig
        elif context.eventid == EventId.SYS_LISTEN:
            eventname = "listen"
            args.append(str(self.get_int_from_buf(event_buf)))  # sockfd
            args.append(str(self.get_int_from_buf(event_buf)))  # backlog
        elif context.eventid == EventId.SYS_CONNECT:
            eventname = "connect"
            args.append(str(self.get_int_from_buf(event_buf)))  # sockfd
            args.append(self.get_sockaddr_from_buf(event_buf))  # sockaddr (partialy parsed to family)
        elif context.eventid == EventId.SYS_ACCEPT:
            eventname = "accept"
            args.append(str(self.get_int_from_buf(event_buf)))  # sockfd
            args.append(self.get_sockaddr_from_buf(event_buf))  # sockaddr (partialy parsed to family)
        elif context.eventid == EventId.SYS_ACCEPT4:
            eventname = "accept4"
            args.append(str(self.get_int_from_buf(event_buf)))  # sockfd
            args.append(self.get_sockaddr_from_buf(event_buf))  # sockaddr (partialy parsed to family)
        elif context.eventid == EventId.SYS_BIND:
            eventname = "bind"
            args.append(str(self.get_int_from_buf(event_buf)))  # sockfd
            args.append(self.get_sockaddr_from_buf(event_buf))  # sockaddr (partialy parsed to family)
        elif context.eventid == EventId.SYS_GETSOCKNAME:
            eventname = "getsockname"
            args.append(str(self.get_int_from_buf(event_buf)))  # sockfd
            args.append(self.get_sockaddr_from_buf(event_buf))  # sockaddr (partialy parsed to family)
        elif context.eventid == EventId.SYS_ACCESS:
            eventname = "access"
            args.append(self.get_string_from_buf(event_buf))  # pathname
            args.append(str(self.get_int_from_buf(event_buf)))  # mode
        elif context.eventid == EventId.SYS_FACCESSAT:
            eventname = "faccessat"
            args.append(str(self.get_int_from_buf(event_buf)))  # dirfd
            args.append(self.get_string_from_buf(event_buf))  # pathname
            args.append(str(self.get_int_from_buf(event_buf)))  # mode
            args.append(str(self.get_int_from_buf(event_buf)))  # flags
        elif context.eventid == EventId.SYS_SOCKET:
            eventname = "socket"
            domain = self.get_int_from_buf(event_buf)  # domain
            if domain in sock_domain:
                args.append(sock_domain[domain])
            else:
                args.append(str(domain))
            _type = self.get_int_from_buf(event_buf)  # type
            type_str = ""
            s_type = _type & 0xf
            if s_type in sock_type:
                type_str = sock_type[s_type]
            else:
                type_str = str(s_type)
            if _type & 0o00004000:
                type_str += "|SOCK_NONBLOCK"
            if _type & 0o02000000:
                type_str += "|SOCK_CLOEXEC"
            args.append(type_str)
            args.append(str(self.get_int_from_buf(event_buf)))  # protocol
        elif context.eventid == EventId.SYS_MPROTECT:
            eventname = "mprotect"
            args.append(str(self.get_pointer_from_buf(event_buf)))  # addr
            args.append(str(self.get_ulong_from_buf(event_buf)))  # length
            args.append(str(self.get_int_from_buf(event_buf)))  # prot
        elif context.eventid == EventId.SYS_STAT:
            eventname = "stat"
            args.append(self.get_string_from_buf(event_buf))  # path
        elif context.eventid == EventId.SYS_FSTAT:
            eventname = "fstat"
            args.append(str(self.get_uint_from_buf(event_buf)))  # fd
        elif context.eventid == EventId.SYS_LSTAT:
            eventname = "lstat"
            args.append(self.get_string_from_buf(event_buf))  # path
        elif context.eventid == EventId.SYS_PRCTL:
            eventname = "prctl"
            args.append(str(self.get_uint_from_buf(event_buf)))  # option
            args.append(str(self.get_ulong_from_buf(event_buf)))  # arg2
            args.append(str(self.get_ulong_from_buf(event_buf)))  # arg3
            args.append(str(self.get_ulong_from_buf(event_buf)))  # arg4
            args.append(str(self.get_ulong_from_buf(event_buf)))  # arg5
        elif context.eventid == EventId.SYS_PTRACE:
            eventname = "ptrace"
            args.append(str(self.get_uint_from_buf(event_buf)))  # request
            args.append(str(self.get_int_from_buf(event_buf)))  # pid
            args.append(str(self.get_pointer_from_buf(event_buf)))  # addr
            args.append(str(self.get_pointer_from_buf(event_buf)))  # data
        elif context.eventid == EventId.SYS_PROCESS_VM_WRITEV:
            eventname = "process_vm_writev"
            args.append(str(self.get_int_from_buf(event_buf)))  # pid
            args.append(str(self.get_pointer_from_buf(event_buf)))  # local_iov
            args.append(str(self.get_ulong_from_buf(event_buf)))  # liovcnt
            args.append(str(self.get_pointer_from_buf(event_buf)))  # remote_iov
            args.append(str(self.get_ulong_from_buf(event_buf)))  # riovcnt
            args.append(str(self.get_ulong_from_buf(event_buf)))  # flags
        elif context.eventid == EventId.SYS_PROCESS_VM_READV:
            eventname = "process_vm_readv"
            args.append(str(self.get_int_from_buf(event_buf)))  # pid
            args.append(str(self.get_pointer_from_buf(event_buf)))  # local_iov
            args.append(str(self.get_ulong_from_buf(event_buf)))  # liovcnt
            args.append(str(self.get_pointer_from_buf(event_buf)))  # remote_iov
            args.append(str(self.get_ulong_from_buf(event_buf)))  # riovcnt
            args.append(str(self.get_ulong_from_buf(event_buf)))  # flags
        elif context.eventid == EventId.SYS_INIT_MODULE:
            eventname = "init_module"
            args.append(str(self.get_pointer_from_buf(event_buf)))  # module_image
            args.append(str(self.get_ulong_from_buf(event_buf)))  # len
            args.append(self.get_string_from_buf(event_buf))  # param_values
        elif context.eventid == EventId.SYS_FINIT_MODULE:
            eventname = "finit_module"
            args.append(str(self.get_int_from_buf(event_buf)))  # fd
            args.append(self.get_string_from_buf(event_buf))  # param_values
            args.append(str(self.get_int_from_buf(event_buf)))  # flags
        elif context.eventid == EventId.SYS_DELETE_MODULE:
            eventname = "delete_module"
            args.append(self.get_string_from_buf(event_buf))  # name
            args.append(str(self.get_int_from_buf(event_buf)))  # flags
        elif context.eventid == EventId.SYS_SYMLINK:
            eventname = "symlink"
            args.append(self.get_string_from_buf(event_buf))  # target
            args.append(self.get_string_from_buf(event_buf))  # linkpath
        elif context.eventid == EventId.SYS_SYMLINKAT:
            eventname = "symlinkat"
            args.append(self.get_string_from_buf(event_buf))  # target
            args.append(str(self.get_int_from_buf(event_buf)))  # newdirfd
            args.append(self.get_string_from_buf(event_buf))  # linkpath
        elif context.eventid == EventId.SYS_GETDENTS:
            eventname = "getdents"
            args.append(str(self.get_uint_from_buf(event_buf)))  # fd
        elif context.eventid == EventId.SYS_GETDENTS64:
            eventname = "getdents64"
            args.append(str(self.get_uint_from_buf(event_buf)))  # fd
        elif context.eventid == EventId.SYS_CLONE:
            eventname = "clone"
        elif context.eventid == EventId.SYS_FORK:
            eventname = "fork"
        elif context.eventid == EventId.SYS_VFORK:
            eventname = "vfork"
        elif context.eventid == EventId.DO_EXIT:
            eventname = "do_exit"
        else:
            return

        try:
            comm = context.comm.decode("utf-8")
            uts_name = context.uts_name.decode("utf-8")
        except:
            return

        if eventname in self.events_to_trace:
            if not self.json:
                log.info("%-14f %-16s %-12d %-12d %-6d %-16s %-16s %-6d %-6d %-6d %-12d %s" % (
                    context.ts / 1000000.0, uts_name, context.mnt_id, context.pid_id, context.uid,
                    eventname, comm, pid, tid, ppid, context.retval, " ".join(args)))
            else:  # prepare data to be consumed by ultrabox
                data = dict()
                data["status"] = [0]
                data["raw"] = ""
                data["type"] = ["apicall"]
                data["time"] = context.ts / 1000000.0
                data["mnt_ns"] = context.mnt_id
                data["pid_ns"] = context.pid_id
                data["uid"] = context.uid
                data["api"] = eventname
                data["uts_name"] = uts_name
                data["process_name"] = comm
                data["pid"] = pid
                data["tid"] = tid
                data["ppid"] = ppid
                data["return_value"] = context.retval
                dict_args = dict()
                args_len = len(args)
                for i in range(args_len):
                    dict_args["p" + str(i)] = args[i].rstrip('\0')
                data["arguments"] = dict_args

                log.info(json.dumps(data))
                self.events.append(data)

        # if eventname == "do_exit" and pid == 1:
        # log.info(json.dumps(events, indent=4))
        # exit()

    def stop_trace(self):
        self.do_trace = False

    def get_events(self):
        return self.events

    def monitor_events(self):
        # loop with callback to print_event
        self.bpf["events"].open_perf_buffer(self.print_event)
        while self.do_trace:
            try:
                self.bpf.perf_buffer_poll(1000)
            except KeyboardInterrupt:
                exit()
Esempio n. 11
0
    bpf_text = bpf_text.replace('FILTER_PID', 'pid != %s' % pid)
else:
    bpf_text = bpf_text.replace('FILTER_PID', '0')
if debug or args.ebpf:
    print(bpf_text)
    if args.ebpf:
        exit()

# load BPF program
b = BPF(text=bpf_text)

b.attach_kprobe(event=ext4_read_fn, fn_name=ext4_trace_read_fn)
b.attach_kprobe(event="ext4_file_write_iter", fn_name="trace_entry")
b.attach_kprobe(event="ext4_file_open", fn_name="trace_entry")
b.attach_kprobe(event="ext4_sync_file", fn_name="trace_entry")
b.attach_kretprobe(event=ext4_read_fn, fn_name='trace_read_return')
b.attach_kretprobe(event="ext4_file_write_iter", fn_name="trace_write_return")
b.attach_kretprobe(event="ext4_file_open", fn_name="trace_open_return")
b.attach_kretprobe(event="ext4_sync_file", fn_name="trace_fsync_return")

print("Tracing ext4 operation latency... Hit Ctrl-C to end.")

# output
exiting = 0
dist = b.get_table("dist")
while (1):
    try:
        if args.interval:
            sleep(int(args.interval))
        else:
            sleep(99999999)
Esempio n. 12
0
args = parser.parse_args()

# load BPF program
current_pid = os.getpid()
# Generate program
prog = prog_header
for lock in locks:
    prog += lock_func.replace("_ID_",
                              str(lock['id'])).replace("_NAME_", lock['name'])

prog = prog.replace("CUR_PID", str(current_pid))

b = BPF(text=prog)
for lock in locks:
    b.attach_kprobe(event=lock['lock_func'], fn_name="lock_%s" % lock['name'])
    b.attach_kretprobe(event=lock['lock_func'],
                       fn_name="release_%s" % lock['name'])

events = {}
# header
# print("%-18s %-16s %-6s %s" % ("TIME(s)", "COMM", "PID", "LOCKTIME"))

print("Tracing locks for %d seconds" % args.time)

# process event
start = 0

# loop with callback to print_event
for lock in locks:
    b[lock['name']].open_perf_buffer(print_event, page_cnt=4096)
start_time = datetime.datetime.now()
try:
Esempio n. 13
0
if args.tid:  # TID trumps PID
    program = program.replace('FILTER',
        'if (tid != %s) { return 0; }' % args.tid)
elif args.pid:
    program = program.replace('FILTER',
        'if (pid != %s) { return 0; }' % args.pid)
else:
    program = program.replace('FILTER', '')

program = program.replace('STACK_STORAGE_SIZE', str(args.stack_storage_size))

b = BPF(text=program)

b.attach_kprobe(event="mutex_unlock", fn_name="mutex_unlock_enter")
b.attach_kretprobe(event="mutex_lock", fn_name="mutex_lock_return")
b.attach_kprobe(event="mutex_lock", fn_name="mutex_lock_enter")

enabled = b.get_table("enabled");

stack_traces = b.get_table("stack_traces")
aq_counts = b.get_table("aq_report_count")
aq_maxs   = b.get_table("aq_report_max")
aq_totals = b.get_table("aq_report_total")

hl_counts = b.get_table("hl_report_count")
hl_maxs   = b.get_table("hl_report_max")
hl_totals = b.get_table("hl_report_total")

aq_sort = sort_list(aq_maxs, aq_totals, aq_counts)
hl_sort = sort_list(hl_maxs, hl_totals, hl_counts)
Esempio n. 14
0
    bpf_text = bpf_text.replace('FILTER_PID', 'pid != %s' % pid)
else:
    bpf_text = bpf_text.replace('FILTER_PID', '0')
if debug or args.ebpf:
    print(bpf_text)
    if args.ebpf:
        exit()

# load BPF program
b = BPF(text=bpf_text)

b.attach_kprobe(event=ext4_read_fn, fn_name=ext4_trace_read_fn)
b.attach_kprobe(event="ext4_file_write_iter", fn_name="trace_entry")
b.attach_kprobe(event="ext4_file_open", fn_name="trace_entry")
b.attach_kprobe(event="ext4_sync_file", fn_name="trace_entry")
b.attach_kretprobe(event=ext4_read_fn, fn_name='trace_read_return')
b.attach_kretprobe(event="ext4_file_write_iter", fn_name="trace_write_return")
b.attach_kretprobe(event="ext4_file_open", fn_name="trace_open_return")
b.attach_kretprobe(event="ext4_sync_file", fn_name="trace_fsync_return")

print("Tracing ext4 operation latency... Hit Ctrl-C to end.")

# output
exiting = 0
dist = b.get_table("dist")
while (1):
    try:
        if args.interval:
            sleep(int(args.interval))
        else:
            sleep(99999999)
Esempio n. 15
0
            event.ts_us, event.task, event.pid, type, event.size,
            event.offset, event.delta_us, event.file))
        return
    print("%-8s %-14.14s %-6s %1s %-7s %-8d %7.2f %s" % (strftime("%H:%M:%S"),
        event.task, event.pid, type, event.size, event.offset / 1024,
        float(event.delta_us) / 1000, event.file))

# initialize BPF
b = BPF(text=bpf_text)

# Common file functions. See earlier comment about generic_*().
b.attach_kprobe(event="generic_file_read_iter", fn_name="trace_read_entry")
b.attach_kprobe(event="btrfs_file_write_iter", fn_name="trace_write_entry")
b.attach_kprobe(event="generic_file_open", fn_name="trace_open_entry")
b.attach_kprobe(event="btrfs_sync_file", fn_name="trace_fsync_entry")
b.attach_kretprobe(event="generic_file_read_iter", fn_name="trace_read_return")
b.attach_kretprobe(event="btrfs_file_write_iter", fn_name="trace_write_return")
b.attach_kretprobe(event="generic_file_open", fn_name="trace_open_return")
b.attach_kretprobe(event="btrfs_sync_file", fn_name="trace_fsync_return")

# header
if (csv):
    print("ENDTIME_us,TASK,PID,TYPE,BYTES,OFFSET_b,LATENCY_us,FILE")
else:
    if min_ms == 0:
        print("Tracing btrfs operations")
    else:
        print("Tracing btrfs operations slower than %d ms" % min_ms)
    print("%-8s %-14s %-6s %1s %-7s %-8s %7s %s" % ("TIME", "COMM", "PID", "T",
        "BYTES", "OFF_KB", "LAT(ms)", "FILENAME"))
Esempio n. 16
0
class EventMonitor:

    def __init__(self, args):
        self.cur_off = 0
        self.events = list()
        self.do_trace = True
        self.bpf = None
        self.event_bufs = list()
        self.total_lost = 0

        # input arguments
        self.cont_mode = args.container
        self.json = args.json
        self.ebpf = args.ebpf
        self.list_events = args.list
        self.events_to_trace = args.events_to_trace
        self.buf_pages = args.buf_pages

    def init_bpf(self):
        bpf_text = load_bpf_program()

        if self.list_events:
            log.info("Syscalls:")
            for e in syscalls:
                log.info("  %s" % e)
            log.info("\nOther events:")
            for e in sysevents:
                log.info("  %s" % e)
            exit()

        if self.ebpf:
            log.debug(bpf_text)
            exit()

        # initialize BPF
        self.bpf = BPF(text=bpf_text)

        # set shared config
        key = ctypes.c_uint32(shared_config.CONFIG_CONT_MODE)
        self.bpf["config_map"][key] = ctypes.c_uint32(self.cont_mode)

        # attaching kprobes
        sk, se = get_kprobes(self.events_to_trace)

        for syscall in sk:
            syscall_fnname = self.bpf.get_syscall_fnname(syscall)
            self.bpf.attach_kprobe(event=syscall_fnname, fn_name="syscall__" + syscall)
            self.bpf.attach_kretprobe(event=syscall_fnname, fn_name="trace_ret_" + syscall)

        for sysevent in se:
            self.bpf.attach_kprobe(event=sysevent, fn_name="trace_" + sysevent)

        if not self.json:
            log.info("%-14s %-16s %-12s %-12s %-6s %-16s %-16s %-6s %-6s %-6s %-12s %s" % (
                "TIME(s)", "UTS_NAME", "MNT_NS", "PID_NS", "UID", "EVENT", "COMM", "PID", "TID", "PPID", "RET", "ARGS"))

    def get_sockaddr_from_buf(self, buf):
        # todo: parse all fields
        c_val = ctypes.cast(ctypes.byref(buf, self.cur_off), ctypes.POINTER(ctypes.c_short)).contents
        self.cur_off = self.cur_off + 2
        domain = c_val.value
        if domain in sock_domain:
            return sock_domain[domain]
        else:
            return str(domain)

    def get_type_from_buf(self, buf):
        c_val = ctypes.cast(ctypes.byref(buf, self.cur_off), ctypes.POINTER(ctypes.c_byte)).contents
        self.cur_off = self.cur_off + 1
        return c_val.value

    def get_int_from_buf(self, buf):
        c_val = ctypes.cast(ctypes.byref(buf, self.cur_off), ctypes.POINTER(ctypes.c_int)).contents
        self.cur_off = self.cur_off + 4
        return c_val.value

    def get_uint_from_buf(self, buf):
        c_val = ctypes.cast(ctypes.byref(buf, self.cur_off), ctypes.POINTER(ctypes.c_uint)).contents
        self.cur_off = self.cur_off + 4
        return c_val.value

    def get_long_from_buf(self, buf):
        c_val = ctypes.cast(ctypes.byref(buf, self.cur_off), ctypes.POINTER(ctypes.c_long)).contents
        self.cur_off = self.cur_off + 8
        return c_val.value

    def get_ulong_from_buf(self, buf):
        c_val = ctypes.cast(ctypes.byref(buf, self.cur_off), ctypes.POINTER(ctypes.c_ulong)).contents
        self.cur_off = self.cur_off + 8
        return c_val.value

    def get_pointer_from_buf(self, buf):
        c_val = ctypes.cast(ctypes.byref(buf, self.cur_off), ctypes.POINTER(ctypes.c_void_p)).contents
        self.cur_off = self.cur_off + 8
        return hex(0 if c_val.value is None else c_val.value)

    def get_string_from_buf(self, buf):
        str_size = ctypes.cast(ctypes.byref(buf, self.cur_off), ctypes.POINTER(ctypes.c_uint)).contents.value
        str_off = self.cur_off + 4
        str_buf = buf[str_off:str_off + str_size]
        self.cur_off = self.cur_off + str_size + 4
        try:
            ret_str = str(array.array('B', str_buf).tostring().decode("utf-8"))
            return ret_str
        except:
            return ""

    def get_str_arr_from_buf(self, buf, args):
        str_list = list()
        while self.cur_off < ctypes.sizeof(buf):
            argtype = self.get_type_from_buf(buf)
            if argtype == ArgType.STR_T:
                str_list.append(self.get_string_from_buf(buf).rstrip('\x00'))
            else:
                args.append('[%s]' % ', '.join(map(str, str_list)))
                return

    def print_event(self, eventname, context, args):
        # There are some syscalls which doesn't have the same name as their function
        eventfunc = "dummy"
        if context.eventid == 4:
            eventfunc = "newstat"
        elif context.eventid == 5:
            eventfunc = "newfstat"
        elif context.eventid == 6:
            eventfunc = "newlstat"

        try:
            comm = context.comm.decode("utf-8")
            uts_name = context.uts_name.decode("utf-8")
        except:
            return

        if eventname in self.events_to_trace or eventfunc in self.events_to_trace:
            if not self.json:
                log.info("%-14f %-16s %-12d %-12d %-6d %-16s %-16s %-6d %-6d %-6d %-12d %s" % (
                    context.ts / 1000000.0, uts_name, context.mnt_id, context.pid_id, context.uid,
                    eventname, comm, context.pid, context.tid, context.ppid, context.retval, " ".join(args)))
            else:  # prepare data to be consumed by ultrabox
                data = dict()
                data["status"] = [0]
                data["raw"] = ""
                data["type"] = ["apicall"]
                data["time"] = context.ts / 1000000.0
                data["mnt_ns"] = context.mnt_id
                data["pid_ns"] = context.pid_id
                data["uid"] = context.uid
                data["api"] = eventname
                data["uts_name"] = uts_name
                data["process_name"] = comm
                data["pid"] = context.pid
                data["tid"] = context.tid
                data["ppid"] = context.ppid
                data["return_value"] = context.retval
                dict_args = dict()
                args_len = len(args)
                for i in range(args_len):
                    dict_args["p" + str(i)] = args[i].rstrip('\0')
                data["arguments"] = dict_args

                log.info(json.dumps(data))
                self.events.append(data)

    def parse_event(self, event_buf):
        context = ctypes.cast(ctypes.byref(event_buf), ctypes.POINTER(context_t)).contents
        self.cur_off = ctypes.sizeof(context_t)
        args = list()

        if context.eventid in event_id:
            eventname = event_id[context.eventid]
            for i in range(context.argnum):
                argtype = self.get_type_from_buf(event_buf)
                # sanity check - should never happen
                if self.cur_off >= ctypes.sizeof(event_buf):
                    return

                if argtype == ArgType.INT_T:
                    args.append(str(self.get_int_from_buf(event_buf)))
                elif argtype == ArgType.UINT_T:
                    args.append(str(self.get_uint_from_buf(event_buf)))
                elif argtype == ArgType.LONG_T:
                    args.append(str(self.get_long_from_buf(event_buf)))
                elif argtype == ArgType.ULONG_T:
                    args.append(str(self.get_ulong_from_buf(event_buf)))
                elif argtype == ArgType.OFF_T_T:
                    args.append(str(self.get_ulong_from_buf(event_buf)))
                elif argtype == ArgType.MODE_T_T:
                    args.append(str(self.get_uint_from_buf(event_buf)))
                elif argtype == ArgType.DEV_T_T:
                    args.append(str(self.get_uint_from_buf(event_buf)))
                elif argtype == ArgType.SIZE_T_T:
                    args.append(str(self.get_ulong_from_buf(event_buf)))
                elif argtype == ArgType.POINTER_T:
                    args.append(str(self.get_pointer_from_buf(event_buf)))
                elif argtype == ArgType.STR_T:
                    args.append(self.get_string_from_buf(event_buf))
                elif argtype == ArgType.STR_ARR_T:
                    self.get_str_arr_from_buf(event_buf, args)
                elif argtype == ArgType.SOCKADDR_T:
                    # sockaddr (partialy parsed to family)
                    args.append(self.get_sockaddr_from_buf(event_buf))
                elif argtype == ArgType.OPENFLAGS_T:
                    args.append(open_flags_to_str(self.get_int_from_buf(event_buf)))
                elif argtype == ArgType.EXEC_FLAG_T:
                    flags = self.get_int_from_buf(event_buf)
                    args.append(execveat_flags_to_str(flags))
                elif argtype == ArgType.SOCK_DOM_T:
                    domain = self.get_int_from_buf(event_buf)
                    if domain in sock_domain:
                        args.append(sock_domain[domain])
                    else:
                        args.append(str(domain))
                elif argtype == ArgType.SOCK_TYPE_T:
                    sock_type_num = self.get_int_from_buf(event_buf)
                    args.append(sock_type_to_str(sock_type_num))
                elif argtype == ArgType.CAP_T:
                    capability = self.get_int_from_buf(event_buf)
                    if capability in capabilities:
                        args.append(capabilities[capability])
                    else:
                        args.append(str(capability))
        else:
            return

        return self.print_event(eventname, context, args)

    # process event
    def handle_event(self, cpu, data, size):
        buf = ctypes.cast(data, ctypes.POINTER(ctypes.c_char*size)).contents
        event_buf = (ctypes.c_char * size).from_buffer_copy(buf)
        self.event_bufs.append(event_buf)

    def lost_event(self, lost):
        self.total_lost += lost
        log.info("Possibly lost %d events (%d in total), consider using a bigger buffer" % (lost, self.total_lost))

    def stop_trace(self):
        self.do_trace = False

    def get_events(self):
        return self.events

    def monitor_events(self):
        # loop with callback to handle_event
        self.bpf["events"].open_perf_buffer(self.handle_event, page_cnt=self.buf_pages, lost_cb=self.lost_event)
        while self.do_trace:
            try:
                # It would have been better to parse the events in a "consumer" thread
                # As python threading is not efficient - parse here
                for event in self.event_bufs:
                    self.parse_event(event)
                self.event_bufs = list()
                self.bpf.perf_buffer_poll(1000)
            except KeyboardInterrupt:
                exit()
Esempio n. 17
0
pid_filter = ""
netns_filter = ""

if args.pid:
    pid_filter = 'if (pid >> 32 != %d) { return 0; }' % args.pid
if args.netns:
    netns_filter = 'if (net_ns_inum != %d) { return 0; }' % args.netns

bpf_text = bpf_text.replace('##FILTER_PID##', pid_filter)
bpf_text = bpf_text.replace('##FILTER_NETNS##', netns_filter)

# initialize BPF
b = BPF(text=bpf_text)
b.attach_kprobe(event="tcp_v4_connect", fn_name="trace_connect_v4_entry")
b.attach_kretprobe(event="tcp_v4_connect", fn_name="trace_connect_v4_return")
b.attach_kprobe(event="tcp_v6_connect", fn_name="trace_connect_v6_entry")
b.attach_kretprobe(event="tcp_v6_connect", fn_name="trace_connect_v6_return")
b.attach_kprobe(event="tcp_set_state", fn_name="trace_tcp_set_state_entry")
b.attach_kprobe(event="tcp_close", fn_name="trace_close_entry")
b.attach_kretprobe(event="inet_csk_accept", fn_name="trace_accept_return")

print("Tracing TCP established connections. Ctrl-C to end.")

# header
if args.verbose:
    print("%-12s %-6s %-16s %-2s %-16s %-16s %-6s %-7s" % ("TYPE",
          "PID", "COMM", "IP", "SADDR", "DADDR", "SPORT", "DPORT"), end="")
    if not args.netns:
        print("%-8s" % "NETNS", end="")
    print()
Esempio n. 18
0
if args.verbose or args.ebpf:
    print(bpf_text)
    if args.ebpf:
        exit()

b = BPF(text=bpf_text)

for i, function in enumerate(args.functions):
    if ":" in function:
        library, func = function.split(":")
        b.attach_uprobe(name=library, sym=func, fn_name="trace_%d" % i)
        b.attach_uretprobe(name=library, sym=func, fn_name="trace_return")
    else:
        b.attach_kprobe(event=function, fn_name="trace_%d" % i)
        b.attach_kretprobe(event=function, fn_name="trace_return")

TASK_COMM_LEN = 16  # linux/sched.h

class Data(ct.Structure):
    _fields_ = [
        ("id", ct.c_ulonglong),
        ("tgid_pid", ct.c_ulonglong),
        ("start_ns", ct.c_ulonglong),
        ("duration_ns", ct.c_ulonglong),
        ("retval", ct.c_ulonglong),
        ("comm", ct.c_char * TASK_COMM_LEN)
    ] + ([("args", ct.c_ulonglong * 6)] if args.arguments else []) + \
            ([("user_stack_id", ct.c_int)] if args.user_stack else []) + \
            ([("kernel_stack_id", ct.c_int),("kernel_ip", ct.c_ulonglong)] if args.kernel_stack else [])
Esempio n. 19
0
        'if (pid != %s) { return 0; }' % args.pid)
else:
    bpf_text = bpf_text.replace('FILTER', '')
if debug:
    print(bpf_text)

# initialize BPF
b = BPF(text=bpf_text)

# for POSIX compliance, all architectures implement these
# system calls but the name of the actual entry point may
# be different for which we must check if the entry points
# actually exist before attaching the probes
if BPF.ksymname("sys_stat") != -1:
    b.attach_kprobe(event="sys_stat", fn_name="trace_entry")
    b.attach_kretprobe(event="sys_stat", fn_name="trace_return")

if BPF.ksymname("sys_statfs") != -1:
    b.attach_kprobe(event="sys_statfs", fn_name="trace_entry")
    b.attach_kretprobe(event="sys_statfs", fn_name="trace_return")

if BPF.ksymname("sys_newstat") != -1:
    b.attach_kprobe(event="sys_newstat", fn_name="trace_entry")
    b.attach_kretprobe(event="sys_newstat", fn_name="trace_return")

TASK_COMM_LEN = 16    # linux/sched.h
NAME_MAX = 255        # linux/limits.h

class Data(ct.Structure):
    _fields_ = [
        ("pid", ct.c_ulonglong),
Esempio n. 20
0
    prog = prog.replace('FILTER', '')

b = BPF(text=prog)

### Probes

# NOTE: use "sys_read" on older kernels
b.attach_kprobe(event="ksys_read", fn_name="in_sys_read")

# the unix socket relevant functions
b.attach_kprobe(event="unix_stream_recvmsg", fn_name="in_unix_stream_recvmsg")
b.attach_kprobe(event="unix_dgram_recvmsg", fn_name="in_unix_stream_recvmsg")
b.attach_kprobe(event="unix_seqpacket_recvmsg",
                fn_name="in_unix_stream_recvmsg")

b.attach_kretprobe(event="ksys_read", fn_name="out_sys_read")
###

# define output data structure in Python
TASK_COMM_LEN = 16  # linux/sched.h
MAX_BUF_SIZE = 356  # Limited by the BPF stack


# Max size of the whole struct: 512 bytes
class Data(ct.Structure):
    _fields_ = [("timestamp_ns", ct.c_ulonglong), ("pid", ct.c_uint),
                ("remote_pid", ct.c_uint), ("comm", ct.c_char * TASK_COMM_LEN),
                ("v0", ct.c_ubyte * MAX_BUF_SIZE), ("buf_size", ct.c_uint),
                ("len", ct.c_uint)]

Esempio n. 21
0
    return 0;
}
"""
if args.pid:
    bpf_text = bpf_text.replace('FILTER',
        'if (pid != %s) { return 0; }' % args.pid)
else:
    bpf_text = bpf_text.replace('FILTER', '')
if debug:
    print(bpf_text)

# initialize BPF
b = BPF(text=bpf_text)
b.attach_kprobe(event="sys_open", fn_name="trace_entry")
b.attach_kretprobe(event="sys_open", fn_name="trace_return")

TASK_COMM_LEN = 16    # linux/sched.h
NAME_MAX = 255        # linux/limits.h

class Data(ct.Structure):
    _fields_ = [
        ("pid", ct.c_ulonglong),
        ("ts", ct.c_ulonglong),
        ("delta", ct.c_ulonglong),
        ("ret", ct.c_int),
        ("comm", ct.c_char * TASK_COMM_LEN),
        ("fname", ct.c_char * NAME_MAX)
    ]

start_ts = 0
Esempio n. 22
0
from bcc import BPF

bpf_source = """
#include <uapi/linux/ptrace.h>

int ret_sys_execve(struct pt_regs *ctx) {
  int return_value;
  char comm[16];
  bpf_get_current_comm(&comm, sizeof(comm));

  // PT_REGS_RC是一个宏,它将从这个特定上下文的BPF寄存器中读取返回值。
  return_value = PT_REGS_RC(ctx);

  bpf_trace_printk("program: %s, return: %d\\n", comm, return_value);
  return 0;
}
"""

bpf = BPF(text=bpf_source)
execve_function = bpf.get_syscall_fnname("execve")
bpf.attach_kretprobe(event=execve_function, fn_name="ret_sys_execve")
bpf.trace_print()
Esempio n. 23
0
    bpf_text = bpf_text.replace('STORE',
        'dist.increment(bpf_log2l(delta));')
if args.verbose:
    print(bpf_text)

# signal handler
def signal_ignore(signal, frame):
    print()

# load BPF program
b = BPF(text=bpf_text)

# attach probes
if not library:
    b.attach_kprobe(event_re=pattern, fn_name="trace_func_entry")
    b.attach_kretprobe(event_re=pattern, fn_name="trace_func_return")
    matched = b.num_open_kprobes()
else:
    b.attach_uprobe(name=library, sym_re=pattern, fn_name="trace_func_entry",
                    pid=args.pid or -1)
    b.attach_uretprobe(name=library, sym_re=pattern,
                       fn_name="trace_func_return", pid=args.pid or -1)
    matched = b.num_open_uprobes()

if matched == 0:
    print("0 functions matched by \"%s\". Exiting." % args.pattern)
    exit()

# header
print("Tracing %d functions for \"%s\"... Hit Ctrl-C to end." %
    (matched / 2, args.pattern))
Esempio n. 24
0
                            event.delay_max, average_delay))


# load BPF program
KVER = os.popen('uname -r').read().rstrip()
b = BPF(text=bpf_text,
        cflags=[
            "-include", "/usr/src/zfs-" + KVER + "/zfs_config.h",
            "-I/usr/src/zfs-" + KVER + "/include/",
            "-I/usr/src/zfs-" + KVER + "/include/spl",
            "-I/usr/src/zfs-" + KVER + "/include/",
            "-I/usr/src/zfs-" + KVER + "/include/linux", "-DCC_USING_FENTRY"
        ])

b.attach_kprobe(event="spa_sync", fn_name="spa_sync_entry")
b.attach_kretprobe(event="spa_sync", fn_name="spa_sync_return")
b.attach_kprobe(event="dsl_pool_sync", fn_name="dsl_pool_sync_entry")
b.attach_kretprobe(event="dsl_pool_sync", fn_name="dsl_pool_sync_return")
b.attach_kprobe(event="dmu_tx_delay", fn_name="dmu_tx_delay_entry")
b.attach_kprobe(event="trace_zfs_delay__mintime",
                fn_name="dmu_tx_delay_mintime")
b.attach_perf_event(ev_type=PerfType.SOFTWARE,
                    ev_config=PerfSWConfig.CPU_CLOCK,
                    fn_name="get_spa_dirty",
                    sample_freq=10)

print_count = 30

# initialize dgrn program object to read zfs_dirty_data_max
prog = drgn.program_from_kernel()
Esempio n. 25
0
        event.ip, inet_ntoa(event.saddr), inet_ntoa(event.daddr),
        event.dport))
def print_ipv6_event(cpu, data, size):
    event = ct.cast(data, ct.POINTER(Data_ipv6)).contents
    if args.timestamp:
        if start_ts == 0:
            start_ts = event.ts_us
        print("%-9.3f" % ((event.ts_us - start_ts) / 100000), end="")
    print("%-6d %-12.12s %-2d ...%-13x ...%-13x %-4d" % (event.pid,
        event.task, event.ip, event.saddr, event.daddr, event.dport))

# initialize BPF
b = BPF(text=bpf_text)
b.attach_kprobe(event="tcp_v4_connect", fn_name="trace_connect_entry")
b.attach_kprobe(event="tcp_v6_connect", fn_name="trace_connect_entry")
b.attach_kretprobe(event="tcp_v4_connect", fn_name="trace_connect_v4_return")
b.attach_kretprobe(event="tcp_v6_connect", fn_name="trace_connect_v6_return")

# header
if args.timestamp:
    print("%-9s" % ("TIME(s)"), end="")
print("%-6s %-12s %-2s %-16s %-16s %-4s" % ("PID", "COMM", "IP", "SADDR",
    "DADDR", "DPORT"))

start_ts = 0

def inet_ntoa(addr):
    dq = ''
    for i in range(0, 4):
        dq = dq + str(addr & 0xff)
        if (i != 3):
Esempio n. 26
0

data_struct = {
    "measurement": 'picknext',
    "time": [],
    "tags": [
        'glob',
        'cpu',
        'pid',
    ],
    "fields": ['duration']
}

b = BPF(text=bpf_text)
b.attach_kprobe(event="pick_next_task_fair", fn_name="pick_start")
b.attach_kretprobe(event="pick_next_task_fair", fn_name="pick_end")

dist = b.get_table("dist")

#print("%-6s%-6s%-6s%-6s" % ("CPU", "PID", "TGID", "TIME(ns)"))

while (1):
    try:
        sleep(1)
        for k, v in dist.items():
            #print("%-6d%-6d%-6d%-6d" % (k.cpu, k.pid, k.tgid, v.value))
            #test_data = lmp_data('glob', k.cpu, k.pid, v.value)
            test_data = lmp_data(datetime.now().isoformat(), 'glob', k.cpu,
                                 k.pid, v.value)
            write2db(data_struct, test_data, client)
        dist.clear()
def run_bpf_probe(num_iterations, sleep_secs):
    """Run the extended BPF probe on Linux's compact_zone_order() function."""

    # debug_level = 0x3    # 0x3: dump LLVM IR and BPF byte code to stderr
    debug_level = 0x0      # debug 0x0 = no debug

    bpf = BPF(src_file="eBPF_c_probe.c", debug=debug_level)

    # ebpf_code = bpf.dump_func(func_name="prb_eBPF_compact_zone_order_entry")

    assert len(bpf["global_var_total_accum_nsec"]) == 1, \
        "Expected a global variable in BPF that be a scalar, ie., of length 1"

    # our BPF probe will only work for a kernel point which is not executed
    # concurrently, if not it will fail. Of course, you can use other
    # data structures in the BPF probe that can make it work concurrently.
    synchr_non_concurrent_kpoint = "compact_zone_order"
    bpf.attach_kprobe(event=synchr_non_concurrent_kpoint,
                      fn_name="prb_eBPF_compact_zone_order_entry")
    bpf.attach_kretprobe(event=synchr_non_concurrent_kpoint,
                         fn_name="prb_eBPF_compact_zone_order_return")

    # these are other collateral events we want to know if they happen at the
    # same time as the main event above, and relatively, how frequently they
    # happen when the main probed events (above) are happening.

    collateral_events = [
        {'func': 'kmalloc_order_trace',
         'probe': 'prb_eBPF_kmalloc_order_trace_return',
         'count': 'global_var_cnt_kmalloc_order_trace'},

        {'func': '__kmalloc',
         'probe': 'prb_eBPF___kmalloc_return',
         'count': 'global_var_cnt___kmalloc'},

        {'func': '__do_kmalloc_node',
         'probe': 'prb_eBPF___do_kmalloc_node_return',
         'count': 'global_var_cnt___do_kmalloc_node'},

        {'func': 'kmem_cache_alloc',
         'probe': 'prb_eBPF_kmem_cache_alloc_return',
         'count': 'global_var_cnt_kmem_cache_alloc'},

        {'func': 'kmem_cache_alloc_trace',
         'probe': 'prb_eBPF_kmem_cache_alloc_trace_return',
         'count': 'global_var_cnt_kmem_cache_alloc_trace'},

        {'func': 'malloc',
         'probe': 'prb_eBPF_malloc_return',
         'count': 'global_var_cnt_malloc'},

        {'func': 'kfree',
         'probe': 'prb_eBPF_kfree_return',
         'count': 'global_var_cnt_kfree'},

        {'func': 'kmem_cache_reap',
         'probe': 'prb_eBPF_kmem_cache_reap_return',
         'count': 'global_var_cnt_kmem_cache_reap'},

        {'func': 'kmem_cache_free',
         'probe': 'prb_eBPF_kmem_cache_free_return',
         'count': 'global_var_cnt_kmem_cache_free'},

        {'func': 'kmem_cache_destroy',
         'probe': 'prb_eBPF_kmem_cache_destroy_return',
         'count': 'global_var_cnt_kmem_cache_destroy'},

        {'func': 'kmem_cache_shrink',
         'probe': 'prb_eBPF_kmem_cache_shrink_return',
         'count': 'global_var_cnt_kmem_cache_shrink'}
    ]

    for collateral_event in collateral_events:
        bpf.attach_kretprobe(event=collateral_event['func'],
                             fn_name=collateral_event['probe'])

        assert len(bpf[collateral_event['count']]) == 1, \
            "Var '{}' must be a scalar too.".format(collateral_event['count'])

    # request time to sleep and iterations as arguments from the command-line,
    # e.g., by using the 'argparse' module (the timing to wait is important
    # because there can be no output reported below if there is no activity of
    # the kprobe we attached to in this period of time)
    for sample in xrange(1, num_iterations + 1):
        sleep(sleep_secs)

        print "---- new sample: {} at {}".format(sample, strftime("%D %T"))
        bpf["delay_dist"].print_log2_hist("usecs")
        bpf["delay_dist"].clear()

        # All the direct iterations on BPF tables return ctypes values (like
        # c_int, c_ulong, etc), which we unwrap here by the .value property and
        # divide by 1000 (microseconds) since the histogram in C in the BPF
        # probe also divided the nanoseconds by 1000, so all will report in the
        # same unit of time

        total_accum_nsec = bpf["global_var_total_accum_nsec"].values()[0]
        print "total_accum_usec = {:.0f}".format(total_accum_nsec.value / 1000)
        bpf["global_var_total_accum_nsec"].clear()

        for k, val in bpf["total_accum_nsec_per_order"].items():
            print ("total_accum_usec[order = {}] = "
                   "{:.0f}").format(k.value, val.value / 1000)
        bpf["total_accum_nsec_per_order"].clear()

        for collateral_event in collateral_events:
            concur_kmallocs = bpf[collateral_event['count']].values()[0]
            print "{} while compaction = {}".format(collateral_event['func'],
                                                    concur_kmallocs.value)
            bpf[collateral_event['count']].clear()

        sys.stdout.flush()
Esempio n. 28
0
    bpf_program.attach_uprobe(name="c",
                              sym="malloc",
                              fn_name="alloc_enter",
                              pid=pid)
    bpf_program.attach_uretprobe(name="c",
                                 sym="malloc",
                                 fn_name="alloc_exit",
                                 pid=pid)
    bpf_program.attach_uprobe(name="c",
                              sym="free",
                              fn_name="free_enter",
                              pid=pid)
else:
    print("Attaching to kmalloc and kfree, Ctrl+C to quit.")
    bpf_program.attach_kprobe(event="__kmalloc", fn_name="alloc_enter")
    bpf_program.attach_kretprobe(event="__kmalloc", fn_name="alloc_exit")
    bpf_program.attach_kprobe(event="kfree", fn_name="free_enter")

decoder = StackDecoder(pid)


def print_outstanding():
    stacks = {}
    print("[%s] Top %d stacks with outstanding allocations:" %
          (datetime.now().strftime("%H:%M:%S"), top_stacks))
    allocs = bpf_program.get_table("allocs")
    for address, info in sorted(allocs.items(), key=lambda a: a[1].size):
        if BPF.monotonic_time() - min_age_ns < info.timestamp_ns:
            continue
        stack = decoder.decode_stack(info, kernel_trace)
        if stack in stacks:
Esempio n. 29
0
        # lop off the trailing .
        question = ("%s" % dnspkt.q.qname)[:-1].encode('utf-8')

        for answer in dnspkt.rr:
            # skip all but A and AAAA records
            if answer.rtype == 1 or answer.rtype == 28:
                dns_cache[str(answer.rdata).encode('utf-8')] = (question,
                                                                datetime.now())


# initialize BPF
b = BPF(text=bpf_text)
b.attach_kprobe(event="tcp_v4_connect", fn_name="trace_connect_entry")
b.attach_kprobe(event="tcp_v6_connect", fn_name="trace_connect_entry")
b.attach_kretprobe(event="tcp_v4_connect", fn_name="trace_connect_v4_return")
b.attach_kretprobe(event="tcp_v6_connect", fn_name="trace_connect_v6_return")
if args.dns:
    b.attach_kprobe(event="udp_recvmsg", fn_name="trace_udp_recvmsg")
    b.attach_kretprobe(event="udp_recvmsg", fn_name="trace_udp_ret_recvmsg")

print("Tracing connect ... Hit Ctrl-C to end")
if args.count:
    try:
        while True:
            sleep(99999999)
    except KeyboardInterrupt:
        pass

    # header
    print("\n%-25s %-25s %-20s %-10s" %
Esempio n. 30
0
File: dcstat.py Progetto: zlim/bcc
void count_lookup(struct pt_regs *ctx) {
    int key = S_SLOW;
    stats.atomic_increment(key);
    if (PT_REGS_RC(ctx) == 0) {
        key = S_MISS;
        stats.atomic_increment(key);
    }
}
"""

# load BPF program
b = BPF(text=bpf_text)
b.attach_kprobe(event_re="^lookup_fast$|^lookup_fast.constprop.*.\d$",
                fn_name="count_fast")
b.attach_kretprobe(event="d_lookup", fn_name="count_lookup")

# stat column labels and indexes
stats = {"REFS": 1, "SLOW": 2, "MISS": 3}

# header
print("%-8s  " % "TIME", end="")
for stype, idx in sorted(stats.items(), key=lambda k_v: (k_v[1], k_v[0])):
    print(" %8s" % (stype + "/s"), end="")
print(" %8s" % "HIT%")

# output
i = 0
while (1):
    if count > 0:
        i += 1
Esempio n. 31
0
b = BPF(text=bpf_text)

# common file functions
if BPF.get_kprobe_functions(b'zpl_iter'):
    b.attach_kprobe(event="zpl_iter_read", fn_name="trace_entry")
    b.attach_kprobe(event="zpl_iter_write", fn_name="trace_entry")
elif BPF.get_kprobe_functions(b'zpl_aio'):
    b.attach_kprobe(event="zpl_aio_read", fn_name="trace_entry")
    b.attach_kprobe(event="zpl_aio_write", fn_name="trace_entry")
else:
    b.attach_kprobe(event="zpl_read", fn_name="trace_entry")
    b.attach_kprobe(event="zpl_write", fn_name="trace_entry")
b.attach_kprobe(event="zpl_open", fn_name="trace_entry")
b.attach_kprobe(event="zpl_fsync", fn_name="trace_entry")
if BPF.get_kprobe_functions(b'zpl_iter'):
    b.attach_kretprobe(event="zpl_iter_read", fn_name="trace_read_return")
    b.attach_kretprobe(event="zpl_iter_write", fn_name="trace_write_return")
elif BPF.get_kprobe_functions(b'zpl_aio'):
    b.attach_kretprobe(event="zpl_aio_read", fn_name="trace_read_return")
    b.attach_kretprobe(event="zpl_aio_write", fn_name="trace_write_return")
else:
    b.attach_kretprobe(event="zpl_read", fn_name="trace_read_return")
    b.attach_kretprobe(event="zpl_write", fn_name="trace_write_return")
b.attach_kretprobe(event="zpl_open", fn_name="trace_open_return")
b.attach_kretprobe(event="zpl_fsync", fn_name="trace_fsync_return")

print("Tracing ZFS operation latency... Hit Ctrl-C to end.")

# output
exiting = 0
dist = b.get_table("dist")
Esempio n. 32
0
    bpf_text = bpf_text.replace('FLAGS_FILTER',
        'if (!(flags & %d)) { return 0; }' % flag_filter_mask)
else:
    bpf_text = bpf_text.replace('FLAGS_FILTER', '')
if not (args.extended_fields or args.flag_filter):
    bpf_text = '\n'.join(x for x in bpf_text.split('\n')
        if 'EXTENDED_STRUCT_MEMBER' not in x)
if debug or args.ebpf:
    print(bpf_text)
    if args.ebpf:
        exit()

# initialize BPF
b = BPF(text=bpf_text)
b.attach_kprobe(event="do_sys_open", fn_name="trace_entry")
b.attach_kretprobe(event="do_sys_open", fn_name="trace_return")

TASK_COMM_LEN = 16    # linux/sched.h
NAME_MAX = 255        # linux/limits.h

class Data(ct.Structure):
    _fields_ = [
        ("id", ct.c_ulonglong),
        ("ts", ct.c_ulonglong),
        ("ret", ct.c_int),
        ("comm", ct.c_char * TASK_COMM_LEN),
        ("fname", ct.c_char * NAME_MAX),
        ("flags", ct.c_int),
    ]

initial_ts = 0
Esempio n. 33
0
    events.perf_submit(ctx, &data, sizeof(data));

    return 0;
}
"""

bpf_text = bpf_text.replace("MAXARG", args.max_args)
if args.ebpf:
    print(bpf_text)
    exit()

# initialize BPF
b = BPF(text=bpf_text)
execve_fnname = b.get_syscall_fnname("execve")
b.attach_kprobe(event=execve_fnname, fn_name="do_sys_execve")
b.attach_kretprobe(event=execve_fnname, fn_name="do_ret_sys_execve")

# header
if args.timestamp:
    print("%-8s" % ("TIME(s)"), end="")
print("%-16s %-6s %-6s %3s %s" % ("PCOMM", "PID", "PPID", "RET", "ARGS"))

TASK_COMM_LEN = 16      # linux/sched.h
ARGSIZE = 128           # should match #define in C above

class Data(ct.Structure):
    _fields_ = [
        ("pid", ct.c_uint),
        ("comm", ct.c_char * TASK_COMM_LEN),
        ("type", ct.c_int),
        ("argv", ct.c_char * ARGSIZE),
Esempio n. 34
0
    bpf_text = bpf_text.replace('TYPE_FILTER', '!S_ISREG(mode)')

if debug or args.ebpf:
    print(bpf_text)
    if args.ebpf:
        exit()

# initialize BPF
b = BPF(text=bpf_text)

# I'd rather trace these via new_sync_read/new_sync_write (which used to be
# do_sync_read/do_sync_write), but those became static. So trace these from
# the parent functions, at the cost of more overhead, instead.
# Ultimately, we should be using [V]FS tracepoints.
b.attach_kprobe(event="__vfs_read", fn_name="trace_read_entry")
b.attach_kretprobe(event="__vfs_read", fn_name="trace_read_return")
try:
    b.attach_kprobe(event="__vfs_write", fn_name="trace_write_entry")
    b.attach_kretprobe(event="__vfs_write", fn_name="trace_write_return")
except Exception:
    # older kernels don't have __vfs_write so try vfs_write instead
    b.attach_kprobe(event="vfs_write", fn_name="trace_write_entry")
    b.attach_kretprobe(event="vfs_write", fn_name="trace_write_return")

TASK_COMM_LEN = 16  # linux/sched.h
DNAME_INLINE_LEN = 32  # linux/dcache.h

class Data(ct.Structure):
    _fields_ = [
        ("mode", ct.c_int),
        ("pid", ct.c_uint),
Esempio n. 35
0
        '(*vp) += delta;')
if debug:
    print(bpf_text)

# load BPF program
b = BPF(text=bpf_text)

# this should really use irq:softirq_entry/exit tracepoints; for now the
# soft irq functions are individually traced (search your kernel for
# open_softirq() calls, and adjust the following list as needed).
for softirqfunc in ("blk_iopoll_softirq", "blk_done_softirq",
        "rcu_process_callbacks", "run_rebalance_domains", "tasklet_action",
        "tasklet_hi_action", "run_timer_softirq", "net_tx_action",
        "net_rx_action"):
    b.attach_kprobe(event=softirqfunc, fn_name="trace_start")
    b.attach_kretprobe(event=softirqfunc, fn_name="trace_completion")

print("Tracing soft irq event time... Hit Ctrl-C to end.")

# output
exiting = 0 if args.interval else 1
dist = b.get_table("dist")
while (1):
    try:
        sleep(int(args.interval))
    except KeyboardInterrupt:
        exiting = 1

    print()
    if args.timestamp:
        print("%-8s\n" % strftime("%H:%M:%S"), end="")
Esempio n. 36
0
if args.pid:
    pid_filter = 'if (pid >> 32 != %d) { return 0; }' % args.pid
if args.netns:
    netns_filter = 'if (net_ns_inum != %d) { return 0; }' % args.netns

bpf_text = bpf_text.replace('##FILTER_PID##', pid_filter)
bpf_text = bpf_text.replace('##FILTER_NETNS##', netns_filter)

if args.ebpf:
    print(bpf_text)
    exit()

# initialize BPF
b = BPF(text=bpf_text)
b.attach_kprobe(event="tcp_v4_connect", fn_name="trace_connect_v4_entry")
b.attach_kretprobe(event="tcp_v4_connect", fn_name="trace_connect_v4_return")
b.attach_kprobe(event="tcp_v6_connect", fn_name="trace_connect_v6_entry")
b.attach_kretprobe(event="tcp_v6_connect", fn_name="trace_connect_v6_return")
b.attach_kprobe(event="tcp_set_state", fn_name="trace_tcp_set_state_entry")
b.attach_kprobe(event="tcp_close", fn_name="trace_close_entry")
b.attach_kretprobe(event="inet_csk_accept", fn_name="trace_accept_return")

print("Tracing TCP established connections. Ctrl-C to end.")

# header
if args.verbose:
    if args.timestamp:
        print("%-14s" % ("TIME(ns)"), end="")
    print("%-12s %-6s %-16s %-2s %-16s %-16s %-6s %-7s" % ("TYPE",
          "PID", "COMM", "IP", "SADDR", "DADDR", "SPORT", "DPORT"), end="")
    if not args.netns:
Esempio n. 37
0
if args.tid:  # TID trumps PID
    bpf_text = bpf_text.replace('FILTER',
        'if (tid != %s) { return 0; }' % args.tid)
elif args.pid:
    bpf_text = bpf_text.replace('FILTER',
        'if (pid != %s) { return 0; }' % args.pid)
else:
    bpf_text = bpf_text.replace('FILTER', '')

# initialize BPF
b = BPF(text=bpf_text)

syscall_fnname = b.get_syscall_fnname("sendmsg")
if BPF.ksymname(syscall_fnname) != -1:
    b.attach_kprobe(event=syscall_fnname, fn_name="syscall__sendmsg")
    b.attach_kretprobe(event=syscall_fnname, fn_name="trace_sendmsg_return")

syscall_fnname = b.get_syscall_fnname("recvmsg")
if BPF.ksymname(syscall_fnname) != -1:
    b.attach_kprobe(event=syscall_fnname, fn_name="syscall__recvmsg")
    b.attach_kretprobe(event=syscall_fnname, fn_name="trace_recvmsg_return")

b.attach_kprobe(event="__scm_send", fn_name="trace_scm_send_entry")
b.attach_kprobe(event="scm_detach_fds", fn_name="trace_scm_detach_fds_entry")
b.attach_kretprobe(event="scm_detach_fds", fn_name="trace_scm_detach_fds_return")

initial_ts = 0

# header
if args.timestamp:
    print("%-14s" % ("TIME(s)"), end="")
Esempio n. 38
0
if args.verbose:
    print(bpf_text)


# signal handler
def signal_ignore(signal, frame):
    print()


# load BPF program
b = BPF(text=bpf_text)

# attach probes
if not library:
    b.attach_kprobe(event_re=pattern, fn_name="trace_func_entry")
    b.attach_kretprobe(event_re=pattern, fn_name="trace_func_return")
    matched = b.num_open_kprobes()
else:
    b.attach_uprobe(name=library,
                    sym_re=pattern,
                    fn_name="trace_func_entry",
                    pid=args.pid or -1)
    b.attach_uretprobe(name=library,
                       sym_re=pattern,
                       fn_name="trace_func_return",
                       pid=args.pid or -1)
    matched = b.num_open_uprobes()

if matched == 0:
    print("0 functions matched by \"%s\". Exiting." % args.pattern)
    exit()
Esempio n. 39
0
void count_lookup(struct pt_regs *ctx) {
    int key = S_SLOW;
    u64 *leaf = stats.lookup(&key);
    if (leaf) (*leaf)++;
    if (PT_REGS_RC(ctx) == 0) {
        key = S_MISS;
        leaf = stats.lookup(&key);
        if (leaf) (*leaf)++;
    }
}
"""

# load BPF program
b = BPF(text=bpf_text)
b.attach_kprobe(event="lookup_fast", fn_name="count_fast")
b.attach_kretprobe(event="d_lookup", fn_name="count_lookup")

# stat column labels and indexes
stats = {
    "REFS": 1,
    "SLOW": 2,
    "MISS": 3
}

# header
print("%-8s  " % "TIME", end="")
for stype, idx in sorted(stats.items(), key=lambda k_v: (k_v[1], k_v[0])):
    print(" %8s" % (stype + "/s"), end="")
print(" %8s" % "HIT%")

# output
Esempio n. 40
0
if debug or args.ebpf:
    print(bpf_text)
    if args.ebpf:
        exit()

# initialize BPF
b = BPF(text=bpf_text)

# for POSIX compliance, all architectures implement these
# system calls but the name of the actual entry point may
# be different for which we must check if the entry points
# actually exist before attaching the probes
syscall_fnname = b.get_syscall_fnname("stat")
if BPF.ksymname(syscall_fnname) != -1:
    b.attach_kprobe(event=syscall_fnname, fn_name="syscall__entry")
    b.attach_kretprobe(event=syscall_fnname, fn_name="trace_return")

syscall_fnname = b.get_syscall_fnname("statfs")
if BPF.ksymname(syscall_fnname) != -1:
    b.attach_kprobe(event=syscall_fnname, fn_name="syscall__entry")
    b.attach_kretprobe(event=syscall_fnname, fn_name="trace_return")

syscall_fnname = b.get_syscall_fnname("newstat")
if BPF.ksymname(syscall_fnname) != -1:
    b.attach_kprobe(event=syscall_fnname, fn_name="syscall__entry")
    b.attach_kretprobe(event=syscall_fnname, fn_name="trace_return")

start_ts = 0
prev_ts = 0
delta = 0
Esempio n. 41
0
"""
if args.pid:
    bpf_text = bpf_text.replace('FILTER',
        'if (pid != %s) { return 0; }' % args.pid)
else:
    bpf_text = bpf_text.replace('FILTER', '')
if debug or args.ebpf:
    print(bpf_text)
    if args.ebpf:
        exit()

# initialize BPF
b = BPF(text=bpf_text)
kill_fnname = b.get_syscall_fnname("kill")
b.attach_kprobe(event=kill_fnname, fn_name="syscall__kill")
b.attach_kretprobe(event=kill_fnname, fn_name="do_ret_sys_kill")


TASK_COMM_LEN = 16    # linux/sched.h

class Data(ct.Structure):
    _fields_ = [
        ("pid", ct.c_ulonglong),
        ("tpid", ct.c_int),
        ("sig", ct.c_int),
        ("ret", ct.c_int),
        ("comm", ct.c_char * TASK_COMM_LEN)
    ]

# header
print("%-9s %-6s %-16s %-4s %-6s %s" % (
Esempio n. 42
0
    global start_ts
    if args.timestamp:
        if start_ts == 0:
            start_ts = event.ts_us
        print("%-9.3f" % ((float(event.ts_us) - start_ts) / 1000000), end="")
    print("%-6d %-12.12s %-2d %-16s %-16s %-4d" %
          (event.pid, event.task.decode(), event.ip,
           inet_ntop(AF_INET6, event.saddr), inet_ntop(
               AF_INET6, event.daddr), event.dport))


# initialize BPF
b = BPF(text=bpf_text)
b.attach_kprobe(event="tcp_v4_connect", fn_name="trace_connect_entry")
b.attach_kprobe(event="tcp_v6_connect", fn_name="trace_connect_entry")
b.attach_kretprobe(event="tcp_v4_connect", fn_name="trace_connect_v4_return")
b.attach_kretprobe(event="tcp_v6_connect", fn_name="trace_connect_v6_return")

# header
if args.timestamp:
    print("%-9s" % ("TIME(s)"), end="")
print("%-6s %-12s %-2s %-16s %-16s %-4s" %
      ("PID", "COMM", "IP", "SADDR", "DADDR", "DPORT"))

start_ts = 0

# read events
b["ipv4_events"].open_perf_buffer(print_ipv4_event)
b["ipv6_events"].open_perf_buffer(print_ipv6_event)
while 1:
    b.perf_buffer_poll()
Esempio n. 43
0
else:
    usdt = None

if args.ebpf or args.verbose:
    if args.verbose and usdt:
        print(usdt.get_text())
    print(program)
    if args.ebpf:
        exit()

bpf = BPF(text=program, usdt_contexts=[usdt] if usdt else [])
if args.syscalls:
    syscall_regex = "^[Ss]y[Ss]_.*"
    bpf.attach_kprobe(event_re=syscall_regex, fn_name="syscall_entry")
    if args.latency:
        bpf.attach_kretprobe(event_re=syscall_regex, fn_name="syscall_return")
    print("Attached %d kernel probes for syscall tracing." %
          bpf.num_open_kprobes())

def get_data():
    # Will be empty when no language was specified for tracing
    if args.latency:
        data = list(map(lambda kv: (kv[0].clazz.decode('utf-8', 'replace') \
                                    + "." + \
                                    kv[0].method.decode('utf-8', 'replace'),
                                   (kv[1].num_calls, kv[1].total_ns)),
                   bpf["times"].items()))
    else:
        data = list(map(lambda kv: (kv[0].clazz.decode('utf-8', 'replace') \
                                    + "." + \
                                    kv[0].method.decode('utf-8', 'replace'),
Esempio n. 44
0
from struct import pack
import time as ti
import json
import ctypes
import sys

# Initialize BPF
b = BPF(src_file="ebpf_probes.c")

# Attach trace functions to kernel functions as k(ret)probes
b.attach_kprobe(event="tcp_reno_cong_avoid", fn_name="trace_cong_avoid")
b.attach_kprobe(event="tcp_init_buffer_space", fn_name="trace_init_cong_control")
b.attach_kprobe(event="tcp_mark_skb_lost", fn_name="trace_mark_lost")
b.attach_kprobe(event="tcp_write_timer_handler", fn_name="trace_timeout_trigger")
b.attach_kprobe(event="tcp_rack_mark_lost", fn_name="trace_rack_timer")
b.attach_kretprobe(event="tcp_schedule_loss_probe", fn_name="trace_probe_timer")


qlog = {
	"qlog_version": "draft-01",
	"traces": [
		{
			"common_fields": {
				"reference_time": -1
			},
			"configuration": {"time_units": "ms"},
			"event_fields": [
				"relative_time",
				"category",
				"event_type",
				"data"
Esempio n. 45
0
           event.offset / 1024,
           float(event.delta_us) / 1000,
           event.file.decode()))


# Currently specifically works for NFSv4, the other kprobes are generic
# so it should work with earlier NFS versions

b = BPF(text=bpf_text)
b.attach_kprobe(event="nfs_file_read", fn_name="trace_rw_entry")
b.attach_kprobe(event="nfs_file_write", fn_name="trace_rw_entry")
b.attach_kprobe(event="nfs4_file_open", fn_name="trace_file_open_entry")
b.attach_kprobe(event="nfs_file_open", fn_name="trace_file_open_entry")
b.attach_kprobe(event="nfs_getattr", fn_name="trace_getattr_entry")

b.attach_kretprobe(event="nfs_file_read", fn_name="trace_read_return")
b.attach_kretprobe(event="nfs_file_write", fn_name="trace_write_return")
b.attach_kretprobe(event="nfs4_file_open", fn_name="trace_file_open_return")
b.attach_kretprobe(event="nfs_file_open", fn_name="trace_file_open_return")
b.attach_kretprobe(event="nfs_getattr", fn_name="trace_getattr_return")

if(csv):
    print("ENDTIME_us,TASK,PID,TYPE,BYTES,OFFSET_b,LATENCY_us,FILE")
else:
    if min_ms == 0:
        print("Tracing NFS operations... Ctrl-C to quit")
    else:
        print("""Tracing NFS operations that are slower than \
%d ms... Ctrl-C to quit"""
              % min_ms)
    print("%-8s %-14s %-6s %1s %-7s %-8s %7s %s" % ("TIME",
Esempio n. 46
0
        'dist.increment(key);')
else:
    bpf_text = bpf_text.replace(
        'STORE', 'irq_key_t key = {.slot = 0 /* ignore */};' +
        'bpf_probe_read(&key.name, sizeof(key.name), name);' +
        'u64 zero = 0, *vp = dist.lookup_or_init(&key, &zero);' +
        '(*vp) += delta;')
if debug:
    print(bpf_text)

# load BPF program
b = BPF(text=bpf_text)

# these should really use irq:irq_handler_entry/exit tracepoints:
b.attach_kprobe(event="handle_irq_event_percpu", fn_name="trace_start")
b.attach_kretprobe(event="handle_irq_event_percpu", fn_name="trace_completion")

print("Tracing hard irq event time... Hit Ctrl-C to end.")

# output
exiting = 0 if args.interval else 1
dist = b.get_table("dist")
while (1):
    try:
        sleep(int(args.interval))
    except KeyboardInterrupt:
        exiting = 1

    print()
    if args.timestamp:
        print("%-8s\n" % strftime("%H:%M:%S"), end="")
Esempio n. 47
0
bpf_source = bpf_source.replace("STACK_FLAGS", stack_flags)

bpf_program = BPF(text=bpf_source)

if not kernel_trace:
        print("Attaching to malloc and free in pid %d, Ctrl+C to quit." % pid)
        bpf_program.attach_uprobe(name="c", sym="malloc",
                                  fn_name="alloc_enter", pid=pid)
        bpf_program.attach_uretprobe(name="c", sym="malloc",
                                     fn_name="alloc_exit", pid=pid)
        bpf_program.attach_uprobe(name="c", sym="free",
                                  fn_name="free_enter", pid=pid)
else:
        print("Attaching to kmalloc and kfree, Ctrl+C to quit.")
        bpf_program.attach_kprobe(event="__kmalloc", fn_name="alloc_enter")
        bpf_program.attach_kretprobe(event="__kmalloc", fn_name="alloc_exit")
        bpf_program.attach_kprobe(event="kfree", fn_name="free_enter")

decoder = KStackDecoder() if kernel_trace else UStackDecoder(pid)

def print_outstanding():
        print("[%s] Top %d stacks with outstanding allocations:" %
              (datetime.now().strftime("%H:%M:%S"), top_stacks))
        alloc_info = {}
        allocs = bpf_program["allocs"]
        stack_traces = bpf_program["stack_traces"]
        for address, info in sorted(allocs.items(), key=lambda a: a[1].size):
                if Time.monotonic_time() - min_age_ns < info.timestamp_ns:
                        continue
                if info.stack_id < 0:
                        continue
Esempio n. 48
0
}
"""
if args.pid:
    bpf_text = bpf_text.replace('FILTER',
                                'if (pid != %s) { return 0; }' % args.pid)
else:
    bpf_text = bpf_text.replace('FILTER', '')
if debug:
    print(bpf_text)

# initialize BPF
b = BPF(text=bpf_text)
b.attach_kprobe(event="sys_stat", fn_name="trace_entry")
b.attach_kprobe(event="sys_statfs", fn_name="trace_entry")
b.attach_kprobe(event="sys_newstat", fn_name="trace_entry")
b.attach_kretprobe(event="sys_stat", fn_name="trace_return")
b.attach_kretprobe(event="sys_statfs", fn_name="trace_return")
b.attach_kretprobe(event="sys_newstat", fn_name="trace_return")

TASK_COMM_LEN = 16  # linux/sched.h
NAME_MAX = 255  # linux/limits.h


class Data(ct.Structure):
    _fields_ = [("pid", ct.c_ulonglong), ("ts", ct.c_ulonglong),
                ("delta", ct.c_ulonglong), ("ret", ct.c_int),
                ("comm", ct.c_char * TASK_COMM_LEN),
                ("fname", ct.c_char * NAME_MAX)]


start_ts = 0
Esempio n. 49
0
        'if (pid != %s) { return 0; }' % args.pid)
else:
    bpf_text = bpf_text.replace('FILTER', '')

if debug or args.ebpf:
    print(bpf_text)
    if args.ebpf:
        exit()

# initialize BPF
b = BPF(text=bpf_text)

syscall_fnname = b.get_syscall_fnname("shmget")
if BPF.ksymname(syscall_fnname) != -1:
    b.attach_kprobe(event=syscall_fnname, fn_name="syscall__shmget")
    b.attach_kretprobe(event=syscall_fnname, fn_name="trace_return")

syscall_fnname = b.get_syscall_fnname("shmat")
if BPF.ksymname(syscall_fnname) != -1:
    b.attach_kprobe(event=syscall_fnname, fn_name="syscall__shmat")
    b.attach_kretprobe(event=syscall_fnname, fn_name="trace_return")

syscall_fnname = b.get_syscall_fnname("shmdt")
if BPF.ksymname(syscall_fnname) != -1:
    b.attach_kprobe(event=syscall_fnname, fn_name="syscall__shmdt")
    b.attach_kretprobe(event=syscall_fnname, fn_name="trace_return")

syscall_fnname = b.get_syscall_fnname("shmctl")
if BPF.ksymname(syscall_fnname) != -1:
    b.attach_kprobe(event=syscall_fnname, fn_name="syscall__shmctl")
    b.attach_kretprobe(event=syscall_fnname, fn_name="trace_return")
Esempio n. 50
0
    bpf_text = bpf_text.replace('FILTER_PID', '0')
if debug or args.ebpf:
    print(bpf_text)
    if args.ebpf:
        exit()

# load BPF program
b = BPF(text=bpf_text)

# common file functions
b.attach_kprobe(event="nfs_file_read", fn_name="trace_entry")
b.attach_kprobe(event="nfs_file_write", fn_name="trace_entry")
b.attach_kprobe(event="nfs_file_open", fn_name="trace_entry")
b.attach_kprobe(event="nfs_getattr", fn_name="trace_entry")

b.attach_kretprobe(event="nfs_file_read", fn_name="trace_read_return")
b.attach_kretprobe(event="nfs_file_write", fn_name="trace_write_return")
b.attach_kretprobe(event="nfs_file_open", fn_name="trace_open_return")
b.attach_kretprobe(event="nfs_getattr", fn_name="trace_getattr_return")

if BPF.get_kprobe_functions(b'nfs4_file_open'):
    b.attach_kprobe(event="nfs4_file_open", fn_name="trace_entry")
    b.attach_kretprobe(event="nfs4_file_open", fn_name="trace_open_return")
else:
    b.attach_kprobe(event="nfs_file_open", fn_name="trace_entry")
    b.attach_kretprobe(event="nfs_file_open", fn_name="trace_open_return")

print("Tracing NFS operation latency... Hit Ctrl-C to end.")

# output
exiting = 0
Esempio n. 51
0
}
""" % (i, i)

if args.verbose:
    print(bpf_text)

b = BPF(text=bpf_text)

for i, function in enumerate(args.functions):
    if ":" in function:
        library, func = function.split(":")
        b.attach_uprobe(name=library, sym=func, fn_name="trace_%d" % i)
        b.attach_uretprobe(name=library, sym=func, fn_name="trace_return")
    else:
        b.attach_kprobe(event=function, fn_name="trace_%d" % i)
        b.attach_kretprobe(event=function, fn_name="trace_return")

TASK_COMM_LEN = 16  # linux/sched.h

class Data(ct.Structure):
    _fields_ = [
        ("id", ct.c_ulonglong),
        ("tgid_pid", ct.c_ulonglong),
        ("start_ns", ct.c_ulonglong),
        ("duration_ns", ct.c_ulonglong),
        ("retval", ct.c_ulonglong),
        ("comm", ct.c_char * TASK_COMM_LEN)
    ] + ([("args", ct.c_ulonglong * 6)] if args.arguments else [])

time_designator = "us" if args.min_us else "ms"
time_value = args.min_us or args.min_ms or 1
Esempio n. 52
0
else:
    usdt = None

if args.ebpf or args.verbose:
    if args.verbose and usdt:
        print(usdt.get_text())
    print(program)
    if args.ebpf:
        exit()

bpf = BPF(text=program, usdt_contexts=[usdt] if usdt else [])
if args.syscalls:
    syscall_regex = "^[Ss]y[Ss]_.*"
    bpf.attach_kprobe(event_re=syscall_regex, fn_name="syscall_entry")
    if args.latency:
        bpf.attach_kretprobe(event_re=syscall_regex, fn_name="syscall_return")
    print("Attached %d kernel probes for syscall tracing." %
          bpf.num_open_kprobes())


def get_data():
    # Will be empty when no language was specified for tracing
    if args.latency:
        data = list(map(lambda kv: (kv[0].clazz.decode('utf-8', 'replace') \
                                    + "." + \
                                    kv[0].method.decode('utf-8', 'replace'),
                                   (kv[1].num_calls, kv[1].total_ns)),
                   bpf["times"].items()))
    else:
        data = list(map(lambda kv: (kv[0].clazz.decode('utf-8', 'replace') \
                                    + "." + \
Esempio n. 53
0
        'dist.increment(key, delta);')
if debug or args.ebpf:
    print(bpf_text)
    if args.ebpf:
        exit()

# load BPF program
b = BPF(text=bpf_text)

# these should really use irq:irq_handler_entry/exit tracepoints:
if args.count:
    b.attach_kprobe(event="handle_irq_event_percpu", fn_name="count_only")
    print("Tracing hard irq events... Hit Ctrl-C to end.")
else:
    b.attach_kprobe(event="handle_irq_event_percpu", fn_name="trace_start")
    b.attach_kretprobe(event="handle_irq_event_percpu",
        fn_name="trace_completion")
    print("Tracing hard irq event time... Hit Ctrl-C to end.")

# output
exiting = 0 if args.interval else 1
dist = b.get_table("dist")
while (1):
    try:
        sleep(int(args.interval))
    except KeyboardInterrupt:
        exiting = 1

    print()
    if args.timestamp:
        print("%-8s\n" % strftime("%H:%M:%S"), end="")
Esempio n. 54
0
    bpf_text = bpf_text.replace('PID_FILTER', '')
if args.signal:
    bpf_text = bpf_text.replace('SIGNAL_FILTER',
                                'if (sig != %s) { return 0; }' % args.signal)
else:
    bpf_text = bpf_text.replace('SIGNAL_FILTER', '')
if debug or args.ebpf:
    print(bpf_text)
    if args.ebpf:
        exit()

# initialize BPF
b = BPF(text=bpf_text)
kill_fnname = b.get_syscall_fnname("kill")
b.attach_kprobe(event=kill_fnname, fn_name="syscall__kill")
b.attach_kretprobe(event=kill_fnname, fn_name="do_ret_sys_kill")

# header
print("%-9s %-6s %-16s %-4s %-6s %s" %
      ("TIME", "PID", "COMM", "SIG", "TPID", "RESULT"))


# process event
def print_event(cpu, data, size):
    event = b["events"].event(data)

    if (args.failed and (event.ret >= 0)):
        return

    printb(b"%-9s %-6d %-16s %-4d %-6d %d" %
           (strftime("%H:%M:%S").encode('ascii'), event.pid, event.comm,
Esempio n. 55
0
File: btrfsdist.py Progetto: AoJ/bcc
if args.pid:
    bpf_text = bpf_text.replace('FILTER_PID', 'pid != %s' % pid)
else:
    bpf_text = bpf_text.replace('FILTER_PID', '0')
if debug:
    print(bpf_text)

# load BPF program
b = BPF(text=bpf_text)

# Common file functions. See earlier comment about generic_file_read_iter().
b.attach_kprobe(event="generic_file_read_iter", fn_name="trace_read_entry")
b.attach_kprobe(event="btrfs_file_write_iter", fn_name="trace_entry")
b.attach_kprobe(event="generic_file_open", fn_name="trace_open_entry")
b.attach_kprobe(event="btrfs_sync_file", fn_name="trace_entry")
b.attach_kretprobe(event="generic_file_read_iter", fn_name="trace_read_return")
b.attach_kretprobe(event="btrfs_file_write_iter", fn_name="trace_write_return")
b.attach_kretprobe(event="generic_file_open", fn_name="trace_open_return")
b.attach_kretprobe(event="btrfs_sync_file", fn_name="trace_fsync_return")

print("Tracing btrfs operation latency... Hit Ctrl-C to end.")

# output
exiting = 0
dist = b.get_table("dist")
while (1):
    try:
        if args.interval:
            sleep(int(args.interval))
        else:
            sleep(99999999)
Esempio n. 56
0
def main():
    examples = '''Examples:
    deadlock 181                 # Analyze PID 181

    deadlock 181 --binary /lib/x86_64-linux-gnu/libpthread.so.0
                                 # Analyze PID 181 and locks from this binary.
                                 # If tracing a process that is running from
                                 # a dynamically-linked binary, this argument
                                 # is required and should be the path to the
                                 # pthread library.

    deadlock 181 --verbose
                                 # Analyze PID 181 and print statistics about
                                 # the mutex wait graph.

    deadlock 181 --lock-symbols my_mutex_lock1,my_mutex_lock2 \\
        --unlock-symbols my_mutex_unlock1,my_mutex_unlock2
                                 # Analyze PID 181 and trace custom mutex
                                 # symbols instead of pthread mutexes.

    deadlock 181 --dump-graph graph.json
                                 # Analyze PID 181 and dump the mutex wait
                                 # graph to graph.json.
    '''
    parser = argparse.ArgumentParser(
        description=(
            'Detect potential deadlocks (lock inversions) in a running binary.'
            '\nMust be run as root.'
        ),
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog=examples,
    )
    parser.add_argument('pid', type=int, help='Pid to trace')
    # Binaries with `:` in the path will fail to attach uprobes on kernels
    # running without this patch: https://lkml.org/lkml/2017/1/13/585.
    # Symlinks to the binary without `:` in the path can get around this issue.
    parser.add_argument(
        '--binary',
        type=str,
        default='',
        help='If set, trace the mutexes from the binary at this path. '
        'For statically-linked binaries, this argument is not required. '
        'For dynamically-linked binaries, this argument is required and '
        'should be the path of the pthread library the binary is using. '
        'Example: /lib/x86_64-linux-gnu/libpthread.so.0',
    )
    parser.add_argument(
        '--dump-graph',
        type=str,
        default='',
        help='If set, this will dump the mutex graph to the specified file.',
    )
    parser.add_argument(
        '--verbose',
        action='store_true',
        help='Print statistics about the mutex wait graph.',
    )
    parser.add_argument(
        '--lock-symbols',
        type=strlist,
        default=['pthread_mutex_lock'],
        help='Comma-separated list of lock symbols to trace. Default is '
        'pthread_mutex_lock. These symbols cannot be inlined in the binary.',
    )
    parser.add_argument(
        '--unlock-symbols',
        type=strlist,
        default=['pthread_mutex_unlock'],
        help='Comma-separated list of unlock symbols to trace. Default is '
        'pthread_mutex_unlock. These symbols cannot be inlined in the binary.',
    )
    args = parser.parse_args()
    if not args.binary:
        try:
            args.binary = os.readlink('/proc/%d/exe' % args.pid)
        except OSError as e:
            print('%s. Is the process (pid=%d) running?' % (str(e), args.pid))
            sys.exit(1)

    bpf = BPF(src_file=b'deadlock.c')

    # Trace where threads are created
    bpf.attach_kretprobe(event=bpf.get_syscall_fnname('clone'), fn_name='trace_clone')

    # We must trace unlock first, otherwise in the time we attached the probe
    # on lock() and have not yet attached the probe on unlock(), a thread can
    # acquire mutexes and release them, but the release events will not be
    # traced, resulting in noisy reports.
    for symbol in args.unlock_symbols:
        try:
            bpf.attach_uprobe(
                name=args.binary,
                sym=symbol,
                fn_name='trace_mutex_release',
                pid=args.pid,
            )
        except Exception as e:
            print('%s. Failed to attach to symbol: %s' % (str(e), symbol))
            sys.exit(1)
    for symbol in args.lock_symbols:
        try:
            bpf.attach_uprobe(
                name=args.binary,
                sym=symbol,
                fn_name='trace_mutex_acquire',
                pid=args.pid,
            )
        except Exception as e:
            print('%s. Failed to attach to symbol: %s' % (str(e), symbol))
            sys.exit(1)

    def print_stack_trace(stack_id):
        '''Closure that prints the symbolized stack trace.'''
        for addr in bpf.get_table('stack_traces').walk(stack_id):
            line = bpf.sym(addr, args.pid)
            # Try to symbolize with objdump if we cannot with bpf.
            if line == '[unknown]':
                symbol = symbolize_with_objdump(args.binary, addr)
                if symbol:
                    line = symbol
            print('@ %016x %s' % (addr, line))

    print('Tracing... Hit Ctrl-C to end.')
    while True:
        try:
            # Map of child thread pid -> parent info
            thread_info = {
                child.value: (parent.parent_pid, parent.stack_id, parent.comm)
                for child, parent in bpf.get_table('thread_to_parent').items()
            }

            # Mutex wait directed graph. Nodes are mutexes. Edge (A,B) exists
            # if there exists some thread T where lock(A) was called and
            # lock(B) was called before unlock(A) was called.
            graph = DiGraph()
            for key, leaf in bpf.get_table('edges').items():
                graph.add_edge(
                    key.mutex1,
                    key.mutex2,
                    thread_pid=leaf.thread_pid,
                    thread_comm=leaf.comm.decode('utf-8'),
                    first_mutex_stack_id=leaf.mutex1_stack_id,
                    second_mutex_stack_id=leaf.mutex2_stack_id,
                )
            if args.verbose:
                print(
                    'Mutexes: %d, Edges: %d' %
                    (len(graph.nodes()), len(graph.edges()))
                )
            if args.dump_graph:
                with open(args.dump_graph, 'w') as f:
                    data = graph.node_link_data()
                    f.write(json.dumps(data, indent=2))

            cycle = find_cycle(graph)
            if cycle:
                print_cycle(
                    args.binary, graph, cycle, thread_info, print_stack_trace
                )
                sys.exit(1)

            time.sleep(1)
        except KeyboardInterrupt:
            break