def test_perf_buffer(self): self.counter = 0 class Data(ct.Structure): _fields_ = [("ts", ct.c_ulonglong)] def cb(cpu, data, size): self.assertGreater(size, ct.sizeof(Data)) event = ct.cast(data, ct.POINTER(Data)).contents self.counter += 1 def lost_cb(lost): self.assertGreater(lost, 0) text = """ BPF_PERF_OUTPUT(events); int do_sys_nanosleep(void *ctx) { struct { u64 ts; } data = {bpf_ktime_get_ns()}; events.perf_submit(ctx, &data, sizeof(data)); return 0; } """ b = BPF(text=text) b.attach_kprobe(event=b.get_syscall_fnname("nanosleep"), fn_name="do_sys_nanosleep") b["events"].open_perf_buffer(cb, lost_cb=lost_cb) time.sleep(0.1) b.perf_buffer_poll() self.assertGreater(self.counter, 0) b.cleanup()
class TestKprobeMaxactive(TestCase): def setUp(self): self.b = BPF(text=b""" typedef struct { int idx; } Key; typedef struct { u64 val; } Val; BPF_HASH(stats, Key, Val, 3); int hello(void *ctx) { stats.lookup_or_init(&(Key){1}, &(Val){0})->val++; return 0; } int goodbye(void *ctx) { stats.lookup_or_init(&(Key){2}, &(Val){0})->val++; return 0; } """) self.b.attach_kprobe(event_re=self.b.get_syscall_prefix() + b"bpf", fn_name=b"hello") self.b.attach_kretprobe(event_re=self.b.get_syscall_prefix() + b"bpf", fn_name=b"goodbye", maxactive=128) def test_send1(self): k1 = self.b[b"stats"].Key(1) k2 = self.b[b"stats"].Key(2) self.assertTrue(self.b[b"stats"][k1].val >= 2) self.assertTrue(self.b[b"stats"][k2].val == 1)
def test_u32(self): test_prog1 = """ BPF_TABLE("percpu_array", u32, u32, stats, 1); int hello_world(void *ctx) { u32 key=0; u32 value = 0, *val; val = stats.lookup_or_init(&key, &value); *val += 1; return 0; } """ self.addCleanup(self.cleanup) bpf_code = BPF(text=test_prog1) stats_map = bpf_code.get_table("stats") bpf_code.attach_kprobe(event="sys_clone", fn_name="hello_world") ini = stats_map.Leaf() for i in range(0, multiprocessing.cpu_count()): ini[i] = 0 stats_map[ stats_map.Key(0) ] = ini f = os.popen("hostname") f.close() self.assertEqual(len(stats_map),1) val = stats_map[ stats_map.Key(0) ] sum = stats_map.sum(stats_map.Key(0)) avg = stats_map.average(stats_map.Key(0)) max = stats_map.max(stats_map.Key(0)) self.assertGreater(sum.value, 0L) self.assertGreater(max.value, 0L)
def test_perf_buffer_for_each_cpu(self): self.events = [] class Data(ct.Structure): _fields_ = [("cpu", ct.c_ulonglong)] def cb(cpu, data, size): self.assertGreater(size, ct.sizeof(Data)) event = ct.cast(data, ct.POINTER(Data)).contents self.events.append(event) def lost_cb(lost): self.assertGreater(lost, 0) text = """ BPF_PERF_OUTPUT(events); int do_sys_nanosleep(void *ctx) { struct { u64 cpu; } data = {bpf_get_smp_processor_id()}; events.perf_submit(ctx, &data, sizeof(data)); return 0; } """ b = BPF(text=text) b.attach_kprobe(event=b.get_syscall_fnname("nanosleep"), fn_name="do_sys_nanosleep") b["events"].open_perf_buffer(cb, lost_cb=lost_cb) online_cpus = get_online_cpus() for cpu in online_cpus: subprocess.call(['taskset', '-c', str(cpu), 'sleep', '0.1']) b.perf_buffer_poll() b.cleanup() self.assertGreaterEqual(len(self.events), len(online_cpus), 'Received only {}/{} events'.format(len(self.events), len(online_cpus)))
class TestProbeQuota(TestCase): def setUp(self): self.b = BPF(text="""int count(void *ctx) { return 0; }""") def test_probe_quota(self): with self.assertRaises(Exception): self.b.attach_kprobe(event_re=".*", fn_name="count")
def test_struct_custom_func(self): test_prog2 = """ typedef struct counter { u32 c1; u32 c2; } counter; BPF_TABLE("percpu_hash", u32, counter, stats, 1); int hello_world(void *ctx) { u32 key=0; counter value = {0,0}, *val; val = stats.lookup_or_init(&key, &value); val->c1 += 1; val->c2 += 1; return 0; } """ self.addCleanup(self.cleanup) bpf_code = BPF(text=test_prog2) stats_map = bpf_code.get_table("stats", reducer=lambda x,y: stats_map.sLeaf(x.c1+y.c1)) bpf_code.attach_kprobe(event="sys_clone", fn_name="hello_world") ini = stats_map.Leaf() for i in ini: i = stats_map.sLeaf(0,0) stats_map[ stats_map.Key(0) ] = ini f = os.popen("hostname") f.close() self.assertEqual(len(stats_map),1) k = stats_map[ stats_map.Key(0) ] self.assertGreater(k.c1, 0L)
def setUp(self): b = BPF(arg1, arg2, debug=0) self.latency = b.get_table("latency", c_uint, c_ulong) b.attach_kprobe(event="blk_start_request", fn_name="probe_blk_start_request", pid=-1, cpu=0) b.attach_kprobe(event="blk_update_request", fn_name="probe_blk_update_request", pid=-1, cpu=0)
def test_lru_percpu_hash(self): test_prog1 = """ BPF_TABLE("lru_percpu_hash", u32, u32, stats, 1); int hello_world(void *ctx) { u32 key=0; u32 value = 0, *val; val = stats.lookup_or_init(&key, &value); *val += 1; return 0; } """ b = BPF(text=test_prog1) stats_map = b.get_table("stats") b.attach_kprobe(event="sys_clone", fn_name="hello_world") ini = stats_map.Leaf() for i in range(0, multiprocessing.cpu_count()): ini[i] = 0 # First initialize with key 1 stats_map[ stats_map.Key(1) ] = ini # Then initialize with key 0 stats_map[ stats_map.Key(0) ] = ini # Key 1 should have been evicted with self.assertRaises(KeyError): val = stats_map[ stats_map.Key(1) ] f = os.popen("hostname") f.close() self.assertEqual(len(stats_map),1) val = stats_map[ stats_map.Key(0) ] sum = stats_map.sum(stats_map.Key(0)) avg = stats_map.average(stats_map.Key(0)) max = stats_map.max(stats_map.Key(0)) self.assertGreater(sum.value, 0L) self.assertGreater(max.value, 0L) b.detach_kprobe("sys_clone")
def test_unary_operator(self): text = """ #include <linux/fs.h> #include <uapi/linux/ptrace.h> int trace_read_entry(struct pt_regs *ctx, struct file *file) { return !file->f_op->read_iter; } """ b = BPF(text=text) b.attach_kprobe(event="__vfs_read", fn_name="trace_read_entry")
def test_arbitrary_increment_simple(self): b = BPF(text=b""" #include <uapi/linux/ptrace.h> struct bpf_map; BPF_HASH(map); int map_delete(struct pt_regs *ctx, struct bpf_map *bpfmap, u64 *k) { map.increment(42, 10); return 0; } """) b.attach_kprobe(event=b"htab_map_delete_elem", fn_name=b"map_delete") b.cleanup()
class TestProbeGlobalCnt(TestCase): def setUp(self): self.b1 = BPF(text="""int count(void *ctx) { return 0; }""") self.b2 = BPF(text="""int count(void *ctx) { return 0; }""") def test_probe_quota(self): self.b1.attach_kprobe(event="schedule", fn_name="count") self.b2.attach_kprobe(event="submit_bio", fn_name="count") self.assertEqual(1, self.b1.num_open_kprobes()) self.assertEqual(1, self.b2.num_open_kprobes()) self.assertEqual(2, _get_num_open_probes()) self.b1.cleanup() self.b2.cleanup() self.assertEqual(0, _get_num_open_probes())
class TestKprobeCnt(TestCase): def setUp(self): self.b = BPF(text=""" int wololo(void *ctx) { return 0; } """) self.b.attach_kprobe(event_re="^vfs_.*", fn_name="wololo") def test_attach1(self): actual_cnt = 0 with open("/sys/kernel/debug/tracing/available_filter_functions") as f: for line in f: if str(line).startswith("vfs_"): actual_cnt += 1 open_cnt = self.b.num_open_kprobes() self.assertEqual(actual_cnt, open_cnt)
def test_map_insert(self): text = """ BPF_HASH(dummy); void do_trace(struct pt_regs *ctx) { u64 key = 0, val = 2; dummy.insert(&key, &val); key = 1; dummy.update(&key, &val); } """ b = BPF(text=text) c_val = ct.c_ulong(1) b["dummy"][ct.c_ulong(0)] = c_val b["dummy"][ct.c_ulong(1)] = c_val b.attach_kprobe(event="sys_sync", fn_name="do_trace") libc = ct.CDLL("libc.so.6") libc.sync() self.assertEqual(1, b["dummy"][ct.c_ulong(0)].value) self.assertEqual(2, b["dummy"][ct.c_ulong(1)].value)
def test_probe_struct_assign(self): b = BPF(text = """ #include <uapi/linux/ptrace.h> struct args_t { const char *filename; int flags; int mode; }; int do_sys_open(struct pt_regs *ctx, const char *filename, int flags, int mode) { struct args_t args = {}; args.filename = filename; args.flags = flags; args.mode = mode; bpf_trace_printk("%s\\n", args.filename); return 0; }; """) b.attach_kprobe(event=b.get_syscall_fnname("open"), fn_name="do_sys_open")
class TestKprobeRgx(TestCase): def setUp(self): self.b = BPF(text=""" typedef struct { int idx; } Key; typedef struct { u64 val; } Val; BPF_TABLE("hash", Key, Val, stats, 3); int hello(void *ctx) { stats.lookup_or_init(&(Key){1}, &(Val){0})->val++; return 0; } int goodbye(void *ctx) { stats.lookup_or_init(&(Key){2}, &(Val){0})->val++; return 0; } """) self.b.attach_kprobe(event_re="^SyS_bp.*", fn_name="hello") self.b.attach_kretprobe(event_re="^SyS_bp.*", fn_name="goodbye") def test_send1(self): k1 = self.b["stats"].Key(1) k2 = self.b["stats"].Key(2) self.assertEqual(self.b["stats"][k1].val, self.b["stats"][k2].val + 1)
def main_run(self): manager = EventManager() task_list = [] self.trace_begin() for k in manager.EVENT_LIST.keys(): self.EVENT_LIST_data[k] = {"count": 0, "size": 0} code_and_func = manager.EVENT_LIST[k] b = BPF(text=code_and_func[0]) if len(code_and_func) > 2: for func_idx in range(1, len(code_and_func)): if (func_idx & 1) == 0: continue b.attach_kprobe(event=code_and_func[func_idx], fn_name=code_and_func[func_idx + 1]) else: b.attach_kprobe(event=code_and_func[1], fn_name="func") task_list.append((b, k)) current_time = time.time() sleep_time = int(current_time) + 1 - current_time print "\n# Data being visualized on port 5601... Hit Ctrl-C to end." while 1: self.bulk = "" time.sleep(sleep_time) for i, v in task_list: self.run_event_tracing(i, v) # i is bpf object, v is eventname self.bulk += "\n" self.conn.request("POST", "/_bulk", self.bulk) resp = self.conn.getresponse() data = resp.read() if resp.status != 200 and resp.status != 201: print "post document: ", resp.status, ":", resp.reason print data start = time.time() sleep_time = int(start) + 1 - start
else: bpf_text = bpf_text.replace( 'STORE', 'irq_key_t key = {.slot = 0 /* ignore */};' + 'bpf_probe_read_kernel(&key.name, sizeof(key.name), name);' + 'dist.increment(key, delta);') if debug or args.ebpf: print(bpf_text) if args.ebpf: exit() # load BPF program b = BPF(text=bpf_text) # these should really use irq:irq_handler_entry/exit tracepoints: if args.count: b.attach_kprobe(event="handle_irq_event_percpu", fn_name="count_only") print("Tracing hard irq events... Hit Ctrl-C to end.") else: b.attach_kprobe(event="handle_irq_event_percpu", fn_name="trace_start") b.attach_kretprobe(event="handle_irq_event_percpu", fn_name="trace_completion") print("Tracing hard irq event time... Hit Ctrl-C to end.") # output exiting = 0 if args.interval else 1 dist = b.get_table("dist") while (1): try: sleep(int(args.interval)) except KeyboardInterrupt: exiting = 1
if args.which == 'from-rq-alloc': start_time_field = 'alloc_time_ns' elif args.which == 'after-rq-alloc': start_time_field = 'start_time_ns' elif args.which == 'on-device': start_time_field = 'io_start_time_ns' else: die() bpf_source = bpf_source.replace('__START_TIME_FIELD__', start_time_field) bpf_source = bpf_source.replace('__MAJOR__', str(int(args.devno.split(':')[0]))) bpf_source = bpf_source.replace('__MINOR__', str(int(args.devno.split(':')[1]))) bpf = BPF(text=bpf_source) bpf.attach_kprobe(event="blk_account_io_done", fn_name="kprobe_blk_account_io_done") # times are in usecs MSEC = 1000 SEC = 1000 * 1000 cur_rwdf_100ms = bpf["rwdf_100ms"] cur_rwdf_1ms = bpf["rwdf_1ms"] cur_rwdf_10us = bpf["rwdf_10us"] last_rwdf_100ms = [0] * 400 last_rwdf_1ms = [0] * 400 last_rwdf_10us = [0] * 400 rwdf_100ms = [0] * 400 rwdf_1ms = [0] * 400
def handle_loop(stdscr, args): # don't wait on key press stdscr.nodelay(1) # set default sorting field sort_field = FIELDS.index(DEFAULT_FIELD) sort_reverse = False # load BPF program bpf_text = """ #include <uapi/linux/ptrace.h> struct key_t { u64 ip; u32 pid; u32 uid; char comm[16]; }; BPF_HASH(counts, struct key_t); int do_count(struct pt_regs *ctx) { struct key_t key = {}; u64 zero = 0 , *val; u64 pid = bpf_get_current_pid_tgid(); u32 uid = bpf_get_current_uid_gid(); key.ip = PT_REGS_IP(ctx); key.pid = pid & 0xFFFFFFFF; key.uid = uid & 0xFFFFFFFF; bpf_get_current_comm(&(key.comm), 16); val = counts.lookup_or_init(&key, &zero); // update counter (*val)++; return 0; } """ b = BPF(text=bpf_text) b.attach_kprobe(event="add_to_page_cache_lru", fn_name="do_count") b.attach_kprobe(event="mark_page_accessed", fn_name="do_count") b.attach_kprobe(event="account_page_dirtied", fn_name="do_count") b.attach_kprobe(event="mark_buffer_dirty", fn_name="do_count") exiting = 0 while 1: s = stdscr.getch() if s == ord('q'): exiting = 1 elif s == ord('r'): sort_reverse = not sort_reverse elif s == ord('<'): sort_field = max(0, sort_field - 1) elif s == ord('>'): sort_field = min(len(FIELDS) - 1, sort_field + 1) try: sleep(args.interval) except KeyboardInterrupt: exiting = 1 # as cleanup can take many seconds, trap Ctrl-C: signal.signal(signal.SIGINT, signal_ignore) # Get memory info mem = get_meminfo() cached = int(mem["Cached"]) / 1024 buff = int(mem["Buffers"]) / 1024 process_stats = get_processes_stats( b, sort_field=sort_field, sort_reverse=sort_reverse) stdscr.clear() stdscr.addstr( 0, 0, "%-8s Buffers MB: %.0f / Cached MB: %.0f " "/ Sort: %s / Order: %s" % ( strftime("%H:%M:%S"), buff, cached, FIELDS[sort_field], sort_reverse and "descending" or "ascending" ) ) # header stdscr.addstr( 1, 0, "{0:8} {1:8} {2:16} {3:8} {4:8} {5:8} {6:10} {7:10}".format( *FIELDS ), curses.A_REVERSE ) (height, width) = stdscr.getmaxyx() for i, stat in enumerate(process_stats): uid = int(stat[1]) try: username = pwd.getpwuid(uid)[0] except KeyError as ex: # `pwd` throws a KeyError if the user cannot be found. This can # happen e.g. when the process is running in a cgroup that has # different users from the host. username = '******'.format(uid) stdscr.addstr( i + 2, 0, "{0:8} {username:8.8} {2:16} {3:8} {4:8} " "{5:8} {6:9.1f}% {7:9.1f}%".format( *stat, username=username ) ) if i > height - 4: break stdscr.refresh() if exiting: print("Detaching...") return
from bcc import BPF bpf_source = """ #include <uapi/linux/ptrace.h> int do_sys_execve(struct pt_regs *ctx) { char comm[16]; bpf_get_current_comm(&comm, sizeof(comm)); bpf_trace_printk("executing program: %s \\n", comm); return 0; } """ bpf = BPF(text=bpf_source) execve_function = bpf.get_syscall_fnname("execve") bpf.attach_kprobe(event=execve_function, fn_name="do_sys_execve") bpf.trace_print()
valp->us += delta_us; valp->bytes += startp->data_len; valp->io++; } start.delete(&req); whobyreq.delete(&req); return 0; } """ if args.ebpf: print(bpf_text) exit() b = BPF(text=bpf_text) b.attach_kprobe(event="blk_account_io_start", fn_name="trace_pid_start") if BPF.get_kprobe_functions(b'blk_start_request'): b.attach_kprobe(event="blk_start_request", fn_name="trace_req_start") b.attach_kprobe(event="blk_mq_start_request", fn_name="trace_req_start") b.attach_kprobe(event="blk_account_io_done", fn_name="trace_req_completion") print('Tracing... Output every %d secs. Hit Ctrl-C to end' % interval) output = open('output/bIO_top.csv', mode='w') # cache disk major,minor -> diskname disklookup = {} with open(diskstats) as stats: for line in stats: a = line.split() disklookup[a[0] + "," + a[1]] = a[2] # output
def print_ipv6_event(cpu, data, size): event = ct.cast(data, ct.POINTER(Data_ipv6)).contents global start_ts if args.timestamp: if start_ts == 0: start_ts = event.ts_us print("%-9.3f" % ((float(event.ts_us) - start_ts) / 1000000), end="") print("%-6d %-12.12s %-2d %-16s %-16s %-4d" % (event.pid, event.task.decode(), event.ip, inet_ntop(AF_INET6, event.saddr), inet_ntop( AF_INET6, event.daddr), event.dport)) # initialize BPF b = BPF(text=bpf_text) b.attach_kprobe(event="tcp_v4_connect", fn_name="trace_connect_entry") b.attach_kprobe(event="tcp_v6_connect", fn_name="trace_connect_entry") b.attach_kretprobe(event="tcp_v4_connect", fn_name="trace_connect_v4_return") b.attach_kretprobe(event="tcp_v6_connect", fn_name="trace_connect_v6_return") # header if args.timestamp: print("%-9s" % ("TIME(s)"), end="") print("%-6s %-12s %-2s %-16s %-16s %-4s" % ("PID", "COMM", "IP", "SADDR", "DADDR", "DPORT")) start_ts = 0 # read events b["ipv4_events"].open_perf_buffer(print_ipv4_event) b["ipv6_events"].open_perf_buffer(print_ipv6_event)
from bcc import BPF #define BPF program prog = """ into hello(void *ctx) { bpf_trace_printk("Hello, World\\n"); return 0; } """ # Load BPF program b = BPF(text=prog) b.attach_kprobe( event=b.get_syscall_fnname("clone") fn_name="hello" ) #header print("%-18s %-16s %-6s %s" % ("TIME(s)", "COMM", "PID", "MESSAGE")) # format output while 1: try: (task, pid, cpu,flags, ts, msg) = b.trace_fields() except ValueError: continue print("%-18s %-16s %-6s %s" % (ts, task, pid, msg))
# arguments interval = 5 count = -1 if len(argv) > 1: try: interval = int(argv[1]) if interval == 0: raise if len(argv) > 2: count = int(argv[2]) except: # also catches -h, --help usage() # load BPF program b = BPF(src_file = "bitehist.c") b.attach_kprobe(event="blk_start_request", fn_name="do_request") dist_max = 64 # header print("Tracing... Hit Ctrl-C to end.") # functions stars_max = 38 def stars(val, val_max, width): i = 0 text = "" while (1): if (i > (width * val / val_max) - 1) or (i > width - 1): break text += "*" i += 1
print("%-9.3f" % ((event.ts_us - start_ts) / 100000), end="") print("%-6d %-12.12s %-2d %-16s %-16s %-4d" % (event.pid, event.task, event.ip, inet_ntoa(event.saddr), inet_ntoa(event.daddr), event.dport)) def print_ipv6_event(cpu, data, size): event = ct.cast(data, ct.POINTER(Data_ipv6)).contents if args.timestamp: if start_ts == 0: start_ts = event.ts_us print("%-9.3f" % ((event.ts_us - start_ts) / 100000), end="") print("%-6d %-12.12s %-2d ...%-13x ...%-13x %-4d" % (event.pid, event.task, event.ip, event.saddr, event.daddr, event.dport)) # initialize BPF b = BPF(text=bpf_text) b.attach_kprobe(event="tcp_v4_connect", fn_name="trace_connect_entry") b.attach_kprobe(event="tcp_v6_connect", fn_name="trace_connect_entry") b.attach_kretprobe(event="tcp_v4_connect", fn_name="trace_connect_v4_return") b.attach_kretprobe(event="tcp_v6_connect", fn_name="trace_connect_v6_return") # header if args.timestamp: print("%-9s" % ("TIME(s)"), end="") print("%-6s %-12s %-2s %-16s %-16s %-4s" % ("PID", "COMM", "IP", "SADDR", "DADDR", "DPORT")) start_ts = 0 def inet_ntoa(addr): dq = '' for i in range(0, 4):
if args.tid: # TID trumps PID program = program.replace('FILTER', 'if (tid != %s) { return 0; }' % args.tid) elif args.pid: program = program.replace('FILTER', 'if (pid != %s) { return 0; }' % args.pid) else: program = program.replace('FILTER', '') program = program.replace('STACK_STORAGE_SIZE', str(args.stack_storage_size)) b = BPF(text=program) b.attach_kprobe(event="mutex_unlock", fn_name="mutex_unlock_enter") b.attach_kretprobe(event="mutex_lock", fn_name="mutex_lock_return") b.attach_kprobe(event="mutex_lock", fn_name="mutex_lock_enter") enabled = b.get_table("enabled") stack_traces = b.get_table("stack_traces") aq_counts = b.get_table("aq_report_count") aq_maxs = b.get_table("aq_report_max") aq_totals = b.get_table("aq_report_total") hl_counts = b.get_table("hl_report_count") hl_maxs = b.get_table("hl_report_max") hl_totals = b.get_table("hl_report_total") aq_sort = sort_list(aq_maxs, aq_totals, aq_counts)
event = ct.cast(data, ct.POINTER(Data_ipv6)).contents print("%-8s %-6d %-2d %-20s > %-20s %s (%s)" % (strftime("%H:%M:%S"), event.pid, event.ip, "%s:%d" % (inet_ntop(AF_INET6, event.saddr), event.sport), "%s:%d" % (inet_ntop(AF_INET6, event.daddr), event.dport), tcp.tcpstate[event.state], tcp.flags2str(event.tcpflags))) for addr in stack_traces.walk(event.stack_id): sym = b.ksym(addr, show_offset=True) print("\t%s" % sym) print("") # initialize BPF b = BPF(text=bpf_text) if b.get_kprobe_functions(b"tcp_drop"): b.attach_kprobe(event="tcp_drop", fn_name="trace_tcp_drop") else: print("ERROR: tcp_drop() kernel function not found or traceable. " "Older kernel versions not supported.") exit() stack_traces = b.get_table("stack_traces") # header print("%-8s %-6s %-2s %-20s > %-20s %s (%s)" % ("TIME", "PID", "IP", "SADDR:SPORT", "DADDR:DPORT", "STATE", "FLAGS")) # read events b["ipv4_events"].open_perf_buffer(print_ipv4_event) b["ipv6_events"].open_perf_buffer(print_ipv6_event) while 1: b.perf_buffer_poll()
""" if args.tgid: bpf_text = bpf_text.replace('TGID_FILTER', 'tgid != %d' % args.tgid) else: bpf_text = bpf_text.replace('TGID_FILTER', '0') if args.all_files: bpf_text = bpf_text.replace('TYPE_FILTER', '0') else: bpf_text = bpf_text.replace('TYPE_FILTER', '!S_ISREG(mode)') if debug: print(bpf_text) # initialize BPF b = BPF(text=bpf_text) b.attach_kprobe(event="__vfs_read", fn_name="trace_read_entry") try: b.attach_kprobe(event="__vfs_write", fn_name="trace_write_entry") except: # older kernels don't have __vfs_write so try vfs_write instead b.attach_kprobe(event="vfs_write", fn_name="trace_write_entry") DNAME_INLINE_LEN = 32 # linux/dcache.h print('Tracing... Output every %d secs. Hit Ctrl-C to end' % interval) # output exiting = 0 while 1: try: sleep(interval)
def print_ipv4_event(cpu, data, size): event = ct.cast(data, ct.POINTER(Data_ipv4)).contents print("%-8s %-6d %-2d %-20s %1s> %-20s %s" % (strftime("%H:%M:%S"), event.pid, event.ip, "%s:%s" % (inet_ntoa(event.saddr), event.lport), type[event.type], "%s:%s" % (inet_ntoa(event.daddr), event.dport), tcpstate[event.state])) def print_ipv6_event(cpu, data, size): event = ct.cast(data, ct.POINTER(Data_ipv6)).contents print("%%-8s -6d %-2d %-20s %1s> %-20s %s" % (strftime("%H:%M:%S"), event.pid, event.ip, "...%x:%d" % (event.saddr, event.lport), type[event.type], "...%x:%d" % (event.daddr, event.dport), tcpstate[event.state])) # initialize BPF b = BPF(text=bpf_text) b.attach_kprobe(event="tcp_retransmit_skb", fn_name="trace_retransmit") b.attach_kprobe(event="tcp_send_loss_probe", fn_name="trace_tlp") # header print("%-8s %-6s %-2s %-20s %1s> %-20s %-4s" % ("TIME", "PID", "IP", "LADDR:LPORT", "T", "RADDR:RPORT", "STATE")) # read events b["ipv4_events"].open_perf_buffer(print_ipv4_event) b["ipv6_events"].open_perf_buffer(print_ipv6_event) while 1: b.kprobe_poll()
def bench_redis(repeat=3, n=1000000): # type: (int, int) -> pandas.DataFrame def read_result(name, file): # type: (str, Optional[IO[Any]]) -> pandas.DataFrame df = pandas.read_csv(file, names=["Type", "Req/s"]) df["Name"] = name return df bench_cmd = [ "redis-benchmark", "-r", "100000", "-t", "set,lpush", "-n", str(n), "--csv", "-p", ] init_port = 10000 results = [] syscalls, bpf_prog = read_syscall() with open(os.devnull, "w") as fnull: for i in range(repeat): print("Record {}th performance without bpf".format(i)) while check_port_inuse(init_port): init_port += 1 serv = subprocess.Popen(server_cmd + [str(init_port)], stdout=fnull) sleep(1) # for setup bench = subprocess.Popen(bench_cmd + [str(init_port)], stdout=subprocess.PIPE) bench.wait() serv.terminate() results.append(read_result("no-bpf", bench.stdout)) b = BPF(text=bpf_prog) for sysc in syscalls: try: b.attach_kprobe(event=sysc, fn_name="dump") b.attach_kretprobe(event=sysc, fn_name="dump") except Exception as e: print(str(e) + ", syscall: {}".format(sysc)) for i in range(repeat): print("Record {}th performance with bpf".format(i)) while check_port_inuse(init_port): init_port += 1 serv = subprocess.Popen(server_cmd + [str(init_port)], stdout=fnull) sleep(1) # for setup bench = subprocess.Popen(bench_cmd + [str(init_port)], stdout=subprocess.PIPE) bench.wait() serv.terminate() results.append(read_result("bpf", bench.stdout)) df = pandas.concat(results) path = os.path.join(os.path.dirname(__file__), "results", "syscall.tsv") print("wrote %s" % path) df.to_csv(path, sep="\t")
bpf.attach_uretprobe(name="%slibddskernel.so"%LIBPATH, sym="v_writerNew", fn_name="W_MapVWriter2TopicName") bpf.attach_uretprobe(name="%slibdcpssac.so"%LIBPATH, sym="DDS_Publisher_create_datawriter", fn_name="W_MapWriter2TopicName") bpf.attach_uprobe(name="%slibdcpssac.so"%LIBPATH, sym="DDS_Subscriber_create_datareader", fn_name="R_MapPID2Topic") bpf.attach_uretprobe(name="%slibdcpssac.so"%LIBPATH, sym="DDS_Subscriber_create_datareader", fn_name="R_MapReader2TopicName") # Write/Read Records bpf.attach_uprobe(name="%slibdcpssac.so"%LIBPATH, sym= "DDS_DataWriter_write", fn_name="DDSWrite_Start") #bpf.attach_uretprobe(name="%slibdcpssac.so"%LIBPATH, sym= "DDS_DataWriter_write", fn_name="DDSWrite_End") bpf.attach_uprobe(name="%slibddskernel.so"%LIBPATH, sym="writerWrite", fn_name="W_MapVMess2GID") bpf.attach_uprobe(name="%slibddsi2.so"%LIBPATH, sym="rtps_write", fn_name="Map_GID2Packet") #bpf.attach_uretprobe(name="%slibddsi2.so"%LIBPATH, sym="rtps_write", fn_name="cleanup_v") bpf.attach_kprobe( event="sock_sendmsg", fn_name="_sock_send") bpf.attach_kprobe( event="ip_send_skb", fn_name="ip_send_skb") bpf.attach_kretprobe( event="__skb_recv_udp", fn_name="skb_recv_udp_ret") bpf.attach_kretprobe(event="sock_recvmsg", fn_name="_sock_recv_ret") def print_event(cpu, data, size): event = bpf["events"].event(data) if 1: print("%14d,%2d,%14d,%4d,%20s,%14s,%6d,%12d,%8d,%8d,%14d,%14d,%14d,%14d,%14d,%14d,%14d,%14s" % (event.ts, event.recordType, event.pid, event.fun_ID, event.tName, event.comm, event.seqNum, event.gid_sys, event.gid_local, event.gid_seria, event.arg1, event.arg2, event.arg3, event.arg4, event.arg5, event.arg6, event.link, hex(event.ret)))
return trace_entry(ctx, %d); } """ % (i, i) if args.verbose: print(bpf_text) b = BPF(text=bpf_text) for i, function in enumerate(args.functions): if ":" in function: library, func = function.split(":") b.attach_uprobe(name=library, sym=func, fn_name="trace_%d" % i) b.attach_uretprobe(name=library, sym=func, fn_name="trace_return") else: b.attach_kprobe(event=function, fn_name="trace_%d" % i) b.attach_kretprobe(event=function, fn_name="trace_return") TASK_COMM_LEN = 16 # linux/sched.h class Data(ct.Structure): _fields_ = [ ("id", ct.c_ulonglong), ("tgid_pid", ct.c_ulonglong), ("start_ns", ct.c_ulonglong), ("duration_ns", ct.c_ulonglong), ("retval", ct.c_ulonglong), ("comm", ct.c_char * TASK_COMM_LEN) ] + ([("args", ct.c_ulonglong * 6)] if args.arguments else []) time_designator = "us" if args.min_us else "ms"
parser = argparse.ArgumentParser( description="Lab2", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=examples) parser.add_argument("-p", "--pid", type=int, default=-1, metavar='PID', help="trace this PID only") args = parser.parse_args() b = BPF(text=bpf_text) # b.attach_kprobe(event="do_page_fault", fn_name="kprobe_do_page_fault") b.attach_kprobe(event="handle_mm_fault", fn_name="kprobe_handle_mm_fault") b.attach_kprobe(event="filemap_map_pages", fn_name="kprobe_filemap_map_pages") b.attach_kprobe(event="ext4_filemap_fault", fn_name="kprobe_ext4_filemap_fault") b.attach_kprobe(event="pagecache_get_page", fn_name="kprobe_pagecache_get_page") # b.attach_kprobe(event="alloc_pages_current", fn_name="trace_pid_start") # b.attach_kprobe(event="add_to_page_cache_lru", fn_name="trace_pid_start") # b.attach_kprobe(event="ext4_mpage_readpages", fn_name="trace_pid_start") # b.attach_kprobe(event="ext4_map_blocks", fn_name="trace_pid_start") # b.attach_kprobe(event="submit_bio", fn_name="trace_pid_start") # b.attach_kprobe(event="io_schedule", fn_name="trace_pid_start") # b.attach_kprobe(event="enter_lazy_tlb", fn_name="trace_pid_start") # b.attach_kprobe(event="irq_enter", fn_name="trace_pid_start") # b.attach_kprobe(event="finish_task_switch", fn_name="trace_pid_start")
void trace_completion(struct pt_regs *ctx, struct request *req) { u64 *tsp, delta; tsp = start.lookup(&req); if (tsp != 0) { delta = bpf_ktime_get_ns() - *tsp; dist.increment(bpf_log2l(delta / 1000)); // bpf_trace_printk("%d %x %d\\n", req->__data_len, // req->cmd_flags, delta / 1000); start.delete(&req); } } """) b.attach_kprobe(event="blk_start_request", fn_name="trace_start") b.attach_kprobe(event="blk_mq_start_request", fn_name="trace_start") b.attach_kprobe(event="blk_account_io_completion", fn_name="trace_completion") # header print("Tracing... Hit Ctrl-C to end.") # trace until Ctrl-C try: sleep(99999999) except KeyboardInterrupt: print # output b["dist"].print_log2_hist("ms")
return 0; // missed entry or lookup didn't fail } submit_event(ctx, (void *)ep->name, LOOKUP_MISS, pid); entrybypid.delete(&pid); return 0; } """ if args.ebpf: print(bpf_text) exit() # initialize BPF b = BPF(text=bpf_text) if args.all: b.attach_kprobe(event="lookup_fast", fn_name="trace_fast") mode_s = { 0: 'M', 1: 'R', } start_ts = time.time() def print_event(cpu, data, size): event = b["events"].event(data) print("%-11.6f %-6d %-16s %1s %s" % (time.time() - start_ts, event.pid, event.comm.decode('utf-8', 'replace'), mode_s[event.type], event.filename.decode('utf-8', 'replace')))
filter = '0' bpf_text = bpf_text.replace('FILTER', filter) # set stack storage size bpf_text = bpf_text.replace('STACK_STORAGE_SIZE', str(args.stack_storage_size)) bpf_text = bpf_text.replace('MINBLOCK_US_VALUE', str(args.min_block_time)) bpf_text = bpf_text.replace('MAXBLOCK_US_VALUE', str(args.max_block_time)) if debug or args.ebpf: print(bpf_text) if args.ebpf: exit() # initialize BPF b = BPF(text=bpf_text) b.attach_kprobe(event="schedule", fn_name="offcpu") b.attach_kprobe(event="try_to_wake_up", fn_name="waker") matched = b.num_open_kprobes() if matched == 0: print("0 functions traced. Exiting.") exit() # header if not folded: print("Tracing blocked time (us) by kernel stack", end="") if duration < 99999999: print(" for %d secs." % duration) else: print("... Hit Ctrl-C to end.") # output
return 0; } """ examples = """examples: ./open_pid -p 181 # only trace PID 181 """ parser = argparse.ArgumentParser( description="Trace open() syscalls", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=examples) parser.add_argument("-p", "--pid", help="trace this PID only") args = parser.parse_args() if args.pid: prog = prog.replace('PID_FILTER', 'if (pid != %s) { return 0; }' % args.pid) else: prog = prog.replace('PID_TID_FILTER', '') b = BPF(text=prog) b.attach_kprobe(event=b.get_syscall_fnname("open"), fn_name="trace_syscall_open") try: b.trace_print() except KeyboardInterrupt: exit()
else: bpf_text = bpf_text.replace('STORE', 'irq_key_t key = {.slot = 0 /* ignore */};' + 'bpf_probe_read(&key.name, sizeof(key.name), name);' + 'dist.increment(key, delta);') if debug or args.ebpf: print(bpf_text) if args.ebpf: exit() # load BPF program b = BPF(text=bpf_text) # these should really use irq:irq_handler_entry/exit tracepoints: if args.count: b.attach_kprobe(event="handle_irq_event_percpu", fn_name="count_only") print("Tracing hard irq events... Hit Ctrl-C to end.") else: b.attach_kprobe(event="handle_irq_event_percpu", fn_name="trace_start") b.attach_kretprobe(event="handle_irq_event_percpu", fn_name="trace_completion") print("Tracing hard irq event time... Hit Ctrl-C to end.") # output exiting = 0 if args.interval else 1 dist = b.get_table("dist") while (1): try: sleep(int(args.interval)) except KeyboardInterrupt: exiting = 1
return trace_entry(ctx, %d); } """ % (i, i) if args.verbose: print(bpf_text) b = BPF(text=bpf_text) for i, function in enumerate(args.functions): if ":" in function: library, func = function.split(":") b.attach_uprobe(name=library, sym=func, fn_name="trace_%d" % i) b.attach_uretprobe(name=library, sym=func, fn_name="trace_return") else: b.attach_kprobe(event=function, fn_name="trace_%d" % i) b.attach_kretprobe(event=function, fn_name="trace_return") TASK_COMM_LEN = 16 # linux/sched.h class Data(ct.Structure): _fields_ = [("id", ct.c_ulonglong), ("tgid_pid", ct.c_ulonglong), ("start_ns", ct.c_ulonglong), ("duration_ns", ct.c_ulonglong), ("retval", ct.c_ulonglong), ("comm", ct.c_char * TASK_COMM_LEN) ] + ([("args", ct.c_ulonglong * 6)] if args.arguments else []) time_designator = "us" if args.min_us else "ms" time_value = args.min_us or args.min_ms or 1 time_multiplier = 1000 if args.min_us else 1000000
print("ERROR: no btrfs_file_operations in /proc/kallsyms. Exiting.") exit() bpf_text = bpf_text.replace('BTRFS_FILE_OPERATIONS', ops) bpf_text = bpf_text.replace('FACTOR', str(factor)) if args.pid: bpf_text = bpf_text.replace('FILTER_PID', 'pid != %s' % pid) else: bpf_text = bpf_text.replace('FILTER_PID', '0') if debug: print(bpf_text) # load BPF program b = BPF(text=bpf_text) # Common file functions. See earlier comment about generic_file_read_iter(). b.attach_kprobe(event="generic_file_read_iter", fn_name="trace_read_entry") b.attach_kprobe(event="btrfs_file_write_iter", fn_name="trace_entry") b.attach_kprobe(event="generic_file_open", fn_name="trace_open_entry") b.attach_kprobe(event="btrfs_sync_file", fn_name="trace_entry") b.attach_kretprobe(event="generic_file_read_iter", fn_name="trace_read_return") b.attach_kretprobe(event="btrfs_file_write_iter", fn_name="trace_write_return") b.attach_kretprobe(event="generic_file_open", fn_name="trace_open_return") b.attach_kretprobe(event="btrfs_sync_file", fn_name="trace_fsync_return") print("Tracing btrfs operation latency... Hit Ctrl-C to end.") # output exiting = 0 dist = b.get_table("dist") while (1): try:
("pid", ct.c_uint), ("task", ct.c_char * TASK_COMM_LEN), ("delta_us", ct.c_ulonglong), ] # process event def print_event(cpu, data, size): event = ct.cast(data, ct.POINTER(Data)).contents print("%-8s %-16s %-6s %14s" % (strftime("%H:%M:%S"), event.task, event.pid, event.delta_us)) # load BPF program b = BPF(text=bpf_text) if not is_support_raw_tp: b.attach_kprobe(event="ttwu_do_wakeup", fn_name="trace_ttwu_do_wakeup") b.attach_kprobe(event="wake_up_new_task", fn_name="trace_wake_up_new_task") b.attach_kprobe(event="finish_task_switch", fn_name="trace_run") print("Tracing run queue latency higher than %d us" % min_us) print("%-8s %-16s %-6s %14s" % ("TIME", "COMM", "PID", "LAT(us)")) # read events b["events"].open_perf_buffer(print_event, page_cnt=64) while 1: try: b.perf_buffer_poll() except KeyboardInterrupt: exit()
stack_context = "user" kernel_stack_get = "-1" elif args.kernel_stacks_only: stack_context = "kernel" user_stack_get = "-1" else: stack_context = "user + kernel" bpf_text = bpf_text.replace('USER_STACK_GET', user_stack_get) bpf_text = bpf_text.replace('KERNEL_STACK_GET', kernel_stack_get) if args.ebpf: print(bpf_text) exit() # initialize BPF b = BPF(text=bpf_text) b.attach_kprobe(event="finish_task_switch", fn_name="oncpu") b.attach_kprobe(event="try_to_wake_up", fn_name="waker") matched = b.num_open_kprobes() if matched == 0: print("0 functions traced. Exiting.") exit() # header if not folded: print("Tracing blocked time (us) by %s off-CPU and waker stack" % stack_context, end="") if duration < 99999999: print(" for %d secs." % duration) else: print("... Hit Ctrl-C to end.")
key.ip = PT_REGS_IP(ctx); counts.atomic_increment(key); // update counter return 0; } """ if debug or args.ebpf: print(bpf_text) if args.ebpf: exit() # load BPF program b = BPF(text=bpf_text) b.attach_kprobe(event="add_to_page_cache_lru", fn_name="do_count") b.attach_kprobe(event="mark_page_accessed", fn_name="do_count") # Function account_page_dirtied() is changed to folio_account_dirtied() in 5.15. # FIXME: Both folio_account_dirtied() and account_page_dirtied() are # static functions and they may be gone during compilation and this may # introduce some inaccuracy. if BPF.get_kprobe_functions(b'folio_account_dirtied'): b.attach_kprobe(event="folio_account_dirtied", fn_name="do_count") elif BPF.get_kprobe_functions(b'account_page_dirtied'): b.attach_kprobe(event="account_page_dirtied", fn_name="do_count") b.attach_kprobe(event="mark_buffer_dirty", fn_name="do_count") # header if tstamp: print("%-8s " % "TIME", end="")
return f'DeviceName: {event.dev_name}; SrcMac: {mac_src}; DestMac: {mac_dest}' def serialize_l3_event(event): src_address = inet_ntop(AF_INET, pack('I', event.saddr)) dest_address = inet_ntop(AF_INET, pack('I', event.daddr)) return f'Source: {src_address}; Destination: {dest_address}; Protocol: {event.protocol}' if __name__ == '__main__': print("Running net_ns_tracer.c.") b = BPF(src_file="net_ns_tracer.c") # Ingress b.attach_kprobe(event="ip_rcv", fn_name="ip_rcv_entry") b.attach_kprobe(event="ip_rcv_core.isra.20", fn_name="ip_rcv_core_entry") b.attach_kretprobe(event="ip_rcv_core.isra.20", fn_name="ip_rcv_core_exit") # Egress b.attach_kprobe(event="ip_rcv_finish", fn_name="ip_rcv_finish_entry") b.attach_kretprobe(event="ip_rcv_finish_core.isra.18", fn_name="ip_rcv_finish_exit") # Egress b.attach_kprobe(event="ip_output", fn_name="ip_output_entry") # Egress after NAT b.attach_kprobe(event="ip_finish_output2", fn_name="ip_finish_output2_entry") b.attach_kprobe(event="dev_queue_xmit", fn_name="dev_queue_xmit_entry") # Forward
cache.update(&pid, &start_time_ns); return 0; } """ bpf_source += """ int trace_bpf_prog_load_return(void ctx) { u64 *start_time_ns, delta; u64 pid = bpf_get_current_pid_tgid(); start_time_ns = cache.lookup(&pid); if (start_time_ns == 0) return 0; delta = bpf_ktime_get_ns() - *start_time_ns; histogram.increment(bpf_log2l(delta)); return 0; } """ bpf = BPF(text=bpf_source) bpf.attach_kprobe(event="bpf_prog_load", fn_name="trace_bpf_prog_load_start") bpf.attach_kretprobe(event="bpf_prog_load", fn_name="trace_bpf_prog_load_return") try: sleep(99999999) except KeyboardInterrupt: print() bpf["histogram"].print_log2_hist("msecs")
//bpf_probe_read_kernel(&data.comm, sizeof(data.comm), valp->comm); bpf_get_current_comm(&data.comm, sizeof(data.comm)); data.pid = valp->pid; data.order = valp->order; data.delta = tsp - valp->ts; data.ts = valp->ts; events.perf_submit(ctx, &data, sizeof(data)); start.delete(&pid); return 0; } """ b = BPF(text=prog) #b.attach_kprobe(event="huge_pte_alloc", fn_name="trace_hugepte") b.attach_kprobe(event="__alloc_pages_nodemask", fn_name="do_entry") b.attach_kretprobe(event="__alloc_pages_nodemask", fn_name="do_return") # b.detach_kprobe(event="__alloc_pages_nodemask") # b.detach_kretprobe(event="__alloc_pages_nodemask") time = [ ] pid = [ ] comm = [ ] latns = [ ] host = [ ] tsp = [ ] order = [ ] print("%-9s %-6s %-16s %10s %s %-3s" % ("TIME", "PID", "COMM", "LATns", "TS", "ORDER")) def print_event(cpu, data, size):
def print_ipv6_event(cpu, data, size): event = b["ipv6_events"].event(data) print("%-8s %-6d %-2d %-20s > %-20s %s (%s)" % ( strftime("%H:%M:%S"), event.pid, event.ip, "%s:%d" % (inet_ntop(AF_INET6, event.saddr), event.sport), "%s:%d" % (inet_ntop(AF_INET6, event.daddr), event.dport), tcp.tcpstate[event.state], tcp.flags2str(event.tcpflags))) for addr in stack_traces.walk(event.stack_id): sym = b.ksym(addr, show_offset=True) print("\t%s" % sym) print("") # initialize BPF b = BPF(text=bpf_text) if b.get_kprobe_functions(b"tcp_drop"): b.attach_kprobe(event="tcp_drop", fn_name="trace_tcp_drop") else: print("ERROR: tcp_drop() kernel function not found or traceable. " "Older kernel versions not supported.") exit() stack_traces = b.get_table("stack_traces") # header print("%-8s %-6s %-2s %-20s > %-20s %s (%s)" % ("TIME", "PID", "IP", "SADDR:SPORT", "DADDR:DPORT", "STATE", "FLAGS")) # read events b["ipv4_events"].open_perf_buffer(print_ipv4_event) b["ipv6_events"].open_perf_buffer(print_ipv6_event) while 1: try:
S_FAULTS = c_int(1) S_FIRST_PAGE = c_int(2) S_PAGES = c_int(3) S_UNACCOUNTED= c_int(4) parser = argparse.ArgumentParser(description="Trace SGX page faults.", formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument("-i", "--interval", default=1, type=int, help="measurement and output interval, in seconds") parser.add_argument("-c", "--cumulative", action="store_true", help="do not clear counts at each interval") args = parser.parse_args() # Load BPF program b = BPF(text=SGX_FAULT_BPF) b.attach_kprobe(event="sgx_fault_page", fn_name="sgx_fault_page_probe0") print("Tracing... Ctrl-C to end.") # output print("[%s] %-12s %-12s %-12s %-12s %-12s" % (strftime("%H:%M:%S"), "PAGES", "SIZE (MB)", "UNACCOUNTED", "FAULTS", "BANDWIDTH (MB)")) while (1): try: sleep(args.interval) except KeyboardInterrupt: exit() pages = b["stats"][S_PAGES].value pgsize = pages/256.0 faults = b["stats"][S_FAULTS].value bandwidth = faults/256.0
("fname", ct.c_char * DNAME_INLINE_LEN) ] if args.pid: bpf_text = bpf_text.replace('FILTER', 'if (pid != %s) { return 0; }' % args.pid) else: bpf_text = bpf_text.replace('FILTER', '') if debug or args.ebpf: print(bpf_text) if args.ebpf: exit() # initialize BPF b = BPF(text=bpf_text) b.attach_kprobe(event="vfs_create", fn_name="trace_create") # newer kernels (say, 4.8) may don't fire vfs_create, so record (or overwrite) # the timestamp in security_inode_create(): b.attach_kprobe(event="security_inode_create", fn_name="trace_create") b.attach_kprobe(event="vfs_unlink", fn_name="trace_unlink") # header print("%-8s %-6s %-16s %-7s %s" % ("TIME", "PID", "COMM", "AGE(s)", "FILE")) # process event def print_event(cpu, data, size): event = ct.cast(data, ct.POINTER(Data)).contents print("%-8s %-6d %-16s %-7.2f %s" % (strftime("%H:%M:%S"), event.pid, event.comm.decode('utf-8', 'replace'), float(event.delta) / 1000, event.fname.decode('utf-8', 'replace')))
if (csv): print("%d,%s,%d,%s,%d,%d,%d,%s" % (event.ts_us, event.task, event.pid, type, event.size, event.offset, event.delta_us, event.file)) return print("%-8s %-14.14s %-6s %1s %-7s %-8d %7.2f %s" % (strftime("%H:%M:%S"), event.task, event.pid, type, event.size, event.offset / 1024, float(event.delta_us) / 1000, event.file)) # initialize BPF b = BPF(text=bpf_text) # common file functions if BPF.get_kprobe_functions(b'zpl_iter'): b.attach_kprobe(event="zpl_iter_read", fn_name="trace_rw_entry") b.attach_kprobe(event="zpl_iter_write", fn_name="trace_rw_entry") elif BPF.get_kprobe_functions(b'zpl_aio'): b.attach_kprobe(event="zpl_aio_read", fn_name="trace_rw_entry") b.attach_kprobe(event="zpl_aio_write", fn_name="trace_rw_entry") else: b.attach_kprobe(event="zpl_read", fn_name="trace_rw_entry") b.attach_kprobe(event="zpl_write", fn_name="trace_rw_entry") b.attach_kprobe(event="zpl_open", fn_name="trace_open_entry") b.attach_kprobe(event="zpl_fsync", fn_name="trace_fsync_entry") if BPF.get_kprobe_functions(b'zpl_iter'): b.attach_kretprobe(event="zpl_iter_read", fn_name="trace_read_return") b.attach_kretprobe(event="zpl_iter_write", fn_name="trace_write_return") elif BPF.get_kprobe_functions(b'zpl_aio'): b.attach_kretprobe(event="zpl_aio_read", fn_name="trace_read_return") b.attach_kretprobe(event="zpl_aio_write", fn_name="trace_write_return")
thread_context = "user threads" thread_filter = '!(prev->flags & PF_KTHREAD)' elif args.kernel_threads_only: thread_context = "kernel threads" thread_filter = 'prev->flags & PF_KTHREAD' else: thread_context = "all threads" thread_filter = '1' bpf_text = bpf_text.replace('THREAD_FILTER', thread_filter) # set stack storage size bpf_text = bpf_text.replace('STACK_STORAGE_SIZE', str(args.stack_storage_size)) # initialize BPF b = BPF(text=bpf_text) b.attach_kprobe(event="finish_task_switch", fn_name="oncpu") matched = b.num_open_kprobes() if matched == 0: print("error: 0 functions traced. Exiting.", file=stderr) exit(1) # header if not folded: print("Tracing off-CPU time (us) of %s by kernel stack" % thread_context, end="") if duration < 99999999: print(" for %d secs." % duration) else: print("... Hit Ctrl-C to end.") try:
else: bpf_text = bpf_text.replace('TYPE_FILTER', '!S_ISREG(mode)') if debug or args.ebpf: print(bpf_text) if args.ebpf: exit() # initialize BPF b = BPF(text=bpf_text) # I'd rather trace these via new_sync_read/new_sync_write (which used to be # do_sync_read/do_sync_write), but those became static. So trace these from # the parent functions, at the cost of more overhead, instead. # Ultimately, we should be using [V]FS tracepoints. b.attach_kprobe(event="__vfs_read", fn_name="trace_read_entry") b.attach_kretprobe(event="__vfs_read", fn_name="trace_read_return") try: b.attach_kprobe(event="__vfs_write", fn_name="trace_write_entry") b.attach_kretprobe(event="__vfs_write", fn_name="trace_write_return") except: # older kernels don't have __vfs_write so try vfs_write instead b.attach_kprobe(event="vfs_write", fn_name="trace_write_entry") b.attach_kretprobe(event="vfs_write", fn_name="trace_write_return") TASK_COMM_LEN = 16 # linux/sched.h DNAME_INLINE_LEN = 32 # linux/dcache.h class Data(ct.Structure): _fields_ = [ ("mode", ct.c_int),
def setUp(self): b = BPF(text=text, debug=0) self.stats = b.get_table("stats") b.attach_kprobe(event="finish_task_switch", fn_name="count_sched")
bpf_trace_printk("%d %s\\n", delta, dentry->d_iname); return 0; } """ if args.pid: bpf_text = bpf_text.replace('FILTER', 'if (pid != %s) { return 0; }' % args.pid) else: bpf_text = bpf_text.replace('FILTER', '') if debug: print(bpf_text) # initialize BPF b = BPF(text=bpf_text) b.attach_kprobe(event="vfs_create", fn_name="trace_create") b.attach_kprobe(event="vfs_unlink", fn_name="trace_unlink") # header print("%-8s %-6s %-16s %-7s %s" % ("TIME", "PID", "COMM", "AGE(s)", "FILE")) start_ts = 0 # format output while 1: (task, pid, cpu, flags, ts, msg) = b.trace_fields() (delta, filename) = msg.split(" ", 1) # print columns print("%-8s %-6d %-16s %-7.2f %s" % (strftime("%H:%M:%S"), pid, task, float(delta) / 1000, filename))
data.retval = PT_REGS_RC(ctx); events.perf_submit(ctx, &data, sizeof(data)); return 0; } """ bpf_text = bpf_text.replace("MAXARG", args.max_args) if args.ebpf: print(bpf_text) exit() # initialize BPF b = BPF(text=bpf_text) execve_fnname = b.get_syscall_fnname("execve") b.attach_kprobe(event=execve_fnname, fn_name="syscall__execve") b.attach_kretprobe(event=execve_fnname, fn_name="do_ret_sys_execve") # header if args.timestamp: print("%-8s" % ("TIME(s)"), end="") print("%-16s %-6s %-6s %3s %s" % ("PCOMM", "PID", "PPID", "RET", "ARGS")) TASK_COMM_LEN = 16 # linux/sched.h ARGSIZE = 128 # should match #define in C above class Data(ct.Structure): _fields_ = [ ("pid", ct.c_uint), ("ppid", ct.c_uint),
def run_bpf_probe(num_iterations, sleep_secs): """Run the extended BPF probe on Linux's compact_zone_order() function.""" # debug_level = 0x3 # 0x3: dump LLVM IR and BPF byte code to stderr debug_level = 0x0 # debug 0x0 = no debug bpf = BPF(src_file="eBPF_c_probe.c", debug=debug_level) # ebpf_code = bpf.dump_func(func_name="prb_eBPF_compact_zone_order_entry") assert len(bpf["global_var_total_accum_nsec"]) == 1, \ "Expected a global variable in BPF that be a scalar, ie., of length 1" # our BPF probe will only work for a kernel point which is not executed # concurrently, if not it will fail. Of course, you can use other # data structures in the BPF probe that can make it work concurrently. synchr_non_concurrent_kpoint = "compact_zone_order" bpf.attach_kprobe(event=synchr_non_concurrent_kpoint, fn_name="prb_eBPF_compact_zone_order_entry") bpf.attach_kretprobe(event=synchr_non_concurrent_kpoint, fn_name="prb_eBPF_compact_zone_order_return") # these are other collateral events we want to know if they happen at the # same time as the main event above, and relatively, how frequently they # happen when the main probed events (above) are happening. collateral_events = [ {'func': 'kmalloc_order_trace', 'probe': 'prb_eBPF_kmalloc_order_trace_return', 'count': 'global_var_cnt_kmalloc_order_trace'}, {'func': '__kmalloc', 'probe': 'prb_eBPF___kmalloc_return', 'count': 'global_var_cnt___kmalloc'}, {'func': '__do_kmalloc_node', 'probe': 'prb_eBPF___do_kmalloc_node_return', 'count': 'global_var_cnt___do_kmalloc_node'}, {'func': 'kmem_cache_alloc', 'probe': 'prb_eBPF_kmem_cache_alloc_return', 'count': 'global_var_cnt_kmem_cache_alloc'}, {'func': 'kmem_cache_alloc_trace', 'probe': 'prb_eBPF_kmem_cache_alloc_trace_return', 'count': 'global_var_cnt_kmem_cache_alloc_trace'}, {'func': 'malloc', 'probe': 'prb_eBPF_malloc_return', 'count': 'global_var_cnt_malloc'}, {'func': 'kfree', 'probe': 'prb_eBPF_kfree_return', 'count': 'global_var_cnt_kfree'}, {'func': 'kmem_cache_reap', 'probe': 'prb_eBPF_kmem_cache_reap_return', 'count': 'global_var_cnt_kmem_cache_reap'}, {'func': 'kmem_cache_free', 'probe': 'prb_eBPF_kmem_cache_free_return', 'count': 'global_var_cnt_kmem_cache_free'}, {'func': 'kmem_cache_destroy', 'probe': 'prb_eBPF_kmem_cache_destroy_return', 'count': 'global_var_cnt_kmem_cache_destroy'}, {'func': 'kmem_cache_shrink', 'probe': 'prb_eBPF_kmem_cache_shrink_return', 'count': 'global_var_cnt_kmem_cache_shrink'} ] for collateral_event in collateral_events: bpf.attach_kretprobe(event=collateral_event['func'], fn_name=collateral_event['probe']) assert len(bpf[collateral_event['count']]) == 1, \ "Var '{}' must be a scalar too.".format(collateral_event['count']) # request time to sleep and iterations as arguments from the command-line, # e.g., by using the 'argparse' module (the timing to wait is important # because there can be no output reported below if there is no activity of # the kprobe we attached to in this period of time) for sample in xrange(1, num_iterations + 1): sleep(sleep_secs) print "---- new sample: {} at {}".format(sample, strftime("%D %T")) bpf["delay_dist"].print_log2_hist("usecs") bpf["delay_dist"].clear() # All the direct iterations on BPF tables return ctypes values (like # c_int, c_ulong, etc), which we unwrap here by the .value property and # divide by 1000 (microseconds) since the histogram in C in the BPF # probe also divided the nanoseconds by 1000, so all will report in the # same unit of time total_accum_nsec = bpf["global_var_total_accum_nsec"].values()[0] print "total_accum_usec = {:.0f}".format(total_accum_nsec.value / 1000) bpf["global_var_total_accum_nsec"].clear() for k, val in bpf["total_accum_nsec_per_order"].items(): print ("total_accum_usec[order = {}] = " "{:.0f}").format(k.value, val.value / 1000) bpf["total_accum_nsec_per_order"].clear() for collateral_event in collateral_events: concur_kmallocs = bpf[collateral_event['count']].values()[0] print "{} while compaction = {}".format(collateral_event['func'], concur_kmallocs.value) bpf[collateral_event['count']].clear() sys.stdout.flush()
def setUp(self): b = BPF(arg1, arg2, debug=0) self.stats = b.get_table("stats", Key, Leaf) b.attach_kprobe(event=b.get_syscall_fnname("write"), fn_name="sys_wr") b.attach_kprobe(event=b.get_syscall_fnname("read"), fn_name="sys_rd") b.attach_kprobe(event="htab_map_get_next_key", fn_name="sys_rd")
#!/usr/bin/python # Copyright (c) PLUMgrid, Inc. # Licensed under the Apache License, Version 2.0 (the "License") from bcc import BPF from time import sleep CPUNUM = 8 b = BPF(src_file="mytask_switch.c") b.attach_kprobe(event="finish_task_switch", fn_name="count_sched") sleep(1) totalCountPerCPU = {} totalCountPerCPU.clear() for k, v in b["stats"].items(): print("cpu: %d, pid: %d, switched count: %d" % (k.cpu, k.pid,v.value)) if(totalCountPerCPU.get(k.cpu,None) is None): totalCountPerCPU[k.cpu] = v.value else: totalCountPerCPU[k.cpu] += v.value; print "======= total switched count per CPU ========"; for k, v in totalCountPerCPU.items(): print("cpu: %d, performed count: %d" % (k,v))
replacements = {} replacements["PLANID_FILTER"] = "" if args.planid is not None: replacements["PLANID_FILTER"] = \ "if (%s != %sULL) return 0;" % ("arg2", args.planid) for k, v in replacements.iteritems(): text=text.replace(k, v) debug = DEBUG_PREPROCESSOR|DEBUG_BPF if args.debug > 1 else 0 b = BPF(text=text, debug=debug, usdt=u) b.attach_kprobe(event="enqueue_entity", fn_name="trace_enqueue_entity") if args.planid is not None: print("Tracing plan ID %d" % args.planid) try: while True: #print("%-8s\n" % time.strftime("%H:%M:%S"), end="") #print("number of pagefaults") #b["pagefaults"].print_log2_hist() #time.sleep(1) (_, _, _, _, _, msg) = b.trace_fields() print("%-8s " % time.strftime("%H:%M:%S"), end="") print(msg)