def test_attach1(self): # enable USDT probe from given PID and verifier generated BPF programs u = USDT(pid=int(self.app.pid)) u.enable_probe(probe="probe", fn_name="do_trace") b = BPF(text=self.bpf_text, usdt_contexts=[u]) # processing events self.probe_value_1 = 0 self.probe_value_2 = 0 self.probe_value_3 = 0 self.probe_value_other = 0 def print_event(cpu, data, size): result = ct.cast(data, ct.POINTER(ct.c_int)).contents if result.value == 1: self.probe_value_1 = 1 elif result.value == 2: self.probe_value_2 = 1 elif result.value == 3: self.probe_value_3 = 1 else: self.probe_value_other = 1 b["event"].open_perf_buffer(print_event) for i in range(10): b.perf_buffer_poll() self.assertTrue(self.probe_value_1 != 0) self.assertTrue(self.probe_value_2 != 0) self.assertTrue(self.probe_value_3 != 0) self.assertTrue(self.probe_value_other == 0)
def _enable_probes(self): self.usdts = [] for pid in self.targets: usdt = USDT(pid=pid) for event in self.events: try: usdt.enable_probe(event, "%s_%s" % (self.language, event)) except Exception: # This process might not have a recent version of the USDT # probes enabled, or might have been compiled without USDT # probes at all. The process could even have been shut down # and the pid been recycled. We have to gracefully handle # the possibility that we can't attach probes to it at all. pass self.usdts.append(usdt)
def _enable_probes(self): self.usdts = [] for pid in self.targets: try: usdt = USDT(pid=pid) except USDTException: # avoid race condition on pid going away. print("failed to instrument %d" % pid, file=sys.stderr) continue for event in self.events: try: usdt.enable_probe(event, "%s_%s" % (self.language, event)) except Exception: # This process might not have a recent version of the USDT # probes enabled, or might have been compiled without USDT # probes at all. The process could even have been shut down # and the pid been recycled. We have to gracefully handle # the possibility that we can't attach probes to it at all. pass self.usdts.append(usdt)
def do_object_creation_probes(): # load BPF program bpf_text = """ #include <uapi/linux/ptrace.h> struct key_t { char objectname[128]; }; BPF_HASH(objmap, struct key_t, u64); int do_count(struct pt_regs *ctx) { struct key_t key = {}; bpf_probe_read(&key.objectname, sizeof(key.objectname), (void *)(unsigned long)ctx->r13); objmap.increment(key); return 0; } """ u = USDT(pid=args.pid) u.enable_probe(probe="object__create", fn_name="do_count") b = BPF(text=bpf_text, usdt=u) print("Press Ctrl-C to display/exit") try: sleep(99999999) except KeyboardInterrupt: pass data = b.get_table("objmap") data = sorted(data.items(), key=lambda kv: kv[1].value) for key, value in data: print("\t%-10s %s" % \ (str(value.value), str(key.objectname.decode())))
args = parser.parse_args() bpfs = dict() # These probes do not currently exist on any branch, and were used solely for my own testing. # You can use any probes that pass through a BSONObj size and a BSONObj char* in that order. probes = []#["query1", "query", "query1", "query1R", "queryR", "query2", "query3"] # necessary to have multiple bpfs, one for each object, # due to restriction on number of insns per bpf object. # copying out long strings requires a large number of insns, # and duplicating that for each entrypt function is prohibitively # large in terms of number of isns. for probe in probes: print(probe) usdt = USDT(pid=args.pid[0]) usdt.enable_probe(probe, fn_name="str_entry_{}".format(probe)) bpfs[probe] = BPF(text=code.replace("#PROBE#", probe), usdt_contexts=[usdt]) print(code) wrks = [] for probe in probes: print(probe) bpf = bpfs[probe] bpf["str_out_{}".format(probe)].open_perf_buffer(gen_callback(probe, bpfs)) wrks.append(WorkerThread(gen_work(probe, bpfs))) master = WorkerMaster(wrks) master.start_all() print("Ready")
] bpf_text = "" with open("../strobelight_hhvm_structs.h", "r") as structs_file: bpf_text += structs_file.read() + "\n" with open("./strobelight_hhvm_signal.c", "r") as signal_file: bpf_text += signal_file.read() + "\n" with open("../strobelight_hhvm_stacks_usdt_probe.c", "r") as stacks_file: bpf_text += stacks_file.read() + "\n" usdts = [] for pid in args.pids: usdt = USDT(pid=pid) usdt.enable_probe("hhvm_stack", "on_hhvm_event_hook") print("Enabled tracing on {}\n".format(pid)) usdts.append(usdt) b = BPF(text=bpf_text, usdt_contexts=usdts, cflags=cflags) # Track pids to fire signals on hhvm_pids = b.get_table("hhvm_pids") for pid in args.pids: hhvm_pids[ct.c_int(pid)] = ct.c_int(1) # Collect a stack every 10m cycles b.attach_perf_event(ev_type=PerfType.HARDWARE, ev_config=PerfHWConfig.CPU_CYCLES, fn_name="on_event", sample_period=10000000)
u64 *start_ts = tracing.lookup(&pid); if (!start_ts) { return 0; } QUERYEND_COLLECT tracing.delete(&pid); memory.delete(&pid); return 0; } """ u = USDT(path=args.bin_path) u.enable_probe(probe="query__start", fn_name="querystart") u.enable_probe(probe="execstats__addmemoryuse", fn_name="execstats_addmemoryuse") u.enable_probe(probe="query__done", fn_name="queryend") replacements = {} replacements["PLANID_FILTER"] = "" replacements["STORAGE"] = "" replacements["QUERYEND_COLLECT"] = \ """ u64 *mem = memory.lookup(&pid); if (mem) { bpf_trace_printk("query used: %d bytes\\n", *mem); }
# load BPF program bpf_text = """ #include <uapi/linux/ptrace.h> int do_trace(struct pt_regs *ctx) { uint64_t addr; char path[128]={0}; bpf_usdt_readarg(6, ctx, &addr); bpf_probe_read(&path, sizeof(path), (void *)addr); bpf_trace_printk("path:%s\\n", path); return 0; }; """ # enable USDT probe from given PID u = USDT(pid=int(pid)) u.enable_probe(probe="http__server__request", fn_name="do_trace") if debug: print(u.get_text()) print(bpf_text) # initialize BPF b = BPF(text=bpf_text, usdt_contexts=[u]) # header print("%-18s %-16s %-6s %s" % ("TIME(s)", "COMM", "PID", "ARGS")) # format output while 1: try: (task, pid, cpu, flags, ts, msg) = b.trace_fields() except ValueError:
return 0; u64 *io_ns = io.lookup(&pid); if (io_ns) bpf_trace_printk("queryend: pid:%u, io time: %ul (ns)\\n", pid, *io_ns); else bpf_trace_printk("queryend: pid:%u, no io time\\n", pid); tracing.delete(&pid); io.delete(&pid); return 0; } """ u = USDT(path=args.bin_path) u.enable_probe(probe="query__start", fn_name="querystart") u.enable_probe(probe="query__done", fn_name="queryend") replacements = {} replacements["PLANID_FILTER"] = "" if args.planid is not None: replacements["PLANID_FILTER"] = \ "if (%s != %sULL) return 0;" % ("arg2", args.planid) for k, v in replacements.iteritems(): text=text.replace(k, v) debug = DEBUG_PREPROCESSOR|DEBUG_BPF if args.debug > 1 else 0
return 0; } """ bpf_text = bpf_text.replace("SAMPLE_COUNT", str(this_count)) bpf_text = bpf_text.replace("FILTER_STRING", this_filter) if this_filter: bpf_text = bpf_text.replace("FILTER", "if (!filter(start_data.input)) { return 0; }") else: bpf_text = bpf_text.replace("FILTER", "") # Create USDT context print("Attaching probes to pid %d" % this_pid) usdt_ctx = USDT(pid=this_pid) usdt_ctx.enable_probe(probe="operation_start", fn_name="trace_operation_start") usdt_ctx.enable_probe(probe="operation_end", fn_name="trace_operation_end") # Create BPF context, load BPF program bpf_ctx = BPF(text=bpf_text, usdt_contexts=[usdt_ctx], debug=debugLevel) print("Tracing... Hit Ctrl-C to end.") lat_hash = bpf_ctx.get_table("lat_hash") while (1): try: sleep(this_interval) except KeyboardInterrupt: exit() print("[%s]" % strftime("%H:%M:%S"))
// populate and emit data struct struct data_t data = {.pid = pid, .ts = sp->ts, .delta = delta}; bpf_probe_read(&data.query, sizeof(data.query), (void *)sp->query); events.perf_submit(ctx, &data, sizeof(data)); } start_tmp.delete(&pid); return 0; }; """ # enable USDT probe from given PID u = USDT(pid=pid) u.enable_probe(probe="query__start", fn_name="do_start") u.enable_probe(probe="query__done", fn_name="do_done") if debug: print(u.get_text()) print(bpf_text) # initialize BPF b = BPF(text=bpf_text, usdt_contexts=[u]) # header print("Tracing MySQL server queries for PID %d slower than %s ms..." % (pid, min_ms_text)) print("%-14s %-6s %8s %s" % ("TIME(s)", "PID", "MS", "QUERY")) class Data(ct.Structure):
char query[128]; /* * Read the first argument from the query-start probe, which is the query. * The format of this probe is: * query-start(query, connectionid, database, user, host) * see: https://dev.mysql.com/doc/refman/5.7/en/dba-dtrace-ref-query.html */ bpf_usdt_readarg(1, ctx, &addr); bpf_trace_printk("%s\\n", addr); return 0; }; """ # enable USDT probe from given PID u = USDT(pid=int(pid)) u.enable_probe(probe="query__start", fn_name="do_trace") if debug: print(u.get_text()) print(bpf_text) # initialize BPF b = BPF(text=bpf_text, usdt=u) # header print("%-18s %-16s %-6s %s" % ("TIME(s)", "COMM", "PID", "QUERY")) # format output while 1: try: (task, pid, cpu, flags, ts, msg) = b.trace_fields() except ValueError:
bpf_text = bpf_text.replace("MAX_SAMPLE_SIZE", str(this_maxsamplesize)) bpf_text = bpf_text.replace("FILTER_STRING", this_filter) if this_filter: bpf_text = bpf_text.replace( "FILTER_STATEMENT", "if (!filter(start_data.input)) { return 0; }") else: bpf_text = bpf_text.replace("FILTER_STATEMENT", "") # Create USDT context print("lat_avg.py - Attaching probes to pid: %d; filter: %s" % (this_pid, this_filter)) usdt_ctx = USDT(pid=this_pid) if args.sdt: usdt_ctx.enable_probe(probe="usdt_sample_lib1_sdt:operation_start_sdt", fn_name="trace_operation_start") usdt_ctx.enable_probe(probe="usdt_sample_lib1_sdt:operation_end_sdt", fn_name="trace_operation_end") else: usdt_ctx.enable_probe(probe="usdt_sample_lib1:operation_start", fn_name="trace_operation_start") usdt_ctx.enable_probe(probe="usdt_sample_lib1:operation_end", fn_name="trace_operation_end") # Create BPF context, load BPF program bpf_ctx = BPF(text=bpf_text, usdt_contexts=[usdt_ctx], debug=debugLevel) print("Tracing... Hit Ctrl-C to end.") lat_hash = bpf_ctx.get_table("lat_hash") print("%-12s %-64s %8s %16s" %
#!/usr/bin/env python from bcc import BPF, USDT import argparse parser = argparse.ArgumentParser( description="Trace JVM monitor contention events and dump a summary", formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument("pid", type=int, help="the Java process id") args = parser.parse_args() usdt = USDT(pid=args.pid) usdt.enable_probe("monitor__contended__enter", "trace_enter") usdt.enable_probe("monitor__contended__entered", "trace_entered") bpf = BPF(text=""" int trace_enter(struct pt_regs *ctx) { // TODO return 0; } int trace_entered(struct pt_regs *ctx) { // TODO return 0; } """, usdt_contexts=[usdt]) # TODO
def run_test(self): # Tests the net:inbound_message and net:outbound_message tracepoints # See https://github.com/bitcoin/bitcoin/blob/master/doc/tracing.md#context-net class P2PMessage(ctypes.Structure): _fields_ = [ ("peer_id", ctypes.c_uint64), ("peer_addr", ctypes.c_char * MAX_PEER_ADDR_LENGTH), ("peer_conn_type", ctypes.c_char * MAX_PEER_CONN_TYPE_LENGTH), ("msg_type", ctypes.c_char * MAX_MSG_TYPE_LENGTH), ("msg_size", ctypes.c_uint64), ("msg", ctypes.c_ubyte * MAX_MSG_DATA_LENGTH), ] def __repr__(self): return f"P2PMessage(peer={self.peer_id}, addr={self.peer_addr.decode('utf-8')}, conn_type={self.peer_conn_type.decode('utf-8')}, msg_type={self.msg_type.decode('utf-8')}, msg_size={self.msg_size})" self.log.info( "hook into the net:inbound_message and net:outbound_message tracepoints" ) ctx = USDT(path=str(self.options.bitcoind)) ctx.enable_probe(probe="net:inbound_message", fn_name="trace_inbound_message") ctx.enable_probe(probe="net:outbound_message", fn_name="trace_outbound_message") bpf = BPF(text=net_tracepoints_program, usdt_contexts=[ctx], debug=0) # The handle_* function is a ctypes callback function called from C. When # we assert in the handle_* function, the AssertError doesn't propagate # back to Python. The exception is ignored. We manually count and assert # that the handle_* functions succeeded. EXPECTED_INOUTBOUND_VERSION_MSG = 1 checked_inbound_version_msg = 0 checked_outbound_version_msg = 0 def check_p2p_message(event, inbound): nonlocal checked_inbound_version_msg, checked_outbound_version_msg if event.msg_type.decode("utf-8") == "version": self.log.info( f"check_p2p_message(): {'inbound' if inbound else 'outbound'} {event}" ) peer = self.nodes[0].getpeerinfo()[0] msg = msg_version() msg.deserialize(BytesIO(bytes(event.msg[:event.msg_size]))) assert_equal(peer["id"], event.peer_id, peer["id"]) assert_equal(peer["addr"], event.peer_addr.decode("utf-8")) assert_equal(peer["connection_type"], event.peer_conn_type.decode("utf-8")) if inbound: checked_inbound_version_msg += 1 else: checked_outbound_version_msg += 1 def handle_inbound(_, data, __): event = ctypes.cast(data, ctypes.POINTER(P2PMessage)).contents check_p2p_message(event, True) def handle_outbound(_, data, __): event = ctypes.cast(data, ctypes.POINTER(P2PMessage)).contents check_p2p_message(event, False) bpf["inbound_messages"].open_perf_buffer(handle_inbound) bpf["outbound_messages"].open_perf_buffer(handle_outbound) self.log.info("connect a P2P test node to our bitcoind node") test_node = P2PInterface() self.nodes[0].add_p2p_connection(test_node) bpf.perf_buffer_poll(timeout=200) self.log.info( "check that we got both an inbound and outbound version message") assert_equal(EXPECTED_INOUTBOUND_VERSION_MSG, checked_inbound_version_msg) assert_equal(EXPECTED_INOUTBOUND_VERSION_MSG, checked_outbound_version_msg) bpf.cleanup()
class Probe(): "Parse, load and attach BPF probes" def __init__(self, probe_spec, kernel_stack, user_stack, use_regex=False, pid=None, per_pid=False, cpu=None): self.kernel_stack = kernel_stack self.user_stack = user_stack if DEBUG: print(probe_spec) self._parse_spec(probe_spec) if (self.type == "p" and self.library) or self.type == "u": libpath = BPF.find_library(self.library) if libpath is None: # This might be an executable (e.g. 'bash') libpath = BPF.find_exe(self.library) if libpath is None or len(libpath) == 0: raise Exception("unable to find library %s" % self.library) self.library = str(libpath, 'ascii') self.pid = pid # FIXME: don't hardcode this here. self.per_pid = True self.cpu = cpu self.matched = 0 self.bpf = None self.usdt = None def _parse_spec(self, spec): parts = spec.split(":") # Two special cases: 'func' means 'p::func', 'lib:func' means # 'p:lib:func'. Other combinations need to provide an empty # value between delimiters, e.g. 'r::func' for a kretprobe on # the function func. if len(parts) == 1: parts = ["p", "", parts[0]] elif len(parts) == 2: parts = ["p", parts[0], parts[1]] if len(parts[0]) == 0: self.type = "p" elif parts[0] in ["p", "r", "t", "u"]: self.type = parts[0] else: raise Exception("probe type must be '', 'p', 't', 'r', " + "or 'u', but got '%s'" % parts[0]) if self.type == "u": # u:<library>[:<provider>]:<probe> where :<provider> is optional self.library = parts[1] self.pattern = ":".join(parts[2:]) else: self.library = ':'.join(parts[1:-1]) self.pattern = parts[-1] def load(self): ctx_name = "ctx" stack_trace = "" if self.user_stack: stack_trace += """ key.user_stack_id = stack_traces.get_stackid( %s, BPF_F_USER_STACK );""" % (ctx_name) else: stack_trace += "key.user_stack_id = -1;" if self.kernel_stack: stack_trace += """ key.kernel_stack_id = stack_traces.get_stackid( %s, 0 );""" % (ctx_name) else: stack_trace += "key.kernel_stack_id = -1;" trace_count_text = """ int trace_count(void *ctx) { FILTER struct key_t key = {}; key.tgid = GET_TGID; STORE_COMM %s counts.increment(key); return 0; } """ trace_count_text = trace_count_text % (stack_trace) bpf_text = """#include <uapi/linux/ptrace.h> #include <linux/sched.h> struct key_t { // no pid (thread ID) so that we do not needlessly split this key u32 tgid; int kernel_stack_id; int user_stack_id; char name[TASK_COMM_LEN]; }; BPF_HASH(counts, struct key_t); BPF_STACK_TRACE(stack_traces, 1024); """ filter_text = [] trace_count_text = trace_count_text.replace('FILTER', '\n '.join(filter_text)) # Do per-pid statistics iff -P is provided if self.per_pid: trace_count_text = trace_count_text.replace( 'GET_TGID', 'bpf_get_current_pid_tgid() >> 32') trace_count_text = trace_count_text.replace( 'STORE_COMM', 'bpf_get_current_comm(&key.name, sizeof(key.name));') else: # skip splitting on PID so these aggregate # together, and don't store the process name. trace_count_text = trace_count_text.replace( 'GET_TGID', '0xffffffff') trace_count_text = trace_count_text.replace('STORE_COMM', '') if self.type == "u": self.usdt = USDT(path=self.library, pid=self.pid) for probe in self.usdt.enumerate_probes(): if not self.pid and (str(probe.bin_path, 'ascii') != self.library): continue parts = self.pattern.split(":") if len(parts) == 1: provider_name = None usdt_name = parts[0].encode("ascii") else: provider_name = parts[0] usdt_name = parts[1] if (str(probe.name, 'ascii') == usdt_name and str(probe.provider, 'ascii') == provider_name): # This hack is required because the bpf_usdt_readarg # functions generated need different function names for # each attached probe. If we just stick to trace_count, # we'd get multiple bpf_usdt_readarg helpers with the same # name when enabling more than one USDT probe. new_func = "trace_count_%d" % self.matched bpf_text += trace_count_text.replace( "trace_count", new_func) self.usdt.enable_probe(str(probe.name, 'ascii'), new_func) self.matched += 1 if DEBUG: print(self.usdt.get_text()) else: bpf_text += trace_count_text if DEBUG: print(bpf_text) self.bpf = BPF(text=bpf_text, usdt_contexts=[self.usdt] if self.usdt else []) def attach(self): if self.type == "p": if self.library: self.bpf.attach_uprobe(name=self.library, sym_re=self.pattern, fn_name="trace_count", pid=self.pid or -1) self.matched = self.bpf.num_open_uprobes() else: self.bpf.attach_kprobe(event_re=self.pattern, fn_name="trace_count") self.matched = self.bpf.num_open_kprobes() elif self.type == "t": self.bpf.attach_tracepoint(tp_re=self.pattern, fn_name="trace_count") self.matched = self.bpf.num_open_tracepoints() elif self.type == "u": pass # Nothing to do -- attach already happened in `load` if self.matched == 0: raise Exception("No functions matched by pattern %s" % self.pattern)
info->total_ns += bpf_ktime_get_ns() - e->timestamp; sysentry.delete(&pid); return 0; } #endif // LATENCY #endif // SYSCALLS """.replace("READ_CLASS", read_class) \ .replace("READ_METHOD", read_method) \ .replace("PID_FILTER", "if ((pid >> 32) != %d) { return 0; }" % args.pid) \ .replace("DEFINE_NOLANG", "#define NOLANG" if not args.language else "") \ .replace("DEFINE_LATENCY", "#define LATENCY" if args.latency else "") \ .replace("DEFINE_SYSCALLS", "#define SYSCALLS" if args.syscalls else "") if args.language: usdt = USDT(pid=args.pid) usdt.enable_probe(entry_probe, "trace_entry") if args.latency: usdt.enable_probe(return_probe, "trace_return") else: usdt = None if args.verbose: if usdt: print(usdt.get_text()) print(program) bpf = BPF(text=program, usdt_contexts=[usdt] if usdt else []) if args.syscalls: syscall_regex = "^[Ss]y[Ss]_.*" bpf.attach_kprobe(event_re=syscall_regex, fn_name="syscall_entry") if args.latency:
class Probe(object): def __init__(self, pattern, use_regex=False, pid=None): """Init a new probe. Init the probe from the pattern provided by the user. The supported patterns mimic the 'trace' and 'argdist' tools, but are simpler because we don't have to distinguish between probes and retprobes. func -- probe a kernel function lib:func -- probe a user-space function in the library 'lib' /path:func -- probe a user-space function in binary '/path' p::func -- same thing as 'func' p:lib:func -- same thing as 'lib:func' t:cat:event -- probe a kernel tracepoint u:lib:probe -- probe a USDT tracepoint """ parts = pattern.split(':') if len(parts) == 1: parts = ["p", "", parts[0]] elif len(parts) == 2: parts = ["p", parts[0], parts[1]] elif len(parts) == 3: if parts[0] == "t": parts = ["t", "", "%s:%s" % tuple(parts[1:])] if parts[0] not in ["p", "t", "u"]: raise Exception("Type must be 'p', 't', or 'u', but got %s" % parts[0]) else: raise Exception("Too many ':'-separated components in pattern %s" % pattern) (self.type, self.library, self.pattern) = parts if not use_regex: self.pattern = self.pattern.replace('*', '.*') self.pattern = '^' + self.pattern + '$' if (self.type == "p" and self.library) or self.type == "u": libpath = BPF.find_library(self.library) if libpath is None: # This might be an executable (e.g. 'bash') libpath = BPF.find_exe(self.library) if libpath is None or len(libpath) == 0: raise Exception("unable to find library %s" % self.library) self.library = libpath self.pid = pid self.matched = 0 self.trace_functions = {} # map location number to function name def is_kernel_probe(self): return self.type == "t" or (self.type == "p" and self.library == "") def attach(self): if self.type == "p" and not self.library: for index, function in self.trace_functions.items(): self.bpf.attach_kprobe( event=function, fn_name="trace_count_%d" % index, pid=self.pid or -1) elif self.type == "p" and self.library: for index, function in self.trace_functions.items(): self.bpf.attach_uprobe( name=self.library, sym=function, fn_name="trace_count_%d" % index, pid=self.pid or -1) elif self.type == "t": for index, function in self.trace_functions.items(): self.bpf.attach_tracepoint( tp=function, fn_name="trace_count_%d" % index, pid=self.pid or -1) elif self.type == "u": pass # Nothing to do -- attach already happened in `load` def _add_function(self, template, probe_name): new_func = "trace_count_%d" % self.matched text = template.replace("PROBE_FUNCTION", new_func) text = text.replace("LOCATION", str(self.matched)) self.trace_functions[self.matched] = probe_name self.matched += 1 return text def _generate_functions(self, template): self.usdt = None text = "" if self.type == "p" and not self.library: functions = BPF.get_kprobe_functions(self.pattern) verify_limit(len(functions)) for function in functions: text += self._add_function(template, function) elif self.type == "p" and self.library: # uprobes are tricky because the same function may have multiple # addresses, and the same address may be mapped to multiple # functions. We aren't allowed to create more than one uprobe # per address, so track unique addresses and ignore functions that # map to an address that we've already seen. Also ignore functions # that may repeat multiple times with different addresses. addresses, functions = (set(), set()) functions_and_addresses = BPF.get_user_functions_and_addresses( self.library, self.pattern) verify_limit(len(functions_and_addresses)) for function, address in functions_and_addresses: if address in addresses or function in functions: continue addresses.add(address) functions.add(function) text += self._add_function(template, function) elif self.type == "t": tracepoints = BPF.get_tracepoints(self.pattern) verify_limit(len(tracepoints)) for tracepoint in tracepoints: text += self._add_function(template, tracepoint) elif self.type == "u": self.usdt = USDT(path=self.library, pid=self.pid) matches = [] for probe in self.usdt.enumerate_probes(): if not self.pid and (probe.bin_path != self.library): continue if re.match(self.pattern, probe.name): matches.append(probe.name) verify_limit(len(matches)) for match in matches: new_func = "trace_count_%d" % self.matched text += self._add_function(template, match) self.usdt.enable_probe(match, new_func) if debug: print(self.usdt.get_text()) return text def load(self): trace_count_text = """ int PROBE_FUNCTION(void *ctx) { FILTER int loc = LOCATION; u64 *val = counts.lookup(&loc); if (!val) { return 0; // Should never happen, # of locations is known } (*val)++; return 0; } """ bpf_text = """#include <uapi/linux/ptrace.h> BPF_ARRAY(counts, u64, NUMLOCATIONS); """ # We really mean the tgid from the kernel's perspective, which is in # the top 32 bits of bpf_get_current_pid_tgid(). if self.pid: trace_count_text = trace_count_text.replace('FILTER', """u32 pid = bpf_get_current_pid_tgid() >> 32; if (pid != %d) { return 0; }""" % self.pid) else: trace_count_text = trace_count_text.replace('FILTER', '') bpf_text += self._generate_functions(trace_count_text) bpf_text = bpf_text.replace("NUMLOCATIONS", str(len(self.trace_functions))) if debug: print(bpf_text) if self.matched == 0: raise Exception("No functions matched by pattern %s" % self.pattern) self.bpf = BPF(text=bpf_text, usdt_contexts=[self.usdt] if self.usdt else []) self.clear() # Initialize all array items to zero def counts(self): return self.bpf["counts"] def clear(self): counts = self.bpf["counts"] for location, _ in list(self.trace_functions.items()): counts[counts.Key(location)] = counts.Leaf()
char query[128]; /* * Read the first argument from the query-start probe, which is the query. * The format of this probe is: * query-start(query, connectionid, database, user, host) * see: https://dev.mysql.com/doc/refman/5.7/en/dba-dtrace-ref-query.html */ bpf_usdt_readarg(1, ctx, &addr); bpf_trace_printk("%s\\n", addr); return 0; }; """ # enable USDT probe from given PID u = USDT(pid=int(pid)) u.enable_probe(probe="query__start", fn_name="do_trace") if debug: print(u.get_text()) print(bpf_text) # initialize BPF b = BPF(text=bpf_text, usdt_contexts=[u]) # header print("%-18s %-16s %-6s %s" % ("TIME(s)", "COMM", "PID", "QUERY")) # format output while 1: try: (task, pid, cpu, flags, ts, msg) = b.trace_fields() except ValueError:
def test_attach1(self): # Enable USDT probe from given PID and verifier generated BPF programs. u = USDT(pid=int(self.app.pid)) u.enable_probe(probe="probe_point_1", fn_name="do_trace1") u.enable_probe(probe="probe_point_2", fn_name="do_trace2") u2 = USDT(pid=int(self.app2.pid)) u2.enable_probe(probe="probe_point_2", fn_name="do_trace2") u2.enable_probe(probe="probe_point_3", fn_name="do_trace3") self.bpf_text = self.bpf_text.replace("FILTER", "pid == %d" % self.app.pid) b = BPF(text=self.bpf_text, usdt_contexts=[u, u2]) # Event states for each event: # 0 - probe not caught, 1 - probe caught with correct value, # 2 - probe caught with incorrect value self.evt_st_1 = 0 self.evt_st_2 = 0 self.evt_st_3 = 0 self.evt_st_4 = 0 self.evt_st_5 = 0 self.evt_st_6 = 0 def check_event_val(data, event_state, expected_val): result = ct.cast(data, ct.POINTER(ct.c_int)).contents if result.value == expected_val: if (event_state == 0 or event_state == 1): return 1 return 2 def print_event1(cpu, data, size): self.evt_st_1 = check_event_val(data, self.evt_st_1, 1) def print_event2(cpu, data, size): self.evt_st_2 = check_event_val(data, self.evt_st_2, 2) def print_event3(cpu, data, size): self.evt_st_3 = check_event_val(data, self.evt_st_3, 3) def print_event4(cpu, data, size): self.evt_st_4 = check_event_val(data, self.evt_st_4, 11) def print_event5(cpu, data, size): self.evt_st_5 = check_event_val(data, self.evt_st_5, 12) def print_event6(cpu, data, size): self.evt_st_6 = check_event_val(data, self.evt_st_6, 13) # loop with callback to print_event b["event1"].open_perf_buffer(print_event1) b["event2"].open_perf_buffer(print_event2) b["event3"].open_perf_buffer(print_event3) b["event4"].open_perf_buffer(print_event4) b["event5"].open_perf_buffer(print_event5) b["event6"].open_perf_buffer(print_event6) # three iterations to make sure we get some probes and have time to process them for i in range(3): b.perf_buffer_poll() # note that event1 and event4 do not really fire, so their state should be 0 # use separate asserts so that if test fails we know which one is the culprit self.assertTrue(self.evt_st_1 == 1) self.assertTrue(self.evt_st_2 == 1) self.assertTrue(self.evt_st_3 == 0) self.assertTrue(self.evt_st_4 == 0) self.assertTrue(self.evt_st_5 == 1) self.assertTrue(self.evt_st_6 == 1)
struct endQueryOutput out = {}; void* summaryPtr = NULL; out.opCtx = NULL; bpf_usdt_readarg(1, ctx, &out.opCtx); if(!out.opCtx) return 0; bpf_usdt_readarg(2, ctx, &summaryPtr); if(!summaryPtr) return 0; bpf_probe_read(&out.summaryStats, sizeof(out.summaryStats), summaryPtr); bpf_usdt_readarg(3, ctx, &out.numResults); endQuery.perf_submit(ctx, &out, sizeof(out)); return 0; } """ probe = USDT(int(sys.argv[1])) probe.enable_probe(probe="findCmdPlan", fn_name="findCmdPlan") toAgg = USDT(int(sys.argv[1])) toAgg.enable_probe(probe="findToAgg", fn_name="findCmdToAgg") # beginQuery = USDT(int(sys.argv[1])) probe.enable_probe(probe="beginQueryOp", fn_name="beginQueryOp") probe.enable_probe(probe="endQueryOp", fn_name="endQueryOp") class BeginQuery(ct.Structure): _fields_ = [("nss", ct.c_char * 50), ("bson", ct.c_byte * 100), ("queryObjSz", ct.c_uint), ("nreturn", ct.c_longlong), ("nskip", ct.c_longlong)]
class Probe(object): next_probe_index = 0 streq_index = 0 aliases = {"$PID": "(bpf_get_current_pid_tgid() >> 32)"} def _substitute_aliases(self, expr): if expr is None: return expr for alias, subst in Probe.aliases.items(): expr = expr.replace(alias, subst) return expr def _parse_signature(self): params = map(str.strip, self.signature.split(',')) self.param_types = {} for param in params: # If the type is a pointer, the * can be next to the # param name. Other complex types like arrays are not # supported right now. index = param.rfind('*') index = index if index != -1 else param.rfind(' ') param_type = param[0:index + 1].strip() param_name = param[index + 1:].strip() self.param_types[param_name] = param_type def _generate_entry(self): self.entry_probe_func = self.probe_func_name + "_entry" text = """ int PROBENAME(struct pt_regs *ctx SIGNATURE) { u64 __pid_tgid = bpf_get_current_pid_tgid(); u32 __pid = __pid_tgid; // lower 32 bits u32 __tgid = __pid_tgid >> 32; // upper 32 bits PID_FILTER COLLECT return 0; } """ text = text.replace("PROBENAME", self.entry_probe_func) text = text.replace("SIGNATURE", "" if len(self.signature) == 0 else ", " + self.signature) text = text.replace("PID_FILTER", self._generate_pid_filter()) collect = "" for pname in self.args_to_probe: param_hash = self.hashname_prefix + pname if pname == "__latency": collect += """ u64 __time = bpf_ktime_get_ns(); %s.update(&__pid, &__time); """ % param_hash else: collect += "%s.update(&__pid, &%s);\n" % \ (param_hash, pname) text = text.replace("COLLECT", collect) return text def _generate_entry_probe(self): # Any $entry(name) expressions result in saving that argument # when entering the function. self.args_to_probe = set() regex = r"\$entry\((\w+)\)" for expr in self.exprs: for arg in re.finditer(regex, expr): self.args_to_probe.add(arg.group(1)) for arg in re.finditer(regex, self.filter): self.args_to_probe.add(arg.group(1)) if any(map(lambda expr: "$latency" in expr, self.exprs)) or \ "$latency" in self.filter: self.args_to_probe.add("__latency") self.param_types["__latency"] = "u64" # nanoseconds for pname in self.args_to_probe: if pname not in self.param_types: raise ValueError("$entry(%s): no such param" % arg) self.hashname_prefix = "%s_param_" % self.probe_hash_name text = "" for pname in self.args_to_probe: # Each argument is stored in a separate hash that is # keyed by pid. text += "BPF_HASH(%s, u32, %s);\n" % \ (self.hashname_prefix + pname, self.param_types[pname]) text += self._generate_entry() return text def _generate_retprobe_prefix(self): # After we're done here, there are __%s_val variables for each # argument we needed to probe using $entry(name), and they all # have values (which isn't necessarily the case if we missed # the method entry probe). text = "" self.param_val_names = {} for pname in self.args_to_probe: val_name = "__%s_val" % pname text += "%s *%s = %s.lookup(&__pid);\n" % \ (self.param_types[pname], val_name, self.hashname_prefix + pname) text += "if (%s == 0) { return 0 ; }\n" % val_name self.param_val_names[pname] = val_name return text def _replace_entry_exprs(self): for pname, vname in self.param_val_names.items(): if pname == "__latency": entry_expr = "$latency" val_expr = "(bpf_ktime_get_ns() - *%s)" % vname else: entry_expr = "$entry(%s)" % pname val_expr = "(*%s)" % vname for i in range(0, len(self.exprs)): self.exprs[i] = self.exprs[i].replace( entry_expr, val_expr) self.filter = self.filter.replace(entry_expr, val_expr) def _attach_entry_probe(self): if self.is_user: self.bpf.attach_uprobe(name=self.library, sym=self.function, fn_name=self.entry_probe_func, pid=self.pid or -1) else: self.bpf.attach_kprobe(event=self.function, fn_name=self.entry_probe_func) def _bail(self, error): raise ValueError("error parsing probe '%s': %s" % (self.raw_spec, error)) def _validate_specifier(self): # Everything after '#' is the probe label, ignore it spec = self.raw_spec.split('#')[0] parts = spec.strip().split(':') if len(parts) < 3: self._bail("at least the probe type, library, and " + "function signature must be specified") if len(parts) > 6: self._bail("extraneous ':'-separated parts detected") if parts[0] not in ["r", "p", "t", "u"]: self._bail("probe type must be 'p', 'r', 't', or 'u'" + " but got '%s'" % parts[0]) if re.match(r"\S+\(.*\)", parts[2]) is None: self._bail(("function signature '%s' has an invalid " + "format") % parts[2]) def _parse_expr_types(self, expr_types): if len(expr_types) == 0: self._bail("no expr types specified") self.expr_types = expr_types.split(',') def _parse_exprs(self, exprs): if len(exprs) == 0: self._bail("no exprs specified") self.exprs = exprs.split(',') def _make_valid_identifier(self, ident): return re.sub(r'[^A-Za-z0-9_]', '_', ident) def __init__(self, tool, type, specifier): self.usdt_ctx = None self.streq_functions = "" self.pid = tool.args.pid self.cumulative = tool.args.cumulative or False self.raw_spec = specifier self._validate_specifier() spec_and_label = specifier.split('#') self.label = spec_and_label[1] \ if len(spec_and_label) == 2 else None parts = spec_and_label[0].strip().split(':') self.type = type # hist or freq self.probe_type = parts[0] fparts = parts[2].split('(') self.function = fparts[0].strip() if self.probe_type == "t": self.library = "" # kernel self.tp_category = parts[1] self.tp_event = self.function elif self.probe_type == "u": self.library = parts[1] self.probe_func_name = self._make_valid_identifier( "%s_probe%d" % (self.function, Probe.next_probe_index)) self._enable_usdt_probe() else: self.library = parts[1] self.is_user = len(self.library) > 0 self.signature = fparts[1].strip()[:-1] self._parse_signature() # If the user didn't specify an expression to probe, we probe # the retval in a ret probe, or simply the value "1" otherwise. self.is_default_expr = len(parts) < 5 if not self.is_default_expr: self._parse_expr_types(parts[3]) self._parse_exprs(parts[4]) if len(self.exprs) != len(self.expr_types): self._bail("mismatched # of exprs and types") if self.type == "hist" and len(self.expr_types) > 1: self._bail("histograms can only have 1 expr") else: if not self.probe_type == "r" and self.type == "hist": self._bail("histograms must have expr") self.expr_types = \ ["u64" if not self.probe_type == "r" else "int"] self.exprs = \ ["1" if not self.probe_type == "r" else "$retval"] self.filter = "" if len(parts) != 6 else parts[5] self._substitute_exprs() # Do we need to attach an entry probe so that we can collect an # argument that is required for an exit (return) probe? def check(expr): keywords = ["$entry", "$latency"] return any(map(lambda kw: kw in expr, keywords)) self.entry_probe_required = self.probe_type == "r" and \ (any(map(check, self.exprs)) or check(self.filter)) self.probe_func_name = self._make_valid_identifier( "%s_probe%d" % (self.function, Probe.next_probe_index)) self.probe_hash_name = self._make_valid_identifier( "%s_hash%d" % (self.function, Probe.next_probe_index)) Probe.next_probe_index += 1 def _enable_usdt_probe(self): self.usdt_ctx = USDT(path=self.library, pid=self.pid) self.usdt_ctx.enable_probe( self.function, self.probe_func_name) def _generate_streq_function(self, string): fname = "streq_%d" % Probe.streq_index Probe.streq_index += 1 self.streq_functions += """ static inline bool %s(char const *ignored, char const *str) { char needle[] = %s; char haystack[sizeof(needle)]; bpf_probe_read(&haystack, sizeof(haystack), (void *)str); for (int i = 0; i < sizeof(needle) - 1; ++i) { if (needle[i] != haystack[i]) { return false; } } return true; } """ % (fname, string) return fname def _substitute_exprs(self): def repl(expr): expr = self._substitute_aliases(expr) matches = re.finditer('STRCMP\\(("[^"]+\\")', expr) for match in matches: string = match.group(1) fname = self._generate_streq_function(string) expr = expr.replace("STRCMP", fname, 1) return expr.replace("$retval", "PT_REGS_RC(ctx)") for i in range(0, len(self.exprs)): self.exprs[i] = repl(self.exprs[i]) self.filter = repl(self.filter) def _is_string(self, expr_type): return expr_type == "char*" or expr_type == "char *" def _generate_hash_field(self, i): if self._is_string(self.expr_types[i]): return "struct __string_t v%d;\n" % i else: return "%s v%d;\n" % (self.expr_types[i], i) def _generate_usdt_arg_assignment(self, i): expr = self.exprs[i] if self.probe_type == "u" and expr[0:3] == "arg": arg_index = int(expr[3]) arg_ctype = self.usdt_ctx.get_probe_arg_ctype( self.function, arg_index - 1) return (" %s %s = 0;\n" + " bpf_usdt_readarg(%s, ctx, &%s);\n") \ % (arg_ctype, expr, expr[3], expr) else: return "" def _generate_field_assignment(self, i): text = self._generate_usdt_arg_assignment(i) if self._is_string(self.expr_types[i]): return (text + " bpf_probe_read(&__key.v%d.s," + " sizeof(__key.v%d.s), (void *)%s);\n") % \ (i, i, self.exprs[i]) else: return text + " __key.v%d = %s;\n" % \ (i, self.exprs[i]) def _generate_hash_decl(self): if self.type == "hist": return "BPF_HISTOGRAM(%s, %s);" % \ (self.probe_hash_name, self.expr_types[0]) else: text = "struct %s_key_t {\n" % self.probe_hash_name for i in range(0, len(self.expr_types)): text += self._generate_hash_field(i) text += "};\n" text += "BPF_HASH(%s, struct %s_key_t, u64);\n" % \ (self.probe_hash_name, self.probe_hash_name) return text def _generate_key_assignment(self): if self.type == "hist": return self._generate_usdt_arg_assignment(0) + \ ("%s __key = %s;\n" % (self.expr_types[0], self.exprs[0])) else: text = "struct %s_key_t __key = {};\n" % \ self.probe_hash_name for i in range(0, len(self.exprs)): text += self._generate_field_assignment(i) return text def _generate_hash_update(self): if self.type == "hist": return "%s.increment(bpf_log2l(__key));" % \ self.probe_hash_name else: return "%s.increment(__key);" % self.probe_hash_name def _generate_pid_filter(self): # Kernel probes need to explicitly filter pid, because the # attach interface doesn't support pid filtering if self.pid is not None and not self.is_user: return "if (__tgid != %d) { return 0; }" % self.pid else: return "" def generate_text(self): program = "" probe_text = """ DATA_DECL """ + ( "TRACEPOINT_PROBE(%s, %s)" % (self.tp_category, self.tp_event) if self.probe_type == "t" else "int PROBENAME(struct pt_regs *ctx SIGNATURE)") + """ { u64 __pid_tgid = bpf_get_current_pid_tgid(); u32 __pid = __pid_tgid; // lower 32 bits u32 __tgid = __pid_tgid >> 32; // upper 32 bits PID_FILTER PREFIX if (!(FILTER)) return 0; KEY_EXPR COLLECT return 0; } """ prefix = "" signature = "" # If any entry arguments are probed in a ret probe, we need # to generate an entry probe to collect them if self.entry_probe_required: program += self._generate_entry_probe() prefix += self._generate_retprobe_prefix() # Replace $entry(paramname) with a reference to the # value we collected when entering the function: self._replace_entry_exprs() if self.probe_type == "p" and len(self.signature) > 0: # Only entry uprobes/kprobes can have user-specified # signatures. Other probes force it to (). signature = ", " + self.signature program += probe_text.replace("PROBENAME", self.probe_func_name) program = program.replace("SIGNATURE", signature) program = program.replace("PID_FILTER", self._generate_pid_filter()) decl = self._generate_hash_decl() key_expr = self._generate_key_assignment() collect = self._generate_hash_update() program = program.replace("DATA_DECL", decl) program = program.replace("KEY_EXPR", key_expr) program = program.replace("FILTER", "1" if len(self.filter) == 0 else self.filter) program = program.replace("COLLECT", collect) program = program.replace("PREFIX", prefix) return self.streq_functions + program def _attach_u(self): libpath = BPF.find_library(self.library) if libpath is None: libpath = BPF.find_exe(self.library) if libpath is None or len(libpath) == 0: self._bail("unable to find library %s" % self.library) if self.probe_type == "r": self.bpf.attach_uretprobe(name=libpath, sym=self.function, fn_name=self.probe_func_name, pid=self.pid or -1) else: self.bpf.attach_uprobe(name=libpath, sym=self.function, fn_name=self.probe_func_name, pid=self.pid or -1) def _attach_k(self): if self.probe_type == "t": pass # Nothing to do for tracepoints elif self.probe_type == "r": self.bpf.attach_kretprobe(event=self.function, fn_name=self.probe_func_name) else: self.bpf.attach_kprobe(event=self.function, fn_name=self.probe_func_name) def attach(self, bpf): self.bpf = bpf if self.probe_type == "u": return if self.is_user: self._attach_u() else: self._attach_k() if self.entry_probe_required: self._attach_entry_probe() def _v2s(self, v): # Most fields can be converted with plain str(), but strings # are wrapped in a __string_t which has an .s field if "__string_t" in type(v).__name__: return str(v.s) return str(v) def _display_expr(self, i): # Replace ugly latency calculation with $latency expr = self.exprs[i].replace( "(bpf_ktime_get_ns() - *____latency_val)", "$latency") # Replace alias values back with the alias name for alias, subst in Probe.aliases.items(): expr = expr.replace(subst, alias) # Replace retval expression with $retval expr = expr.replace("PT_REGS_RC(ctx)", "$retval") # Replace ugly (*__param_val) expressions with param name return re.sub(r"\(\*__(\w+)_val\)", r"\1", expr) def _display_key(self, key): if self.is_default_expr: if not self.probe_type == "r": return "total calls" else: return "retval = %s" % str(key.v0) else: # The key object has v0, ..., vk fields containing # the values of the expressions from self.exprs def str_i(i): key_i = self._v2s(getattr(key, "v%d" % i)) return "%s = %s" % \ (self._display_expr(i), key_i) return ", ".join(map(str_i, range(0, len(self.exprs)))) def display(self, top): data = self.bpf.get_table(self.probe_hash_name) if self.type == "freq": print(self.label or self.raw_spec) print("\t%-10s %s" % ("COUNT", "EVENT")) sdata = sorted(data.items(), key=lambda p: p[1].value) if top is not None: sdata = sdata[-top:] for key, value in sdata: # Print some nice values if the user didn't # specify an expression to probe if self.is_default_expr: if not self.probe_type == "r": key_str = "total calls" else: key_str = "retval = %s" % \ self._v2s(key.v0) else: key_str = self._display_key(key) print("\t%-10s %s" % (str(value.value), key_str)) elif self.type == "hist": label = self.label or (self._display_expr(0) if not self.is_default_expr else "retval") data.print_log2_hist(val_type=label) if not self.cumulative: data.clear() def __str__(self): return self.label or self.raw_spec
def test_attach1(self): # Enable USDT probe from given PID and verifier generated BPF programs. u = USDT(pid=int(self.app.pid)) u.enable_probe(probe="probe_point_1", fn_name="do_trace1") u.enable_probe(probe="probe_point_2", fn_name="do_trace2") u2 = USDT(pid=int(self.app2.pid)) u2.enable_probe(probe="probe_point_2", fn_name="do_trace2") u2.enable_probe(probe="probe_point_3", fn_name="do_trace3") self.bpf_text = self.bpf_text.replace("FILTER", "pid == %d" % self.app.pid) b = BPF(text=self.bpf_text, usdt_contexts=[u, u2]) # Event states for each event: # 0 - probe not caught, 1 - probe caught with correct value, # 2 - probe caught with incorrect value self.evt_st_1 = 0 self.evt_st_2 = 0 self.evt_st_3 = 0 self.evt_st_4 = 0 self.evt_st_5 = 0 self.evt_st_6 = 0 def check_event_val(data, event_state, expected_val): result = ct.cast(data, ct.POINTER(ct.c_int)).contents if result.value == expected_val: if (event_state == 0 or event_state == 1): return 1 return 2 def print_event1(cpu, data, size): self.evt_st_1 = check_event_val(data, self.evt_st_1, 1) def print_event2(cpu, data, size): self.evt_st_2 = check_event_val(data, self.evt_st_2, 2) def print_event3(cpu, data, size): self.evt_st_3 = check_event_val(data, self.evt_st_3, 3) def print_event4(cpu, data, size): self.evt_st_4 = check_event_val(data, self.evt_st_4, 11) def print_event5(cpu, data, size): self.evt_st_5 = check_event_val(data, self.evt_st_5, 12) def print_event6(cpu, data, size): self.evt_st_6 = check_event_val(data, self.evt_st_6, 13) # loop with callback to print_event b["event1"].open_perf_buffer(print_event1) b["event2"].open_perf_buffer(print_event2) b["event3"].open_perf_buffer(print_event3) b["event4"].open_perf_buffer(print_event4) b["event5"].open_perf_buffer(print_event5) b["event6"].open_perf_buffer(print_event6) # three iterations to make sure we get some probes and have time to process them for i in range(5): b.perf_buffer_poll() # note that event1 and event4 do not really fire, so their state should be 0 # use separate asserts so that if test fails we know which one is the culprit self.assertTrue(self.evt_st_1 == 1) self.assertTrue(self.evt_st_2 == 1) self.assertTrue(self.evt_st_3 == 0) self.assertTrue(self.evt_st_4 == 0) self.assertTrue(self.evt_st_5 == 1) self.assertTrue(self.evt_st_6 == 1)
#!/usr/bin/env python from bcc import BPF, USDT import argparse parser = argparse.ArgumentParser( description="Trace JVM monitor contention events and dump a summary", formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument("pid", type=int, help="the Java process id") args = parser.parse_args() usdt = USDT(pid=args.pid) usdt.enable_probe("monitor__contended__enter", "trace_enter") usdt.enable_probe("monitor__contended__entered", "trace_entered") bpf = BPF(text=""" int trace_enter(struct pt_regs *ctx) { // TODO return 0; } int trace_entered(struct pt_regs *ctx) { // TODO return 0; } """, usdt_contexts=[usdt]) # TODO
if (!timestampp) return 0; u64 delta = bpf_ktime_get_ns() - *timestampp; if (delta/1000000 < %d) return 0; delta /= %d; latency.increment(bpf_log2l(delta)); temp.delete(&pid); return 0; } """ % (args.threshold, 1000000 if args.milliseconds else 1000) usdt = USDT(pid=int(dbpid)) usdt.enable_probe("query__start", "probe_start") usdt.enable_probe("query__done", "probe_end") bpf = BPF(text=program, usdt_contexts=[usdt]) if args.verbose: print(usdt.get_text()) print(program) print( "Tracing database queries slower than %dms for PID %d... Ctrl+C to quit." % (args.threshold, dbpid)) try: sleep(999999999999) except KeyboardInterrupt: pass
dist.increment(dist_key); return 0; } """ bpf_text = bpf_text.replace("FILTER_STRING", this_filter) if this_filter: bpf_text = bpf_text.replace( "FILTER", "if (!filter(start_data.input)) { return 0; }") else: bpf_text = bpf_text.replace("FILTER", "") # Create USDT context print("Attaching probes to pid %d" % this_pid) usdt_ctx = USDT(pid=this_pid) usdt_ctx.enable_probe(probe="operation_start", fn_name="trace_operation_start") usdt_ctx.enable_probe(probe="operation_end", fn_name="trace_operation_end") # Create BPF context, load BPF program bpf_ctx = BPF(text=bpf_text, usdt_contexts=[usdt_ctx], debug=debugLevel) start = 0 dist = bpf_ctx.get_table("dist") while (1): try: sleep(this_interval) except KeyboardInterrupt: exit() print("[%s]" % strftime("%H:%M:%S")) dist.print_log2_hist("latency (us)")
def test_attach1(self): # enable USDT probe from given PID and verifier generated BPF programs u = USDT(pid=int(self.app.pid)) u.enable_probe(probe="probe_point_1", fn_name="do_trace1") u.enable_probe(probe="probe_point_2", fn_name="do_trace2") u.enable_probe(probe="probe_point_3", fn_name="do_trace3") u.enable_probe(probe="probe_point_4", fn_name="do_trace4") u.enable_probe(probe="probe_point_5", fn_name="do_trace5") b = BPF(text=self.bpf_text, usdt_contexts=[u], debug=4) # Event states for each event: # 0 - probe not caught, 1 - probe caught with correct value, # 2 - probe caught with incorrect value self.evt_st_1 = 0 self.evt_st_2 = 0 self.evt_st_3 = 0 # define output data structure in Python class Data1(ct.Structure): _fields_ = [("v1", ct.c_char), ("v2", ct.c_int)] class Data2(ct.Structure): _fields_ = [("v1", ct.c_int), ("v2", ct.c_char)] class Data3(ct.Structure): _fields_ = [("v1", ct.c_int), ("v2", ct.c_int)] class Data4(ct.Structure): _fields_ = [("v1", ct.c_ulonglong), ("v2", ct.c_char * 64)] class Data5(ct.Structure): _fields_ = [("v1", ct.c_char * 64), ("v2", ct.c_ulonglong)] def check_event_val(event, event_state, v1, v2, v3, v4): if ((event.v1 == v1 and event.v2 == v2) or (event.v1 == v3 and event.v2 == v4)): if (event_state == 0 or event_state == 1): return 1 return 2 def print_event1(cpu, data, size): event = ct.cast(data, ct.POINTER(Data1)).contents self.evt_st_1 = check_event_val(event, self.evt_st_1, b'\x0d', 40, b'\x08', 200) def print_event2(cpu, data, size): event = ct.cast(data, ct.POINTER(Data2)).contents # pretend we have two identical probe points to simplify the code self.evt_st_2 = check_event_val(event, self.evt_st_2, 5, b'\x04', 5, b'\x04') def print_event3(cpu, data, size): event = ct.cast(data, ct.POINTER(Data3)).contents self.evt_st_3 = check_event_val(event, self.evt_st_3, 200, 40, 8, 13) def print_event4(cpu, data, size): event = ct.cast(data, ct.POINTER(Data4)).contents print("%s" % event.v2) def print_event5(cpu, data, size): event = ct.cast(data, ct.POINTER(Data5)).contents print("%s" % event.v1) # loop with callback to print_event b["event1"].open_perf_buffer(print_event1) b["event2"].open_perf_buffer(print_event2) b["event3"].open_perf_buffer(print_event3) b["event4"].open_perf_buffer(print_event4) b["event5"].open_perf_buffer(print_event5) # three iterations to make sure we get some probes and have time to process them for i in range(3): b.perf_buffer_poll() self.assertTrue(self.evt_st_1 == 1 and self.evt_st_2 == 1 and self.evt_st_3 == 1)
class Probe(object): def __init__(self, pattern, use_regex=False, pid=None): """Init a new probe. Init the probe from the pattern provided by the user. The supported patterns mimic the 'trace' and 'argdist' tools, but are simpler because we don't have to distinguish between probes and retprobes. func -- probe a kernel function lib:func -- probe a user-space function in the library 'lib' p::func -- same thing as 'func' p:lib:func -- same thing as 'lib:func' t:cat:event -- probe a kernel tracepoint u:lib:probe -- probe a USDT tracepoint """ parts = pattern.split(':') if len(parts) == 1: parts = ["p", "", parts[0]] elif len(parts) == 2: parts = ["p", parts[0], parts[1]] elif len(parts) == 3: if parts[0] == "t": parts = ["t", "", "%s:%s" % tuple(parts[1:])] if parts[0] not in ["p", "t", "u"]: raise Exception("Type must be 'p', 't', or 'u', but got %s" % parts[0]) else: raise Exception("Too many ':'-separated components in pattern %s" % pattern) (self.type, self.library, self.pattern) = parts if not use_regex: self.pattern = self.pattern.replace('*', '.*') self.pattern = '^' + self.pattern + '$' if (self.type == "p" and self.library) or self.type == "u": libpath = BPF.find_library(self.library) if libpath is None: # This might be an executable (e.g. 'bash') libpath = BPF.find_exe(self.library) if libpath is None or len(libpath) == 0: raise Exception("unable to find library %s" % self.library) self.library = libpath self.pid = pid self.matched = 0 self.trace_functions = {} # map location number to function name def is_kernel_probe(self): return self.type == "t" or (self.type == "p" and self.library == "") def attach(self): if self.type == "p" and not self.library: for index, function in self.trace_functions.items(): self.bpf.attach_kprobe(event=function, fn_name="trace_count_%d" % index, pid=self.pid or -1) elif self.type == "p" and self.library: for index, function in self.trace_functions.items(): self.bpf.attach_uprobe(name=self.library, sym=function, fn_name="trace_count_%d" % index, pid=self.pid or -1) elif self.type == "t": for index, function in self.trace_functions.items(): self.bpf.attach_tracepoint(tp=function, fn_name="trace_count_%d" % index, pid=self.pid or -1) elif self.type == "u": pass # Nothing to do -- attach already happened in `load` if self.matched == 0: raise Exception("No functions matched by pattern %s" % self.pattern) def _add_function(self, template, probe_name): new_func = "trace_count_%d" % self.matched text = template.replace("PROBE_FUNCTION", new_func) text = text.replace("LOCATION", str(self.matched)) self.trace_functions[self.matched] = probe_name self.matched += 1 return text def _generate_functions(self, template): self.usdt = None text = "" if self.type == "p" and not self.library: functions = BPF.get_kprobe_functions(self.pattern) verify_limit(len(functions)) for function in functions: text += self._add_function(template, function) elif self.type == "p" and self.library: # uprobes are tricky because the same function may have multiple # addresses, and the same address may be mapped to multiple # functions. We aren't allowed to create more than one uprobe # per address, so track unique addresses and ignore functions that # map to an address that we've already seen. Also ignore functions # that may repeat multiple times with different addresses. addresses, functions = (set(), set()) functions_and_addresses = BPF.get_user_functions_and_addresses( self.library, self.pattern) verify_limit(len(functions_and_addresses)) for function, address in functions_and_addresses: if address in addresses or function in functions: continue addresses.add(address) functions.add(function) text += self._add_function(template, function) elif self.type == "t": tracepoints = BPF.get_tracepoints(self.pattern) verify_limit(len(tracepoints)) for tracepoint in tracepoints: text += self._add_function(template, tracepoint) elif self.type == "u": self.usdt = USDT(path=self.library, pid=self.pid) matches = [] for probe in self.usdt.enumerate_probes(): if not self.pid and (probe.bin_path != self.library): continue if re.match(self.pattern, probe.name): matches.append(probe.name) verify_limit(len(matches)) for match in matches: new_func = "trace_count_%d" % self.matched text += self._add_function(template, match) self.usdt.enable_probe(match, new_func) if debug: print(self.usdt.get_text()) return text def load(self): trace_count_text = """ int PROBE_FUNCTION(void *ctx) { FILTER int loc = LOCATION; u64 *val = counts.lookup(&loc); if (!val) { return 0; // Should never happen, # of locations is known } (*val)++; return 0; } """ bpf_text = """#include <uapi/linux/ptrace.h> BPF_TABLE("array", int, u64, counts, NUMLOCATIONS); """ # We really mean the tgid from the kernel's perspective, which is in # the top 32 bits of bpf_get_current_pid_tgid(). if self.pid: trace_count_text = trace_count_text.replace( 'FILTER', """u32 pid = bpf_get_current_pid_tgid() >> 32; if (pid != %d) { return 0; }""" % self.pid) else: trace_count_text = trace_count_text.replace('FILTER', '') bpf_text += self._generate_functions(trace_count_text) bpf_text = bpf_text.replace("NUMLOCATIONS", str(len(self.trace_functions))) if debug: print(bpf_text) self.bpf = BPF(text=bpf_text, usdt_contexts=[self.usdt] if self.usdt else []) self.clear() # Initialize all array items to zero def counts(self): return self.bpf["counts"] def clear(self): counts = self.bpf["counts"] for location, _ in list(self.trace_functions.items()): counts[counts.Key(location)] = counts.Leaf()
class Probe(object): next_probe_index = 0 streq_index = 0 aliases = {"$PID": "(bpf_get_current_pid_tgid() >> 32)"} def _substitute_aliases(self, expr): if expr is None: return expr for alias, subst in Probe.aliases.items(): expr = expr.replace(alias, subst) return expr def _parse_signature(self): params = map(str.strip, self.signature.split(',')) self.param_types = {} for param in params: # If the type is a pointer, the * can be next to the # param name. Other complex types like arrays are not # supported right now. index = param.rfind('*') index = index if index != -1 else param.rfind(' ') param_type = param[0:index + 1].strip() param_name = param[index + 1:].strip() self.param_types[param_name] = param_type # Maintain list of user params. Then later decide to # switch to bpf_probe_read_kernel or bpf_probe_read_user. if "__user" in param_type.split(): self.probe_user_list.add(param_name) def _generate_entry(self): self.entry_probe_func = self.probe_func_name + "_entry" text = """ int PROBENAME(struct pt_regs *ctx SIGNATURE) { u64 __pid_tgid = bpf_get_current_pid_tgid(); u32 __pid = __pid_tgid; // lower 32 bits u32 __tgid = __pid_tgid >> 32; // upper 32 bits PID_FILTER COLLECT return 0; } """ text = text.replace("PROBENAME", self.entry_probe_func) text = text.replace("SIGNATURE", "" if len(self.signature) == 0 else ", " + self.signature) text = text.replace("PID_FILTER", self._generate_pid_filter()) collect = "" for pname in self.args_to_probe: param_hash = self.hashname_prefix + pname if pname == "__latency": collect += """ u64 __time = bpf_ktime_get_ns(); %s.update(&__pid, &__time); """ % param_hash else: collect += "%s.update(&__pid, &%s);\n" % \ (param_hash, pname) text = text.replace("COLLECT", collect) return text def _generate_entry_probe(self): # Any $entry(name) expressions result in saving that argument # when entering the function. self.args_to_probe = set() regex = r"\$entry\((\w+)\)" for expr in self.exprs: for arg in re.finditer(regex, expr): self.args_to_probe.add(arg.group(1)) for arg in re.finditer(regex, self.filter): self.args_to_probe.add(arg.group(1)) if any(map(lambda expr: "$latency" in expr, self.exprs)) or \ "$latency" in self.filter: self.args_to_probe.add("__latency") self.param_types["__latency"] = "u64" # nanoseconds for pname in self.args_to_probe: if pname not in self.param_types: raise ValueError("$entry(%s): no such param" % arg) self.hashname_prefix = "%s_param_" % self.probe_hash_name text = "" for pname in self.args_to_probe: # Each argument is stored in a separate hash that is # keyed by pid. text += "BPF_HASH(%s, u32, %s);\n" % \ (self.hashname_prefix + pname, self.param_types[pname]) text += self._generate_entry() return text def _generate_retprobe_prefix(self): # After we're done here, there are __%s_val variables for each # argument we needed to probe using $entry(name), and they all # have values (which isn't necessarily the case if we missed # the method entry probe). text = "" self.param_val_names = {} for pname in self.args_to_probe: val_name = "__%s_val" % pname text += "%s *%s = %s.lookup(&__pid);\n" % \ (self.param_types[pname], val_name, self.hashname_prefix + pname) text += "if (%s == 0) { return 0 ; }\n" % val_name self.param_val_names[pname] = val_name return text def _replace_entry_exprs(self): for pname, vname in self.param_val_names.items(): if pname == "__latency": entry_expr = "$latency" val_expr = "(bpf_ktime_get_ns() - *%s)" % vname else: entry_expr = "$entry(%s)" % pname val_expr = "(*%s)" % vname for i in range(0, len(self.exprs)): self.exprs[i] = self.exprs[i].replace( entry_expr, val_expr) self.filter = self.filter.replace(entry_expr, val_expr) def _attach_entry_probe(self): if self.is_user: self.bpf.attach_uprobe(name=self.library, sym=self.function, fn_name=self.entry_probe_func, pid=self.pid or -1) else: self.bpf.attach_kprobe(event=self.function, fn_name=self.entry_probe_func) def _bail(self, error): raise ValueError("error parsing probe '%s': %s" % (self.raw_spec, error)) def _validate_specifier(self): # Everything after '#' is the probe label, ignore it spec = self.raw_spec.split('#')[0] parts = spec.strip().split(':') if len(parts) < 3: self._bail("at least the probe type, library, and " + "function signature must be specified") if len(parts) > 6: self._bail("extraneous ':'-separated parts detected") if parts[0] not in ["r", "p", "t", "u"]: self._bail("probe type must be 'p', 'r', 't', or 'u'" + " but got '%s'" % parts[0]) if re.match(r"\S+\(.*\)", parts[2]) is None: self._bail(("function signature '%s' has an invalid " + "format") % parts[2]) def _parse_expr_types(self, expr_types): if len(expr_types) == 0: self._bail("no expr types specified") self.expr_types = expr_types.split(',') def _parse_exprs(self, exprs): if len(exprs) == 0: self._bail("no exprs specified") self.exprs = exprs.split(',') def _make_valid_identifier(self, ident): return re.sub(r'[^A-Za-z0-9_]', '_', ident) def __init__(self, tool, type, specifier): self.usdt_ctx = None self.streq_functions = "" self.pid = tool.args.pid self.cumulative = tool.args.cumulative or False self.raw_spec = specifier self.probe_user_list = set() self.bin_cmp = False self._validate_specifier() spec_and_label = specifier.split('#') self.label = spec_and_label[1] \ if len(spec_and_label) == 2 else None parts = spec_and_label[0].strip().split(':') self.type = type # hist or freq self.probe_type = parts[0] fparts = parts[2].split('(') self.function = fparts[0].strip() if self.probe_type == "t": self.library = "" # kernel self.tp_category = parts[1] self.tp_event = self.function elif self.probe_type == "u": self.library = parts[1] self.probe_func_name = self._make_valid_identifier( "%s_probe%d" % (self.function, Probe.next_probe_index)) self._enable_usdt_probe() else: self.library = parts[1] self.is_user = len(self.library) > 0 self.signature = fparts[1].strip()[:-1] self._parse_signature() # If the user didn't specify an expression to probe, we probe # the retval in a ret probe, or simply the value "1" otherwise. self.is_default_expr = len(parts) < 5 if not self.is_default_expr: self._parse_expr_types(parts[3]) self._parse_exprs(parts[4]) if len(self.exprs) != len(self.expr_types): self._bail("mismatched # of exprs and types") if self.type == "hist" and len(self.expr_types) > 1: self._bail("histograms can only have 1 expr") else: if not self.probe_type == "r" and self.type == "hist": self._bail("histograms must have expr") self.expr_types = \ ["u64" if not self.probe_type == "r" else "int"] self.exprs = \ ["1" if not self.probe_type == "r" else "$retval"] self.filter = "" if len(parts) != 6 else parts[5] self._substitute_exprs() # Do we need to attach an entry probe so that we can collect an # argument that is required for an exit (return) probe? def check(expr): keywords = ["$entry", "$latency"] return any(map(lambda kw: kw in expr, keywords)) self.entry_probe_required = self.probe_type == "r" and \ (any(map(check, self.exprs)) or check(self.filter)) self.probe_func_name = self._make_valid_identifier( "%s_probe%d" % (self.function, Probe.next_probe_index)) self.probe_hash_name = self._make_valid_identifier( "%s_hash%d" % (self.function, Probe.next_probe_index)) Probe.next_probe_index += 1 def _enable_usdt_probe(self): self.usdt_ctx = USDT(path=self.library, pid=self.pid) self.usdt_ctx.enable_probe( self.function, self.probe_func_name) def _substitute_exprs(self): def repl(expr): expr = self._substitute_aliases(expr) rdict = StrcmpRewrite.rewrite_expr(expr, self.bin_cmp, self.library, self.probe_user_list, self.streq_functions, Probe.streq_index) expr = rdict["expr"] self.streq_functions = rdict["streq_functions"] Probe.streq_index = rdict["probeid"] return expr.replace("$retval", "PT_REGS_RC(ctx)") for i in range(0, len(self.exprs)): self.exprs[i] = repl(self.exprs[i]) self.filter = repl(self.filter) def _is_string(self, expr_type): return expr_type == "char*" or expr_type == "char *" def _generate_hash_field(self, i): if self._is_string(self.expr_types[i]): return "struct __string_t v%d;\n" % i else: return "%s v%d;\n" % (self.expr_types[i], i) def _generate_usdt_arg_assignment(self, i): expr = self.exprs[i] if self.probe_type == "u" and expr[0:3] == "arg": arg_index = int(expr[3]) arg_ctype = self.usdt_ctx.get_probe_arg_ctype( self.function, arg_index - 1) return (" %s %s = 0;\n" + " bpf_usdt_readarg(%s, ctx, &%s);\n") \ % (arg_ctype, expr, expr[3], expr) else: return "" def _generate_field_assignment(self, i): text = self._generate_usdt_arg_assignment(i) if self._is_string(self.expr_types[i]): if self.is_user or \ self.exprs[i] in self.probe_user_list: probe_readfunc = "bpf_probe_read_user" else: probe_readfunc = "bpf_probe_read_kernel" return (text + " %s(&__key.v%d.s," + " sizeof(__key.v%d.s), (void *)%s);\n") % \ (probe_readfunc, i, i, self.exprs[i]) else: return text + " __key.v%d = %s;\n" % \ (i, self.exprs[i]) def _generate_hash_decl(self): if self.type == "hist": return "BPF_HISTOGRAM(%s, %s);" % \ (self.probe_hash_name, self.expr_types[0]) else: text = "struct %s_key_t {\n" % self.probe_hash_name for i in range(0, len(self.expr_types)): text += self._generate_hash_field(i) text += "};\n" text += "BPF_HASH(%s, struct %s_key_t, u64);\n" % \ (self.probe_hash_name, self.probe_hash_name) return text def _generate_key_assignment(self): if self.type == "hist": return self._generate_usdt_arg_assignment(0) + \ ("%s __key = %s;\n" % (self.expr_types[0], self.exprs[0])) else: text = "struct %s_key_t __key = {};\n" % \ self.probe_hash_name for i in range(0, len(self.exprs)): text += self._generate_field_assignment(i) return text def _generate_hash_update(self): if self.type == "hist": return "%s.increment(bpf_log2l(__key));" % \ self.probe_hash_name else: return "%s.increment(__key);" % self.probe_hash_name def _generate_pid_filter(self): # Kernel probes need to explicitly filter pid, because the # attach interface doesn't support pid filtering if self.pid is not None and not self.is_user: return "if (__tgid != %d) { return 0; }" % self.pid else: return "" def generate_text(self): program = "" probe_text = """ DATA_DECL """ + ( "TRACEPOINT_PROBE(%s, %s)" % (self.tp_category, self.tp_event) if self.probe_type == "t" else "int PROBENAME(struct pt_regs *ctx SIGNATURE)") + """ { u64 __pid_tgid = bpf_get_current_pid_tgid(); u32 __pid = __pid_tgid; // lower 32 bits u32 __tgid = __pid_tgid >> 32; // upper 32 bits PID_FILTER PREFIX KEY_EXPR if (!(FILTER)) return 0; COLLECT return 0; } """ prefix = "" signature = "" # If any entry arguments are probed in a ret probe, we need # to generate an entry probe to collect them if self.entry_probe_required: program += self._generate_entry_probe() prefix += self._generate_retprobe_prefix() # Replace $entry(paramname) with a reference to the # value we collected when entering the function: self._replace_entry_exprs() if self.probe_type == "p" and len(self.signature) > 0: # Only entry uprobes/kprobes can have user-specified # signatures. Other probes force it to (). signature = ", " + self.signature program += probe_text.replace("PROBENAME", self.probe_func_name) program = program.replace("SIGNATURE", signature) program = program.replace("PID_FILTER", self._generate_pid_filter()) decl = self._generate_hash_decl() key_expr = self._generate_key_assignment() collect = self._generate_hash_update() program = program.replace("DATA_DECL", decl) program = program.replace("KEY_EXPR", key_expr) program = program.replace("FILTER", "1" if len(self.filter) == 0 else self.filter) program = program.replace("COLLECT", collect) program = program.replace("PREFIX", prefix) return self.streq_functions + program def _attach_u(self): libpath = BPF.find_library(self.library) if libpath is None: libpath = BPF.find_exe(self.library) if libpath is None or len(libpath) == 0: self._bail("unable to find library %s" % self.library) if self.probe_type == "r": self.bpf.attach_uretprobe(name=libpath, sym=self.function, fn_name=self.probe_func_name, pid=self.pid or -1) else: self.bpf.attach_uprobe(name=libpath, sym=self.function, fn_name=self.probe_func_name, pid=self.pid or -1) def _attach_k(self): if self.probe_type == "t": pass # Nothing to do for tracepoints elif self.probe_type == "r": self.bpf.attach_kretprobe(event=self.function, fn_name=self.probe_func_name) else: self.bpf.attach_kprobe(event=self.function, fn_name=self.probe_func_name) def attach(self, bpf): self.bpf = bpf if self.probe_type == "u": return if self.is_user: self._attach_u() else: self._attach_k() if self.entry_probe_required: self._attach_entry_probe() def _v2s(self, v): # Most fields can be converted with plain str(), but strings # are wrapped in a __string_t which has an .s field if "__string_t" in type(v).__name__: return str(v.s) return str(v) def _display_expr(self, i): # Replace ugly latency calculation with $latency expr = self.exprs[i].replace( "(bpf_ktime_get_ns() - *____latency_val)", "$latency") # Replace alias values back with the alias name for alias, subst in Probe.aliases.items(): expr = expr.replace(subst, alias) # Replace retval expression with $retval expr = expr.replace("PT_REGS_RC(ctx)", "$retval") # Replace ugly (*__param_val) expressions with param name return re.sub(r"\(\*__(\w+)_val\)", r"\1", expr) def _display_key(self, key): if self.is_default_expr: if not self.probe_type == "r": return "total calls" else: return "retval = %s" % str(key.v0) else: # The key object has v0, ..., vk fields containing # the values of the expressions from self.exprs def str_i(i): key_i = self._v2s(getattr(key, "v%d" % i)) return "%s = %s" % \ (self._display_expr(i), key_i) return ", ".join(map(str_i, range(0, len(self.exprs)))) def display(self, top): data = self.bpf.get_table(self.probe_hash_name) if self.type == "freq": print(self.label or self.raw_spec) print("\t%-10s %s" % ("COUNT", "EVENT")) sdata = sorted(data.items(), key=lambda p: p[1].value) if top is not None: sdata = sdata[-top:] for key, value in sdata: # Print some nice values if the user didn't # specify an expression to probe if self.is_default_expr: if not self.probe_type == "r": key_str = "total calls" else: key_str = "retval = %s" % \ self._v2s(key.v0) else: key_str = self._display_key(key) print("\t%-10s %s" % (str(value.value), key_str)) elif self.type == "hist": label = self.label or (self._display_expr(0) if not self.is_default_expr else "retval") data.print_log2_hist(val_type=label) if not self.cumulative: data.clear() def __str__(self): return self.label or self.raw_spec
class Probe(object): def __init__(self, pattern, use_regex=False, pid=None, per_pid=False): """Init a new probe. Init the probe from the pattern provided by the user. The supported patterns mimic the 'trace' and 'argdist' tools, but are simpler because we don't have to distinguish between probes and retprobes. func -- probe a kernel function lib:func -- probe a user-space function in the library 'lib' p::func -- same thing as 'func' p:lib:func -- same thing as 'lib:func' t:cat:event -- probe a kernel tracepoint u:lib:probe -- probe a USDT tracepoint """ parts = pattern.split(':') if len(parts) == 1: parts = ["p", "", parts[0]] elif len(parts) == 2: parts = ["p", parts[0], parts[1]] elif len(parts) == 3: if parts[0] == "t": parts = ["t", "", "%s:%s" % tuple(parts[1:])] if parts[0] not in ["p", "t", "u"]: raise Exception("Type must be 'p', 't', or 'u', but got %s" % parts[0]) else: raise Exception("Too many ':'-separated components in pattern %s" % pattern) (self.type, self.library, self.pattern) = parts if not use_regex: self.pattern = self.pattern.replace('*', '.*') self.pattern = '^' + self.pattern + '$' if (self.type == "p" and self.library) or self.type == "u": libpath = BPF.find_library(self.library) if libpath is None: # This might be an executable (e.g. 'bash') libpath = BPF.find_exe(self.library) if libpath is None or len(libpath) == 0: raise Exception("unable to find library %s" % self.library) self.library = libpath self.pid = pid self.per_pid = per_pid self.matched = 0 def is_kernel_probe(self): return self.type == "t" or (self.type == "p" and self.library == "") def attach(self): if self.type == "p": if self.library: self.bpf.attach_uprobe(name=self.library, sym_re=self.pattern, fn_name="trace_count", pid=self.pid or -1) self.matched = self.bpf.num_open_uprobes() else: self.bpf.attach_kprobe(event_re=self.pattern, fn_name="trace_count", pid=self.pid or -1) self.matched = self.bpf.num_open_kprobes() elif self.type == "t": self.bpf.attach_tracepoint(tp_re=self.pattern, fn_name="trace_count", pid=self.pid or -1) self.matched = self.bpf.num_open_tracepoints() elif self.type == "u": pass # Nothing to do -- attach already happened in `load` if self.matched == 0: raise Exception("No functions matched by pattern %s" % self.pattern) def load(self): trace_count_text = """ int trace_count(void *ctx) { FILTER struct key_t key = {}; key.pid = GET_PID; key.stackid = stack_traces.get_stackid(ctx, STACK_FLAGS); u64 zero = 0; u64 *val = counts.lookup_or_init(&key, &zero); (*val)++; return 0; } """ bpf_text = """#include <uapi/linux/ptrace.h> struct key_t { u32 pid; int stackid; }; BPF_HASH(counts, struct key_t); BPF_STACK_TRACE(stack_traces, 1024); """ # We really mean the tgid from the kernel's perspective, which is in # the top 32 bits of bpf_get_current_pid_tgid(). if self.is_kernel_probe() and self.pid: trace_count_text = trace_count_text.replace( 'FILTER', ('u32 pid; pid = bpf_get_current_pid_tgid() >> 32; ' + 'if (pid != %d) { return 0; }') % (self.pid)) else: trace_count_text = trace_count_text.replace('FILTER', '') # We need per-pid statistics when tracing a user-space process, because # the meaning of the symbols depends on the pid. We also need them if # per-pid statistics were requested with -P. if self.per_pid or not self.is_kernel_probe(): trace_count_text = trace_count_text.replace( 'GET_PID', 'bpf_get_current_pid_tgid() >> 32') else: trace_count_text = trace_count_text.replace( 'GET_PID', '0xffffffff') stack_flags = 'BPF_F_REUSE_STACKID' if not self.is_kernel_probe(): stack_flags += '| BPF_F_USER_STACK' # can't do both U *and* K trace_count_text = trace_count_text.replace('STACK_FLAGS', stack_flags) self.usdt = None if self.type == "u": self.usdt = USDT(path=self.library, pid=self.pid) for probe in self.usdt.enumerate_probes(): if not self.pid and (probe.bin_path != self.library): continue if re.match(self.pattern, probe.name): # This hack is required because the bpf_usdt_readarg # functions generated need different function names for # each attached probe. If we just stick to trace_count, # we'd get multiple bpf_usdt_readarg helpers with the same # name when enabling more than one USDT probe. new_func = "trace_count_%d" % self.matched bpf_text += trace_count_text.replace( "trace_count", new_func) self.usdt.enable_probe(probe.name, new_func) self.matched += 1 if debug: print(self.usdt.get_text()) else: bpf_text += trace_count_text if debug: print(bpf_text) self.bpf = BPF(text=bpf_text, usdt_contexts=[self.usdt] if self.usdt else [])
class Probe(object): def __init__(self, pattern, kernel_stack, user_stack, use_regex=False, pid=None, per_pid=False, cpu=None): """Init a new probe. Init the probe from the pattern provided by the user. The supported patterns mimic the 'trace' and 'argdist' tools, but are simpler because we don't have to distinguish between probes and retprobes. func -- probe a kernel function lib:func -- probe a user-space function in the library 'lib' p::func -- same thing as 'func' p:lib:func -- same thing as 'lib:func' t:cat:event -- probe a kernel tracepoint u:lib:probe -- probe a USDT tracepoint """ self.kernel_stack = kernel_stack self.user_stack = user_stack parts = pattern.split(':') if len(parts) == 1: parts = ["p", "", parts[0]] elif len(parts) == 2: parts = ["p", parts[0], parts[1]] elif len(parts) == 3: if parts[0] == "t": parts = ["t", "", "%s:%s" % tuple(parts[1:])] if parts[0] not in ["p", "t", "u"]: raise Exception("Type must be 'p', 't', or 'u', but got %s" % parts[0]) else: raise Exception("Too many ':'-separated components in pattern %s" % pattern) (self.type, self.library, self.pattern) = parts if not use_regex: self.pattern = self.pattern.replace('*', '.*') self.pattern = '^' + self.pattern + '$' if (self.type == "p" and self.library) or self.type == "u": libpath = BPF.find_library(self.library) if libpath is None: # This might be an executable (e.g. 'bash') libpath = BPF.find_exe(self.library) if libpath is None or len(libpath) == 0: raise Exception("unable to find library %s" % self.library) self.library = libpath self.pid = pid self.per_pid = per_pid self.cpu = cpu self.matched = 0 def is_kernel_probe(self): return self.type == "t" or (self.type == "p" and self.library == "") def attach(self): if self.type == "p": if self.library: self.bpf.attach_uprobe(name=self.library, sym_re=self.pattern, fn_name="trace_count", pid=self.pid or -1) self.matched = self.bpf.num_open_uprobes() else: self.bpf.attach_kprobe(event_re=self.pattern, fn_name="trace_count") self.matched = self.bpf.num_open_kprobes() elif self.type == "t": self.bpf.attach_tracepoint(tp_re=self.pattern, fn_name="trace_count") self.matched = self.bpf.num_open_tracepoints() elif self.type == "u": pass # Nothing to do -- attach already happened in `load` if self.matched == 0: raise Exception("No functions matched by pattern %s" % self.pattern) def load(self): ctx_name = "ctx" stack_trace = "" if self.user_stack: stack_trace += """ key.user_stack_id = stack_traces.get_stackid( %s, BPF_F_REUSE_STACKID | BPF_F_USER_STACK );""" % (ctx_name) else: stack_trace += "key.user_stack_id = -1;" if self.kernel_stack: stack_trace += """ key.kernel_stack_id = stack_traces.get_stackid( %s, BPF_F_REUSE_STACKID );""" % (ctx_name) else: stack_trace += "key.kernel_stack_id = -1;" trace_count_text = """ int trace_count(void *ctx) { FILTER struct key_t key = {}; key.tgid = GET_TGID; STORE_COMM %s counts.increment(key); return 0; } """ trace_count_text = trace_count_text % (stack_trace) bpf_text = """#include <uapi/linux/ptrace.h> #include <linux/sched.h> struct key_t { // no pid (thread ID) so that we do not needlessly split this key u32 tgid; int kernel_stack_id; int user_stack_id; char name[TASK_COMM_LEN]; }; BPF_HASH(counts, struct key_t); BPF_STACK_TRACE(stack_traces, 1024); """ filter_text = [] # We really mean the tgid from the kernel's perspective, which is in # the top 32 bits of bpf_get_current_pid_tgid(). if self.is_kernel_probe() and self.pid: filter_text.append( 'u32 pid; pid = bpf_get_current_pid_tgid() >> 32; ' + 'if (pid != %d) { return 0; }' % self.pid) if self.is_kernel_probe() and self.cpu: filter_text.append( 'struct task_struct *task; task = (struct task_struct*)bpf_get_current_task(); ' + 'if (task->cpu != %d) { return 0; }' % self.cpu) trace_count_text = trace_count_text.replace('FILTER', '\n '.join(filter_text)) # We need per-pid statistics when tracing a user-space process, because # the meaning of the symbols depends on the pid. We also need them if # per-pid statistics were requested with -P, or for user stacks. if self.per_pid or not self.is_kernel_probe() or self.user_stack: trace_count_text = trace_count_text.replace( 'GET_TGID', 'bpf_get_current_pid_tgid() >> 32') trace_count_text = trace_count_text.replace( 'STORE_COMM', 'bpf_get_current_comm(&key.name, sizeof(key.name));') else: # kernel stacks only. skip splitting on PID so these aggregate # together, and don't store the process name. trace_count_text = trace_count_text.replace( 'GET_TGID', '0xffffffff') trace_count_text = trace_count_text.replace('STORE_COMM', '') self.usdt = None if self.type == "u": self.usdt = USDT(path=self.library, pid=self.pid) for probe in self.usdt.enumerate_probes(): if not self.pid and (probe.bin_path != self.library): continue if re.match(self.pattern, probe.name): # This hack is required because the bpf_usdt_readarg # functions generated need different function names for # each attached probe. If we just stick to trace_count, # we'd get multiple bpf_usdt_readarg helpers with the same # name when enabling more than one USDT probe. new_func = "trace_count_%d" % self.matched bpf_text += trace_count_text.replace( "trace_count", new_func) self.usdt.enable_probe(probe.name, new_func) self.matched += 1 if debug: print(self.usdt.get_text()) else: bpf_text += trace_count_text if debug: print(bpf_text) self.bpf = BPF(text=bpf_text, usdt_contexts=[self.usdt] if self.usdt else [])
""" if not args.server: text += probe.replace("PROBE", "client").replace("TYPE", "CLIENT") if not args.client: text += probe.replace("PROBE", "server").replace("TYPE", "SERVER") CLIENT = 0 SERVER = 1 if args.stack: text = "#define NEED_STACK\n" + text usdt = USDT(pid=args.pid) if not args.server: usdt.enable_probe("http__client__request", "client_start") usdt.enable_probe("http__client__response", "client_end") if not args.client: usdt.enable_probe("http__server__request", "server_start") usdt.enable_probe("http__server__response", "server_end") bpf = BPF(text=text, usdt_contexts=[usdt], debug=DEBUG_PREPROCESSOR if args.debug else 0) class Data(ct.Structure): _fields_ = [ ("start_ns", ct.c_ulong), ("duration_ns", ct.c_ulong), ("port", ct.c_int), ("type", ct.c_int), ("stackid", ct.c_int), ("method", ct.c_char * 16),
parser.add_argument('-d', type=int, dest='duration_seconds', help='Recording duration in seconds', default=10) return parser args = get_arg_parser().parse_args() print(prog % (next_power_of_two(args.sampling_frequency) - 1)) pid = args.pid usdt = USDT(path=args.lib_jvm_path, pid=args.pid) usdt.enable_probe(probe="object__alloc", fn_name="trace_alloc") bpf = BPF(text=prog % (next_power_of_two(args.sampling_frequency) - 1), usdt_contexts=[usdt]) time.sleep(float(str(args.duration_seconds))) if len(bpf["tids"]) == 0: print("No data found - are DTrace probes enabled in running process?") stack_traces = bpf["stack_traces"] all_stacks = [] stack_counts = dict() for k, v in bpf["counts"].iteritems(): stack = [] for addr in stack_traces.walk(k.user_stack_id):
info->total_ns += bpf_ktime_get_ns() - e->timestamp; sysentry.delete(&pid); return 0; } #endif // LATENCY #endif // SYSCALLS """.replace("READ_CLASS", read_class) \ .replace("READ_METHOD", read_method) \ .replace("PID_FILTER", "if ((pid >> 32) != %d) { return 0; }" % args.pid) \ .replace("DEFINE_NOLANG", "#define NOLANG" if not args.language else "") \ .replace("DEFINE_LATENCY", "#define LATENCY" if args.latency else "") \ .replace("DEFINE_SYSCALLS", "#define SYSCALLS" if args.syscalls else "") if args.language: usdt = USDT(pid=args.pid) usdt.enable_probe(entry_probe, "trace_entry") if args.latency: usdt.enable_probe(return_probe, "trace_return") else: usdt = None if args.verbose: if usdt: print(usdt.get_text()) print(program) bpf = BPF(text=program, usdt_contexts=[usdt] if usdt else []) if args.syscalls: syscall_regex = "^[Ss]y[Ss]_.*" bpf.attach_kprobe(event_re=syscall_regex, fn_name="syscall_entry") if args.latency:
def test_uncache(self): """ Tests the utxocache:uncache tracepoint API. https://github.com/bitcoin/bitcoin/blob/master/doc/tracing.md#tracepoint-utxocacheuncache """ # To trigger an UTXO uncache from the cache, we create an invalid transaction # spending a not-cached, but existing UTXO. During transaction validation, this # the UTXO is added to the utxo cache, but as the transaction is invalid, it's # uncached again. self.log.info("testing the utxocache:uncache tracepoint API") # Retrieve the txid for the UTXO created in the first block. This UTXO is not # in our UTXO cache. EARLY_BLOCK_HEIGHT = 1 block_1_hash = self.nodes[0].getblockhash(EARLY_BLOCK_HEIGHT) block_1 = self.nodes[0].getblock(block_1_hash) block_1_coinbase_txid = block_1["tx"][0] # Create a transaction and invalidate it by changing the txid of the previous # output to the coinbase txid of the block at height 1. invalid_tx = self.wallet.create_self_transfer( from_node=self.nodes[0])["tx"] invalid_tx.vin[0].prevout.hash = int(block_1_coinbase_txid, 16) self.log.info("hooking into the utxocache:uncache tracepoint") ctx = USDT(path=str(self.options.bitcoind)) ctx.enable_probe(probe="utxocache:uncache", fn_name="trace_utxocache_uncache") bpf = BPF(text=utxocache_changes_program, usdt_contexts=[ctx], debug=0) # The handle_* function is a ctypes callback function called from C. When # we assert in the handle_* function, the AssertError doesn't propagate # back to Python. The exception is ignored. We manually count and assert # that the handle_* functions succeeded. EXPECTED_HANDLE_UNCACHE_SUCCESS = 1 handle_uncache_succeeds = 0 def handle_utxocache_uncache(_, data, __): nonlocal handle_uncache_succeeds event = ctypes.cast(data, ctypes.POINTER(UTXOCacheChange)).contents self.log.info(f"handle_utxocache_uncache(): {event}") assert_equal(block_1_coinbase_txid, bytes(event.txid[::-1]).hex()) assert_equal(0, event.index) # prevout index assert_equal(EARLY_BLOCK_HEIGHT, event.height) assert_equal(50 * COIN, event.value) assert_equal(True, event.is_coinbase) handle_uncache_succeeds += 1 bpf["utxocache_uncache"].open_perf_buffer(handle_utxocache_uncache) self.log.info( "testmempoolaccept the invalid transaction to trigger an UTXO-cache uncache") result = self.nodes[0].testmempoolaccept( [invalid_tx.serialize().hex()])[0] assert_equal(result["allowed"], False) bpf.perf_buffer_poll(timeout=100) bpf.cleanup() self.log.info( f"check that we successfully traced {EXPECTED_HANDLE_UNCACHE_SUCCESS} uncaches") assert_equal(EXPECTED_HANDLE_UNCACHE_SUCCESS, handle_uncache_succeeds)
def do_gc_probes(): # load BPF program bpf_text = """ #include <uapi/linux/ptrace.h> #include <linux/sched.h> struct val_t { u32 pid; char comm[TASK_COMM_LEN]; u64 ts; }; struct data_t { u32 pid; u64 ts; u64 delta; char comm[TASK_COMM_LEN]; }; BPF_HASH(start, u32, struct val_t); BPF_PERF_OUTPUT(events); int do_entry(struct pt_regs *ctx) { if (!PT_REGS_PARM1(ctx)) return 0; struct val_t val = {}; u32 pid = bpf_get_current_pid_tgid(); if (bpf_get_current_comm(&val.comm, sizeof(val.comm)) == 0) { val.pid = bpf_get_current_pid_tgid(); val.ts = bpf_ktime_get_ns(); start.update(&pid, &val); } return 0; } int do_return(struct pt_regs *ctx) { struct val_t *valp; struct data_t data = {}; u64 delta; u32 pid = bpf_get_current_pid_tgid(); u64 tsp = bpf_ktime_get_ns(); valp = start.lookup(&pid); if (valp == 0) return 0; // missed start bpf_probe_read(&data.comm, sizeof(data.comm), valp->comm); data.pid = valp->pid; data.delta = tsp - valp->ts; data.ts = tsp / 1000; if (data.delta > 0) events.perf_submit(ctx, &data, sizeof(data)); start.delete(&pid); return 0; } """ u = USDT(pid=args.pid) u.enable_probe(probe="gc__mark__begin", fn_name="do_entry") u.enable_probe(probe="gc__mark__end", fn_name="do_return") b = BPF(text=bpf_text ,usdt=u) # b.attach_uprobe(name="ruby", sym="gc_start_internal", fn_name="do_entry", pid=args.pid) # b.attach_uretprobe(name="ruby", sym="gc_start_internal", fn_name="do_return", pid=args.pid) print("%-9s %-6s %-16s %10s" % ("TIME", "PID", "COMM", "LATms")) b["events"].open_perf_buffer(print_gc_event) try: while 1: b.kprobe_poll() except: pass
def test_add_spent(self): """ Tests the utxocache:add utxocache:spent tracepoint API See https://github.com/bitcoin/bitcoin/blob/master/doc/tracing.md#tracepoint-utxocacheadd and https://github.com/bitcoin/bitcoin/blob/master/doc/tracing.md#tracepoint-utxocachespent """ self.log.info( "test the utxocache:add and utxocache:spent tracepoint API") self.log.info("create an unconfirmed transaction") self.wallet.send_self_transfer(from_node=self.nodes[0]) # We mine a block to trace changes (add/spent) to the active in-memory cache # of the UTXO set (see CoinsTip() of CCoinsViewCache). However, in some cases # temporary clones of the active cache are made. For example, during mining with # the generate RPC call, the block is first tested in TestBlockValidity(). There, # a clone of the active cache is modified during a test ConnectBlock() call. # These are implementation details we don't want to test here. Thus, after # mining, we invalidate the block, start the tracing, and then trace the cache # changes to the active utxo cache. self.log.info("mine and invalidate a block that is later reconsidered") block_hash = self.generate(self.wallet, 1)[0] self.nodes[0].invalidateblock(block_hash) self.log.info( "hook into the utxocache:add and utxocache:spent tracepoints") ctx = USDT(path=str(self.options.bitcoind)) ctx.enable_probe(probe="utxocache:add", fn_name="trace_utxocache_add") ctx.enable_probe(probe="utxocache:spent", fn_name="trace_utxocache_spent") bpf = BPF(text=utxocache_changes_program, usdt_contexts=[ctx], debug=0) # The handle_* function is a ctypes callback function called from C. When # we assert in the handle_* function, the AssertError doesn't propagate # back to Python. The exception is ignored. We manually count and assert # that the handle_* functions succeeded. EXPECTED_HANDLE_ADD_SUCCESS = 2 EXPECTED_HANDLE_SPENT_SUCCESS = 1 handle_add_succeeds = 0 handle_spent_succeeds = 0 expected_utxocache_spents = [] expected_utxocache_adds = [] def handle_utxocache_add(_, data, __): nonlocal handle_add_succeeds event = ctypes.cast(data, ctypes.POINTER(UTXOCacheChange)).contents self.log.info(f"handle_utxocache_add(): {event}") add = expected_utxocache_adds.pop(0) assert_equal(add["txid"], bytes(event.txid[::-1]).hex()) assert_equal(add["index"], event.index) assert_equal(add["height"], event.height) assert_equal(add["value"], event.value) assert_equal(add["is_coinbase"], event.is_coinbase) handle_add_succeeds += 1 def handle_utxocache_spent(_, data, __): nonlocal handle_spent_succeeds event = ctypes.cast(data, ctypes.POINTER(UTXOCacheChange)).contents self.log.info(f"handle_utxocache_spent(): {event}") spent = expected_utxocache_spents.pop(0) assert_equal(spent["txid"], bytes(event.txid[::-1]).hex()) assert_equal(spent["index"], event.index) assert_equal(spent["height"], event.height) assert_equal(spent["value"], event.value) assert_equal(spent["is_coinbase"], event.is_coinbase) handle_spent_succeeds += 1 bpf["utxocache_add"].open_perf_buffer(handle_utxocache_add) bpf["utxocache_spent"].open_perf_buffer(handle_utxocache_spent) # We trigger a block re-connection. This causes changes (add/spent) # to the UTXO-cache which in turn triggers the tracepoints. self.log.info("reconsider the previously invalidated block") self.nodes[0].reconsiderblock(block_hash) block = self.nodes[0].getblock(block_hash, 2) for (block_index, tx) in enumerate(block["tx"]): for vin in tx["vin"]: if "coinbase" not in vin: prevout_tx = self.nodes[0].getrawtransaction( vin["txid"], True) prevout_tx_block = self.nodes[0].getblockheader( prevout_tx["blockhash"]) spends_coinbase = "coinbase" in prevout_tx["vin"][0] expected_utxocache_spents.append({ "txid": vin["txid"], "index": vin["vout"], "height": prevout_tx_block["height"], "value": int(prevout_tx["vout"][vin["vout"]]["value"] * COIN), "is_coinbase": spends_coinbase, }) for (i, vout) in enumerate(tx["vout"]): if vout["scriptPubKey"]["type"] != "nulldata": expected_utxocache_adds.append({ "txid": tx["txid"], "index": i, "height": block["height"], "value": int(vout["value"] * COIN), "is_coinbase": block_index == 0, }) assert_equal(EXPECTED_HANDLE_ADD_SUCCESS, len(expected_utxocache_adds)) assert_equal(EXPECTED_HANDLE_SPENT_SUCCESS, len(expected_utxocache_spents)) bpf.perf_buffer_poll(timeout=200) bpf.cleanup() self.log.info( f"check that we successfully traced {EXPECTED_HANDLE_ADD_SUCCESS} adds and {EXPECTED_HANDLE_SPENT_SUCCESS} spent") assert_equal(0, len(expected_utxocache_adds)) assert_equal(0, len(expected_utxocache_spents)) assert_equal(EXPECTED_HANDLE_ADD_SUCCESS, handle_add_succeeds) assert_equal(EXPECTED_HANDLE_SPENT_SUCCESS, handle_spent_succeeds)
if args.language == "java": program += """ int alloc_entry(struct pt_regs *ctx) { struct key_t key = {}; struct val_t *valp, zero = {}; u64 classptr = 0, size = 0; bpf_usdt_readarg(2, ctx, &classptr); bpf_usdt_readarg(4, ctx, &size); bpf_probe_read(&key.name, sizeof(key.name), (void *)classptr); valp = allocs.lookup_or_init(&key, &zero); valp->total_size += size; valp->num_allocs += 1; return 0; } """ usdt.enable_probe("object__alloc", "alloc_entry") # # Ruby # elif args.language == "ruby": create_template = """ int THETHING_alloc_entry(struct pt_regs *ctx) { struct key_t key = { .name = "THETHING" }; struct val_t *valp, zero = {}; u64 size = 0; bpf_usdt_readarg(1, ctx, &size); valp = allocs.lookup_or_init(&key, &zero); valp->total_size += size; valp->num_allocs += 1; return 0; }
def test_flush(self): """ Tests the utxocache:flush tracepoint API. See https://github.com/bitcoin/bitcoin/blob/master/doc/tracing.md#tracepoint-utxocacheflush""" self.log.info("test the utxocache:flush tracepoint API") self.log.info("hook into the utxocache:flush tracepoint") ctx = USDT(path=str(self.options.bitcoind)) ctx.enable_probe(probe="utxocache:flush", fn_name="trace_utxocache_flush") bpf = BPF(text=utxocache_flushes_program, usdt_contexts=[ctx], debug=0) # The handle_* function is a ctypes callback function called from C. When # we assert in the handle_* function, the AssertError doesn't propagate # back to Python. The exception is ignored. We manually count and assert # that the handle_* functions succeeded. EXPECTED_HANDLE_FLUSH_SUCCESS = 3 handle_flush_succeeds = 0 possible_cache_sizes = set() expected_flushes = [] def handle_utxocache_flush(_, data, __): nonlocal handle_flush_succeeds event = ctypes.cast(data, ctypes.POINTER(UTXOCacheFlush)).contents self.log.info(f"handle_utxocache_flush(): {event}") expected = expected_flushes.pop(0) assert_equal(expected["mode"], FLUSHMODE_NAME[event.mode]) possible_cache_sizes.remove(event.size) # fails if size not in set # sanity checks only assert(event.memory > 0) assert(event.duration > 0) handle_flush_succeeds += 1 bpf["utxocache_flush"].open_perf_buffer(handle_utxocache_flush) self.log.info("stop the node to flush the UTXO cache") UTXOS_IN_CACHE = 104 # might need to be changed if the eariler tests are modified # A node shutdown causes two flushes. One that flushes UTXOS_IN_CACHE # UTXOs and one that flushes 0 UTXOs. Normally the 0-UTXO-flush is the # second flush, however it can happen that the order changes. possible_cache_sizes = {UTXOS_IN_CACHE, 0} flush_for_shutdown = {"mode": "ALWAYS", "for_prune": False} expected_flushes.extend([flush_for_shutdown, flush_for_shutdown]) self.stop_node(0) bpf.perf_buffer_poll(timeout=200) self.log.info("check that we don't expect additional flushes") assert_equal(0, len(expected_flushes)) assert_equal(0, len(possible_cache_sizes)) self.log.info("restart the node with -prune") self.start_node(0, ["-fastprune=1", "-prune=1"]) BLOCKS_TO_MINE = 350 self.log.info(f"mine {BLOCKS_TO_MINE} blocks to be able to prune") self.generate(self.wallet, BLOCKS_TO_MINE) # we added BLOCKS_TO_MINE coinbase UTXOs to the cache possible_cache_sizes = {BLOCKS_TO_MINE} expected_flushes.append( {"mode": "NONE", "for_prune": True, "size_fn": lambda x: x == BLOCKS_TO_MINE}) self.log.info(f"prune blockchain to trigger a flush for pruning") self.nodes[0].pruneblockchain(315) bpf.perf_buffer_poll(timeout=500) bpf.cleanup() self.log.info( f"check that we don't expect additional flushes and that the handle_* function succeeded") assert_equal(0, len(expected_flushes)) assert_equal(0, len(possible_cache_sizes)) assert_equal(EXPECTED_HANDLE_FLUSH_SUCCESS, handle_flush_succeeds)
def test_attach1(self): # enable USDT probe from given PID and verifier generated BPF programs u = USDT(pid=int(self.app.pid)) u.enable_probe(probe="probe_point_1", fn_name="do_trace1") u.enable_probe(probe="probe_point_2", fn_name="do_trace2") u.enable_probe(probe="probe_point_3", fn_name="do_trace3") u.enable_probe(probe="probe_point_4", fn_name="do_trace4") u.enable_probe(probe="probe_point_5", fn_name="do_trace5") b = BPF(text=self.bpf_text, usdt_contexts=[u], debug=4) # Event states for each event: # 0 - probe not caught, 1 - probe caught with correct value, # 2 - probe caught with incorrect value self.evt_st_1 = 0 self.evt_st_2 = 0 self.evt_st_3 = 0 # define output data structure in Python class Data1(ct.Structure): _fields_ = [("v1", ct.c_char), ("v2", ct.c_int)] class Data2(ct.Structure): _fields_ = [("v1", ct.c_int), ("v2", ct.c_char)] class Data3(ct.Structure): _fields_ = [("v1", ct.c_int), ("v2", ct.c_int)] class Data4(ct.Structure): _fields_ = [("v1", ct.c_ulonglong), ("v2", ct.c_char * 64)] class Data5(ct.Structure): _fields_ = [("v1", ct.c_char * 64), ("v2", ct.c_ulonglong)] def check_event_val(event, event_state, v1, v2, v3, v4): if ((event.v1 == v1 and event.v2 == v2) or (event.v1 == v3 and event.v2 == v4)): if (event_state == 0 or event_state == 1): return 1 return 2 def print_event1(cpu, data, size): event = ct.cast(data, ct.POINTER(Data1)).contents self.evt_st_1 = check_event_val(event, self.evt_st_1, b'\x0d', 40, b'\x08', 200) def print_event2(cpu, data, size): event = ct.cast(data, ct.POINTER(Data2)).contents # pretend we have two identical probe points to simplify the code self.evt_st_2 = check_event_val(event, self.evt_st_2, 5, b'\x04', 5, b'\x04') def print_event3(cpu, data, size): event = ct.cast(data, ct.POINTER(Data3)).contents self.evt_st_3 = check_event_val(event, self.evt_st_3, 200, 40, 8, 13) def print_event4(cpu, data, size): event = ct.cast(data, ct.POINTER(Data4)).contents print("%s" % event.v2) def print_event5(cpu, data, size): event = ct.cast(data, ct.POINTER(Data5)).contents print("%s" % event.v1) # loop with callback to print_event b["event1"].open_perf_buffer(print_event1) b["event2"].open_perf_buffer(print_event2) b["event3"].open_perf_buffer(print_event3) b["event4"].open_perf_buffer(print_event4) b["event5"].open_perf_buffer(print_event5) # three iterations to make sure we get some probes and have time to process them for i in range(3): b.kprobe_poll() self.assertTrue(self.evt_st_1 == 1 and self.evt_st_2 == 1 and self.evt_st_3 == 1)
"-DHHVM_TRACING_FILE_NAME_MAX={}".format(128), "-DHHVM_TRACING_CLASS_NAME_MAX={}".format(128), "-DHHVM_TRACING_FUNCTION_MAX={}".format(128), ] bpf_text = "" with open("../strobelight_hhvm_structs.h", "r") as structs_file: bpf_text += structs_file.read() + "\n" with open("./strobelight_hhvm_signal_latency.c", "r") as latency_file: bpf_text += latency_file.read() usdts = [] for pid in args.pids: usdt = USDT(pid=pid) usdt.enable_probe("hhvm_surprise", "on_hhvm_signal_handler") print("Enabled tracing on {}\n".format(pid)) usdts.append(usdt) bpf = BPF(text=bpf_text, usdt_contexts=usdts, cflags=cflags) hhvm_pids = bpf.get_table("hhvm_pids") for pid in args.pids: hhvm_pids[c_int(pid)] = c_int(1) # A generic perf event for driving the signal bpf.attach_perf_event( ev_type=PerfType.HARDWARE, ev_config=PerfHWConfig.CPU_CYCLES, fn_name="on_event", sample_period=10000000,
if (!timestampp) return 0; u64 delta = bpf_ktime_get_ns() - *timestampp; if (delta/1000000 < %d) return 0; delta /= %d; latency.increment(bpf_log2l(delta)); temp.delete(&pid); return 0; } """ % (args.threshold, 1000000 if args.milliseconds else 1000) usdt = USDT(pid=int(dbpid)) usdt.enable_probe("query__start", "probe_start") usdt.enable_probe("query__done", "probe_end") bpf = BPF(text=program, usdt_contexts=[usdt]) if args.verbose: print(usdt.get_text()) print(program) print("Tracing database queries slower than %dms for PID %d... Ctrl+C to quit." % (args.threshold, dbpid)) try: sleep(999999999999) except KeyboardInterrupt: pass
def main(): # # Don't like these globals, but ctx passing does not seem to work with the # existing open_ring_buffer() API :( # global b global options # # Argument parsing # parser = argparse.ArgumentParser() parser.add_argument("--buffer-page-count", help="Number of BPF ring buffer pages, default 1024", type=int, default=1024, metavar="NUMBER") parser.add_argument("-D", "--debug", help="Enable eBPF debugging", type=int, const=0x3f, default=0, nargs='?') parser.add_argument('-d', '--packet-decode', help='Display packet content in selected mode, ' 'default none', choices=['none', 'hex', 'decode'], default='none') parser.add_argument("-f", "--flow-key-size", help="Set maximum flow key size to capture, " "default 64", type=buffer_size_type, default=64, metavar="[64-2048]") parser.add_argument('-k', '--flow-key-decode', help='Display flow-key content in selected mode, ' 'default none', choices=['none', 'hex', 'nlraw'], default='none') parser.add_argument("-p", "--pid", metavar="VSWITCHD_PID", help="ovs-vswitch's PID", type=int, default=None) parser.add_argument("-s", "--packet-size", help="Set maximum packet size to capture, " "default 64", type=buffer_size_type, default=64, metavar="[64-2048]") parser.add_argument("-w", "--pcap", metavar="PCAP_FILE", help="Write upcall packets to specified pcap file.", type=str, default=None) options = parser.parse_args() # # Find the PID of the ovs-vswitchd daemon if not specified. # if options.pid is None: for proc in psutil.process_iter(): if 'ovs-vswitchd' in proc.name(): if options.pid is not None: print("ERROR: Multiple ovs-vswitchd daemons running, " "use the -p option!") sys.exit(-1) options.pid = proc.pid # # Error checking on input parameters # if options.pid is None: print("ERROR: Failed to find ovs-vswitchd's PID!") sys.exit(-1) if options.pcap is not None: if exists(options.pcap): print("ERROR: Destination capture file \"{}\" already exists!". format(options.pcap)) sys.exit(-1) options.buffer_page_count = next_power_of_two(options.buffer_page_count) # # Attach the usdt probe # u = USDT(pid=int(options.pid)) try: u.enable_probe(probe="recv_upcall", fn_name="do_trace") except USDTException as e: print("ERROR: {}" "ovs-vswitchd!".format( (re.sub('^', ' ' * 7, str(e), flags=re.MULTILINE)).strip().replace( "--with-dtrace or --enable-dtrace", "--enable-usdt-probes"))) sys.exit(-1) # # Uncomment to see how arguments are decoded. # print(u.get_text()) # # # Attach probe to running process # source = ebpf_source.replace("<MAX_PACKET_VAL>", str(options.packet_size)) source = source.replace("<MAX_KEY_VAL>", str(options.flow_key_size)) source = source.replace("<BUFFER_PAGE_CNT>", str(options.buffer_page_count)) b = BPF(text=source, usdt_contexts=[u], debug=options.debug) # # Print header # print("{:<18} {:<4} {:<16} {:<10} {:<32} {:<4} {:<10} {:<10}".format( "TIME", "CPU", "COMM", "PID", "DPIF_NAME", "TYPE", "PKT_LEN", "FLOW_KEY_LEN")) # # Dump out all events # b['events'].open_ring_buffer(print_event) while 1: try: b.ring_buffer_poll() time.sleep(0.5) except KeyboardInterrupt: break dropcnt = b.get_table("dropcnt") for k in dropcnt.keys(): count = dropcnt.sum(k).value if k.value == 0 and count > 0: print("\nWARNING: Not all upcalls were captured, {} were dropped!" "\n Increase the BPF ring buffer size with the " "--buffer-page-count option.".format(count))
class Tool(object): examples = """ Probe specifier syntax: {p,r,t,u}:{[library],category}:function(signature)[:type[,type...]:expr[,expr...][:filter]][#label] Where: p,r,t,u -- probe at function entry, function exit, kernel tracepoint, or USDT probe in exit probes: can use $retval, $entry(param), $latency library -- the library that contains the function (leave empty for kernel functions) category -- the category of the kernel tracepoint (e.g. net, sched) function -- the function name to trace (or tracepoint name) signature -- the function's parameters, as in the C header type -- the type of the expression to collect (supports multiple) expr -- the expression to collect (supports multiple) filter -- the filter that is applied to collected values label -- the label for this probe in the resulting output EXAMPLES: argdist -H 'p::__kmalloc(u64 size):u64:size' Print a histogram of allocation sizes passed to kmalloc argdist -p 1005 -C 'p:c:malloc(size_t size):size_t:size:size==16' Print a frequency count of how many times process 1005 called malloc with an allocation size of 16 bytes argdist -C 'r:c:gets():char*:(char*)$retval#snooped strings' Snoop on all strings returned by gets() argdist -H 'r::__kmalloc(size_t size):u64:$latency/$entry(size)#ns per byte' Print a histogram of nanoseconds per byte from kmalloc allocations argdist -C 'p::__kmalloc(size_t size, gfp_t flags):size_t:size:flags&GFP_ATOMIC' Print frequency count of kmalloc allocation sizes that have GFP_ATOMIC argdist -p 1005 -C 'p:c:write(int fd):int:fd' -T 5 Print frequency counts of how many times writes were issued to a particular file descriptor number, in process 1005, but only show the top 5 busiest fds argdist -p 1005 -H 'r:c:read()' Print a histogram of results (sizes) returned by read() in process 1005 argdist -C 'r::__vfs_read():u32:$PID:$latency > 100000' Print frequency of reads by process where the latency was >0.1ms argdist -H 'r::__vfs_read(void *file, void *buf, size_t count):size_t:$entry(count):$latency > 1000000' Print a histogram of read sizes that were longer than 1ms argdist -H \\ 'p:c:write(int fd, const void *buf, size_t count):size_t:count:fd==1' Print a histogram of buffer sizes passed to write() across all processes, where the file descriptor was 1 (STDOUT) argdist -C 'p:c:fork()#fork calls' Count fork() calls in libc across all processes Can also use funccount.py, which is easier and more flexible argdist -H 't:block:block_rq_complete():u32:tp.nr_sector' Print histogram of number of sectors in completing block I/O requests argdist -C 't:irq:irq_handler_entry():int:tp.irq' Aggregate interrupts by interrupt request (IRQ) argdist -C 'u:pthread:pthread_start():u64:arg2' -p 1337 Print frequency of function addresses used as a pthread start function, relying on the USDT pthread_start probe in process 1337 argdist -H \\ 'p:c:sleep(u32 seconds):u32:seconds' \\ 'p:c:nanosleep(struct timespec *req):long:req->tv_nsec' Print histograms of sleep() and nanosleep() parameter values argdist -p 2780 -z 120 \\ -C 'p:c:write(int fd, char* buf, size_t len):char*:buf:fd==1' Spy on writes to STDOUT performed by process 2780, up to a string size of 120 characters """ def __init__(self): parser = argparse.ArgumentParser(description="Trace a " + "function and display a summary of its parameter values.", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=Tool.examples) parser.add_argument("-p", "--pid", type=int, help="id of the process to trace (optional)") parser.add_argument("-z", "--string-size", default=80, type=int, help="maximum string size to read from char* arguments") parser.add_argument("-i", "--interval", default=1, type=int, help="output interval, in seconds") parser.add_argument("-n", "--number", type=int, dest="count", help="number of outputs") parser.add_argument("-v", "--verbose", action="store_true", help="print resulting BPF program code before executing") parser.add_argument("-T", "--top", type=int, help="number of top results to show (not applicable to " + "histograms)") parser.add_argument("-H", "--histogram", nargs="*", dest="histspecifier", metavar="specifier", help="probe specifier to capture histogram of " + "(see examples below)") parser.add_argument("-C", "--count", nargs="*", dest="countspecifier", metavar="specifier", help="probe specifier to capture count of " + "(see examples below)") parser.add_argument("-I", "--include", nargs="*", metavar="header", help="additional header files to include in the BPF program") self.args = parser.parse_args() self.usdt_ctx = None def _create_probes(self): self.probes = [] for specifier in (self.args.countspecifier or []): self.probes.append(Probe(self, "freq", specifier)) for histspecifier in (self.args.histspecifier or []): self.probes.append(Probe(self, "hist", histspecifier)) if len(self.probes) == 0: print("at least one specifier is required") exit() def enable_usdt_probe(self, probe_name, fn_name): if not self.usdt_ctx: self.usdt_ctx = USDT(pid=self.args.pid) self.usdt_ctx.enable_probe(probe_name, fn_name) def _generate_program(self): bpf_source = """ struct __string_t { char s[%d]; }; #include <uapi/linux/ptrace.h> """ % self.args.string_size for include in (self.args.include or []): bpf_source += "#include <%s>\n" % include bpf_source += BPF.generate_auto_includes( map(lambda p: p.raw_spec, self.probes)) bpf_source += Tracepoint.generate_decl() bpf_source += Tracepoint.generate_entry_probe() for probe in self.probes: bpf_source += probe.generate_text() if self.args.verbose: if self.usdt_ctx: print(self.usdt_ctx.get_text()) print(bpf_source) self.bpf = BPF(text=bpf_source, usdt=self.usdt_ctx) def _attach(self): Tracepoint.attach(self.bpf) for probe in self.probes: probe.attach(self.bpf) if self.args.verbose: print("open uprobes: %s" % self.bpf.open_uprobes) print("open kprobes: %s" % self.bpf.open_kprobes) def _main_loop(self): count_so_far = 0 while True: try: sleep(self.args.interval) except KeyboardInterrupt: exit() print("[%s]" % strftime("%H:%M:%S")) for probe in self.probes: probe.display(self.args.top) count_so_far += 1 if self.args.count is not None and \ count_so_far >= self.args.count: exit() def _close_probes(self): for probe in self.probes: probe.close() if self.args.verbose: print("closed probe: " + str(probe)) def run(self): try: self._create_probes() self._generate_program() self._attach() self._main_loop() except: if self.args.verbose: traceback.print_exc() elif sys.exc_info()[0] is not SystemExit: print(sys.exc_info()[1]) self._close_probes()
int command_failed(struct pt_regs *ctx) {{ struct failed_out out = {{}}; int err_code = 0; bpf_usdt_readarg(6, ctx, &err_code); error_hist.increment(err_code); const char* addr = NULL; bpf_usdt_readarg(2, ctx, &addr); bpf_probe_read_str(out.name, sizeof(out.name), addr); out.error_code = err_code; failed.perf_submit(ctx, &out, sizeof(out)); return 0; }} """.format(NUM_ERR_CODES=len(ERROR_CODES)) command_failed = USDT(pid=int(sys.argv[1])) command_failed.enable_probe(probe="commandFail", fn_name="command_failed") b = BPF(text = text, usdt_contexts=[command_failed]) command_to_errors = dict() def print_event(cpu, data, size): event = b["failed"].event(data) event_name = str(event.name, 'utf-8') if event_name not in command_to_errors: command_to_errors[event_name] = dict() if event.error_code not in command_to_errors[event_name]: command_to_errors[event_name][event.error_code] = 0 command_to_errors[event_name][event.error_code] += 1 b["failed"].open_perf_buffer(print_event)
// populate and emit data struct struct data_t data = {.pid = pid, .ts = sp->ts, .delta = delta}; bpf_probe_read(&data.query, sizeof(data.query), (void *)sp->query); events.perf_submit(ctx, &data, sizeof(data)); } start_tmp.delete(&pid); return 0; }; """ # enable USDT probe from given PID u = USDT(pid=pid) u.enable_probe(probe="query__start", fn_name="do_start") u.enable_probe(probe="query__done", fn_name="do_done") if debug: print(u.get_text()) print(bpf_text) # initialize BPF b = BPF(text=bpf_text, usdt_contexts=[u]) # header print("Tracing MySQL server queries for PID %d slower than %s ms..." % (pid, min_ms_text)) print("%-14s %-6s %8s %s" % ("TIME(s)", "PID", "MS", "QUERY")) class Data(ct.Structure): _fields_ = [
from bcc import BPF, USDT bpf_source = """ #include <uapi/linux/ptrace.h> int trace_binary_exec(struct pt_regs *ctx) { u64 pid = bpf_get_current_pid_tgid(); bpf_trace_printk("New hello_usdt process running with PID: %d", pid); } """ usdt = USDT(path="./hello_usdt") usdt.enable_probe(probe="probe-main", fn_name="trace_binary_exec") bpf = BPF(text=bpf_source, usdt=usdt) bpf.trace_print()
class Probe(object): def __init__(self, pattern, kernel_stack, user_stack, use_regex=False, pid=None, per_pid=False): """Init a new probe. Init the probe from the pattern provided by the user. The supported patterns mimic the 'trace' and 'argdist' tools, but are simpler because we don't have to distinguish between probes and retprobes. func -- probe a kernel function lib:func -- probe a user-space function in the library 'lib' p::func -- same thing as 'func' p:lib:func -- same thing as 'lib:func' t:cat:event -- probe a kernel tracepoint u:lib:probe -- probe a USDT tracepoint """ self.kernel_stack = kernel_stack self.user_stack = user_stack parts = pattern.split(':') if len(parts) == 1: parts = ["p", "", parts[0]] elif len(parts) == 2: parts = ["p", parts[0], parts[1]] elif len(parts) == 3: if parts[0] == "t": parts = ["t", "", "%s:%s" % tuple(parts[1:])] if parts[0] not in ["p", "t", "u"]: raise Exception("Type must be 'p', 't', or 'u', but got %s" % parts[0]) else: raise Exception("Too many ':'-separated components in pattern %s" % pattern) (self.type, self.library, self.pattern) = parts if not use_regex: self.pattern = self.pattern.replace('*', '.*') self.pattern = '^' + self.pattern + '$' if (self.type == "p" and self.library) or self.type == "u": libpath = BPF.find_library(self.library) if libpath is None: # This might be an executable (e.g. 'bash') libpath = BPF.find_exe(self.library) if libpath is None or len(libpath) == 0: raise Exception("unable to find library %s" % self.library) self.library = libpath self.pid = pid self.per_pid = per_pid self.matched = 0 def is_kernel_probe(self): return self.type == "t" or (self.type == "p" and self.library == "") def attach(self): if self.type == "p": if self.library: self.bpf.attach_uprobe(name=self.library, sym_re=self.pattern, fn_name="trace_count", pid=self.pid or -1) self.matched = self.bpf.num_open_uprobes() else: self.bpf.attach_kprobe(event_re=self.pattern, fn_name="trace_count") self.matched = self.bpf.num_open_kprobes() elif self.type == "t": self.bpf.attach_tracepoint(tp_re=self.pattern, fn_name="trace_count") self.matched = self.bpf.num_open_tracepoints() elif self.type == "u": pass # Nothing to do -- attach already happened in `load` if self.matched == 0: raise Exception("No functions matched by pattern %s" % self.pattern) def load(self): ctx_name = "ctx" stack_trace = "" if self.user_stack: stack_trace += """ key.user_stack_id = stack_traces.get_stackid( %s, BPF_F_REUSE_STACKID | BPF_F_USER_STACK );""" % (ctx_name) else: stack_trace += "key.user_stack_id = -1;" if self.kernel_stack: stack_trace += """ key.kernel_stack_id = stack_traces.get_stackid( %s, BPF_F_REUSE_STACKID );""" % (ctx_name) else: stack_trace += "key.kernel_stack_id = -1;" trace_count_text = """ int trace_count(void *ctx) { FILTER struct key_t key = {}; key.tgid = GET_TGID; STORE_COMM %s counts.increment(key); return 0; } """ trace_count_text = trace_count_text % (stack_trace) bpf_text = """#include <uapi/linux/ptrace.h> #include <linux/sched.h> struct key_t { // no pid (thread ID) so that we do not needlessly split this key u32 tgid; int kernel_stack_id; int user_stack_id; char name[TASK_COMM_LEN]; }; BPF_HASH(counts, struct key_t); BPF_STACK_TRACE(stack_traces, 1024); """ # We really mean the tgid from the kernel's perspective, which is in # the top 32 bits of bpf_get_current_pid_tgid(). if self.is_kernel_probe() and self.pid: trace_count_text = trace_count_text.replace('FILTER', ('u32 pid; pid = bpf_get_current_pid_tgid() >> 32; ' + 'if (pid != %d) { return 0; }') % (self.pid)) else: trace_count_text = trace_count_text.replace('FILTER', '') # We need per-pid statistics when tracing a user-space process, because # the meaning of the symbols depends on the pid. We also need them if # per-pid statistics were requested with -P, or for user stacks. if self.per_pid or not self.is_kernel_probe() or self.user_stack: trace_count_text = trace_count_text.replace('GET_TGID', 'bpf_get_current_pid_tgid() >> 32') trace_count_text = trace_count_text.replace('STORE_COMM', 'bpf_get_current_comm(&key.name, sizeof(key.name));') else: # kernel stacks only. skip splitting on PID so these aggregate # together, and don't store the process name. trace_count_text = trace_count_text.replace( 'GET_TGID', '0xffffffff') trace_count_text = trace_count_text.replace('STORE_COMM', '') self.usdt = None if self.type == "u": self.usdt = USDT(path=self.library, pid=self.pid) for probe in self.usdt.enumerate_probes(): if not self.pid and (probe.bin_path != self.library): continue if re.match(self.pattern, probe.name): # This hack is required because the bpf_usdt_readarg # functions generated need different function names for # each attached probe. If we just stick to trace_count, # we'd get multiple bpf_usdt_readarg helpers with the same # name when enabling more than one USDT probe. new_func = "trace_count_%d" % self.matched bpf_text += trace_count_text.replace( "trace_count", new_func) self.usdt.enable_probe(probe.name, new_func) self.matched += 1 if debug: print(self.usdt.get_text()) else: bpf_text += trace_count_text if debug: print(bpf_text) self.bpf = BPF(text=bpf_text, usdt_contexts=[self.usdt] if self.usdt else [])
class Tool(object): examples = """ Probe specifier syntax: {p,r,t,u}:{[library],category}:function(signature)[:type[,type...]:expr[,expr...][:filter]][#label] Where: p,r,t,u -- probe at function entry, function exit, kernel tracepoint, or USDT probe in exit probes: can use $retval, $entry(param), $latency library -- the library that contains the function (leave empty for kernel functions) category -- the category of the kernel tracepoint (e.g. net, sched) function -- the function name to trace (or tracepoint name) signature -- the function's parameters, as in the C header type -- the type of the expression to collect (supports multiple) expr -- the expression to collect (supports multiple) filter -- the filter that is applied to collected values label -- the label for this probe in the resulting output EXAMPLES: argdist -H 'p::__kmalloc(u64 size):u64:size' Print a histogram of allocation sizes passed to kmalloc argdist -p 1005 -C 'p:c:malloc(size_t size):size_t:size:size==16' Print a frequency count of how many times process 1005 called malloc with an allocation size of 16 bytes argdist -C 'r:c:gets():char*:(char*)$retval#snooped strings' Snoop on all strings returned by gets() argdist -H 'r::__kmalloc(size_t size):u64:$latency/$entry(size)#ns per byte' Print a histogram of nanoseconds per byte from kmalloc allocations argdist -C 'p::__kmalloc(size_t size, gfp_t flags):size_t:size:flags&GFP_ATOMIC' Print frequency count of kmalloc allocation sizes that have GFP_ATOMIC argdist -p 1005 -C 'p:c:write(int fd):int:fd' -T 5 Print frequency counts of how many times writes were issued to a particular file descriptor number, in process 1005, but only show the top 5 busiest fds argdist -p 1005 -H 'r:c:read()' Print a histogram of results (sizes) returned by read() in process 1005 argdist -C 'r::__vfs_read():u32:$PID:$latency > 100000' Print frequency of reads by process where the latency was >0.1ms argdist -H 'r::__vfs_read(void *file, void *buf, size_t count):size_t:$entry(count):$latency > 1000000' Print a histogram of read sizes that were longer than 1ms argdist -H \\ 'p:c:write(int fd, const void *buf, size_t count):size_t:count:fd==1' Print a histogram of buffer sizes passed to write() across all processes, where the file descriptor was 1 (STDOUT) argdist -C 'p:c:fork()#fork calls' Count fork() calls in libc across all processes Can also use funccount.py, which is easier and more flexible argdist -H 't:block:block_rq_complete():u32:tp.nr_sector' Print histogram of number of sectors in completing block I/O requests argdist -C 't:irq:irq_handler_entry():int:tp.irq' Aggregate interrupts by interrupt request (IRQ) argdist -C 'u:pthread:pthread_start():u64:arg2' -p 1337 Print frequency of function addresses used as a pthread start function, relying on the USDT pthread_start probe in process 1337 argdist -H \\ 'p:c:sleep(u32 seconds):u32:seconds' \\ 'p:c:nanosleep(struct timespec *req):long:req->tv_nsec' Print histograms of sleep() and nanosleep() parameter values argdist -p 2780 -z 120 \\ -C 'p:c:write(int fd, char* buf, size_t len):char*:buf:fd==1' Spy on writes to STDOUT performed by process 2780, up to a string size of 120 characters """ def __init__(self): parser = argparse.ArgumentParser( description="Trace a " + "function and display a summary of its parameter values.", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=Tool.examples) parser.add_argument("-p", "--pid", type=int, help="id of the process to trace (optional)") parser.add_argument( "-z", "--string-size", default=80, type=int, help="maximum string size to read from char* arguments") parser.add_argument("-i", "--interval", default=1, type=int, help="output interval, in seconds") parser.add_argument("-n", "--number", type=int, dest="count", help="number of outputs") parser.add_argument( "-v", "--verbose", action="store_true", help="print resulting BPF program code before executing") parser.add_argument( "-T", "--top", type=int, help="number of top results to show (not applicable to " + "histograms)") parser.add_argument("-H", "--histogram", nargs="*", dest="histspecifier", metavar="specifier", help="probe specifier to capture histogram of " + "(see examples below)") parser.add_argument("-C", "--count", nargs="*", dest="countspecifier", metavar="specifier", help="probe specifier to capture count of " + "(see examples below)") parser.add_argument( "-I", "--include", nargs="*", metavar="header", help="additional header files to include in the BPF program") self.args = parser.parse_args() self.usdt_ctx = None def _create_probes(self): self.probes = [] for specifier in (self.args.countspecifier or []): self.probes.append(Probe(self, "freq", specifier)) for histspecifier in (self.args.histspecifier or []): self.probes.append(Probe(self, "hist", histspecifier)) if len(self.probes) == 0: print("at least one specifier is required") exit() def enable_usdt_probe(self, probe_name, fn_name): if not self.usdt_ctx: self.usdt_ctx = USDT(pid=self.args.pid) self.usdt_ctx.enable_probe(probe_name, fn_name) def _generate_program(self): bpf_source = """ struct __string_t { char s[%d]; }; #include <uapi/linux/ptrace.h> """ % self.args.string_size for include in (self.args.include or []): bpf_source += "#include <%s>\n" % include bpf_source += BPF.generate_auto_includes( map(lambda p: p.raw_spec, self.probes)) bpf_source += Tracepoint.generate_decl() bpf_source += Tracepoint.generate_entry_probe() for probe in self.probes: bpf_source += probe.generate_text() if self.args.verbose: if self.usdt_ctx: print(self.usdt_ctx.get_text()) print(bpf_source) self.bpf = BPF(text=bpf_source, usdt=self.usdt_ctx) def _attach(self): Tracepoint.attach(self.bpf) for probe in self.probes: probe.attach(self.bpf) if self.args.verbose: print("open uprobes: %s" % self.bpf.open_uprobes) print("open kprobes: %s" % self.bpf.open_kprobes) def _main_loop(self): count_so_far = 0 while True: try: sleep(self.args.interval) except KeyboardInterrupt: exit() print("[%s]" % strftime("%H:%M:%S")) for probe in self.probes: probe.display(self.args.top) count_so_far += 1 if self.args.count is not None and \ count_so_far >= self.args.count: exit() def _close_probes(self): for probe in self.probes: probe.close() if self.args.verbose: print("closed probe: " + str(probe)) def run(self): try: self._create_probes() self._generate_program() self._attach() self._main_loop() except: if self.args.verbose: traceback.print_exc() elif sys.exc_info()[0] is not SystemExit: print(sys.exc_info()[1]) self._close_probes()