def print_usdt(pid, lib): reader = USDT(path=lib, pid=pid) probes_seen = [] for probe in reader.enumerate_probes(): probe_name = probe.short_name() if not args.filter or fnmatch.fnmatch(probe_name, args.filter): if probe_name in probes_seen: continue probes_seen.append(probe_name) print_usdt_details(probe)
def print_usdt(pid, lib): reader = USDT(path=lib, pid=pid) probes_seen = [] for probe in reader.enumerate_probes(): probe_name = probe.short_name() if not args.filter or fnmatch.fnmatch(probe_name, args.filter): if probe_name in probes_seen: continue probes_seen.append(probe_name) if args.variables: print(probe) else: print("%s %s:%s" % (probe.bin_path, probe.provider, probe.name))
class Probe(object): probe_count = 0 streq_index = 0 max_events = None event_count = 0 first_ts = 0 print_time = False use_localtime = True time_field = False print_cpu = False print_address = False tgid = -1 pid = -1 page_cnt = None build_id_enabled = False @classmethod def configure(cls, args): cls.max_events = args.max_events cls.print_time = args.timestamp or args.time cls.use_localtime = not args.timestamp cls.time_field = cls.print_time and (not cls.use_localtime) cls.print_cpu = args.print_cpu cls.print_address = args.address cls.first_ts = BPF.monotonic_time() cls.tgid = args.tgid or -1 cls.pid = args.pid or -1 cls.page_cnt = args.buffer_pages cls.bin_cmp = args.bin_cmp cls.build_id_enabled = args.sym_file_list is not None def __init__(self, probe, string_size, kernel_stack, user_stack): self.usdt = None self.streq_functions = "" self.raw_probe = probe self.string_size = string_size self.kernel_stack = kernel_stack self.user_stack = user_stack Probe.probe_count += 1 self._parse_probe() self.probe_num = Probe.probe_count self.probe_name = "probe_%s_%d" % \ (self._display_function(), self.probe_num) self.probe_name = re.sub(r'[^A-Za-z0-9_]', '_', self.probe_name) # compiler can generate proper codes for function # signatures with "syscall__" prefix if self.is_syscall_kprobe: self.probe_name = "syscall__" + self.probe_name[6:] def __str__(self): return "%s:%s:%s FLT=%s ACT=%s/%s" % (self.probe_type, self.library, self._display_function(), self.filter, self.types, self.values) def is_default_action(self): return self.python_format == "" def _bail(self, error): raise ValueError("error in probe '%s': %s" % (self.raw_probe, error)) def _parse_probe(self): text = self.raw_probe # There might be a function signature preceding the actual # filter/print part, or not. Find the probe specifier first -- # it ends with either a space or an open paren ( for the # function signature part. # opt. signature # probespec | rest # --------- ---------- -- (spec, sig, rest) = re.match(r'([^ \t\(]+)(\([^\(]*\))?(.*)', text).groups() self._parse_spec(spec) # Remove the parens self.signature = sig[1:-1] if sig else None if self.signature and self.probe_type in ['u', 't']: self._bail("USDT and tracepoint probes can't have " + "a function signature; use arg1, arg2, " + "... instead") text = rest.lstrip() # If we now have a (, wait for the balanced closing ) and that # will be the predicate self.filter = None if len(text) > 0 and text[0] == "(": balance = 1 for i in range(1, len(text)): if text[i] == "(": balance += 1 if text[i] == ")": balance -= 1 if balance == 0: self._parse_filter(text[:i + 1]) text = text[i + 1:] break if self.filter is None: self._bail("unmatched end of predicate") if self.filter is None: self.filter = "1" # The remainder of the text is the printf action self._parse_action(text.lstrip()) def _parse_spec(self, spec): parts = spec.split(":") # Two special cases: 'func' means 'p::func', 'lib:func' means # 'p:lib:func'. Other combinations need to provide an empty # value between delimiters, e.g. 'r::func' for a kretprobe on # the function func. if len(parts) == 1: parts = ["p", "", parts[0]] elif len(parts) == 2: parts = ["p", parts[0], parts[1]] if len(parts[0]) == 0: self.probe_type = "p" elif parts[0] in ["p", "r", "t", "u"]: self.probe_type = parts[0] else: self._bail("probe type must be '', 'p', 't', 'r', " + "or 'u', but got '%s'" % parts[0]) if self.probe_type == "t": self.tp_category = parts[1] self.tp_event = parts[2] self.library = "" # kernel self.function = "" # from TRACEPOINT_PROBE elif self.probe_type == "u": self.library = ':'.join(parts[1:-1]) self.usdt_name = parts[-1] self.function = "" # no function, just address # We will discover the USDT provider by matching on # the USDT name in the specified library self._find_usdt_probe() else: self.library = ':'.join(parts[1:-1]) self.function = parts[-1] # only x64 syscalls needs checking, no other syscall wrapper yet. self.is_syscall_kprobe = False if self.probe_type == "p" and len(self.library) == 0 and \ self.function[:10] == "__x64_sys_": self.is_syscall_kprobe = True def _find_usdt_probe(self): target = Probe.pid if Probe.pid and Probe.pid != -1 \ else Probe.tgid self.usdt = USDT(path=self.library, pid=target) for probe in self.usdt.enumerate_probes(): if probe.name == self.usdt_name.encode('ascii'): return # Found it, will enable later self._bail("unrecognized USDT probe %s" % self.usdt_name) def _parse_filter(self, filt): self.filter = self._rewrite_expr(filt) def _parse_types(self, fmt): for match in re.finditer( r'[^%]%(s|u|d|lu|llu|ld|lld|hu|hd|x|lx|llx|c|K|U)', fmt): self.types.append(match.group(1)) fmt = re.sub(r'([^%]%)(u|d|lu|llu|ld|lld|hu|hd)', r'\1d', fmt) fmt = re.sub(r'([^%]%)(x|lx|llx)', r'\1x', fmt) fmt = re.sub('%K|%U', '%s', fmt) self.python_format = fmt.strip('"') def _parse_action(self, action): self.values = [] self.types = [] self.python_format = "" if len(action) == 0: return action = action.strip() match = re.search(r'(\".*?\"),?(.*)', action) if match is None: self._bail("expected format string in \"s") self.raw_format = match.group(1) self._parse_types(self.raw_format) for part in re.split('(?<!"),', match.group(2)): part = self._rewrite_expr(part) if len(part) > 0: self.values.append(part) aliases_arg = { "arg1": "PT_REGS_PARM1(ctx)", "arg2": "PT_REGS_PARM2(ctx)", "arg3": "PT_REGS_PARM3(ctx)", "arg4": "PT_REGS_PARM4(ctx)", "arg5": "PT_REGS_PARM5(ctx)", "arg6": "PT_REGS_PARM6(ctx)", } aliases_indarg = { "arg1": "({u64 _val; struct pt_regs *_ctx = (struct pt_regs *)PT_REGS_PARM1(ctx);" " bpf_probe_read(&_val, sizeof(_val), &(PT_REGS_PARM1(_ctx))); _val;})", "arg2": "({u64 _val; struct pt_regs *_ctx = (struct pt_regs *)PT_REGS_PARM1(ctx);" " bpf_probe_read(&_val, sizeof(_val), &(PT_REGS_PARM2(_ctx))); _val;})", "arg3": "({u64 _val; struct pt_regs *_ctx = (struct pt_regs *)PT_REGS_PARM1(ctx);" " bpf_probe_read(&_val, sizeof(_val), &(PT_REGS_PARM3(_ctx))); _val;})", "arg4": "({u64 _val; struct pt_regs *_ctx = (struct pt_regs *)PT_REGS_PARM1(ctx);" " bpf_probe_read(&_val, sizeof(_val), &(PT_REGS_PARM4(_ctx))); _val;})", "arg5": "({u64 _val; struct pt_regs *_ctx = (struct pt_regs *)PT_REGS_PARM1(ctx);" " bpf_probe_read(&_val, sizeof(_val), &(PT_REGS_PARM5(_ctx))); _val;})", "arg6": "({u64 _val; struct pt_regs *_ctx = (struct pt_regs *)PT_REGS_PARM1(ctx);" " bpf_probe_read(&_val, sizeof(_val), &(PT_REGS_PARM6(_ctx))); _val;})", } aliases_common = { "retval": "PT_REGS_RC(ctx)", "$uid": "(unsigned)(bpf_get_current_uid_gid() & 0xffffffff)", "$gid": "(unsigned)(bpf_get_current_uid_gid() >> 32)", "$pid": "(unsigned)(bpf_get_current_pid_tgid() & 0xffffffff)", "$tgid": "(unsigned)(bpf_get_current_pid_tgid() >> 32)", "$cpu": "bpf_get_smp_processor_id()", "$task" : "((struct task_struct *)bpf_get_current_task())" } def _generate_streq_function(self, string): fname = "streq_%d" % Probe.streq_index Probe.streq_index += 1 self.streq_functions += """ static inline bool %s(char const *ignored, uintptr_t str) { char needle[] = %s; char haystack[sizeof(needle)]; bpf_probe_read(&haystack, sizeof(haystack), (void *)str); for (int i = 0; i < sizeof(needle) - 1; ++i) { if (needle[i] != haystack[i]) { return false; } } return true; } """ % (fname, string) return fname def _rewrite_expr(self, expr): if self.is_syscall_kprobe: for alias, replacement in Probe.aliases_indarg.items(): expr = expr.replace(alias, replacement) else: for alias, replacement in Probe.aliases_arg.items(): # For USDT probes, we replace argN values with the # actual arguments for that probe obtained using # bpf_readarg_N macros emitted at BPF construction. if self.probe_type == "u": continue expr = expr.replace(alias, replacement) for alias, replacement in Probe.aliases_common.items(): expr = expr.replace(alias, replacement) if self.bin_cmp: STRCMP_RE = 'STRCMP\\(\"([^"]+)\\"' else: STRCMP_RE = 'STRCMP\\(("[^"]+\\")' matches = re.finditer(STRCMP_RE, expr) for match in matches: string = match.group(1) fname = self._generate_streq_function(string) expr = expr.replace("STRCMP", fname, 1) return expr p_type = {"u": ct.c_uint, "d": ct.c_int, "lu": ct.c_ulong, "ld": ct.c_long, "llu": ct.c_ulonglong, "lld": ct.c_longlong, "hu": ct.c_ushort, "hd": ct.c_short, "x": ct.c_uint, "lx": ct.c_ulong, "llx": ct.c_ulonglong, "c": ct.c_ubyte, "K": ct.c_ulonglong, "U": ct.c_ulonglong} def _generate_python_field_decl(self, idx, fields): field_type = self.types[idx] if field_type == "s": ptype = ct.c_char * self.string_size else: ptype = Probe.p_type[field_type] fields.append(("v%d" % idx, ptype)) def _generate_python_data_decl(self): self.python_struct_name = "%s_%d_Data" % \ (self._display_function(), self.probe_num) fields = [] if self.time_field: fields.append(("timestamp_ns", ct.c_ulonglong)) if self.print_cpu: fields.append(("cpu", ct.c_int)) fields.extend([ ("tgid", ct.c_uint), ("pid", ct.c_uint), ("comm", ct.c_char * 16) # TASK_COMM_LEN ]) for i in range(0, len(self.types)): self._generate_python_field_decl(i, fields) if self.kernel_stack: fields.append(("kernel_stack_id", ct.c_int)) if self.user_stack: fields.append(("user_stack_id", ct.c_int)) return type(self.python_struct_name, (ct.Structure,), dict(_fields_=fields)) c_type = {"u": "unsigned int", "d": "int", "lu": "unsigned long", "ld": "long", "llu": "unsigned long long", "lld": "long long", "hu": "unsigned short", "hd": "short", "x": "unsigned int", "lx": "unsigned long", "llx": "unsigned long long", "c": "char", "K": "unsigned long long", "U": "unsigned long long"} fmt_types = c_type.keys() def _generate_field_decl(self, idx): field_type = self.types[idx] if field_type == "s": return "char v%d[%d];\n" % (idx, self.string_size) if field_type in Probe.fmt_types: return "%s v%d;\n" % (Probe.c_type[field_type], idx) self._bail("unrecognized format specifier %s" % field_type) def _generate_data_decl(self): # The BPF program will populate values into the struct # according to the format string, and the Python program will # construct the final display string. self.events_name = "%s_events" % self.probe_name self.struct_name = "%s_data_t" % self.probe_name self.stacks_name = "%s_stacks" % self.probe_name stack_type = "BPF_STACK_TRACE" if self.build_id_enabled is False \ else "BPF_STACK_TRACE_BUILDID" stack_table = "%s(%s, 1024);" % (stack_type,self.stacks_name) \ if (self.kernel_stack or self.user_stack) else "" data_fields = "" for i, field_type in enumerate(self.types): data_fields += " " + \ self._generate_field_decl(i) time_str = "u64 timestamp_ns;" if self.time_field else "" cpu_str = "int cpu;" if self.print_cpu else "" kernel_stack_str = " int kernel_stack_id;" \ if self.kernel_stack else "" user_stack_str = " int user_stack_id;" \ if self.user_stack else "" text = """ struct %s { %s %s u32 tgid; u32 pid; char comm[TASK_COMM_LEN]; %s %s %s }; BPF_PERF_OUTPUT(%s); %s """ return text % (self.struct_name, time_str, cpu_str, data_fields, kernel_stack_str, user_stack_str, self.events_name, stack_table) def _generate_field_assign(self, idx): field_type = self.types[idx] expr = self.values[idx].strip() text = "" if self.probe_type == "u" and expr[0:3] == "arg": arg_index = int(expr[3]) arg_ctype = self.usdt.get_probe_arg_ctype( self.usdt_name, arg_index - 1) text = (" %s %s = 0;\n" + " bpf_usdt_readarg(%s, ctx, &%s);\n") \ % (arg_ctype, expr, expr[3], expr) if field_type == "s": return text + """ if (%s != 0) { void *__tmp = (void *)%s; bpf_probe_read(&__data.v%d, sizeof(__data.v%d), __tmp); } """ % (expr, expr, idx, idx) if field_type in Probe.fmt_types: return text + " __data.v%d = (%s)%s;\n" % \ (idx, Probe.c_type[field_type], expr) self._bail("unrecognized field type %s" % field_type) def _generate_usdt_filter_read(self): text = "" if self.probe_type != "u": return text for arg, _ in Probe.aliases_arg.items(): if not (arg in self.filter): continue arg_index = int(arg.replace("arg", "")) arg_ctype = self.usdt.get_probe_arg_ctype( self.usdt_name, arg_index - 1) if not arg_ctype: self._bail("Unable to determine type of {} " "in the filter".format(arg)) text += """ {} {}_filter; bpf_usdt_readarg({}, ctx, &{}_filter); """.format(arg_ctype, arg, arg_index, arg) self.filter = self.filter.replace( arg, "{}_filter".format(arg)) return text def generate_program(self, include_self): data_decl = self._generate_data_decl() if Probe.pid != -1: pid_filter = """ if (__pid != %d) { return 0; } """ % Probe.pid # uprobes can have a built-in tgid filter passed to # attach_uprobe, hence the check here -- for kprobes, we # need to do the tgid test by hand: elif len(self.library) == 0 and Probe.tgid != -1: pid_filter = """ if (__tgid != %d) { return 0; } """ % Probe.tgid elif not include_self: pid_filter = """ if (__tgid == %d) { return 0; } """ % os.getpid() else: pid_filter = "" prefix = "" signature = "struct pt_regs *ctx" if self.signature: signature += ", " + self.signature data_fields = "" for i, expr in enumerate(self.values): data_fields += self._generate_field_assign(i) if self.probe_type == "t": heading = "TRACEPOINT_PROBE(%s, %s)" % \ (self.tp_category, self.tp_event) ctx_name = "args" else: heading = "int %s(%s)" % (self.probe_name, signature) ctx_name = "ctx" time_str = """ __data.timestamp_ns = bpf_ktime_get_ns();""" if self.time_field else "" cpu_str = """ __data.cpu = bpf_get_smp_processor_id();""" if self.print_cpu else "" stack_trace = "" if self.user_stack: stack_trace += """ __data.user_stack_id = %s.get_stackid( %s, BPF_F_REUSE_STACKID | BPF_F_USER_STACK );""" % (self.stacks_name, ctx_name) if self.kernel_stack: stack_trace += """ __data.kernel_stack_id = %s.get_stackid( %s, BPF_F_REUSE_STACKID );""" % (self.stacks_name, ctx_name) text = heading + """ { u64 __pid_tgid = bpf_get_current_pid_tgid(); u32 __tgid = __pid_tgid >> 32; u32 __pid = __pid_tgid; // implicit cast to u32 for bottom half %s %s %s if (!(%s)) return 0; struct %s __data = {0}; %s %s __data.tgid = __tgid; __data.pid = __pid; bpf_get_current_comm(&__data.comm, sizeof(__data.comm)); %s %s %s.perf_submit(%s, &__data, sizeof(__data)); return 0; } """ text = text % (pid_filter, prefix, self._generate_usdt_filter_read(), self.filter, self.struct_name, time_str, cpu_str, data_fields, stack_trace, self.events_name, ctx_name) return self.streq_functions + data_decl + "\n" + text @classmethod def _time_off_str(cls, timestamp_ns): return "%.6f" % (1e-9 * (timestamp_ns - cls.first_ts)) def _display_function(self): if self.probe_type == 'p' or self.probe_type == 'r': return self.function elif self.probe_type == 'u': return self.usdt_name else: # self.probe_type == 't' return self.tp_event def print_stack(self, bpf, stack_id, tgid): if stack_id < 0: print(" %d" % stack_id) return stack = list(bpf.get_table(self.stacks_name).walk(stack_id)) for addr in stack: print(" ", end="") if Probe.print_address: print("%16x " % addr, end="") print("%s" % (bpf.sym(addr, tgid, show_module=True, show_offset=True))) def _format_message(self, bpf, tgid, values): # Replace each %K with kernel sym and %U with user sym in tgid kernel_placeholders = [i for i, t in enumerate(self.types) if t == 'K'] user_placeholders = [i for i, t in enumerate(self.types) if t == 'U'] for kp in kernel_placeholders: values[kp] = bpf.ksym(values[kp], show_offset=True) for up in user_placeholders: values[up] = bpf.sym(values[up], tgid, show_module=True, show_offset=True) return self.python_format % tuple(values) def print_event(self, bpf, cpu, data, size): # Cast as the generated structure type and display # according to the format string in the probe. event = ct.cast(data, ct.POINTER(self.python_struct)).contents values = map(lambda i: getattr(event, "v%d" % i), range(0, len(self.values))) msg = self._format_message(bpf, event.tgid, values) if Probe.print_time: time = strftime("%H:%M:%S") if Probe.use_localtime else \ Probe._time_off_str(event.timestamp_ns) print("%-8s " % time[:8], end="") if Probe.print_cpu: print("%-3s " % event.cpu, end="") print("%-7d %-7d %-15s %-16s %s" % (event.tgid, event.pid, event.comm.decode('utf-8', 'replace'), self._display_function(), msg)) if self.kernel_stack: self.print_stack(bpf, event.kernel_stack_id, -1) if self.user_stack: self.print_stack(bpf, event.user_stack_id, event.tgid) if self.user_stack or self.kernel_stack: print("") Probe.event_count += 1 if Probe.max_events is not None and \ Probe.event_count >= Probe.max_events: exit() def attach(self, bpf, verbose): if len(self.library) == 0: self._attach_k(bpf) else: self._attach_u(bpf) self.python_struct = self._generate_python_data_decl() callback = partial(self.print_event, bpf) bpf[self.events_name].open_perf_buffer(callback, page_cnt=self.page_cnt) def _attach_k(self, bpf): if self.probe_type == "r": bpf.attach_kretprobe(event=self.function, fn_name=self.probe_name) elif self.probe_type == "p": bpf.attach_kprobe(event=self.function, fn_name=self.probe_name) # Note that tracepoints don't need an explicit attach def _attach_u(self, bpf): libpath = BPF.find_library(self.library) if libpath is None: # This might be an executable (e.g. 'bash') libpath = BPF.find_exe(self.library) if libpath is None or len(libpath) == 0: self._bail("unable to find library %s" % self.library) if self.probe_type == "u": pass # Was already enabled by the BPF constructor elif self.probe_type == "r": bpf.attach_uretprobe(name=libpath, sym=self.function, fn_name=self.probe_name, pid=Probe.tgid) else: bpf.attach_uprobe(name=libpath, sym=self.function, fn_name=self.probe_name, pid=Probe.tgid)
class Probe(object): def __init__(self, pattern, use_regex=False, pid=None): """Init a new probe. Init the probe from the pattern provided by the user. The supported patterns mimic the 'trace' and 'argdist' tools, but are simpler because we don't have to distinguish between probes and retprobes. func -- probe a kernel function lib:func -- probe a user-space function in the library 'lib' /path:func -- probe a user-space function in binary '/path' p::func -- same thing as 'func' p:lib:func -- same thing as 'lib:func' t:cat:event -- probe a kernel tracepoint u:lib:probe -- probe a USDT tracepoint """ parts = pattern.split(':') if len(parts) == 1: parts = ["p", "", parts[0]] elif len(parts) == 2: parts = ["p", parts[0], parts[1]] elif len(parts) == 3: if parts[0] == "t": parts = ["t", "", "%s:%s" % tuple(parts[1:])] if parts[0] not in ["p", "t", "u"]: raise Exception("Type must be 'p', 't', or 'u', but got %s" % parts[0]) else: raise Exception("Too many ':'-separated components in pattern %s" % pattern) (self.type, self.library, self.pattern) = parts if not use_regex: self.pattern = self.pattern.replace('*', '.*') self.pattern = '^' + self.pattern + '$' if (self.type == "p" and self.library) or self.type == "u": libpath = BPF.find_library(self.library) if libpath is None: # This might be an executable (e.g. 'bash') libpath = BPF.find_exe(self.library) if libpath is None or len(libpath) == 0: raise Exception("unable to find library %s" % self.library) self.library = libpath self.pid = pid self.matched = 0 self.trace_functions = {} # map location number to function name def is_kernel_probe(self): return self.type == "t" or (self.type == "p" and self.library == "") def attach(self): if self.type == "p" and not self.library: for index, function in self.trace_functions.items(): self.bpf.attach_kprobe( event=function, fn_name="trace_count_%d" % index) elif self.type == "p" and self.library: for index, function in self.trace_functions.items(): self.bpf.attach_uprobe( name=self.library, sym=function, fn_name="trace_count_%d" % index, pid=self.pid or -1) elif self.type == "t": for index, function in self.trace_functions.items(): self.bpf.attach_tracepoint( tp=function, fn_name="trace_count_%d" % index) elif self.type == "u": pass # Nothing to do -- attach already happened in `load` def _add_function(self, template, probe_name): new_func = "trace_count_%d" % self.matched text = template.replace("PROBE_FUNCTION", new_func) text = text.replace("LOCATION", str(self.matched)) self.trace_functions[self.matched] = probe_name self.matched += 1 return text def _generate_functions(self, template): self.usdt = None text = "" if self.type == "p" and not self.library: functions = BPF.get_kprobe_functions(self.pattern) verify_limit(len(functions)) for function in functions: text += self._add_function(template, function) elif self.type == "p" and self.library: # uprobes are tricky because the same function may have multiple # addresses, and the same address may be mapped to multiple # functions. We aren't allowed to create more than one uprobe # per address, so track unique addresses and ignore functions that # map to an address that we've already seen. Also ignore functions # that may repeat multiple times with different addresses. addresses, functions = (set(), set()) functions_and_addresses = BPF.get_user_functions_and_addresses( self.library, self.pattern) verify_limit(len(functions_and_addresses)) for function, address in functions_and_addresses: if address in addresses or function in functions: continue addresses.add(address) functions.add(function) text += self._add_function(template, function) elif self.type == "t": tracepoints = BPF.get_tracepoints(self.pattern) verify_limit(len(tracepoints)) for tracepoint in tracepoints: text += self._add_function(template, tracepoint) elif self.type == "u": self.usdt = USDT(path=self.library, pid=self.pid) matches = [] for probe in self.usdt.enumerate_probes(): if not self.pid and (probe.bin_path != self.library): continue if re.match(self.pattern, probe.name): matches.append(probe.name) verify_limit(len(matches)) for match in matches: new_func = "trace_count_%d" % self.matched text += self._add_function(template, match) self.usdt.enable_probe(match, new_func) if debug: print(self.usdt.get_text()) return text def load(self): trace_count_text = """ int PROBE_FUNCTION(void *ctx) { FILTER int loc = LOCATION; u64 *val = counts.lookup(&loc); if (!val) { return 0; // Should never happen, # of locations is known } (*val)++; return 0; } """ bpf_text = """#include <uapi/linux/ptrace.h> BPF_ARRAY(counts, u64, NUMLOCATIONS); """ # We really mean the tgid from the kernel's perspective, which is in # the top 32 bits of bpf_get_current_pid_tgid(). if self.pid: trace_count_text = trace_count_text.replace('FILTER', """u32 pid = bpf_get_current_pid_tgid() >> 32; if (pid != %d) { return 0; }""" % self.pid) else: trace_count_text = trace_count_text.replace('FILTER', '') bpf_text += self._generate_functions(trace_count_text) bpf_text = bpf_text.replace("NUMLOCATIONS", str(len(self.trace_functions))) if debug: print(bpf_text) if self.matched == 0: raise Exception("No functions matched by pattern %s" % self.pattern) self.bpf = BPF(text=bpf_text, usdt_contexts=[self.usdt] if self.usdt else []) self.clear() # Initialize all array items to zero def counts(self): return self.bpf["counts"] def clear(self): counts = self.bpf["counts"] for location, _ in list(self.trace_functions.items()): counts[counts.Key(location)] = counts.Leaf()
class Probe(object): def __init__(self, pattern, kernel_stack, user_stack, use_regex=False, pid=None, per_pid=False): """Init a new probe. Init the probe from the pattern provided by the user. The supported patterns mimic the 'trace' and 'argdist' tools, but are simpler because we don't have to distinguish between probes and retprobes. func -- probe a kernel function lib:func -- probe a user-space function in the library 'lib' p::func -- same thing as 'func' p:lib:func -- same thing as 'lib:func' t:cat:event -- probe a kernel tracepoint u:lib:probe -- probe a USDT tracepoint """ self.kernel_stack = kernel_stack self.user_stack = user_stack parts = pattern.split(':') if len(parts) == 1: parts = ["p", "", parts[0]] elif len(parts) == 2: parts = ["p", parts[0], parts[1]] elif len(parts) == 3: if parts[0] == "t": parts = ["t", "", "%s:%s" % tuple(parts[1:])] if parts[0] not in ["p", "t", "u"]: raise Exception("Type must be 'p', 't', or 'u', but got %s" % parts[0]) else: raise Exception("Too many ':'-separated components in pattern %s" % pattern) (self.type, self.library, self.pattern) = parts if not use_regex: self.pattern = self.pattern.replace('*', '.*') self.pattern = '^' + self.pattern + '$' if (self.type == "p" and self.library) or self.type == "u": libpath = BPF.find_library(self.library) if libpath is None: # This might be an executable (e.g. 'bash') libpath = BPF.find_exe(self.library) if libpath is None or len(libpath) == 0: raise Exception("unable to find library %s" % self.library) self.library = libpath self.pid = pid self.per_pid = per_pid self.matched = 0 def is_kernel_probe(self): return self.type == "t" or (self.type == "p" and self.library == "") def attach(self): if self.type == "p": if self.library: self.bpf.attach_uprobe(name=self.library, sym_re=self.pattern, fn_name="trace_count", pid=self.pid or -1) self.matched = self.bpf.num_open_uprobes() else: self.bpf.attach_kprobe(event_re=self.pattern, fn_name="trace_count") self.matched = self.bpf.num_open_kprobes() elif self.type == "t": self.bpf.attach_tracepoint(tp_re=self.pattern, fn_name="trace_count") self.matched = self.bpf.num_open_tracepoints() elif self.type == "u": pass # Nothing to do -- attach already happened in `load` if self.matched == 0: raise Exception("No functions matched by pattern %s" % self.pattern) def load(self): ctx_name = "ctx" stack_trace = "" if self.user_stack: stack_trace += """ key.user_stack_id = stack_traces.get_stackid( %s, BPF_F_REUSE_STACKID | BPF_F_USER_STACK );""" % (ctx_name) else: stack_trace += "key.user_stack_id = -1;" if self.kernel_stack: stack_trace += """ key.kernel_stack_id = stack_traces.get_stackid( %s, BPF_F_REUSE_STACKID );""" % (ctx_name) else: stack_trace += "key.kernel_stack_id = -1;" trace_count_text = """ int trace_count(void *ctx) { FILTER struct key_t key = {}; key.tgid = GET_TGID; STORE_COMM %s counts.increment(key); return 0; } """ trace_count_text = trace_count_text % (stack_trace) bpf_text = """#include <uapi/linux/ptrace.h> #include <linux/sched.h> struct key_t { // no pid (thread ID) so that we do not needlessly split this key u32 tgid; int kernel_stack_id; int user_stack_id; char name[TASK_COMM_LEN]; }; BPF_HASH(counts, struct key_t); BPF_STACK_TRACE(stack_traces, 1024); """ # We really mean the tgid from the kernel's perspective, which is in # the top 32 bits of bpf_get_current_pid_tgid(). if self.is_kernel_probe() and self.pid: trace_count_text = trace_count_text.replace('FILTER', ('u32 pid; pid = bpf_get_current_pid_tgid() >> 32; ' + 'if (pid != %d) { return 0; }') % (self.pid)) else: trace_count_text = trace_count_text.replace('FILTER', '') # We need per-pid statistics when tracing a user-space process, because # the meaning of the symbols depends on the pid. We also need them if # per-pid statistics were requested with -P, or for user stacks. if self.per_pid or not self.is_kernel_probe() or self.user_stack: trace_count_text = trace_count_text.replace('GET_TGID', 'bpf_get_current_pid_tgid() >> 32') trace_count_text = trace_count_text.replace('STORE_COMM', 'bpf_get_current_comm(&key.name, sizeof(key.name));') else: # kernel stacks only. skip splitting on PID so these aggregate # together, and don't store the process name. trace_count_text = trace_count_text.replace( 'GET_TGID', '0xffffffff') trace_count_text = trace_count_text.replace('STORE_COMM', '') self.usdt = None if self.type == "u": self.usdt = USDT(path=self.library, pid=self.pid) for probe in self.usdt.enumerate_probes(): if not self.pid and (probe.bin_path != self.library): continue if re.match(self.pattern, probe.name): # This hack is required because the bpf_usdt_readarg # functions generated need different function names for # each attached probe. If we just stick to trace_count, # we'd get multiple bpf_usdt_readarg helpers with the same # name when enabling more than one USDT probe. new_func = "trace_count_%d" % self.matched bpf_text += trace_count_text.replace( "trace_count", new_func) self.usdt.enable_probe(probe.name, new_func) self.matched += 1 if debug: print(self.usdt.get_text()) else: bpf_text += trace_count_text if debug: print(bpf_text) self.bpf = BPF(text=bpf_text, usdt_contexts=[self.usdt] if self.usdt else [])
class Probe(object): probe_count = 0 streq_index = 0 max_events = None event_count = 0 first_ts = 0 use_localtime = True tgid = -1 pid = -1 page_cnt = None @classmethod def configure(cls, args): cls.max_events = args.max_events cls.print_time = args.timestamp or args.time cls.use_localtime = not args.timestamp cls.first_ts = BPF.monotonic_time() cls.tgid = args.tgid or -1 cls.pid = args.pid or -1 cls.page_cnt = args.buffer_pages def __init__(self, probe, string_size, kernel_stack, user_stack): self.usdt = None self.streq_functions = "" self.raw_probe = probe self.string_size = string_size self.kernel_stack = kernel_stack self.user_stack = user_stack Probe.probe_count += 1 self._parse_probe() self.probe_num = Probe.probe_count self.probe_name = "probe_%s_%d" % \ (self._display_function(), self.probe_num) self.probe_name = re.sub(r'[^A-Za-z0-9_]', '_', self.probe_name) def __str__(self): return "%s:%s:%s FLT=%s ACT=%s/%s" % (self.probe_type, self.library, self._display_function(), self.filter, self.types, self.values) def is_default_action(self): return self.python_format == "" def _bail(self, error): raise ValueError("error in probe '%s': %s" % (self.raw_probe, error)) def _parse_probe(self): text = self.raw_probe # There might be a function signature preceding the actual # filter/print part, or not. Find the probe specifier first -- # it ends with either a space or an open paren ( for the # function signature part. # opt. signature # probespec | rest # --------- ---------- -- (spec, sig, rest) = re.match(r'([^ \t\(]+)(\([^\(]*\))?(.*)', text).groups() self._parse_spec(spec) # Remove the parens self.signature = sig[1:-1] if sig else None if self.signature and self.probe_type in ['u', 't']: self._bail("USDT and tracepoint probes can't have " + "a function signature; use arg1, arg2, " + "... instead") text = rest.lstrip() # If we now have a (, wait for the balanced closing ) and that # will be the predicate self.filter = None if len(text) > 0 and text[0] == "(": balance = 1 for i in range(1, len(text)): if text[i] == "(": balance += 1 if text[i] == ")": balance -= 1 if balance == 0: self._parse_filter(text[:i + 1]) text = text[i + 1:] break if self.filter is None: self._bail("unmatched end of predicate") if self.filter is None: self.filter = "1" # The remainder of the text is the printf action self._parse_action(text.lstrip()) def _parse_spec(self, spec): parts = spec.split(":") # Two special cases: 'func' means 'p::func', 'lib:func' means # 'p:lib:func'. Other combinations need to provide an empty # value between delimiters, e.g. 'r::func' for a kretprobe on # the function func. if len(parts) == 1: parts = ["p", "", parts[0]] elif len(parts) == 2: parts = ["p", parts[0], parts[1]] if len(parts[0]) == 0: self.probe_type = "p" elif parts[0] in ["p", "r", "t", "u"]: self.probe_type = parts[0] else: self._bail("probe type must be '', 'p', 't', 'r', " + "or 'u', but got '%s'" % parts[0]) if self.probe_type == "t": self.tp_category = parts[1] self.tp_event = parts[2] self.library = "" # kernel self.function = "" # from TRACEPOINT_PROBE elif self.probe_type == "u": self.library = ':'.join(parts[1:-1]) self.usdt_name = parts[-1] self.function = "" # no function, just address # We will discover the USDT provider by matching on # the USDT name in the specified library self._find_usdt_probe() else: self.library = ':'.join(parts[1:-1]) self.function = parts[-1] def _find_usdt_probe(self): target = Probe.pid if Probe.pid and Probe.pid != -1 \ else Probe.tgid self.usdt = USDT(path=self.library, pid=target) for probe in self.usdt.enumerate_probes(): if probe.name == self.usdt_name: return # Found it, will enable later self._bail("unrecognized USDT probe %s" % self.usdt_name) def _parse_filter(self, filt): self.filter = self._rewrite_expr(filt) def _parse_types(self, fmt): for match in re.finditer( r'[^%]%(s|u|d|llu|lld|hu|hd|x|llx|c|K|U)', fmt): self.types.append(match.group(1)) fmt = re.sub(r'([^%]%)(u|d|llu|lld|hu|hd)', r'\1d', fmt) fmt = re.sub(r'([^%]%)(x|llx)', r'\1x', fmt) fmt = re.sub('%K|%U', '%s', fmt) self.python_format = fmt.strip('"') def _parse_action(self, action): self.values = [] self.types = [] self.python_format = "" if len(action) == 0: return action = action.strip() match = re.search(r'(\".*?\"),?(.*)', action) if match is None: self._bail("expected format string in \"s") self.raw_format = match.group(1) self._parse_types(self.raw_format) for part in re.split('(?<!"),', match.group(2)): part = self._rewrite_expr(part) if len(part) > 0: self.values.append(part) aliases = { "retval": "PT_REGS_RC(ctx)", "arg1": "PT_REGS_PARM1(ctx)", "arg2": "PT_REGS_PARM2(ctx)", "arg3": "PT_REGS_PARM3(ctx)", "arg4": "PT_REGS_PARM4(ctx)", "arg5": "PT_REGS_PARM5(ctx)", "arg6": "PT_REGS_PARM6(ctx)", "$uid": "(unsigned)(bpf_get_current_uid_gid() & 0xffffffff)", "$gid": "(unsigned)(bpf_get_current_uid_gid() >> 32)", "$pid": "(unsigned)(bpf_get_current_pid_tgid() & 0xffffffff)", "$tgid": "(unsigned)(bpf_get_current_pid_tgid() >> 32)", "$cpu": "bpf_get_smp_processor_id()" } def _generate_streq_function(self, string): fname = "streq_%d" % Probe.streq_index Probe.streq_index += 1 self.streq_functions += """ static inline bool %s(char const *ignored, uintptr_t str) { char needle[] = %s; char haystack[sizeof(needle)]; bpf_probe_read(&haystack, sizeof(haystack), (void *)str); for (int i = 0; i < sizeof(needle) - 1; ++i) { if (needle[i] != haystack[i]) { return false; } } return true; } """ % (fname, string) return fname def _rewrite_expr(self, expr): for alias, replacement in Probe.aliases.items(): # For USDT probes, we replace argN values with the # actual arguments for that probe obtained using # bpf_readarg_N macros emitted at BPF construction. if alias.startswith("arg") and self.probe_type == "u": continue expr = expr.replace(alias, replacement) matches = re.finditer('STRCMP\\(("[^"]+\\")', expr) for match in matches: string = match.group(1) fname = self._generate_streq_function(string) expr = expr.replace("STRCMP", fname, 1) return expr p_type = {"u": ct.c_uint, "d": ct.c_int, "llu": ct.c_ulonglong, "lld": ct.c_longlong, "hu": ct.c_ushort, "hd": ct.c_short, "x": ct.c_uint, "llx": ct.c_ulonglong, "c": ct.c_ubyte, "K": ct.c_ulonglong, "U": ct.c_ulonglong} def _generate_python_field_decl(self, idx, fields): field_type = self.types[idx] if field_type == "s": ptype = ct.c_char * self.string_size else: ptype = Probe.p_type[field_type] fields.append(("v%d" % idx, ptype)) def _generate_python_data_decl(self): self.python_struct_name = "%s_%d_Data" % \ (self._display_function(), self.probe_num) fields = [ ("timestamp_ns", ct.c_ulonglong), ("tgid", ct.c_uint), ("pid", ct.c_uint), ("comm", ct.c_char * 16) # TASK_COMM_LEN ] for i in range(0, len(self.types)): self._generate_python_field_decl(i, fields) if self.kernel_stack: fields.append(("kernel_stack_id", ct.c_int)) if self.user_stack: fields.append(("user_stack_id", ct.c_int)) return type(self.python_struct_name, (ct.Structure,), dict(_fields_=fields)) c_type = {"u": "unsigned int", "d": "int", "llu": "unsigned long long", "lld": "long long", "hu": "unsigned short", "hd": "short", "x": "unsigned int", "llx": "unsigned long long", "c": "char", "K": "unsigned long long", "U": "unsigned long long"} fmt_types = c_type.keys() def _generate_field_decl(self, idx): field_type = self.types[idx] if field_type == "s": return "char v%d[%d];\n" % (idx, self.string_size) if field_type in Probe.fmt_types: return "%s v%d;\n" % (Probe.c_type[field_type], idx) self._bail("unrecognized format specifier %s" % field_type) def _generate_data_decl(self): # The BPF program will populate values into the struct # according to the format string, and the Python program will # construct the final display string. self.events_name = "%s_events" % self.probe_name self.struct_name = "%s_data_t" % self.probe_name self.stacks_name = "%s_stacks" % self.probe_name stack_table = "BPF_STACK_TRACE(%s, 1024);" % self.stacks_name \ if (self.kernel_stack or self.user_stack) else "" data_fields = "" for i, field_type in enumerate(self.types): data_fields += " " + \ self._generate_field_decl(i) kernel_stack_str = " int kernel_stack_id;" \ if self.kernel_stack else "" user_stack_str = " int user_stack_id;" \ if self.user_stack else "" text = """ struct %s { u64 timestamp_ns; u32 tgid; u32 pid; char comm[TASK_COMM_LEN]; %s %s %s }; BPF_PERF_OUTPUT(%s); %s """ return text % (self.struct_name, data_fields, kernel_stack_str, user_stack_str, self.events_name, stack_table) def _generate_field_assign(self, idx): field_type = self.types[idx] expr = self.values[idx].strip() text = "" if self.probe_type == "u" and expr[0:3] == "arg": arg_index = int(expr[3]) arg_ctype = self.usdt.get_probe_arg_ctype( self.usdt_name, arg_index - 1) text = (" %s %s = 0;\n" + " bpf_usdt_readarg(%s, ctx, &%s);\n") \ % (arg_ctype, expr, expr[3], expr) if field_type == "s": return text + """ if (%s != 0) { bpf_probe_read(&__data.v%d, sizeof(__data.v%d), (void *)%s); } """ % (expr, idx, idx, expr) if field_type in Probe.fmt_types: return text + " __data.v%d = (%s)%s;\n" % \ (idx, Probe.c_type[field_type], expr) self._bail("unrecognized field type %s" % field_type) def _generate_usdt_filter_read(self): text = "" if self.probe_type != "u": return text for arg, _ in Probe.aliases.items(): if not (arg.startswith("arg") and (arg in self.filter)): continue arg_index = int(arg.replace("arg", "")) arg_ctype = self.usdt.get_probe_arg_ctype( self.usdt_name, arg_index - 1) if not arg_ctype: self._bail("Unable to determine type of {} " "in the filter".format(arg)) text += """ {} {}_filter; bpf_usdt_readarg({}, ctx, &{}_filter); """.format(arg_ctype, arg, arg_index, arg) self.filter = self.filter.replace( arg, "{}_filter".format(arg)) return text def generate_program(self, include_self): data_decl = self._generate_data_decl() if Probe.pid != -1: pid_filter = """ if (__pid != %d) { return 0; } """ % Probe.pid # uprobes can have a built-in tgid filter passed to # attach_uprobe, hence the check here -- for kprobes, we # need to do the tgid test by hand: elif len(self.library) == 0 and Probe.tgid != -1: pid_filter = """ if (__tgid != %d) { return 0; } """ % Probe.tgid elif not include_self: pid_filter = """ if (__tgid == %d) { return 0; } """ % os.getpid() else: pid_filter = "" prefix = "" signature = "struct pt_regs *ctx" if self.signature: signature += ", " + self.signature data_fields = "" for i, expr in enumerate(self.values): data_fields += self._generate_field_assign(i) if self.probe_type == "t": heading = "TRACEPOINT_PROBE(%s, %s)" % \ (self.tp_category, self.tp_event) ctx_name = "args" else: heading = "int %s(%s)" % (self.probe_name, signature) ctx_name = "ctx" stack_trace = "" if self.user_stack: stack_trace += """ __data.user_stack_id = %s.get_stackid( %s, BPF_F_REUSE_STACKID | BPF_F_USER_STACK );""" % (self.stacks_name, ctx_name) if self.kernel_stack: stack_trace += """ __data.kernel_stack_id = %s.get_stackid( %s, BPF_F_REUSE_STACKID );""" % (self.stacks_name, ctx_name) text = heading + """ { u64 __pid_tgid = bpf_get_current_pid_tgid(); u32 __tgid = __pid_tgid >> 32; u32 __pid = __pid_tgid; // implicit cast to u32 for bottom half %s %s %s if (!(%s)) return 0; struct %s __data = {0}; __data.timestamp_ns = bpf_ktime_get_ns(); __data.tgid = __tgid; __data.pid = __pid; bpf_get_current_comm(&__data.comm, sizeof(__data.comm)); %s %s %s.perf_submit(%s, &__data, sizeof(__data)); return 0; } """ text = text % (pid_filter, prefix, self._generate_usdt_filter_read(), self.filter, self.struct_name, data_fields, stack_trace, self.events_name, ctx_name) return self.streq_functions + data_decl + "\n" + text @classmethod def _time_off_str(cls, timestamp_ns): return "%.6f" % (1e-9 * (timestamp_ns - cls.first_ts)) def _display_function(self): if self.probe_type == 'p' or self.probe_type == 'r': return self.function elif self.probe_type == 'u': return self.usdt_name else: # self.probe_type == 't' return self.tp_event def print_stack(self, bpf, stack_id, tgid): if stack_id < 0: print(" %d" % stack_id) return stack = list(bpf.get_table(self.stacks_name).walk(stack_id)) for addr in stack: print(" %s" % (bpf.sym(addr, tgid, show_module=True, show_offset=True))) def _format_message(self, bpf, tgid, values): # Replace each %K with kernel sym and %U with user sym in tgid kernel_placeholders = [i for i, t in enumerate(self.types) if t == 'K'] user_placeholders = [i for i, t in enumerate(self.types) if t == 'U'] for kp in kernel_placeholders: values[kp] = bpf.ksym(values[kp], show_offset=True) for up in user_placeholders: values[up] = bpf.sym(values[up], tgid, show_module=True, show_offset=True) return self.python_format % tuple(values) def print_event(self, bpf, cpu, data, size): # Cast as the generated structure type and display # according to the format string in the probe. event = ct.cast(data, ct.POINTER(self.python_struct)).contents values = map(lambda i: getattr(event, "v%d" % i), range(0, len(self.values))) msg = self._format_message(bpf, event.tgid, values) if not Probe.print_time: print("%-6d %-6d %-12s %-16s %s" % (event.tgid, event.pid, event.comm.decode(), self._display_function(), msg)) else: time = strftime("%H:%M:%S") if Probe.use_localtime else \ Probe._time_off_str(event.timestamp_ns) print("%-8s %-6d %-6d %-12s %-16s %s" % (time[:8], event.tgid, event.pid, event.comm.decode(), self._display_function(), msg)) if self.kernel_stack: self.print_stack(bpf, event.kernel_stack_id, -1) if self.user_stack: self.print_stack(bpf, event.user_stack_id, event.tgid) if self.user_stack or self.kernel_stack: print("") Probe.event_count += 1 if Probe.max_events is not None and \ Probe.event_count >= Probe.max_events: exit() def attach(self, bpf, verbose): if len(self.library) == 0: self._attach_k(bpf) else: self._attach_u(bpf) self.python_struct = self._generate_python_data_decl() callback = partial(self.print_event, bpf) bpf[self.events_name].open_perf_buffer(callback, page_cnt=self.page_cnt) def _attach_k(self, bpf): if self.probe_type == "r": bpf.attach_kretprobe(event=self.function, fn_name=self.probe_name) elif self.probe_type == "p": bpf.attach_kprobe(event=self.function, fn_name=self.probe_name) # Note that tracepoints don't need an explicit attach def _attach_u(self, bpf): libpath = BPF.find_library(self.library) if libpath is None: # This might be an executable (e.g. 'bash') libpath = BPF.find_exe(self.library) if libpath is None or len(libpath) == 0: self._bail("unable to find library %s" % self.library) if self.probe_type == "u": pass # Was already enabled by the BPF constructor elif self.probe_type == "r": bpf.attach_uretprobe(name=libpath, sym=self.function, fn_name=self.probe_name, pid=Probe.tgid) else: bpf.attach_uprobe(name=libpath, sym=self.function, fn_name=self.probe_name, pid=Probe.tgid)
class Probe(object): probe_count = 0 streq_index = 0 max_events = None event_count = 0 first_ts = 0 first_ts_real = None print_time = False print_unix_timestamp = False use_localtime = True time_field = False print_cpu = False print_address = False tgid = -1 pid = -1 page_cnt = None build_id_enabled = False @classmethod def configure(cls, args): cls.max_events = args.max_events cls.print_time = args.timestamp or args.time cls.print_unix_timestamp = args.unix_timestamp cls.use_localtime = not args.timestamp cls.time_field = cls.print_time and (not cls.use_localtime) cls.print_cpu = args.print_cpu cls.print_address = args.address cls.first_ts = BPF.monotonic_time() cls.first_ts_real = time.time() cls.tgid = args.tgid or -1 cls.pid = args.pid or -1 cls.page_cnt = args.buffer_pages cls.bin_cmp = args.bin_cmp cls.build_id_enabled = args.sym_file_list is not None def __init__(self, probe, string_size, kernel_stack, user_stack, cgroup_map_name, name, msg_filter): self.usdt = None self.streq_functions = "" self.raw_probe = probe self.string_size = string_size self.kernel_stack = kernel_stack self.user_stack = user_stack self.probe_user_list = set() Probe.probe_count += 1 self._parse_probe() self.probe_num = Probe.probe_count self.probe_name = "probe_%s_%d" % \ (self._display_function(), self.probe_num) self.probe_name = re.sub(r'[^A-Za-z0-9_]', '_', self.probe_name) self.cgroup_map_name = cgroup_map_name self.name = name self.msg_filter = msg_filter # compiler can generate proper codes for function # signatures with "syscall__" prefix if self.is_syscall_kprobe: self.probe_name = "syscall__" + self.probe_name[6:] def __str__(self): return "%s:%s:%s FLT=%s ACT=%s/%s" % ( self.probe_type, self.library, self._display_function(), self.filter, self.types, self.values) def is_default_action(self): return self.python_format == "" def _bail(self, error): raise ValueError("error in probe '%s': %s" % (self.raw_probe, error)) def _parse_probe(self): text = self.raw_probe # There might be a function signature preceding the actual # filter/print part, or not. Find the probe specifier first -- # it ends with either a space or an open paren ( for the # function signature part. # opt. signature # probespec | rest # --------- ---------- -- (spec, sig, rest) = re.match(r'([^ \t\(]+)(\([^\(]*\))?(.*)', text).groups() self._parse_spec(spec) # Remove the parens self.signature = sig[1:-1] if sig else None if self.signature and self.probe_type in ['u', 't']: self._bail("USDT and tracepoint probes can't have " + "a function signature; use arg1, arg2, " + "... instead") text = rest.lstrip() # If we now have a (, wait for the balanced closing ) and that # will be the predicate self.filter = None if len(text) > 0 and text[0] == "(": balance = 1 for i in range(1, len(text)): if text[i] == "(": balance += 1 if text[i] == ")": balance -= 1 if balance == 0: self._parse_filter(text[:i + 1]) text = text[i + 1:] break if self.filter is None: self._bail("unmatched end of predicate") if self.filter is None: self.filter = "1" # The remainder of the text is the printf action self._parse_action(text.lstrip()) def _parse_spec(self, spec): parts = spec.split(":") # Two special cases: 'func' means 'p::func', 'lib:func' means # 'p:lib:func'. Other combinations need to provide an empty # value between delimiters, e.g. 'r::func' for a kretprobe on # the function func. if len(parts) == 1: parts = ["p", "", parts[0]] elif len(parts) == 2: parts = ["p", parts[0], parts[1]] if len(parts[0]) == 0: self.probe_type = "p" elif parts[0] in ["p", "r", "t", "u"]: self.probe_type = parts[0] else: self._bail("probe type must be '', 'p', 't', 'r', " + "or 'u', but got '%s'" % parts[0]) if self.probe_type == "t": self.tp_category = parts[1] self.tp_event = parts[2] self.library = "" # kernel self.function = "" # from TRACEPOINT_PROBE elif self.probe_type == "u": # u:<library>[:<provider>]:<probe> where :<provider> is optional self.library = parts[1] self.usdt_name = ":".join(parts[2:]) self.function = "" # no function, just address # We will discover the USDT provider by matching on # the USDT name in the specified library self._find_usdt_probe() else: self.library = ':'.join(parts[1:-1]) self.function = parts[-1] # only x64 syscalls needs checking, no other syscall wrapper yet. self.is_syscall_kprobe = False if self.probe_type == "p" and len(self.library) == 0 and \ self.function[:10] == "__x64_sys_": self.is_syscall_kprobe = True def _find_usdt_probe(self): target = Probe.pid if Probe.pid and Probe.pid != -1 \ else Probe.tgid self.usdt = USDT(path=self.library, pid=target) parts = self.usdt_name.split(":") if len(parts) == 1: provider_name = None usdt_name = parts[0].encode("ascii") else: provider_name = parts[0].encode("ascii") usdt_name = parts[1].encode("ascii") for probe in self.usdt.enumerate_probes(): if ((not provider_name or probe.provider == provider_name) and probe.name == usdt_name): return # Found it, will enable later self._bail("unrecognized USDT probe %s" % self.usdt_name) def _parse_filter(self, filt): self.filter = self._rewrite_expr(filt) def _parse_types(self, fmt): for match in re.finditer( r'[^%]%(s|u|d|lu|llu|ld|lld|hu|hd|x|lx|llx|c|K|U)', fmt): self.types.append(match.group(1)) fmt = re.sub(r'([^%]%)(u|d|lu|llu|ld|lld|hu|hd)', r'\1d', fmt) fmt = re.sub(r'([^%]%)(x|lx|llx)', r'\1x', fmt) fmt = re.sub('%K|%U', '%s', fmt) self.python_format = fmt.strip('"') def _parse_action(self, action): self.values = [] self.types = [] self.python_format = "" if len(action) == 0: return action = action.strip() match = re.search(r'(\".*?\"),?(.*)', action) if match is None: self._bail("expected format string in \"s") self.raw_format = match.group(1) self._parse_types(self.raw_format) for part in re.split('(?<!"),', match.group(2)): part = self._rewrite_expr(part) if len(part) > 0: self.values.append(part) aliases_arg = { "arg1": "PT_REGS_PARM1(ctx)", "arg2": "PT_REGS_PARM2(ctx)", "arg3": "PT_REGS_PARM3(ctx)", "arg4": "PT_REGS_PARM4(ctx)", "arg5": "PT_REGS_PARM5(ctx)", "arg6": "PT_REGS_PARM6(ctx)", } aliases_indarg = { "arg1": "({u64 _val; struct pt_regs *_ctx = (struct pt_regs *)PT_REGS_PARM1(ctx);" " bpf_probe_read(&_val, sizeof(_val), &(PT_REGS_PARM1(_ctx))); _val;})", "arg2": "({u64 _val; struct pt_regs *_ctx = (struct pt_regs *)PT_REGS_PARM1(ctx);" " bpf_probe_read(&_val, sizeof(_val), &(PT_REGS_PARM2(_ctx))); _val;})", "arg3": "({u64 _val; struct pt_regs *_ctx = (struct pt_regs *)PT_REGS_PARM1(ctx);" " bpf_probe_read(&_val, sizeof(_val), &(PT_REGS_PARM3(_ctx))); _val;})", "arg4": "({u64 _val; struct pt_regs *_ctx = (struct pt_regs *)PT_REGS_PARM1(ctx);" " bpf_probe_read(&_val, sizeof(_val), &(PT_REGS_PARM4(_ctx))); _val;})", "arg5": "({u64 _val; struct pt_regs *_ctx = (struct pt_regs *)PT_REGS_PARM1(ctx);" " bpf_probe_read(&_val, sizeof(_val), &(PT_REGS_PARM5(_ctx))); _val;})", "arg6": "({u64 _val; struct pt_regs *_ctx = (struct pt_regs *)PT_REGS_PARM1(ctx);" " bpf_probe_read(&_val, sizeof(_val), &(PT_REGS_PARM6(_ctx))); _val;})", } aliases_common = { "retval": "PT_REGS_RC(ctx)", "$uid": "(unsigned)(bpf_get_current_uid_gid() & 0xffffffff)", "$gid": "(unsigned)(bpf_get_current_uid_gid() >> 32)", "$pid": "(unsigned)(bpf_get_current_pid_tgid() & 0xffffffff)", "$tgid": "(unsigned)(bpf_get_current_pid_tgid() >> 32)", "$cpu": "bpf_get_smp_processor_id()", "$task": "((struct task_struct *)bpf_get_current_task())" } def _rewrite_expr(self, expr): # Find the occurances of any arg[1-6]@user. Use it later to # identify bpf_probe_read_user for matches in re.finditer(r'(arg[1-6])(@user)', expr): if matches.group(1).strip() not in self.probe_user_list: self.probe_user_list.add(matches.group(1).strip()) # Remove @user occurrences from arg before resolving to its # corresponding aliases. expr = re.sub(r'(arg[1-6])@user', r'\1', expr) rdict = StrcmpRewrite.rewrite_expr(expr, self.bin_cmp, self.library, self.probe_user_list, self.streq_functions, Probe.streq_index) expr = rdict["expr"] self.streq_functions = rdict["streq_functions"] Probe.streq_index = rdict["probeid"] alias_to_check = Probe.aliases_indarg \ if self.is_syscall_kprobe \ else Probe.aliases_arg # For USDT probes, we replace argN values with the # actual arguments for that probe obtained using # bpf_readarg_N macros emitted at BPF construction. if not self.probe_type == "u": for alias, replacement in alias_to_check.items(): expr = expr.replace(alias, replacement) for alias, replacement in Probe.aliases_common.items(): expr = expr.replace(alias, replacement) return expr p_type = { "u": ct.c_uint, "d": ct.c_int, "lu": ct.c_ulong, "ld": ct.c_long, "llu": ct.c_ulonglong, "lld": ct.c_longlong, "hu": ct.c_ushort, "hd": ct.c_short, "x": ct.c_uint, "lx": ct.c_ulong, "llx": ct.c_ulonglong, "c": ct.c_ubyte, "K": ct.c_ulonglong, "U": ct.c_ulonglong } def _generate_python_field_decl(self, idx, fields): field_type = self.types[idx] if field_type == "s": ptype = ct.c_char * self.string_size else: ptype = Probe.p_type[field_type] fields.append(("v%d" % idx, ptype)) def _generate_python_data_decl(self): self.python_struct_name = "%s_%d_Data" % \ (self._display_function(), self.probe_num) fields = [] if self.time_field: fields.append(("timestamp_ns", ct.c_ulonglong)) if self.print_cpu: fields.append(("cpu", ct.c_int)) fields.extend([ ("tgid", ct.c_uint), ("pid", ct.c_uint), ("comm", ct.c_char * 16) # TASK_COMM_LEN ]) for i in range(0, len(self.types)): self._generate_python_field_decl(i, fields) if self.kernel_stack: fields.append(("kernel_stack_id", ct.c_int)) if self.user_stack: fields.append(("user_stack_id", ct.c_int)) return type(self.python_struct_name, (ct.Structure, ), dict(_fields_=fields)) c_type = { "u": "unsigned int", "d": "int", "lu": "unsigned long", "ld": "long", "llu": "unsigned long long", "lld": "long long", "hu": "unsigned short", "hd": "short", "x": "unsigned int", "lx": "unsigned long", "llx": "unsigned long long", "c": "char", "K": "unsigned long long", "U": "unsigned long long" } fmt_types = c_type.keys() def _generate_field_decl(self, idx): field_type = self.types[idx] if field_type == "s": return "char v%d[%d];\n" % (idx, self.string_size) if field_type in Probe.fmt_types: return "%s v%d;\n" % (Probe.c_type[field_type], idx) self._bail("unrecognized format specifier %s" % field_type) def _generate_data_decl(self): # The BPF program will populate values into the struct # according to the format string, and the Python program will # construct the final display string. self.events_name = "%s_events" % self.probe_name self.struct_name = "%s_data_t" % self.probe_name self.stacks_name = "%s_stacks" % self.probe_name stack_type = "BPF_STACK_TRACE" if self.build_id_enabled is False \ else "BPF_STACK_TRACE_BUILDID" stack_table = "%s(%s, 1024);" % (stack_type,self.stacks_name) \ if (self.kernel_stack or self.user_stack) else "" data_fields = "" for i, field_type in enumerate(self.types): data_fields += " " + \ self._generate_field_decl(i) time_str = "u64 timestamp_ns;" if self.time_field else "" cpu_str = "int cpu;" if self.print_cpu else "" kernel_stack_str = " int kernel_stack_id;" \ if self.kernel_stack else "" user_stack_str = " int user_stack_id;" \ if self.user_stack else "" text = """ struct %s { %s %s u32 tgid; u32 pid; char comm[TASK_COMM_LEN]; %s %s %s }; BPF_PERF_OUTPUT(%s); %s """ return text % (self.struct_name, time_str, cpu_str, data_fields, kernel_stack_str, user_stack_str, self.events_name, stack_table) def _generate_field_assign(self, idx): field_type = self.types[idx] expr = self.values[idx].strip() text = "" if self.probe_type == "u" and expr[0:3] == "arg": arg_index = int(expr[3]) arg_ctype = self.usdt.get_probe_arg_ctype(self.usdt_name, arg_index - 1) text = (" %s %s = 0;\n" + " bpf_usdt_readarg(%s, ctx, &%s);\n") \ % (arg_ctype, expr, expr[3], expr) probe_read_func = "bpf_probe_read" if field_type == "s": if self.library: probe_read_func = "bpf_probe_read_user" else: alias_to_check = Probe.aliases_indarg \ if self.is_syscall_kprobe \ else Probe.aliases_arg for arg, alias in alias_to_check.items(): if alias == expr and arg in self.probe_user_list: probe_read_func = "bpf_probe_read_user" break return text + """ if (%s != 0) { void *__tmp = (void *)%s; %s(&__data.v%d, sizeof(__data.v%d), __tmp); } """ % (expr, expr, probe_read_func, idx, idx) if field_type in Probe.fmt_types: return text + " __data.v%d = (%s)%s;\n" % \ (idx, Probe.c_type[field_type], expr) self._bail("unrecognized field type %s" % field_type) def _generate_usdt_filter_read(self): text = "" if self.probe_type != "u": return text for arg, _ in Probe.aliases_arg.items(): if not (arg in self.filter): continue arg_index = int(arg.replace("arg", "")) arg_ctype = self.usdt.get_probe_arg_ctype(self.usdt_name, arg_index - 1) if not arg_ctype: self._bail("Unable to determine type of {} " "in the filter".format(arg)) text += """ {} {}_filter; bpf_usdt_readarg({}, ctx, &{}_filter); """.format(arg_ctype, arg, arg_index, arg) self.filter = self.filter.replace(arg, "{}_filter".format(arg)) return text def generate_program(self, include_self): data_decl = self._generate_data_decl() if Probe.pid != -1: pid_filter = """ if (__pid != %d) { return 0; } """ % Probe.pid # uprobes can have a built-in tgid filter passed to # attach_uprobe, hence the check here -- for kprobes, we # need to do the tgid test by hand: elif len(self.library) == 0 and Probe.tgid != -1: pid_filter = """ if (__tgid != %d) { return 0; } """ % Probe.tgid elif not include_self: pid_filter = """ if (__tgid == %d) { return 0; } """ % os.getpid() else: pid_filter = "" if self.cgroup_map_name is not None: cgroup_filter = """ if (%s.check_current_task(0) <= 0) { return 0; } """ % self.cgroup_map_name else: cgroup_filter = "" prefix = "" signature = "struct pt_regs *ctx" if self.signature: signature += ", " + self.signature data_fields = "" for i, expr in enumerate(self.values): data_fields += self._generate_field_assign(i) if self.probe_type == "t": heading = "TRACEPOINT_PROBE(%s, %s)" % \ (self.tp_category, self.tp_event) ctx_name = "args" else: heading = "int %s(%s)" % (self.probe_name, signature) ctx_name = "ctx" time_str = """ __data.timestamp_ns = bpf_ktime_get_ns();""" if self.time_field else "" cpu_str = """ __data.cpu = bpf_get_smp_processor_id();""" if self.print_cpu else "" stack_trace = "" if self.user_stack: stack_trace += """ __data.user_stack_id = %s.get_stackid( %s, BPF_F_USER_STACK );""" % (self.stacks_name, ctx_name) if self.kernel_stack: stack_trace += """ __data.kernel_stack_id = %s.get_stackid( %s, 0 );""" % (self.stacks_name, ctx_name) text = heading + """ { u64 __pid_tgid = bpf_get_current_pid_tgid(); u32 __tgid = __pid_tgid >> 32; u32 __pid = __pid_tgid; // implicit cast to u32 for bottom half %s %s %s %s if (!(%s)) return 0; struct %s __data = {0}; %s %s __data.tgid = __tgid; __data.pid = __pid; bpf_get_current_comm(&__data.comm, sizeof(__data.comm)); %s %s %s.perf_submit(%s, &__data, sizeof(__data)); return 0; } """ text = text % (pid_filter, cgroup_filter, prefix, self._generate_usdt_filter_read(), self.filter, self.struct_name, time_str, cpu_str, data_fields, stack_trace, self.events_name, ctx_name) return self.streq_functions + data_decl + "\n" + text @classmethod def _time_off_str(cls, timestamp_ns): offset = 1e-9 * (timestamp_ns - cls.first_ts) if cls.print_unix_timestamp: return "%.6f" % (offset + cls.first_ts_real) else: return "%.6f" % offset def _display_function(self): if self.probe_type == 'p' or self.probe_type == 'r': return self.function elif self.probe_type == 'u': return self.usdt_name else: # self.probe_type == 't' return self.tp_event def print_stack(self, bpf, stack_id, tgid): if stack_id < 0: print(" %d" % stack_id) return stack = list(bpf.get_table(self.stacks_name).walk(stack_id)) for addr in stack: print(" ", end="") if Probe.print_address: print("%16x " % addr, end="") print("%s" % (bpf.sym(addr, tgid, show_module=True, show_offset=True))) def _format_message(self, bpf, tgid, values): # Replace each %K with kernel sym and %U with user sym in tgid kernel_placeholders = [i for i, t in enumerate(self.types) if t == 'K'] user_placeholders = [i for i, t in enumerate(self.types) if t == 'U'] for kp in kernel_placeholders: values[kp] = bpf.ksym(values[kp], show_offset=True) for up in user_placeholders: values[up] = bpf.sym(values[up], tgid, show_module=True, show_offset=True) return self.python_format % tuple(values) def print_event(self, bpf, cpu, data, size): # Cast as the generated structure type and display # according to the format string in the probe. event = ct.cast(data, ct.POINTER(self.python_struct)).contents if self.name and bytes(self.name) not in event.comm: return values = map(lambda i: getattr(event, "v%d" % i), range(0, len(self.values))) msg = self._format_message(bpf, event.tgid, values) if self.msg_filter and bytes(self.msg_filter) not in msg: return if Probe.print_time: time = strftime("%H:%M:%S") if Probe.use_localtime else \ Probe._time_off_str(event.timestamp_ns) if Probe.print_unix_timestamp: print("%-17s " % time[:17], end="") else: print("%-8s " % time[:8], end="") if Probe.print_cpu: print("%-3s " % event.cpu, end="") print("%-7d %-7d %-15s %-16s %s" % (event.tgid, event.pid, event.comm.decode( 'utf-8', 'replace'), self._display_function(), msg)) if self.kernel_stack: self.print_stack(bpf, event.kernel_stack_id, -1) if self.user_stack: self.print_stack(bpf, event.user_stack_id, event.tgid) if self.user_stack or self.kernel_stack: print("") Probe.event_count += 1 if Probe.max_events is not None and \ Probe.event_count >= Probe.max_events: exit() sys.stdout.flush() def attach(self, bpf, verbose): if len(self.library) == 0: self._attach_k(bpf) else: self._attach_u(bpf) self.python_struct = self._generate_python_data_decl() callback = partial(self.print_event, bpf) bpf[self.events_name].open_perf_buffer(callback, page_cnt=self.page_cnt) def _attach_k(self, bpf): if self.probe_type == "r": bpf.attach_kretprobe(event=self.function, fn_name=self.probe_name) elif self.probe_type == "p": bpf.attach_kprobe(event=self.function, fn_name=self.probe_name) # Note that tracepoints don't need an explicit attach def _attach_u(self, bpf): libpath = BPF.find_library(self.library) if libpath is None: # This might be an executable (e.g. 'bash') libpath = BPF.find_exe(self.library) if libpath is None or len(libpath) == 0: self._bail("unable to find library %s" % self.library) if self.probe_type == "u": pass # Was already enabled by the BPF constructor elif self.probe_type == "r": bpf.attach_uretprobe(name=libpath, sym=self.function, fn_name=self.probe_name, pid=Probe.tgid) else: bpf.attach_uprobe(name=libpath, sym=self.function, fn_name=self.probe_name, pid=Probe.tgid)
class Probe(object): def __init__(self, pattern, kernel_stack, user_stack, use_regex=False, pid=None, per_pid=False, cpu=None): """Init a new probe. Init the probe from the pattern provided by the user. The supported patterns mimic the 'trace' and 'argdist' tools, but are simpler because we don't have to distinguish between probes and retprobes. func -- probe a kernel function lib:func -- probe a user-space function in the library 'lib' p::func -- same thing as 'func' p:lib:func -- same thing as 'lib:func' t:cat:event -- probe a kernel tracepoint u:lib:probe -- probe a USDT tracepoint """ self.kernel_stack = kernel_stack self.user_stack = user_stack parts = pattern.split(':') if len(parts) == 1: parts = ["p", "", parts[0]] elif len(parts) == 2: parts = ["p", parts[0], parts[1]] elif len(parts) == 3: if parts[0] == "t": parts = ["t", "", "%s:%s" % tuple(parts[1:])] if parts[0] not in ["p", "t", "u"]: raise Exception("Type must be 'p', 't', or 'u', but got %s" % parts[0]) else: raise Exception("Too many ':'-separated components in pattern %s" % pattern) (self.type, self.library, self.pattern) = parts if not use_regex: self.pattern = self.pattern.replace('*', '.*') self.pattern = '^' + self.pattern + '$' if (self.type == "p" and self.library) or self.type == "u": libpath = BPF.find_library(self.library) if libpath is None: # This might be an executable (e.g. 'bash') libpath = BPF.find_exe(self.library) if libpath is None or len(libpath) == 0: raise Exception("unable to find library %s" % self.library) self.library = libpath self.pid = pid self.per_pid = per_pid self.cpu = cpu self.matched = 0 def is_kernel_probe(self): return self.type == "t" or (self.type == "p" and self.library == "") def attach(self): if self.type == "p": if self.library: self.bpf.attach_uprobe(name=self.library, sym_re=self.pattern, fn_name="trace_count", pid=self.pid or -1) self.matched = self.bpf.num_open_uprobes() else: self.bpf.attach_kprobe(event_re=self.pattern, fn_name="trace_count") self.matched = self.bpf.num_open_kprobes() elif self.type == "t": self.bpf.attach_tracepoint(tp_re=self.pattern, fn_name="trace_count") self.matched = self.bpf.num_open_tracepoints() elif self.type == "u": pass # Nothing to do -- attach already happened in `load` if self.matched == 0: raise Exception("No functions matched by pattern %s" % self.pattern) def load(self): ctx_name = "ctx" stack_trace = "" if self.user_stack: stack_trace += """ key.user_stack_id = stack_traces.get_stackid( %s, BPF_F_USER_STACK );""" % (ctx_name) else: stack_trace += "key.user_stack_id = -1;" if self.kernel_stack: stack_trace += """ key.kernel_stack_id = stack_traces.get_stackid( %s, 0 );""" % (ctx_name) else: stack_trace += "key.kernel_stack_id = -1;" trace_count_text = """ int trace_count(void *ctx) { FILTER struct key_t key = {}; key.tgid = GET_TGID; STORE_COMM %s counts.increment(key); return 0; } """ trace_count_text = trace_count_text % (stack_trace) bpf_text = """#include <uapi/linux/ptrace.h> #include <linux/sched.h> struct key_t { // no pid (thread ID) so that we do not needlessly split this key u32 tgid; int kernel_stack_id; int user_stack_id; char name[TASK_COMM_LEN]; }; BPF_HASH(counts, struct key_t); BPF_STACK_TRACE(stack_traces, 1024); """ filter_text = [] # We really mean the tgid from the kernel's perspective, which is in # the top 32 bits of bpf_get_current_pid_tgid(). if self.is_kernel_probe() and self.pid: filter_text.append( 'u32 pid; pid = bpf_get_current_pid_tgid() >> 32; ' + 'if (pid != %d) { return 0; }' % self.pid) if self.is_kernel_probe() and self.cpu: filter_text.append( 'struct task_struct *task; task = (struct task_struct*)bpf_get_current_task(); ' + 'if (task->cpu != %d) { return 0; }' % self.cpu) trace_count_text = trace_count_text.replace('FILTER', '\n '.join(filter_text)) # Do per-pid statistics iff -P is provided if self.per_pid: trace_count_text = trace_count_text.replace( 'GET_TGID', 'bpf_get_current_pid_tgid() >> 32') trace_count_text = trace_count_text.replace( 'STORE_COMM', 'bpf_get_current_comm(&key.name, sizeof(key.name));') else: # skip splitting on PID so these aggregate # together, and don't store the process name. trace_count_text = trace_count_text.replace( 'GET_TGID', '0xffffffff') trace_count_text = trace_count_text.replace('STORE_COMM', '') self.usdt = None if self.type == "u": self.usdt = USDT(path=self.library, pid=self.pid) for probe in self.usdt.enumerate_probes(): if not self.pid and (probe.bin_path != self.library): continue if re.match(self.pattern, probe.name): # This hack is required because the bpf_usdt_readarg # functions generated need different function names for # each attached probe. If we just stick to trace_count, # we'd get multiple bpf_usdt_readarg helpers with the same # name when enabling more than one USDT probe. new_func = "trace_count_%d" % self.matched bpf_text += trace_count_text.replace( "trace_count", new_func) self.usdt.enable_probe(probe.name, new_func) self.matched += 1 if debug: print(self.usdt.get_text()) else: bpf_text += trace_count_text if debug: print(bpf_text) self.bpf = BPF(text=bpf_text, usdt_contexts=[self.usdt] if self.usdt else [])
class Probe(object): def __init__(self, pattern, use_regex=False, pid=None): """Init a new probe. Init the probe from the pattern provided by the user. The supported patterns mimic the 'trace' and 'argdist' tools, but are simpler because we don't have to distinguish between probes and retprobes. func -- probe a kernel function lib:func -- probe a user-space function in the library 'lib' /path:func -- probe a user-space function in binary '/path' p::func -- same thing as 'func' p:lib:func -- same thing as 'lib:func' t:cat:event -- probe a kernel tracepoint u:lib:probe -- probe a USDT tracepoint """ parts = pattern.split(':') if len(parts) == 1: parts = ["p", "", parts[0]] elif len(parts) == 2: parts = ["p", parts[0], parts[1]] elif len(parts) == 3: if parts[0] == "t": parts = ["t", "", "%s:%s" % tuple(parts[1:])] if parts[0] not in ["p", "t", "u"]: raise Exception("Type must be 'p', 't', or 'u', but got %s" % parts[0]) else: raise Exception("Too many ':'-separated components in pattern %s" % pattern) (self.type, self.library, self.pattern) = parts if not use_regex: self.pattern = self.pattern.replace('*', '.*') self.pattern = '^' + self.pattern + '$' if (self.type == "p" and self.library) or self.type == "u": libpath = BPF.find_library(self.library) if libpath is None: # This might be an executable (e.g. 'bash') libpath = BPF.find_exe(self.library) if libpath is None or len(libpath) == 0: raise Exception("unable to find library %s" % self.library) self.library = libpath self.pid = pid self.matched = 0 self.trace_functions = {} # map location number to function name def is_kernel_probe(self): return self.type == "t" or (self.type == "p" and self.library == "") def attach(self): if self.type == "p" and not self.library: for index, function in self.trace_functions.items(): self.bpf.attach_kprobe( event=function, fn_name="trace_count_%d" % index, pid=self.pid or -1) elif self.type == "p" and self.library: for index, function in self.trace_functions.items(): self.bpf.attach_uprobe( name=self.library, sym=function, fn_name="trace_count_%d" % index, pid=self.pid or -1) elif self.type == "t": for index, function in self.trace_functions.items(): self.bpf.attach_tracepoint( tp=function, fn_name="trace_count_%d" % index, pid=self.pid or -1) elif self.type == "u": pass # Nothing to do -- attach already happened in `load` def _add_function(self, template, probe_name): new_func = "trace_count_%d" % self.matched text = template.replace("PROBE_FUNCTION", new_func) text = text.replace("LOCATION", str(self.matched)) self.trace_functions[self.matched] = probe_name self.matched += 1 return text def _generate_functions(self, template): self.usdt = None text = "" if self.type == "p" and not self.library: functions = BPF.get_kprobe_functions(self.pattern) verify_limit(len(functions)) for function in functions: text += self._add_function(template, function) elif self.type == "p" and self.library: # uprobes are tricky because the same function may have multiple # addresses, and the same address may be mapped to multiple # functions. We aren't allowed to create more than one uprobe # per address, so track unique addresses and ignore functions that # map to an address that we've already seen. Also ignore functions # that may repeat multiple times with different addresses. addresses, functions = (set(), set()) functions_and_addresses = BPF.get_user_functions_and_addresses( self.library, self.pattern) verify_limit(len(functions_and_addresses)) for function, address in functions_and_addresses: if address in addresses or function in functions: continue addresses.add(address) functions.add(function) text += self._add_function(template, function) elif self.type == "t": tracepoints = BPF.get_tracepoints(self.pattern) verify_limit(len(tracepoints)) for tracepoint in tracepoints: text += self._add_function(template, tracepoint) elif self.type == "u": self.usdt = USDT(path=self.library, pid=self.pid) matches = [] for probe in self.usdt.enumerate_probes(): if not self.pid and (probe.bin_path != self.library): continue if re.match(self.pattern, probe.name): matches.append(probe.name) verify_limit(len(matches)) for match in matches: new_func = "trace_count_%d" % self.matched text += self._add_function(template, match) self.usdt.enable_probe(match, new_func) if debug: print(self.usdt.get_text()) return text def load(self): trace_count_text = """ int PROBE_FUNCTION(void *ctx) { FILTER int loc = LOCATION; u64 *val = counts.lookup(&loc); if (!val) { return 0; // Should never happen, # of locations is known } (*val)++; return 0; } """ bpf_text = """#include <uapi/linux/ptrace.h> BPF_ARRAY(counts, u64, NUMLOCATIONS); """ # We really mean the tgid from the kernel's perspective, which is in # the top 32 bits of bpf_get_current_pid_tgid(). if self.pid: trace_count_text = trace_count_text.replace('FILTER', """u32 pid = bpf_get_current_pid_tgid() >> 32; if (pid != %d) { return 0; }""" % self.pid) else: trace_count_text = trace_count_text.replace('FILTER', '') bpf_text += self._generate_functions(trace_count_text) bpf_text = bpf_text.replace("NUMLOCATIONS", str(len(self.trace_functions))) if debug: print(bpf_text) if self.matched == 0: raise Exception("No functions matched by pattern %s" % self.pattern) self.bpf = BPF(text=bpf_text, usdt_contexts=[self.usdt] if self.usdt else []) self.clear() # Initialize all array items to zero def counts(self): return self.bpf["counts"] def clear(self): counts = self.bpf["counts"] for location, _ in list(self.trace_functions.items()): counts[counts.Key(location)] = counts.Leaf()
class Probe(object): probe_count = 0 max_events = None event_count = 0 first_ts = 0 use_localtime = True pid = -1 @classmethod def configure(cls, args): cls.max_events = args.max_events cls.use_localtime = not args.offset cls.first_ts = Time.monotonic_time() cls.pid = args.pid or -1 def __init__(self, probe, string_size, kernel_stack, user_stack): self.usdt = None self.raw_probe = probe self.string_size = string_size self.kernel_stack = kernel_stack self.user_stack = user_stack Probe.probe_count += 1 self._parse_probe() self.probe_num = Probe.probe_count self.probe_name = "probe_%s_%d" % \ (self._display_function(), self.probe_num) def __str__(self): return "%s:%s:%s FLT=%s ACT=%s/%s" % (self.probe_type, self.library, self._display_function(), self.filter, self.types, self.values) def is_default_action(self): return self.python_format == "" def _bail(self, error): raise ValueError("error in probe '%s': %s" % (self.raw_probe, error)) def _parse_probe(self): text = self.raw_probe # Everything until the first space is the probe specifier first_space = text.find(' ') spec = text[:first_space] if first_space >= 0 else text self._parse_spec(spec) if first_space >= 0: text = text[first_space:].lstrip() else: text = "" # If we now have a (, wait for the balanced closing ) and that # will be the predicate self.filter = None if len(text) > 0 and text[0] == "(": balance = 1 for i in range(1, len(text)): if text[i] == "(": balance += 1 if text[i] == ")": balance -= 1 if balance == 0: self._parse_filter(text[:i+1]) text = text[i+1:] break if self.filter is None: self._bail("unmatched end of predicate") if self.filter is None: self.filter = "1" # The remainder of the text is the printf action self._parse_action(text.lstrip()) def _parse_spec(self, spec): parts = spec.split(":") # Two special cases: 'func' means 'p::func', 'lib:func' means # 'p:lib:func'. Other combinations need to provide an empty # value between delimiters, e.g. 'r::func' for a kretprobe on # the function func. if len(parts) == 1: parts = ["p", "", parts[0]] elif len(parts) == 2: parts = ["p", parts[0], parts[1]] if len(parts[0]) == 0: self.probe_type = "p" elif parts[0] in ["p", "r", "t", "u"]: self.probe_type = parts[0] else: self._bail("probe type must be '', 'p', 't', 'r', " + "or 'u', but got '%s'" % parts[0]) if self.probe_type == "t": self.tp_category = parts[1] self.tp_event = parts[2] self.library = "" # kernel self.function = "" # generated from TRACEPOINT_PROBE elif self.probe_type == "u": self.library = parts[1] self.usdt_name = parts[2] self.function = "" # no function, just address # We will discover the USDT provider by matching on # the USDT name in the specified library self._find_usdt_probe() else: self.library = parts[1] self.function = parts[2] def _find_usdt_probe(self): self.usdt = USDT(path=self.library, pid=Probe.pid) for probe in self.usdt.enumerate_probes(): if probe.name == self.usdt_name: return # Found it, will enable later self._bail("unrecognized USDT probe %s" % self.usdt_name) def _parse_filter(self, filt): self.filter = self._replace_args(filt) def _parse_types(self, fmt): for match in re.finditer( r'[^%]%(s|u|d|llu|lld|hu|hd|x|llx|c)', fmt): self.types.append(match.group(1)) fmt = re.sub(r'([^%]%)(u|d|llu|lld|hu|hd)', r'\1d', fmt) fmt = re.sub(r'([^%]%)(x|llx)', r'\1x', fmt) self.python_format = fmt.strip('"') def _parse_action(self, action): self.values = [] self.types = [] self.python_format = "" if len(action) == 0: return action = action.strip() match = re.search(r'(\".*\"),?(.*)', action) if match is None: self._bail("expected format string in \"s") self.raw_format = match.group(1) self._parse_types(self.raw_format) for part in match.group(2).split(','): part = self._replace_args(part) if len(part) > 0: self.values.append(part) aliases = { "retval": "PT_REGS_RC(ctx)", "arg1": "PT_REGS_PARM1(ctx)", "arg2": "PT_REGS_PARM2(ctx)", "arg3": "PT_REGS_PARM3(ctx)", "arg4": "PT_REGS_PARM4(ctx)", "arg5": "PT_REGS_PARM5(ctx)", "arg6": "PT_REGS_PARM6(ctx)", "$uid": "(unsigned)(bpf_get_current_uid_gid() & 0xffffffff)", "$gid": "(unsigned)(bpf_get_current_uid_gid() >> 32)", "$pid": "(unsigned)(bpf_get_current_pid_tgid() & 0xffffffff)", "$tgid": "(unsigned)(bpf_get_current_pid_tgid() >> 32)", "$cpu": "bpf_get_smp_processor_id()" } def _replace_args(self, expr): for alias, replacement in Probe.aliases.items(): # For USDT probes, we replace argN values with the # actual arguments for that probe obtained using special # bpf_readarg_N macros emitted at BPF construction. if alias.startswith("arg") and self.probe_type == "u": continue expr = expr.replace(alias, replacement) return expr p_type = { "u": ct.c_uint, "d": ct.c_int, "llu": ct.c_ulonglong, "lld": ct.c_longlong, "hu": ct.c_ushort, "hd": ct.c_short, "x": ct.c_uint, "llx": ct.c_ulonglong, "c": ct.c_ubyte } def _generate_python_field_decl(self, idx, fields): field_type = self.types[idx] if field_type == "s": ptype = ct.c_char * self.string_size else: ptype = Probe.p_type[field_type] fields.append(("v%d" % idx, ptype)) def _generate_python_data_decl(self): self.python_struct_name = "%s_%d_Data" % \ (self._display_function(), self.probe_num) fields = [ ("timestamp_ns", ct.c_ulonglong), ("pid", ct.c_uint), ("comm", ct.c_char * 16) # TASK_COMM_LEN ] for i in range(0, len(self.types)): self._generate_python_field_decl(i, fields) if self.kernel_stack: fields.append(("kernel_stack_id", ct.c_int)) if self.user_stack: fields.append(("user_stack_id", ct.c_int)) return type(self.python_struct_name, (ct.Structure,), dict(_fields_=fields)) c_type = { "u": "unsigned int", "d": "int", "llu": "unsigned long long", "lld": "long long", "hu": "unsigned short", "hd": "short", "x": "unsigned int", "llx": "unsigned long long", "c": "char" } fmt_types = c_type.keys() def _generate_field_decl(self, idx): field_type = self.types[idx] if field_type == "s": return "char v%d[%d];\n" % (idx, self.string_size) if field_type in Probe.fmt_types: return "%s v%d;\n" % (Probe.c_type[field_type], idx) self._bail("unrecognized format specifier %s" % field_type) def _generate_data_decl(self): # The BPF program will populate values into the struct # according to the format string, and the Python program will # construct the final display string. self.events_name = "%s_events" % self.probe_name self.struct_name = "%s_data_t" % self.probe_name self.stacks_name = "%s_stacks" % self.probe_name stack_table = "BPF_STACK_TRACE(%s, 1024);" % self.stacks_name \ if (self.kernel_stack or self.user_stack) else "" data_fields = "" for i, field_type in enumerate(self.types): data_fields += " " + \ self._generate_field_decl(i) kernel_stack_str = " int kernel_stack_id;" \ if self.kernel_stack else "" user_stack_str = " int user_stack_id;" \ if self.user_stack else "" text = """ struct %s { u64 timestamp_ns; u32 pid; char comm[TASK_COMM_LEN]; %s %s %s }; BPF_PERF_OUTPUT(%s); %s """ return text % (self.struct_name, data_fields, kernel_stack_str, user_stack_str, self.events_name, stack_table) def _generate_field_assign(self, idx): field_type = self.types[idx] expr = self.values[idx].strip() text = "" if self.probe_type == "u" and expr[0:3] == "arg": text = (" u64 %s = 0;\n" + " bpf_usdt_readarg(%s, ctx, &%s);\n") % \ (expr, expr[3], expr) if field_type == "s": return text + """ if (%s != 0) { bpf_probe_read(&__data.v%d, sizeof(__data.v%d), (void *)%s); } """ % (expr, idx, idx, expr) if field_type in Probe.fmt_types: return text + " __data.v%d = (%s)%s;\n" % \ (idx, Probe.c_type[field_type], expr) self._bail("unrecognized field type %s" % field_type) def _generate_usdt_filter_read(self): text = "" if self.probe_type == "u": for arg, _ in Probe.aliases.items(): if not (arg.startswith("arg") and (arg in self.filter)): continue arg_index = int(arg.replace("arg", "")) arg_ctype = self.usdt.get_probe_arg_ctype( self.usdt_name, arg_index) if not arg_ctype: self._bail("Unable to determine type of {} " "in the filter".format(arg)) text += """ {} {}_filter; bpf_usdt_readarg({}, ctx, &{}_filter); """.format(arg_ctype, arg, arg_index, arg) self.filter = self.filter.replace( arg, "{}_filter".format(arg)) return text def generate_program(self, include_self): data_decl = self._generate_data_decl() # kprobes don't have built-in pid filters, so we have to add # it to the function body: if len(self.library) == 0 and Probe.pid != -1: pid_filter = """ u32 __pid = bpf_get_current_pid_tgid(); if (__pid != %d) { return 0; } """ % Probe.pid elif not include_self: pid_filter = """ u32 __pid = bpf_get_current_pid_tgid(); if (__pid == %d) { return 0; } """ % os.getpid() else: pid_filter = "" prefix = "" signature = "struct pt_regs *ctx" data_fields = "" for i, expr in enumerate(self.values): data_fields += self._generate_field_assign(i) stack_trace = "" if self.user_stack: stack_trace += """ __data.user_stack_id = %s.get_stackid( ctx, BPF_F_REUSE_STACKID | BPF_F_USER_STACK );""" % self.stacks_name if self.kernel_stack: stack_trace += """ __data.kernel_stack_id = %s.get_stackid( ctx, BPF_F_REUSE_STACKID );""" % self.stacks_name if self.probe_type == "t": heading = "TRACEPOINT_PROBE(%s, %s)" % \ (self.tp_category, self.tp_event) ctx_name = "args" else: heading = "int %s(%s)" % (self.probe_name, signature) ctx_name = "ctx" text = heading + """ { %s %s %s if (!(%s)) return 0; struct %s __data = {0}; __data.timestamp_ns = bpf_ktime_get_ns(); __data.pid = bpf_get_current_pid_tgid(); bpf_get_current_comm(&__data.comm, sizeof(__data.comm)); %s %s %s.perf_submit(%s, &__data, sizeof(__data)); return 0; } """ text = text % (pid_filter, prefix, self._generate_usdt_filter_read(), self.filter, self.struct_name, data_fields, stack_trace, self.events_name, ctx_name) return data_decl + "\n" + text @classmethod def _time_off_str(cls, timestamp_ns): return "%.6f" % (1e-9 * (timestamp_ns - cls.first_ts)) def _display_function(self): if self.probe_type == 'p' or self.probe_type == 'r': return self.function elif self.probe_type == 'u': return self.usdt_name else: # self.probe_type == 't' return self.tp_event def print_stack(self, bpf, stack_id, pid): if stack_id < 0: print(" %d" % stack_id) return stack = list(bpf.get_table(self.stacks_name).walk(stack_id)) for addr in stack: print(" %016x %s" % (addr, bpf.sym(addr, pid))) def print_event(self, bpf, cpu, data, size): # Cast as the generated structure type and display # according to the format string in the probe. event = ct.cast(data, ct.POINTER(self.python_struct)).contents values = map(lambda i: getattr(event, "v%d" % i), range(0, len(self.values))) msg = self.python_format % tuple(values) time = strftime("%H:%M:%S") if Probe.use_localtime else \ Probe._time_off_str(event.timestamp_ns) print("%-8s %-6d %-12s %-16s %s" % \ (time[:8], event.pid, event.comm[:12], self._display_function(), msg)) if self.user_stack: print(" User Stack Trace:") self.print_stack(bpf, event.user_stack_id, event.pid) if self.kernel_stack: print(" Kernel Stack Trace:") self.print_stack(bpf, event.kernel_stack_id, -1) if self.user_stack or self.kernel_stack: print("") Probe.event_count += 1 if Probe.max_events is not None and \ Probe.event_count >= Probe.max_events: exit() def attach(self, bpf, verbose): if len(self.library) == 0: self._attach_k(bpf) else: self._attach_u(bpf) self.python_struct = self._generate_python_data_decl() callback = partial(self.print_event, bpf) bpf[self.events_name].open_perf_buffer(callback) def _attach_k(self, bpf): if self.probe_type == "r": bpf.attach_kretprobe(event=self.function, fn_name=self.probe_name) elif self.probe_type == "p": bpf.attach_kprobe(event=self.function, fn_name=self.probe_name) # Note that tracepoints don't need an explicit attach def _attach_u(self, bpf): libpath = BPF.find_library(self.library) if libpath is None: # This might be an executable (e.g. 'bash') libpath = BPF.find_exe(self.library) if libpath is None or len(libpath) == 0: self._bail("unable to find library %s" % self.library) if self.probe_type == "u": pass # Was already enabled by the BPF constructor elif self.probe_type == "r": bpf.attach_uretprobe(name=libpath, sym=self.function, fn_name=self.probe_name, pid=Probe.pid) else: bpf.attach_uprobe(name=libpath, sym=self.function, fn_name=self.probe_name, pid=Probe.pid)
class Probe(object): def __init__(self, pattern, use_regex=False, pid=None, per_pid=False): """Init a new probe. Init the probe from the pattern provided by the user. The supported patterns mimic the 'trace' and 'argdist' tools, but are simpler because we don't have to distinguish between probes and retprobes. func -- probe a kernel function lib:func -- probe a user-space function in the library 'lib' p::func -- same thing as 'func' p:lib:func -- same thing as 'lib:func' t:cat:event -- probe a kernel tracepoint u:lib:probe -- probe a USDT tracepoint """ parts = pattern.split(':') if len(parts) == 1: parts = ["p", "", parts[0]] elif len(parts) == 2: parts = ["p", parts[0], parts[1]] elif len(parts) == 3: if parts[0] == "t": parts = ["t", "", "%s:%s" % tuple(parts[1:])] if parts[0] not in ["p", "t", "u"]: raise Exception("Type must be 'p', 't', or 'u', but got %s" % parts[0]) else: raise Exception("Too many ':'-separated components in pattern %s" % pattern) (self.type, self.library, self.pattern) = parts if not use_regex: self.pattern = self.pattern.replace('*', '.*') self.pattern = '^' + self.pattern + '$' if (self.type == "p" and self.library) or self.type == "u": libpath = BPF.find_library(self.library) if libpath is None: # This might be an executable (e.g. 'bash') libpath = BPF.find_exe(self.library) if libpath is None or len(libpath) == 0: raise Exception("unable to find library %s" % self.library) self.library = libpath self.pid = pid self.per_pid = per_pid self.matched = 0 def is_kernel_probe(self): return self.type == "t" or (self.type == "p" and self.library == "") def attach(self): if self.type == "p": if self.library: self.bpf.attach_uprobe(name=self.library, sym_re=self.pattern, fn_name="trace_count", pid=self.pid or -1) self.matched = self.bpf.num_open_uprobes() else: self.bpf.attach_kprobe(event_re=self.pattern, fn_name="trace_count", pid=self.pid or -1) self.matched = self.bpf.num_open_kprobes() elif self.type == "t": self.bpf.attach_tracepoint(tp_re=self.pattern, fn_name="trace_count", pid=self.pid or -1) self.matched = self.bpf.num_open_tracepoints() elif self.type == "u": pass # Nothing to do -- attach already happened in `load` if self.matched == 0: raise Exception("No functions matched by pattern %s" % self.pattern) def load(self): trace_count_text = """ int trace_count(void *ctx) { FILTER struct key_t key = {}; key.pid = GET_PID; key.stackid = stack_traces.get_stackid(ctx, STACK_FLAGS); u64 zero = 0; u64 *val = counts.lookup_or_init(&key, &zero); (*val)++; return 0; } """ bpf_text = """#include <uapi/linux/ptrace.h> struct key_t { u32 pid; int stackid; }; BPF_HASH(counts, struct key_t); BPF_STACK_TRACE(stack_traces, 1024); """ # We really mean the tgid from the kernel's perspective, which is in # the top 32 bits of bpf_get_current_pid_tgid(). if self.is_kernel_probe() and self.pid: trace_count_text = trace_count_text.replace( 'FILTER', ('u32 pid; pid = bpf_get_current_pid_tgid() >> 32; ' + 'if (pid != %d) { return 0; }') % (self.pid)) else: trace_count_text = trace_count_text.replace('FILTER', '') # We need per-pid statistics when tracing a user-space process, because # the meaning of the symbols depends on the pid. We also need them if # per-pid statistics were requested with -P. if self.per_pid or not self.is_kernel_probe(): trace_count_text = trace_count_text.replace( 'GET_PID', 'bpf_get_current_pid_tgid() >> 32') else: trace_count_text = trace_count_text.replace( 'GET_PID', '0xffffffff') stack_flags = 'BPF_F_REUSE_STACKID' if not self.is_kernel_probe(): stack_flags += '| BPF_F_USER_STACK' # can't do both U *and* K trace_count_text = trace_count_text.replace('STACK_FLAGS', stack_flags) self.usdt = None if self.type == "u": self.usdt = USDT(path=self.library, pid=self.pid) for probe in self.usdt.enumerate_probes(): if not self.pid and (probe.bin_path != self.library): continue if re.match(self.pattern, probe.name): # This hack is required because the bpf_usdt_readarg # functions generated need different function names for # each attached probe. If we just stick to trace_count, # we'd get multiple bpf_usdt_readarg helpers with the same # name when enabling more than one USDT probe. new_func = "trace_count_%d" % self.matched bpf_text += trace_count_text.replace( "trace_count", new_func) self.usdt.enable_probe(probe.name, new_func) self.matched += 1 if debug: print(self.usdt.get_text()) else: bpf_text += trace_count_text if debug: print(bpf_text) self.bpf = BPF(text=bpf_text, usdt_contexts=[self.usdt] if self.usdt else [])
class Probe(object): probe_count = 0 streq_index = 0 max_events = None event_count = 0 first_ts = 0 use_localtime = True tgid = -1 pid = -1 @classmethod def configure(cls, args): cls.max_events = args.max_events cls.use_localtime = not args.offset cls.first_ts = Time.monotonic_time() cls.tgid = args.tgid or -1 cls.pid = args.pid or -1 def __init__(self, probe, string_size, kernel_stack, user_stack): self.usdt = None self.streq_functions = "" self.raw_probe = probe self.string_size = string_size self.kernel_stack = kernel_stack self.user_stack = user_stack Probe.probe_count += 1 self._parse_probe() self.probe_num = Probe.probe_count self.probe_name = "probe_%s_%d" % (self._display_function(), self.probe_num) def __str__(self): return "%s:%s:%s FLT=%s ACT=%s/%s" % ( self.probe_type, self.library, self._display_function(), self.filter, self.types, self.values, ) def is_default_action(self): return self.python_format == "" def _bail(self, error): raise ValueError("error in probe '%s': %s" % (self.raw_probe, error)) def _parse_probe(self): text = self.raw_probe # Everything until the first space is the probe specifier first_space = text.find(" ") spec = text[:first_space] if first_space >= 0 else text self._parse_spec(spec) if first_space >= 0: text = text[first_space:].lstrip() else: text = "" # If we now have a (, wait for the balanced closing ) and that # will be the predicate self.filter = None if len(text) > 0 and text[0] == "(": balance = 1 for i in range(1, len(text)): if text[i] == "(": balance += 1 if text[i] == ")": balance -= 1 if balance == 0: self._parse_filter(text[: i + 1]) text = text[i + 1 :] break if self.filter is None: self._bail("unmatched end of predicate") if self.filter is None: self.filter = "1" # The remainder of the text is the printf action self._parse_action(text.lstrip()) def _parse_spec(self, spec): parts = spec.split(":") # Two special cases: 'func' means 'p::func', 'lib:func' means # 'p:lib:func'. Other combinations need to provide an empty # value between delimiters, e.g. 'r::func' for a kretprobe on # the function func. if len(parts) == 1: parts = ["p", "", parts[0]] elif len(parts) == 2: parts = ["p", parts[0], parts[1]] if len(parts[0]) == 0: self.probe_type = "p" elif parts[0] in ["p", "r", "t", "u"]: self.probe_type = parts[0] else: self._bail("probe type must be '', 'p', 't', 'r', " + "or 'u', but got '%s'" % parts[0]) if self.probe_type == "t": self.tp_category = parts[1] self.tp_event = parts[2] self.library = "" # kernel self.function = "" # from TRACEPOINT_PROBE elif self.probe_type == "u": self.library = parts[1] self.usdt_name = parts[2] self.function = "" # no function, just address # We will discover the USDT provider by matching on # the USDT name in the specified library self._find_usdt_probe() else: self.library = parts[1] self.function = parts[2] def _find_usdt_probe(self): target = Probe.pid if Probe.pid and Probe.pid != -1 else Probe.tgid self.usdt = USDT(path=self.library, pid=target) for probe in self.usdt.enumerate_probes(): if probe.name == self.usdt_name: return # Found it, will enable later self._bail("unrecognized USDT probe %s" % self.usdt_name) def _parse_filter(self, filt): self.filter = self._rewrite_expr(filt) def _parse_types(self, fmt): for match in re.finditer(r"[^%]%(s|u|d|llu|lld|hu|hd|x|llx|c|K|U)", fmt): self.types.append(match.group(1)) fmt = re.sub(r"([^%]%)(u|d|llu|lld|hu|hd)", r"\1d", fmt) fmt = re.sub(r"([^%]%)(x|llx)", r"\1x", fmt) fmt = re.sub("%K|%U", "%s", fmt) self.python_format = fmt.strip('"') def _parse_action(self, action): self.values = [] self.types = [] self.python_format = "" if len(action) == 0: return action = action.strip() match = re.search(r"(\".*?\"),?(.*)", action) if match is None: self._bail('expected format string in "s') self.raw_format = match.group(1) self._parse_types(self.raw_format) for part in re.split('(?<!"),', match.group(2)): part = self._rewrite_expr(part) if len(part) > 0: self.values.append(part) aliases = { "retval": "PT_REGS_RC(ctx)", "arg1": "PT_REGS_PARM1(ctx)", "arg2": "PT_REGS_PARM2(ctx)", "arg3": "PT_REGS_PARM3(ctx)", "arg4": "PT_REGS_PARM4(ctx)", "arg5": "PT_REGS_PARM5(ctx)", "arg6": "PT_REGS_PARM6(ctx)", "$uid": "(unsigned)(bpf_get_current_uid_gid() & 0xffffffff)", "$gid": "(unsigned)(bpf_get_current_uid_gid() >> 32)", "$pid": "(unsigned)(bpf_get_current_pid_tgid() & 0xffffffff)", "$tgid": "(unsigned)(bpf_get_current_pid_tgid() >> 32)", "$cpu": "bpf_get_smp_processor_id()", } def _generate_streq_function(self, string): fname = "streq_%d" % Probe.streq_index Probe.streq_index += 1 self.streq_functions += """ static inline bool %s(char const *ignored, unsigned long str) { char needle[] = %s; char haystack[sizeof(needle)]; bpf_probe_read(&haystack, sizeof(haystack), (void *)str); for (int i = 0; i < sizeof(needle); ++i) { if (needle[i] != haystack[i]) { return false; } } return true; } """ % ( fname, string, ) return fname def _rewrite_expr(self, expr): for alias, replacement in Probe.aliases.items(): # For USDT probes, we replace argN values with the # actual arguments for that probe obtained using # bpf_readarg_N macros emitted at BPF construction. if alias.startswith("arg") and self.probe_type == "u": continue expr = expr.replace(alias, replacement) matches = re.finditer('STRCMP\\(("[^"]+\\")', expr) for match in matches: string = match.group(1) fname = self._generate_streq_function(string) expr = expr.replace("STRCMP", fname, 1) return expr p_type = { "u": ct.c_uint, "d": ct.c_int, "llu": ct.c_ulonglong, "lld": ct.c_longlong, "hu": ct.c_ushort, "hd": ct.c_short, "x": ct.c_uint, "llx": ct.c_ulonglong, "c": ct.c_ubyte, "K": ct.c_ulonglong, "U": ct.c_ulonglong, } def _generate_python_field_decl(self, idx, fields): field_type = self.types[idx] if field_type == "s": ptype = ct.c_char * self.string_size else: ptype = Probe.p_type[field_type] fields.append(("v%d" % idx, ptype)) def _generate_python_data_decl(self): self.python_struct_name = "%s_%d_Data" % (self._display_function(), self.probe_num) fields = [ ("timestamp_ns", ct.c_ulonglong), ("tgid", ct.c_uint), ("pid", ct.c_uint), ("comm", ct.c_char * 16), # TASK_COMM_LEN ] for i in range(0, len(self.types)): self._generate_python_field_decl(i, fields) if self.kernel_stack: fields.append(("kernel_stack_id", ct.c_int)) if self.user_stack: fields.append(("user_stack_id", ct.c_int)) return type(self.python_struct_name, (ct.Structure,), dict(_fields_=fields)) c_type = { "u": "unsigned int", "d": "int", "llu": "unsigned long long", "lld": "long long", "hu": "unsigned short", "hd": "short", "x": "unsigned int", "llx": "unsigned long long", "c": "char", "K": "unsigned long long", "U": "unsigned long long", } fmt_types = c_type.keys() def _generate_field_decl(self, idx): field_type = self.types[idx] if field_type == "s": return "char v%d[%d];\n" % (idx, self.string_size) if field_type in Probe.fmt_types: return "%s v%d;\n" % (Probe.c_type[field_type], idx) self._bail("unrecognized format specifier %s" % field_type) def _generate_data_decl(self): # The BPF program will populate values into the struct # according to the format string, and the Python program will # construct the final display string. self.events_name = "%s_events" % self.probe_name self.struct_name = "%s_data_t" % self.probe_name self.stacks_name = "%s_stacks" % self.probe_name stack_table = "BPF_STACK_TRACE(%s, 1024);" % self.stacks_name if (self.kernel_stack or self.user_stack) else "" data_fields = "" for i, field_type in enumerate(self.types): data_fields += " " + self._generate_field_decl(i) kernel_stack_str = " int kernel_stack_id;" if self.kernel_stack else "" user_stack_str = " int user_stack_id;" if self.user_stack else "" text = """ struct %s { u64 timestamp_ns; u32 tgid; u32 pid; char comm[TASK_COMM_LEN]; %s %s %s }; BPF_PERF_OUTPUT(%s); %s """ return text % (self.struct_name, data_fields, kernel_stack_str, user_stack_str, self.events_name, stack_table) def _generate_field_assign(self, idx): field_type = self.types[idx] expr = self.values[idx].strip() text = "" if self.probe_type == "u" and expr[0:3] == "arg": text = (" u64 %s = 0;\n" + " bpf_usdt_readarg(%s, ctx, &%s);\n") % (expr, expr[3], expr) if field_type == "s": return ( text + """ if (%s != 0) { bpf_probe_read(&__data.v%d, sizeof(__data.v%d), (void *)%s); } """ % (expr, idx, idx, expr) ) if field_type in Probe.fmt_types: return text + " __data.v%d = (%s)%s;\n" % (idx, Probe.c_type[field_type], expr) self._bail("unrecognized field type %s" % field_type) def _generate_usdt_filter_read(self): text = "" if self.probe_type == "u": for arg, _ in Probe.aliases.items(): if not (arg.startswith("arg") and (arg in self.filter)): continue arg_index = int(arg.replace("arg", "")) arg_ctype = self.usdt.get_probe_arg_ctype(self.usdt_name, arg_index) if not arg_ctype: self._bail("Unable to determine type of {} " "in the filter".format(arg)) text += """ {} {}_filter; bpf_usdt_readarg({}, ctx, &{}_filter); """.format( arg_ctype, arg, arg_index, arg ) self.filter = self.filter.replace(arg, "{}_filter".format(arg)) return text def generate_program(self, include_self): data_decl = self._generate_data_decl() # kprobes don't have built-in pid filters, so we have to add # it to the function body: if len(self.library) == 0 and Probe.pid != -1: pid_filter = ( """ if (__pid != %d) { return 0; } """ % Probe.pid ) elif len(self.library) == 0 and Probe.tgid != -1: pid_filter = ( """ if (__tgid != %d) { return 0; } """ % Probe.tgid ) elif not include_self: pid_filter = ( """ if (__tgid == %d) { return 0; } """ % os.getpid() ) else: pid_filter = "" prefix = "" signature = "struct pt_regs *ctx" data_fields = "" for i, expr in enumerate(self.values): data_fields += self._generate_field_assign(i) if self.probe_type == "t": heading = "TRACEPOINT_PROBE(%s, %s)" % (self.tp_category, self.tp_event) ctx_name = "args" else: heading = "int %s(%s)" % (self.probe_name, signature) ctx_name = "ctx" stack_trace = "" if self.user_stack: stack_trace += """ __data.user_stack_id = %s.get_stackid( %s, BPF_F_REUSE_STACKID | BPF_F_USER_STACK );""" % ( self.stacks_name, ctx_name, ) if self.kernel_stack: stack_trace += """ __data.kernel_stack_id = %s.get_stackid( %s, BPF_F_REUSE_STACKID );""" % ( self.stacks_name, ctx_name, ) text = ( heading + """ { u64 __pid_tgid = bpf_get_current_pid_tgid(); u32 __tgid = __pid_tgid >> 32; u32 __pid = __pid_tgid; // implicit cast to u32 for bottom half %s %s %s if (!(%s)) return 0; struct %s __data = {0}; __data.timestamp_ns = bpf_ktime_get_ns(); __data.tgid = __tgid; __data.pid = __pid; bpf_get_current_comm(&__data.comm, sizeof(__data.comm)); %s %s %s.perf_submit(%s, &__data, sizeof(__data)); return 0; } """ ) text = text % ( pid_filter, prefix, self._generate_usdt_filter_read(), self.filter, self.struct_name, data_fields, stack_trace, self.events_name, ctx_name, ) return self.streq_functions + data_decl + "\n" + text @classmethod def _time_off_str(cls, timestamp_ns): return "%.6f" % (1e-9 * (timestamp_ns - cls.first_ts)) def _display_function(self): if self.probe_type == "p" or self.probe_type == "r": return self.function elif self.probe_type == "u": return self.usdt_name else: # self.probe_type == 't' return self.tp_event def print_stack(self, bpf, stack_id, tgid): if stack_id < 0: print(" %d" % stack_id) return stack = list(bpf.get_table(self.stacks_name).walk(stack_id)) for addr in stack: print(" %016x %s" % (addr, bpf.sym(addr, tgid))) def _format_message(self, bpf, tgid, values): # Replace each %K with kernel sym and %U with user sym in tgid kernel_placeholders = [i for i in xrange(0, len(self.types)) if self.types[i] == "K"] user_placeholders = [i for i in xrange(0, len(self.types)) if self.types[i] == "U"] for kp in kernel_placeholders: values[kp] = bpf.ksymaddr(values[kp]) for up in user_placeholders: values[up] = bpf.symaddr(values[up], tgid) return self.python_format % tuple(values) def print_event(self, bpf, cpu, data, size): # Cast as the generated structure type and display # according to the format string in the probe. event = ct.cast(data, ct.POINTER(self.python_struct)).contents values = map(lambda i: getattr(event, "v%d" % i), range(0, len(self.values))) msg = self._format_message(bpf, event.tgid, values) time = strftime("%H:%M:%S") if Probe.use_localtime else Probe._time_off_str(event.timestamp_ns) print( "%-8s %-6d %-6d %-12s %-16s %s" % (time[:8], event.tgid, event.pid, event.comm, self._display_function(), msg) ) if self.kernel_stack: self.print_stack(bpf, event.kernel_stack_id, -1) if self.user_stack: self.print_stack(bpf, event.user_stack_id, event.tgid) if self.user_stack or self.kernel_stack: print("") Probe.event_count += 1 if Probe.max_events is not None and Probe.event_count >= Probe.max_events: exit() def attach(self, bpf, verbose): if len(self.library) == 0: self._attach_k(bpf) else: self._attach_u(bpf) self.python_struct = self._generate_python_data_decl() callback = partial(self.print_event, bpf) bpf[self.events_name].open_perf_buffer(callback) def _attach_k(self, bpf): if self.probe_type == "r": bpf.attach_kretprobe(event=self.function, fn_name=self.probe_name) elif self.probe_type == "p": bpf.attach_kprobe(event=self.function, fn_name=self.probe_name) # Note that tracepoints don't need an explicit attach def _attach_u(self, bpf): libpath = BPF.find_library(self.library) if libpath is None: # This might be an executable (e.g. 'bash') libpath = BPF.find_exe(self.library) if libpath is None or len(libpath) == 0: self._bail("unable to find library %s" % self.library) if self.probe_type == "u": pass # Was already enabled by the BPF constructor elif self.probe_type == "r": bpf.attach_uretprobe(name=libpath, sym=self.function, fn_name=self.probe_name, pid=Probe.pid) else: bpf.attach_uprobe(name=libpath, sym=self.function, fn_name=self.probe_name, pid=Probe.pid)
class Probe(): "Parse, load and attach BPF probes" def __init__(self, probe_spec, kernel_stack, user_stack, use_regex=False, pid=None, per_pid=False, cpu=None): self.kernel_stack = kernel_stack self.user_stack = user_stack if DEBUG: print(probe_spec) self._parse_spec(probe_spec) if (self.type == "p" and self.library) or self.type == "u": libpath = BPF.find_library(self.library) if libpath is None: # This might be an executable (e.g. 'bash') libpath = BPF.find_exe(self.library) if libpath is None or len(libpath) == 0: raise Exception("unable to find library %s" % self.library) self.library = str(libpath, 'ascii') self.pid = pid # FIXME: don't hardcode this here. self.per_pid = True self.cpu = cpu self.matched = 0 self.bpf = None self.usdt = None def _parse_spec(self, spec): parts = spec.split(":") # Two special cases: 'func' means 'p::func', 'lib:func' means # 'p:lib:func'. Other combinations need to provide an empty # value between delimiters, e.g. 'r::func' for a kretprobe on # the function func. if len(parts) == 1: parts = ["p", "", parts[0]] elif len(parts) == 2: parts = ["p", parts[0], parts[1]] if len(parts[0]) == 0: self.type = "p" elif parts[0] in ["p", "r", "t", "u"]: self.type = parts[0] else: raise Exception("probe type must be '', 'p', 't', 'r', " + "or 'u', but got '%s'" % parts[0]) if self.type == "u": # u:<library>[:<provider>]:<probe> where :<provider> is optional self.library = parts[1] self.pattern = ":".join(parts[2:]) else: self.library = ':'.join(parts[1:-1]) self.pattern = parts[-1] def load(self): ctx_name = "ctx" stack_trace = "" if self.user_stack: stack_trace += """ key.user_stack_id = stack_traces.get_stackid( %s, BPF_F_USER_STACK );""" % (ctx_name) else: stack_trace += "key.user_stack_id = -1;" if self.kernel_stack: stack_trace += """ key.kernel_stack_id = stack_traces.get_stackid( %s, 0 );""" % (ctx_name) else: stack_trace += "key.kernel_stack_id = -1;" trace_count_text = """ int trace_count(void *ctx) { FILTER struct key_t key = {}; key.tgid = GET_TGID; STORE_COMM %s counts.increment(key); return 0; } """ trace_count_text = trace_count_text % (stack_trace) bpf_text = """#include <uapi/linux/ptrace.h> #include <linux/sched.h> struct key_t { // no pid (thread ID) so that we do not needlessly split this key u32 tgid; int kernel_stack_id; int user_stack_id; char name[TASK_COMM_LEN]; }; BPF_HASH(counts, struct key_t); BPF_STACK_TRACE(stack_traces, 1024); """ filter_text = [] trace_count_text = trace_count_text.replace('FILTER', '\n '.join(filter_text)) # Do per-pid statistics iff -P is provided if self.per_pid: trace_count_text = trace_count_text.replace( 'GET_TGID', 'bpf_get_current_pid_tgid() >> 32') trace_count_text = trace_count_text.replace( 'STORE_COMM', 'bpf_get_current_comm(&key.name, sizeof(key.name));') else: # skip splitting on PID so these aggregate # together, and don't store the process name. trace_count_text = trace_count_text.replace( 'GET_TGID', '0xffffffff') trace_count_text = trace_count_text.replace('STORE_COMM', '') if self.type == "u": self.usdt = USDT(path=self.library, pid=self.pid) for probe in self.usdt.enumerate_probes(): if not self.pid and (str(probe.bin_path, 'ascii') != self.library): continue parts = self.pattern.split(":") if len(parts) == 1: provider_name = None usdt_name = parts[0].encode("ascii") else: provider_name = parts[0] usdt_name = parts[1] if (str(probe.name, 'ascii') == usdt_name and str(probe.provider, 'ascii') == provider_name): # This hack is required because the bpf_usdt_readarg # functions generated need different function names for # each attached probe. If we just stick to trace_count, # we'd get multiple bpf_usdt_readarg helpers with the same # name when enabling more than one USDT probe. new_func = "trace_count_%d" % self.matched bpf_text += trace_count_text.replace( "trace_count", new_func) self.usdt.enable_probe(str(probe.name, 'ascii'), new_func) self.matched += 1 if DEBUG: print(self.usdt.get_text()) else: bpf_text += trace_count_text if DEBUG: print(bpf_text) self.bpf = BPF(text=bpf_text, usdt_contexts=[self.usdt] if self.usdt else []) def attach(self): if self.type == "p": if self.library: self.bpf.attach_uprobe(name=self.library, sym_re=self.pattern, fn_name="trace_count", pid=self.pid or -1) self.matched = self.bpf.num_open_uprobes() else: self.bpf.attach_kprobe(event_re=self.pattern, fn_name="trace_count") self.matched = self.bpf.num_open_kprobes() elif self.type == "t": self.bpf.attach_tracepoint(tp_re=self.pattern, fn_name="trace_count") self.matched = self.bpf.num_open_tracepoints() elif self.type == "u": pass # Nothing to do -- attach already happened in `load` if self.matched == 0: raise Exception("No functions matched by pattern %s" % self.pattern)