class Tool(object): DEFAULT_PERF_BUFFER_PAGES = 64 examples = """ EXAMPLES: trace do_sys_open Trace the open syscall and print a default trace message when entered trace 'do_sys_open "%s", arg2' Trace the open syscall and print the filename being opened trace 'sys_read (arg3 > 20000) "read %d bytes", arg3' Trace the read syscall and print a message for reads >20000 bytes trace 'r::do_sys_open "%llx", retval' Trace the return from the open syscall and print the return value trace 'c:open (arg2 == 42) "%s %d", arg1, arg2' Trace the open() call from libc only if the flags (arg2) argument is 42 trace 'c:malloc "size = %d", arg1' Trace malloc calls and print the size being allocated trace 'p:c:write (arg1 == 1) "writing %d bytes to STDOUT", arg3' Trace the write() call from libc to monitor writes to STDOUT trace 'r::__kmalloc (retval == 0) "kmalloc failed!"' Trace returns from __kmalloc which returned a null pointer trace 'r:c:malloc (retval) "allocated = %x", retval' Trace returns from malloc and print non-NULL allocated buffers trace 't:block:block_rq_complete "sectors=%d", args->nr_sector' Trace the block_rq_complete kernel tracepoint and print # of tx sectors trace 'u:pthread:pthread_create (arg4 != 0)' Trace the USDT probe pthread_create when its 4th argument is non-zero trace 'p::SyS_nanosleep(struct timespec *ts) "sleep for %lld ns", ts->tv_nsec' Trace the nanosleep syscall and print the sleep duration in ns trace -I 'linux/fs.h' \\ 'p::uprobe_register(struct inode *inode) "a_ops = %llx", inode->i_mapping->a_ops' Trace the uprobe_register inode mapping ops, and the symbol can be found in /proc/kallsyms trace -I 'kernel/sched/sched.h' \\ 'p::__account_cfs_rq_runtime(struct cfs_rq *cfs_rq) "%d", cfs_rq->runtime_remaining' Trace the cfs scheduling runqueue remaining runtime. The struct cfs_rq is defined in kernel/sched/sched.h which is in kernel source tree and not in kernel-devel package. So this command needs to run at the kernel source tree root directory so that the added header file can be found by the compiler. trace -I 'net/sock.h' \\ 'udpv6_sendmsg(struct sock *sk) (sk->sk_dport == 13568)' Trace udpv6 sendmsg calls only if socket's destination port is equal to 53 (DNS; 13568 in big endian order) trace -I 'linux/fs_struct.h' 'mntns_install "users = %d", $task->fs->users' Trace the number of users accessing the file system of the current task """ def __init__(self): parser = argparse.ArgumentParser(description="Attach to " + "functions and print trace messages.", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=Tool.examples) parser.add_argument("-b", "--buffer-pages", type=int, default=Tool.DEFAULT_PERF_BUFFER_PAGES, help="number of pages to use for perf_events ring buffer " "(default: %(default)d)") # we'll refer to the userspace concepts of "pid" and "tid" by # their kernel names -- tgid and pid -- inside the script parser.add_argument("-p", "--pid", type=int, metavar="PID", dest="tgid", help="id of the process to trace (optional)") parser.add_argument("-L", "--tid", type=int, metavar="TID", dest="pid", help="id of the thread to trace (optional)") parser.add_argument("-v", "--verbose", action="store_true", help="print resulting BPF program code before executing") parser.add_argument("-Z", "--string-size", type=int, default=80, help="maximum size to read from strings") parser.add_argument("-S", "--include-self", action="store_true", help="do not filter trace's own pid from the trace") parser.add_argument("-M", "--max-events", type=int, help="number of events to print before quitting") parser.add_argument("-t", "--timestamp", action="store_true", help="print timestamp column (offset from trace start)") parser.add_argument("-T", "--time", action="store_true", help="print time column") parser.add_argument("-C", "--print_cpu", action="store_true", help="print CPU id") parser.add_argument("-B", "--bin_cmp", action="store_true", help="allow to use STRCMP with binary values") parser.add_argument('-s', "--sym_file_list", type=str, \ metavar="SYM_FILE_LIST", dest="sym_file_list", \ help="coma separated list of symbol files to use \ for symbol resolution") parser.add_argument("-K", "--kernel-stack", action="store_true", help="output kernel stack trace") parser.add_argument("-U", "--user-stack", action="store_true", help="output user stack trace") parser.add_argument("-a", "--address", action="store_true", help="print virtual address in stacks") parser.add_argument(metavar="probe", dest="probes", nargs="+", help="probe specifier (see examples)") parser.add_argument("-I", "--include", action="append", metavar="header", help="additional header files to include in the BPF program " "as either full path, " "or relative to current working directory, " "or relative to default kernel header search path") parser.add_argument("--ebpf", action="store_true", help=argparse.SUPPRESS) self.args = parser.parse_args() if self.args.tgid and self.args.pid: parser.error("only one of -p and -L may be specified") def _create_probes(self): Probe.configure(self.args) self.probes = [] for probe_spec in self.args.probes: self.probes.append(Probe( probe_spec, self.args.string_size, self.args.kernel_stack, self.args.user_stack)) def _generate_program(self): self.program = """ #include <linux/ptrace.h> #include <linux/sched.h> /* For TASK_COMM_LEN */ """ for include in (self.args.include or []): if include.startswith((".", "/")): include = os.path.abspath(include) self.program += "#include \"%s\"\n" % include else: self.program += "#include <%s>\n" % include self.program += BPF.generate_auto_includes( map(lambda p: p.raw_probe, self.probes)) for probe in self.probes: self.program += probe.generate_program( self.args.include_self) if self.args.verbose or self.args.ebpf: print(self.program) if self.args.ebpf: exit() def _attach_probes(self): usdt_contexts = [] for probe in self.probes: if probe.usdt: # USDT probes must be enabled before the BPF object # is initialized, because that's where the actual # uprobe is being attached. probe.usdt.enable_probe( probe.usdt_name, probe.probe_name) if self.args.verbose: print(probe.usdt.get_text()) usdt_contexts.append(probe.usdt) self.bpf = BPF(text=self.program, usdt_contexts=usdt_contexts) if self.args.sym_file_list is not None: print("Note: Kernel bpf will report stack map with ip/build_id") map(lambda x: self.bpf.add_module(x), self.args.sym_file_list.split(',')) for probe in self.probes: if self.args.verbose: print(probe) probe.attach(self.bpf, self.args.verbose) def _main_loop(self): all_probes_trivial = all(map(Probe.is_default_action, self.probes)) # Print header if self.args.timestamp or self.args.time: print("%-8s " % "TIME", end=""); if self.args.print_cpu: print("%-3s " % "CPU", end=""); print("%-7s %-7s %-15s %-16s %s" % ("PID", "TID", "COMM", "FUNC", "-" if not all_probes_trivial else "")) while True: self.bpf.perf_buffer_poll() def run(self): try: self._create_probes() self._generate_program() self._attach_probes() self._main_loop() except: exc_info = sys.exc_info() sys_exit = exc_info[0] is SystemExit if self.args.verbose: traceback.print_exc() elif not sys_exit: print(exc_info[1]) exit(0 if sys_exit else 1)
bpf.attach_perf_event(ev_type=PerfType.SOFTWARE, ev_config=PerfSWConfig.CPU_CLOCK, fn_name="collect_stack_traces", cpu=0, sample_freq=99) try: sleep(999999999) except KeyboardInterrupt: signal.signal(signal.SIGINT, signal.SIG_DFL) cache = bpf["cache"] traces = bpf["traces"] bpf.detach_perf_event(ev_type=PerfType.SOFTWARE, ev_config=PerfSWConfig.CPU_CLOCK) bpf.add_module("/usr/lib64/libc-2.30.so") for trace, acc in sorted(cache.items(), key=lambda cache: cache[1].value): line = [] if trace.stack_id < 0 and trace.stack_id == -errno.EFAULT: line = ["Unknown stack"] else: stack_trace = list(traces.walk(trace.stack_id)) for stack_address in reversed(stack_trace): line.extend(bpf.sym(stack_address, int(program_pid))) print(line) #frame = b";".join(line).decode("utf-8", "replace") #print(f"{frame} {acc.value}")
} """ b = BPF(text=bpf_text) b.attach_perf_event(ev_type=PerfType.SOFTWARE, ev_config=PerfSWConfig.CPU_CLOCK, fn_name="do_perf_event", sample_period=0, sample_freq=49, cpu=0) # Add the list of libraries/executables to the build sym cache for sym resolution # Change the libc path if it is different on a different machine. # libc.so and ping are added here so that any symbols pertaining to # libc or ping are resolved. More executables/libraries can be added here. b.add_module(Get_libc_path()) b.add_module("/usr/sbin/sshd") b.add_module("/bin/ping") counts = b.get_table("counts") stack_traces = b.get_table("stack_traces") duration = 2 def signal_handler(signal, frame): print() try: sleep(duration) except KeyboardInterrupt: # as cleanup can take some time, trap Ctrl-C:
class Tool(object): DEFAULT_PERF_BUFFER_PAGES = 64 examples = """ EXAMPLES: trace do_sys_open Trace the open syscall and print a default trace message when entered trace 'do_sys_open "%s", arg2' Trace the open syscall and print the filename being opened trace 'sys_read (arg3 > 20000) "read %d bytes", arg3' Trace the read syscall and print a message for reads >20000 bytes trace 'r::do_sys_open "%llx", retval' Trace the return from the open syscall and print the return value trace 'c:open (arg2 == 42) "%s %d", arg1, arg2' Trace the open() call from libc only if the flags (arg2) argument is 42 trace 'c:malloc "size = %d", arg1' Trace malloc calls and print the size being allocated trace 'p:c:write (arg1 == 1) "writing %d bytes to STDOUT", arg3' Trace the write() call from libc to monitor writes to STDOUT trace 'r::__kmalloc (retval == 0) "kmalloc failed!"' Trace returns from __kmalloc which returned a null pointer trace 'r:c:malloc (retval) "allocated = %x", retval' Trace returns from malloc and print non-NULL allocated buffers trace 't:block:block_rq_complete "sectors=%d", args->nr_sector' Trace the block_rq_complete kernel tracepoint and print # of tx sectors trace 'u:pthread:pthread_create (arg4 != 0)' Trace the USDT probe pthread_create when its 4th argument is non-zero trace 'p::SyS_nanosleep(struct timespec *ts) "sleep for %lld ns", ts->tv_nsec' Trace the nanosleep syscall and print the sleep duration in ns trace -c /sys/fs/cgroup/system.slice/workload.service '__x64_sys_nanosleep' '__x64_sys_clone' Trace nanosleep/clone syscall calls only under workload.service cgroup hierarchy. trace -I 'linux/fs.h' \\ 'p::uprobe_register(struct inode *inode) "a_ops = %llx", inode->i_mapping->a_ops' Trace the uprobe_register inode mapping ops, and the symbol can be found in /proc/kallsyms trace -I 'kernel/sched/sched.h' \\ 'p::__account_cfs_rq_runtime(struct cfs_rq *cfs_rq) "%d", cfs_rq->runtime_remaining' Trace the cfs scheduling runqueue remaining runtime. The struct cfs_rq is defined in kernel/sched/sched.h which is in kernel source tree and not in kernel-devel package. So this command needs to run at the kernel source tree root directory so that the added header file can be found by the compiler. trace -I 'net/sock.h' \\ 'udpv6_sendmsg(struct sock *sk) (sk->sk_dport == 13568)' Trace udpv6 sendmsg calls only if socket's destination port is equal to 53 (DNS; 13568 in big endian order) trace -I 'linux/fs_struct.h' 'mntns_install "users = %d", $task->fs->users' Trace the number of users accessing the file system of the current task """ def __init__(self): parser = argparse.ArgumentParser( description="Attach to " + "functions and print trace messages.", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=Tool.examples) parser.add_argument( "-b", "--buffer-pages", type=int, default=Tool.DEFAULT_PERF_BUFFER_PAGES, help="number of pages to use for perf_events ring buffer " "(default: %(default)d)") # we'll refer to the userspace concepts of "pid" and "tid" by # their kernel names -- tgid and pid -- inside the script parser.add_argument("-p", "--pid", type=int, metavar="PID", dest="tgid", help="id of the process to trace (optional)") parser.add_argument("-L", "--tid", type=int, metavar="TID", dest="pid", help="id of the thread to trace (optional)") parser.add_argument( "-v", "--verbose", action="store_true", help="print resulting BPF program code before executing") parser.add_argument("-Z", "--string-size", type=int, default=80, help="maximum size to read from strings") parser.add_argument( "-S", "--include-self", action="store_true", help="do not filter trace's own pid from the trace") parser.add_argument("-M", "--max-events", type=int, help="number of events to print before quitting") parser.add_argument( "-t", "--timestamp", action="store_true", help="print timestamp column (offset from trace start)") parser.add_argument( "-u", "--unix-timestamp", action="store_true", help= "print UNIX timestamp instead of offset from trace start, requires -t" ) parser.add_argument("-T", "--time", action="store_true", help="print time column") parser.add_argument("-C", "--print_cpu", action="store_true", help="print CPU id") parser.add_argument("-c", "--cgroup-path", type=str, \ metavar="CGROUP_PATH", dest="cgroup_path", \ help="cgroup path") parser.add_argument("-B", "--bin_cmp", action="store_true", help="allow to use STRCMP with binary values") parser.add_argument('-s', "--sym_file_list", type=str, \ metavar="SYM_FILE_LIST", dest="sym_file_list", \ help="coma separated list of symbol files to use \ for symbol resolution" ) parser.add_argument("-K", "--kernel-stack", action="store_true", help="output kernel stack trace") parser.add_argument("-U", "--user-stack", action="store_true", help="output user stack trace") parser.add_argument("-a", "--address", action="store_true", help="print virtual address in stacks") parser.add_argument(metavar="probe", dest="probes", nargs="+", help="probe specifier (see examples)") parser.add_argument( "-I", "--include", action="append", metavar="header", help="additional header files to include in the BPF program " "as either full path, " "or relative to current working directory, " "or relative to default kernel header search path") parser.add_argument("--ebpf", action="store_true", help=argparse.SUPPRESS) self.args = parser.parse_args() if self.args.tgid and self.args.pid: parser.error("only one of -p and -L may be specified") if self.args.cgroup_path is not None: self.cgroup_map_name = "__cgroup" else: self.cgroup_map_name = None def _create_probes(self): Probe.configure(self.args) self.probes = [] for probe_spec in self.args.probes: self.probes.append( Probe(probe_spec, self.args.string_size, self.args.kernel_stack, self.args.user_stack, self.cgroup_map_name)) def _generate_program(self): self.program = """ #include <linux/ptrace.h> #include <linux/sched.h> /* For TASK_COMM_LEN */ """ for include in (self.args.include or []): if include.startswith((".", "/")): include = os.path.abspath(include) self.program += "#include \"%s\"\n" % include else: self.program += "#include <%s>\n" % include self.program += BPF.generate_auto_includes( map(lambda p: p.raw_probe, self.probes)) if self.cgroup_map_name is not None: self.program += "BPF_CGROUP_ARRAY(%s, 1);\n" % \ self.cgroup_map_name for probe in self.probes: self.program += probe.generate_program(self.args.include_self) if self.args.verbose or self.args.ebpf: print(self.program) if self.args.ebpf: exit() def _attach_probes(self): usdt_contexts = [] for probe in self.probes: if probe.usdt: # USDT probes must be enabled before the BPF object # is initialized, because that's where the actual # uprobe is being attached. probe.usdt.enable_probe(probe.usdt_name, probe.probe_name) if self.args.verbose: print(probe.usdt.get_text()) usdt_contexts.append(probe.usdt) self.bpf = BPF(text=self.program, usdt_contexts=usdt_contexts) if self.args.sym_file_list is not None: print("Note: Kernel bpf will report stack map with ip/build_id") map(lambda x: self.bpf.add_module(x), self.args.sym_file_list.split(',')) # if cgroup filter is requested, update the cgroup array map if self.cgroup_map_name is not None: cgroup_array = self.bpf.get_table(self.cgroup_map_name) cgroup_array[0] = self.args.cgroup_path for probe in self.probes: if self.args.verbose: print(probe) probe.attach(self.bpf, self.args.verbose) def _main_loop(self): all_probes_trivial = all(map(Probe.is_default_action, self.probes)) # Print header if self.args.timestamp or self.args.time: col_fmt = "%-17s " if self.args.unix_timestamp else "%-8s " print(col_fmt % "TIME", end="") if self.args.print_cpu: print("%-3s " % "CPU", end="") print("%-7s %-7s %-15s %-16s %s" % ("PID", "TID", "COMM", "FUNC", "-" if not all_probes_trivial else "")) while True: self.bpf.perf_buffer_poll() def run(self): try: self._create_probes() self._generate_program() self._attach_probes() self._main_loop() except: exc_info = sys.exc_info() sys_exit = exc_info[0] is SystemExit if self.args.verbose: traceback.print_exc() elif not sys_exit: print(exc_info[1]) exit(0 if sys_exit else 1)
counts.increment(key); } return 0; } """ b = BPF(text=bpf_text) b.attach_perf_event(ev_type=PerfType.SOFTWARE, ev_config=PerfSWConfig.CPU_CLOCK, fn_name="do_perf_event", sample_period=0, sample_freq=49, cpu=0) # Add the list of libraries/executables to the build sym cache for sym resolution # Change the libc path if it is different on a different machine. # libc.so and ping are added here so that any symbols pertaining to # libc or ping are resolved. More executables/libraries can be added here. b.add_module(Get_libc_path()) b.add_module("/usr/sbin/sshd") b.add_module("/bin/ping") counts = b.get_table("counts") stack_traces = b.get_table("stack_traces") duration = 2 def signal_handler(signal, frame): print() try: sleep(duration) except KeyboardInterrupt: # as cleanup can take some time, trap Ctrl-C: signal.signal(signal.SIGINT, signal_ignore)