Beispiel #1
0
def print_usdt(pid, lib):
        reader = USDT(path=lib, pid=pid)
        probes_seen = []
        for probe in reader.enumerate_probes():
                probe_name = probe.short_name()
                if not args.filter or fnmatch.fnmatch(probe_name, args.filter):
                        if probe_name in probes_seen:
                                continue
                        probes_seen.append(probe_name)
                        print_usdt_details(probe)
Beispiel #2
0
def print_usdt(pid, lib):
    reader = USDT(path=lib, pid=pid)
    probes_seen = []
    for probe in reader.enumerate_probes():
        probe_name = probe.short_name()
        if not args.filter or fnmatch.fnmatch(probe_name, args.filter):
            if probe_name in probes_seen:
                continue
            probes_seen.append(probe_name)
            print_usdt_details(probe)
Beispiel #3
0
def print_usdt(pid, lib):
        reader = USDT(path=lib, pid=pid)
        probes_seen = []
        for probe in reader.enumerate_probes():
                probe_name = probe.short_name()
                if not args.filter or fnmatch.fnmatch(probe_name, args.filter):
                        if probe_name in probes_seen:
                                continue
                        probes_seen.append(probe_name)
                        if args.variables:
                                print(probe)
                        else:
                                print("%s %s:%s" % (probe.bin_path,
                                                    probe.provider, probe.name))
Beispiel #4
0
def print_usdt(pid, lib):
    reader = USDT(path=lib, pid=pid)
    probes_seen = []
    for probe in reader.enumerate_probes():
        probe_name = probe.short_name()
        if not args.filter or fnmatch.fnmatch(probe_name, args.filter):
            if probe_name in probes_seen:
                continue
            probes_seen.append(probe_name)
            if args.variables:
                print(probe)
            else:
                print("%s %s:%s" %
                      (probe.bin_path, probe.provider, probe.name))
Beispiel #5
0
class Probe(object):
        probe_count = 0
        streq_index = 0
        max_events = None
        event_count = 0
        first_ts = 0
        print_time = False
        use_localtime = True
        time_field = False
        print_cpu = False
        print_address = False
        tgid = -1
        pid = -1
        page_cnt = None
        build_id_enabled = False

        @classmethod
        def configure(cls, args):
                cls.max_events = args.max_events
                cls.print_time = args.timestamp or args.time
                cls.use_localtime = not args.timestamp
                cls.time_field = cls.print_time and (not cls.use_localtime)
                cls.print_cpu = args.print_cpu
                cls.print_address = args.address
                cls.first_ts = BPF.monotonic_time()
                cls.tgid = args.tgid or -1
                cls.pid = args.pid or -1
                cls.page_cnt = args.buffer_pages
                cls.bin_cmp = args.bin_cmp
                cls.build_id_enabled = args.sym_file_list is not None

        def __init__(self, probe, string_size, kernel_stack, user_stack):
                self.usdt = None
                self.streq_functions = ""
                self.raw_probe = probe
                self.string_size = string_size
                self.kernel_stack = kernel_stack
                self.user_stack = user_stack
                Probe.probe_count += 1
                self._parse_probe()
                self.probe_num = Probe.probe_count
                self.probe_name = "probe_%s_%d" % \
                                (self._display_function(), self.probe_num)
                self.probe_name = re.sub(r'[^A-Za-z0-9_]', '_',
                                         self.probe_name)

                # compiler can generate proper codes for function
                # signatures with "syscall__" prefix
                if self.is_syscall_kprobe:
                        self.probe_name = "syscall__" + self.probe_name[6:]

        def __str__(self):
                return "%s:%s:%s FLT=%s ACT=%s/%s" % (self.probe_type,
                        self.library, self._display_function(), self.filter,
                        self.types, self.values)

        def is_default_action(self):
                return self.python_format == ""

        def _bail(self, error):
                raise ValueError("error in probe '%s': %s" %
                                 (self.raw_probe, error))

        def _parse_probe(self):
                text = self.raw_probe

                # There might be a function signature preceding the actual
                # filter/print part, or not. Find the probe specifier first --
                # it ends with either a space or an open paren ( for the
                # function signature part.
                #                                          opt. signature
                #                               probespec       |      rest
                #                               ---------  ----------   --
                (spec, sig, rest) = re.match(r'([^ \t\(]+)(\([^\(]*\))?(.*)',
                                             text).groups()

                self._parse_spec(spec)
                # Remove the parens
                self.signature = sig[1:-1] if sig else None
                if self.signature and self.probe_type in ['u', 't']:
                        self._bail("USDT and tracepoint probes can't have " +
                                   "a function signature; use arg1, arg2, " +
                                   "... instead")

                text = rest.lstrip()
                # If we now have a (, wait for the balanced closing ) and that
                # will be the predicate
                self.filter = None
                if len(text) > 0 and text[0] == "(":
                        balance = 1
                        for i in range(1, len(text)):
                                if text[i] == "(":
                                        balance += 1
                                if text[i] == ")":
                                        balance -= 1
                                if balance == 0:
                                        self._parse_filter(text[:i + 1])
                                        text = text[i + 1:]
                                        break
                        if self.filter is None:
                                self._bail("unmatched end of predicate")

                if self.filter is None:
                        self.filter = "1"

                # The remainder of the text is the printf action
                self._parse_action(text.lstrip())

        def _parse_spec(self, spec):
                parts = spec.split(":")
                # Two special cases: 'func' means 'p::func', 'lib:func' means
                # 'p:lib:func'. Other combinations need to provide an empty
                # value between delimiters, e.g. 'r::func' for a kretprobe on
                # the function func.
                if len(parts) == 1:
                        parts = ["p", "", parts[0]]
                elif len(parts) == 2:
                        parts = ["p", parts[0], parts[1]]
                if len(parts[0]) == 0:
                        self.probe_type = "p"
                elif parts[0] in ["p", "r", "t", "u"]:
                        self.probe_type = parts[0]
                else:
                        self._bail("probe type must be '', 'p', 't', 'r', " +
                                   "or 'u', but got '%s'" % parts[0])
                if self.probe_type == "t":
                        self.tp_category = parts[1]
                        self.tp_event = parts[2]
                        self.library = ""       # kernel
                        self.function = ""      # from TRACEPOINT_PROBE
                elif self.probe_type == "u":
                        self.library = ':'.join(parts[1:-1])
                        self.usdt_name = parts[-1]
                        self.function = ""      # no function, just address
                        # We will discover the USDT provider by matching on
                        # the USDT name in the specified library
                        self._find_usdt_probe()
                else:
                        self.library = ':'.join(parts[1:-1])
                        self.function = parts[-1]

                # only x64 syscalls needs checking, no other syscall wrapper yet.
                self.is_syscall_kprobe = False
                if self.probe_type == "p" and len(self.library) == 0 and \
                   self.function[:10] == "__x64_sys_":
                        self.is_syscall_kprobe = True

        def _find_usdt_probe(self):
                target = Probe.pid if Probe.pid and Probe.pid != -1 \
                                   else Probe.tgid
                self.usdt = USDT(path=self.library, pid=target)
                for probe in self.usdt.enumerate_probes():
                        if probe.name == self.usdt_name.encode('ascii'):
                                return  # Found it, will enable later
                self._bail("unrecognized USDT probe %s" % self.usdt_name)

        def _parse_filter(self, filt):
                self.filter = self._rewrite_expr(filt)

        def _parse_types(self, fmt):
                for match in re.finditer(
                            r'[^%]%(s|u|d|lu|llu|ld|lld|hu|hd|x|lx|llx|c|K|U)', fmt):
                        self.types.append(match.group(1))
                fmt = re.sub(r'([^%]%)(u|d|lu|llu|ld|lld|hu|hd)', r'\1d', fmt)
                fmt = re.sub(r'([^%]%)(x|lx|llx)', r'\1x', fmt)
                fmt = re.sub('%K|%U', '%s', fmt)
                self.python_format = fmt.strip('"')

        def _parse_action(self, action):
                self.values = []
                self.types = []
                self.python_format = ""
                if len(action) == 0:
                        return

                action = action.strip()
                match = re.search(r'(\".*?\"),?(.*)', action)
                if match is None:
                        self._bail("expected format string in \"s")

                self.raw_format = match.group(1)
                self._parse_types(self.raw_format)
                for part in re.split('(?<!"),', match.group(2)):
                        part = self._rewrite_expr(part)
                        if len(part) > 0:
                                self.values.append(part)

        aliases_arg = {
                "arg1": "PT_REGS_PARM1(ctx)",
                "arg2": "PT_REGS_PARM2(ctx)",
                "arg3": "PT_REGS_PARM3(ctx)",
                "arg4": "PT_REGS_PARM4(ctx)",
                "arg5": "PT_REGS_PARM5(ctx)",
                "arg6": "PT_REGS_PARM6(ctx)",
        }

        aliases_indarg = {
                "arg1": "({u64 _val; struct pt_regs *_ctx = (struct pt_regs *)PT_REGS_PARM1(ctx);"
                        "  bpf_probe_read(&_val, sizeof(_val), &(PT_REGS_PARM1(_ctx))); _val;})",
                "arg2": "({u64 _val; struct pt_regs *_ctx = (struct pt_regs *)PT_REGS_PARM1(ctx);"
                        "  bpf_probe_read(&_val, sizeof(_val), &(PT_REGS_PARM2(_ctx))); _val;})",
                "arg3": "({u64 _val; struct pt_regs *_ctx = (struct pt_regs *)PT_REGS_PARM1(ctx);"
                        "  bpf_probe_read(&_val, sizeof(_val), &(PT_REGS_PARM3(_ctx))); _val;})",
                "arg4": "({u64 _val; struct pt_regs *_ctx = (struct pt_regs *)PT_REGS_PARM1(ctx);"
                        "  bpf_probe_read(&_val, sizeof(_val), &(PT_REGS_PARM4(_ctx))); _val;})",
                "arg5": "({u64 _val; struct pt_regs *_ctx = (struct pt_regs *)PT_REGS_PARM1(ctx);"
                        "  bpf_probe_read(&_val, sizeof(_val), &(PT_REGS_PARM5(_ctx))); _val;})",
                "arg6": "({u64 _val; struct pt_regs *_ctx = (struct pt_regs *)PT_REGS_PARM1(ctx);"
                        "  bpf_probe_read(&_val, sizeof(_val), &(PT_REGS_PARM6(_ctx))); _val;})",
        }

        aliases_common = {
                "retval": "PT_REGS_RC(ctx)",
                "$uid": "(unsigned)(bpf_get_current_uid_gid() & 0xffffffff)",
                "$gid": "(unsigned)(bpf_get_current_uid_gid() >> 32)",
                "$pid": "(unsigned)(bpf_get_current_pid_tgid() & 0xffffffff)",
                "$tgid": "(unsigned)(bpf_get_current_pid_tgid() >> 32)",
                "$cpu": "bpf_get_smp_processor_id()",
                "$task" : "((struct task_struct *)bpf_get_current_task())"
        }

        def _generate_streq_function(self, string):
                fname = "streq_%d" % Probe.streq_index
                Probe.streq_index += 1
                self.streq_functions += """
static inline bool %s(char const *ignored, uintptr_t str) {
        char needle[] = %s;
        char haystack[sizeof(needle)];
        bpf_probe_read(&haystack, sizeof(haystack), (void *)str);
        for (int i = 0; i < sizeof(needle) - 1; ++i) {
                if (needle[i] != haystack[i]) {
                        return false;
                }
        }
        return true;
}
                """ % (fname, string)
                return fname

        def _rewrite_expr(self, expr):
                if self.is_syscall_kprobe:
                    for alias, replacement in Probe.aliases_indarg.items():
                        expr = expr.replace(alias, replacement)
                else:
                    for alias, replacement in Probe.aliases_arg.items():
                        # For USDT probes, we replace argN values with the
                        # actual arguments for that probe obtained using
                        # bpf_readarg_N macros emitted at BPF construction.
                        if self.probe_type == "u":
                                continue
                        expr = expr.replace(alias, replacement)
                for alias, replacement in Probe.aliases_common.items():
                    expr = expr.replace(alias, replacement)
                if self.bin_cmp:
                    STRCMP_RE = 'STRCMP\\(\"([^"]+)\\"'
                else:
                    STRCMP_RE = 'STRCMP\\(("[^"]+\\")'
                matches = re.finditer(STRCMP_RE, expr)
                for match in matches:
                        string = match.group(1)
                        fname = self._generate_streq_function(string)
                        expr = expr.replace("STRCMP", fname, 1)
                return expr

        p_type = {"u": ct.c_uint, "d": ct.c_int, "lu": ct.c_ulong,
                  "ld": ct.c_long,
                  "llu": ct.c_ulonglong, "lld": ct.c_longlong,
                  "hu": ct.c_ushort, "hd": ct.c_short,
                  "x": ct.c_uint, "lx": ct.c_ulong, "llx": ct.c_ulonglong,
                  "c": ct.c_ubyte,
                  "K": ct.c_ulonglong, "U": ct.c_ulonglong}

        def _generate_python_field_decl(self, idx, fields):
                field_type = self.types[idx]
                if field_type == "s":
                        ptype = ct.c_char * self.string_size
                else:
                        ptype = Probe.p_type[field_type]
                fields.append(("v%d" % idx, ptype))

        def _generate_python_data_decl(self):
                self.python_struct_name = "%s_%d_Data" % \
                                (self._display_function(), self.probe_num)
                fields = []
                if self.time_field:
                    fields.append(("timestamp_ns", ct.c_ulonglong))
                if self.print_cpu:
                    fields.append(("cpu", ct.c_int))
                fields.extend([
                        ("tgid", ct.c_uint),
                        ("pid", ct.c_uint),
                        ("comm", ct.c_char * 16)       # TASK_COMM_LEN
                ])
                for i in range(0, len(self.types)):
                        self._generate_python_field_decl(i, fields)
                if self.kernel_stack:
                        fields.append(("kernel_stack_id", ct.c_int))
                if self.user_stack:
                        fields.append(("user_stack_id", ct.c_int))
                return type(self.python_struct_name, (ct.Structure,),
                            dict(_fields_=fields))

        c_type = {"u": "unsigned int", "d": "int",
                  "lu": "unsigned long", "ld": "long",
                  "llu": "unsigned long long", "lld": "long long",
                  "hu": "unsigned short", "hd": "short",
                  "x": "unsigned int", "lx": "unsigned long",
                  "llx": "unsigned long long",
                  "c": "char", "K": "unsigned long long",
                  "U": "unsigned long long"}
        fmt_types = c_type.keys()

        def _generate_field_decl(self, idx):
                field_type = self.types[idx]
                if field_type == "s":
                        return "char v%d[%d];\n" % (idx, self.string_size)
                if field_type in Probe.fmt_types:
                        return "%s v%d;\n" % (Probe.c_type[field_type], idx)
                self._bail("unrecognized format specifier %s" % field_type)

        def _generate_data_decl(self):
                # The BPF program will populate values into the struct
                # according to the format string, and the Python program will
                # construct the final display string.
                self.events_name = "%s_events" % self.probe_name
                self.struct_name = "%s_data_t" % self.probe_name
                self.stacks_name = "%s_stacks" % self.probe_name
                stack_type = "BPF_STACK_TRACE" if self.build_id_enabled is False \
                             else "BPF_STACK_TRACE_BUILDID"
                stack_table = "%s(%s, 1024);" % (stack_type,self.stacks_name) \
                              if (self.kernel_stack or self.user_stack) else ""
                data_fields = ""
                for i, field_type in enumerate(self.types):
                        data_fields += "        " + \
                                       self._generate_field_decl(i)
                time_str = "u64 timestamp_ns;" if self.time_field else ""
                cpu_str = "int cpu;" if self.print_cpu else ""
                kernel_stack_str = "       int kernel_stack_id;" \
                                   if self.kernel_stack else ""
                user_stack_str = "       int user_stack_id;" \
                                 if self.user_stack else ""

                text = """
struct %s
{
%s
%s
        u32 tgid;
        u32 pid;
        char comm[TASK_COMM_LEN];
%s
%s
%s
};

BPF_PERF_OUTPUT(%s);
%s
"""
                return text % (self.struct_name, time_str, cpu_str, data_fields,
                               kernel_stack_str, user_stack_str,
                               self.events_name, stack_table)

        def _generate_field_assign(self, idx):
                field_type = self.types[idx]
                expr = self.values[idx].strip()
                text = ""
                if self.probe_type == "u" and expr[0:3] == "arg":
                        arg_index = int(expr[3])
                        arg_ctype = self.usdt.get_probe_arg_ctype(
                                self.usdt_name, arg_index - 1)
                        text = ("        %s %s = 0;\n" +
                                "        bpf_usdt_readarg(%s, ctx, &%s);\n") \
                                % (arg_ctype, expr, expr[3], expr)

                if field_type == "s":
                        return text + """
        if (%s != 0) {
                void *__tmp = (void *)%s;
                bpf_probe_read(&__data.v%d, sizeof(__data.v%d), __tmp);
        }
                """ % (expr, expr, idx, idx)
                if field_type in Probe.fmt_types:
                        return text + "        __data.v%d = (%s)%s;\n" % \
                                        (idx, Probe.c_type[field_type], expr)
                self._bail("unrecognized field type %s" % field_type)

        def _generate_usdt_filter_read(self):
            text = ""
            if self.probe_type != "u":
                    return text
            for arg, _ in Probe.aliases_arg.items():
                    if not (arg in self.filter):
                            continue
                    arg_index = int(arg.replace("arg", ""))
                    arg_ctype = self.usdt.get_probe_arg_ctype(
                            self.usdt_name, arg_index - 1)
                    if not arg_ctype:
                            self._bail("Unable to determine type of {} "
                                       "in the filter".format(arg))
                    text += """
        {} {}_filter;
        bpf_usdt_readarg({}, ctx, &{}_filter);
                    """.format(arg_ctype, arg, arg_index, arg)
                    self.filter = self.filter.replace(
                            arg, "{}_filter".format(arg))
            return text

        def generate_program(self, include_self):
                data_decl = self._generate_data_decl()
                if Probe.pid != -1:
                        pid_filter = """
        if (__pid != %d) { return 0; }
                """ % Probe.pid
                # uprobes can have a built-in tgid filter passed to
                # attach_uprobe, hence the check here -- for kprobes, we
                # need to do the tgid test by hand:
                elif len(self.library) == 0 and Probe.tgid != -1:
                        pid_filter = """
        if (__tgid != %d) { return 0; }
                """ % Probe.tgid
                elif not include_self:
                        pid_filter = """
        if (__tgid == %d) { return 0; }
                """ % os.getpid()
                else:
                        pid_filter = ""

                prefix = ""
                signature = "struct pt_regs *ctx"
                if self.signature:
                        signature += ", " + self.signature

                data_fields = ""
                for i, expr in enumerate(self.values):
                        data_fields += self._generate_field_assign(i)

                if self.probe_type == "t":
                        heading = "TRACEPOINT_PROBE(%s, %s)" % \
                                  (self.tp_category, self.tp_event)
                        ctx_name = "args"
                else:
                        heading = "int %s(%s)" % (self.probe_name, signature)
                        ctx_name = "ctx"

                time_str = """
        __data.timestamp_ns = bpf_ktime_get_ns();""" if self.time_field else ""
                cpu_str = """
        __data.cpu = bpf_get_smp_processor_id();""" if self.print_cpu else ""
                stack_trace = ""
                if self.user_stack:
                        stack_trace += """
        __data.user_stack_id = %s.get_stackid(
          %s, BPF_F_REUSE_STACKID | BPF_F_USER_STACK
        );""" % (self.stacks_name, ctx_name)
                if self.kernel_stack:
                        stack_trace += """
        __data.kernel_stack_id = %s.get_stackid(
          %s, BPF_F_REUSE_STACKID
        );""" % (self.stacks_name, ctx_name)

                text = heading + """
{
        u64 __pid_tgid = bpf_get_current_pid_tgid();
        u32 __tgid = __pid_tgid >> 32;
        u32 __pid = __pid_tgid; // implicit cast to u32 for bottom half
        %s
        %s
        %s
        if (!(%s)) return 0;

        struct %s __data = {0};
        %s
        %s
        __data.tgid = __tgid;
        __data.pid = __pid;
        bpf_get_current_comm(&__data.comm, sizeof(__data.comm));
%s
%s
        %s.perf_submit(%s, &__data, sizeof(__data));
        return 0;
}
"""
                text = text % (pid_filter, prefix,
                               self._generate_usdt_filter_read(), self.filter,
                               self.struct_name, time_str, cpu_str, data_fields,
                               stack_trace, self.events_name, ctx_name)

                return self.streq_functions + data_decl + "\n" + text

        @classmethod
        def _time_off_str(cls, timestamp_ns):
                return "%.6f" % (1e-9 * (timestamp_ns - cls.first_ts))

        def _display_function(self):
                if self.probe_type == 'p' or self.probe_type == 'r':
                        return self.function
                elif self.probe_type == 'u':
                        return self.usdt_name
                else:   # self.probe_type == 't'
                        return self.tp_event

        def print_stack(self, bpf, stack_id, tgid):
            if stack_id < 0:
                print("        %d" % stack_id)
                return

            stack = list(bpf.get_table(self.stacks_name).walk(stack_id))
            for addr in stack:
                print("        ", end="")
                if Probe.print_address:
                    print("%16x " % addr, end="")
                print("%s" % (bpf.sym(addr, tgid,
                                     show_module=True, show_offset=True)))

        def _format_message(self, bpf, tgid, values):
                # Replace each %K with kernel sym and %U with user sym in tgid
                kernel_placeholders = [i for i, t in enumerate(self.types)
                                       if t == 'K']
                user_placeholders = [i for i, t in enumerate(self.types)
                                     if t == 'U']
                for kp in kernel_placeholders:
                        values[kp] = bpf.ksym(values[kp], show_offset=True)
                for up in user_placeholders:
                        values[up] = bpf.sym(values[up], tgid,
                                           show_module=True, show_offset=True)
                return self.python_format % tuple(values)

        def print_event(self, bpf, cpu, data, size):
                # Cast as the generated structure type and display
                # according to the format string in the probe.
                event = ct.cast(data, ct.POINTER(self.python_struct)).contents
                values = map(lambda i: getattr(event, "v%d" % i),
                             range(0, len(self.values)))
                msg = self._format_message(bpf, event.tgid, values)
                if Probe.print_time:
                    time = strftime("%H:%M:%S") if Probe.use_localtime else \
                           Probe._time_off_str(event.timestamp_ns)
                    print("%-8s " % time[:8], end="")
                if Probe.print_cpu:
                    print("%-3s " % event.cpu, end="")
                print("%-7d %-7d %-15s %-16s %s" %
                      (event.tgid, event.pid,
                       event.comm.decode('utf-8', 'replace'),
                       self._display_function(), msg))

                if self.kernel_stack:
                        self.print_stack(bpf, event.kernel_stack_id, -1)
                if self.user_stack:
                        self.print_stack(bpf, event.user_stack_id, event.tgid)
                if self.user_stack or self.kernel_stack:
                        print("")

                Probe.event_count += 1
                if Probe.max_events is not None and \
                   Probe.event_count >= Probe.max_events:
                        exit()

        def attach(self, bpf, verbose):
                if len(self.library) == 0:
                        self._attach_k(bpf)
                else:
                        self._attach_u(bpf)
                self.python_struct = self._generate_python_data_decl()
                callback = partial(self.print_event, bpf)
                bpf[self.events_name].open_perf_buffer(callback,
                        page_cnt=self.page_cnt)

        def _attach_k(self, bpf):
                if self.probe_type == "r":
                        bpf.attach_kretprobe(event=self.function,
                                             fn_name=self.probe_name)
                elif self.probe_type == "p":
                        bpf.attach_kprobe(event=self.function,
                                          fn_name=self.probe_name)
                # Note that tracepoints don't need an explicit attach

        def _attach_u(self, bpf):
                libpath = BPF.find_library(self.library)
                if libpath is None:
                        # This might be an executable (e.g. 'bash')
                        libpath = BPF.find_exe(self.library)
                if libpath is None or len(libpath) == 0:
                        self._bail("unable to find library %s" % self.library)

                if self.probe_type == "u":
                        pass    # Was already enabled by the BPF constructor
                elif self.probe_type == "r":
                        bpf.attach_uretprobe(name=libpath,
                                             sym=self.function,
                                             fn_name=self.probe_name,
                                             pid=Probe.tgid)
                else:
                        bpf.attach_uprobe(name=libpath,
                                          sym=self.function,
                                          fn_name=self.probe_name,
                                          pid=Probe.tgid)
Beispiel #6
0
class Probe(object):
    def __init__(self, pattern, use_regex=False, pid=None):
        """Init a new probe.

        Init the probe from the pattern provided by the user. The supported
        patterns mimic the 'trace' and 'argdist' tools, but are simpler because
        we don't have to distinguish between probes and retprobes.

            func            -- probe a kernel function
            lib:func        -- probe a user-space function in the library 'lib'
            /path:func      -- probe a user-space function in binary '/path'
            p::func         -- same thing as 'func'
            p:lib:func      -- same thing as 'lib:func'
            t:cat:event     -- probe a kernel tracepoint
            u:lib:probe     -- probe a USDT tracepoint
        """
        parts = pattern.split(':')
        if len(parts) == 1:
            parts = ["p", "", parts[0]]
        elif len(parts) == 2:
            parts = ["p", parts[0], parts[1]]
        elif len(parts) == 3:
            if parts[0] == "t":
                parts = ["t", "", "%s:%s" % tuple(parts[1:])]
            if parts[0] not in ["p", "t", "u"]:
                raise Exception("Type must be 'p', 't', or 'u', but got %s" %
                                parts[0])
        else:
            raise Exception("Too many ':'-separated components in pattern %s" %
                            pattern)

        (self.type, self.library, self.pattern) = parts
        if not use_regex:
            self.pattern = self.pattern.replace('*', '.*')
            self.pattern = '^' + self.pattern + '$'

        if (self.type == "p" and self.library) or self.type == "u":
            libpath = BPF.find_library(self.library)
            if libpath is None:
                # This might be an executable (e.g. 'bash')
                libpath = BPF.find_exe(self.library)
            if libpath is None or len(libpath) == 0:
                raise Exception("unable to find library %s" % self.library)
            self.library = libpath

        self.pid = pid
        self.matched = 0
        self.trace_functions = {}   # map location number to function name

    def is_kernel_probe(self):
        return self.type == "t" or (self.type == "p" and self.library == "")

    def attach(self):
        if self.type == "p" and not self.library:
            for index, function in self.trace_functions.items():
                self.bpf.attach_kprobe(
                        event=function,
                        fn_name="trace_count_%d" % index)
        elif self.type == "p" and self.library:
            for index, function in self.trace_functions.items():
                self.bpf.attach_uprobe(
                        name=self.library,
                        sym=function,
                        fn_name="trace_count_%d" % index,
                        pid=self.pid or -1)
        elif self.type == "t":
            for index, function in self.trace_functions.items():
                self.bpf.attach_tracepoint(
                        tp=function,
                        fn_name="trace_count_%d" % index)
        elif self.type == "u":
            pass    # Nothing to do -- attach already happened in `load`

    def _add_function(self, template, probe_name):
        new_func = "trace_count_%d" % self.matched
        text = template.replace("PROBE_FUNCTION", new_func)
        text = text.replace("LOCATION", str(self.matched))
        self.trace_functions[self.matched] = probe_name
        self.matched += 1
        return text

    def _generate_functions(self, template):
        self.usdt = None
        text = ""
        if self.type == "p" and not self.library:
            functions = BPF.get_kprobe_functions(self.pattern)
            verify_limit(len(functions))
            for function in functions:
                text += self._add_function(template, function)
        elif self.type == "p" and self.library:
            # uprobes are tricky because the same function may have multiple
            # addresses, and the same address may be mapped to multiple
            # functions. We aren't allowed to create more than one uprobe
            # per address, so track unique addresses and ignore functions that
            # map to an address that we've already seen. Also ignore functions
            # that may repeat multiple times with different addresses.
            addresses, functions = (set(), set())
            functions_and_addresses = BPF.get_user_functions_and_addresses(
                                        self.library, self.pattern)
            verify_limit(len(functions_and_addresses))
            for function, address in functions_and_addresses:
                if address in addresses or function in functions:
                    continue
                addresses.add(address)
                functions.add(function)
                text += self._add_function(template, function)
        elif self.type == "t":
            tracepoints = BPF.get_tracepoints(self.pattern)
            verify_limit(len(tracepoints))
            for tracepoint in tracepoints:
                text += self._add_function(template, tracepoint)
        elif self.type == "u":
            self.usdt = USDT(path=self.library, pid=self.pid)
            matches = []
            for probe in self.usdt.enumerate_probes():
                if not self.pid and (probe.bin_path != self.library):
                    continue
                if re.match(self.pattern, probe.name):
                    matches.append(probe.name)
            verify_limit(len(matches))
            for match in matches:
                new_func = "trace_count_%d" % self.matched
                text += self._add_function(template, match)
                self.usdt.enable_probe(match, new_func)
            if debug:
                print(self.usdt.get_text())
        return text

    def load(self):
        trace_count_text = """
int PROBE_FUNCTION(void *ctx) {
    FILTER
    int loc = LOCATION;
    u64 *val = counts.lookup(&loc);
    if (!val) {
        return 0;   // Should never happen, # of locations is known
    }
    (*val)++;
    return 0;
}
        """
        bpf_text = """#include <uapi/linux/ptrace.h>

BPF_ARRAY(counts, u64, NUMLOCATIONS);
        """

        # We really mean the tgid from the kernel's perspective, which is in
        # the top 32 bits of bpf_get_current_pid_tgid().
        if self.pid:
            trace_count_text = trace_count_text.replace('FILTER',
                """u32 pid = bpf_get_current_pid_tgid() >> 32;
                   if (pid != %d) { return 0; }""" % self.pid)
        else:
            trace_count_text = trace_count_text.replace('FILTER', '')

        bpf_text += self._generate_functions(trace_count_text)
        bpf_text = bpf_text.replace("NUMLOCATIONS",
                                    str(len(self.trace_functions)))
        if debug:
            print(bpf_text)

        if self.matched == 0:
            raise Exception("No functions matched by pattern %s" %
                            self.pattern)

        self.bpf = BPF(text=bpf_text,
                       usdt_contexts=[self.usdt] if self.usdt else [])
        self.clear()    # Initialize all array items to zero

    def counts(self):
        return self.bpf["counts"]

    def clear(self):
        counts = self.bpf["counts"]
        for location, _ in list(self.trace_functions.items()):
            counts[counts.Key(location)] = counts.Leaf()
Beispiel #7
0
class Probe(object):
    def __init__(self, pattern, kernel_stack, user_stack, use_regex=False,
                 pid=None, per_pid=False):
        """Init a new probe.

        Init the probe from the pattern provided by the user. The supported
        patterns mimic the 'trace' and 'argdist' tools, but are simpler because
        we don't have to distinguish between probes and retprobes.

            func            -- probe a kernel function
            lib:func        -- probe a user-space function in the library 'lib'
            p::func         -- same thing as 'func'
            p:lib:func      -- same thing as 'lib:func'
            t:cat:event     -- probe a kernel tracepoint
            u:lib:probe     -- probe a USDT tracepoint
        """
        self.kernel_stack = kernel_stack
        self.user_stack = user_stack
        parts = pattern.split(':')
        if len(parts) == 1:
            parts = ["p", "", parts[0]]
        elif len(parts) == 2:
            parts = ["p", parts[0], parts[1]]
        elif len(parts) == 3:
            if parts[0] == "t":
                parts = ["t", "", "%s:%s" % tuple(parts[1:])]
            if parts[0] not in ["p", "t", "u"]:
                raise Exception("Type must be 'p', 't', or 'u', but got %s" %
                                parts[0])
        else:
            raise Exception("Too many ':'-separated components in pattern %s" %
                            pattern)

        (self.type, self.library, self.pattern) = parts
        if not use_regex:
            self.pattern = self.pattern.replace('*', '.*')
            self.pattern = '^' + self.pattern + '$'

        if (self.type == "p" and self.library) or self.type == "u":
            libpath = BPF.find_library(self.library)
            if libpath is None:
                # This might be an executable (e.g. 'bash')
                libpath = BPF.find_exe(self.library)
            if libpath is None or len(libpath) == 0:
                raise Exception("unable to find library %s" % self.library)
            self.library = libpath

        self.pid = pid
        self.per_pid = per_pid
        self.matched = 0

    def is_kernel_probe(self):
        return self.type == "t" or (self.type == "p" and self.library == "")

    def attach(self):
        if self.type == "p":
            if self.library:
                self.bpf.attach_uprobe(name=self.library,
                                       sym_re=self.pattern,
                                       fn_name="trace_count",
                                       pid=self.pid or -1)
                self.matched = self.bpf.num_open_uprobes()
            else:
                self.bpf.attach_kprobe(event_re=self.pattern,
                                       fn_name="trace_count")
                self.matched = self.bpf.num_open_kprobes()
        elif self.type == "t":
            self.bpf.attach_tracepoint(tp_re=self.pattern,
                                       fn_name="trace_count")
            self.matched = self.bpf.num_open_tracepoints()
        elif self.type == "u":
            pass    # Nothing to do -- attach already happened in `load`

        if self.matched == 0:
            raise Exception("No functions matched by pattern %s" %
                            self.pattern)

    def load(self):
        ctx_name = "ctx"
        stack_trace = ""
        if self.user_stack:
                stack_trace += """
                    key.user_stack_id = stack_traces.get_stackid(
                      %s, BPF_F_REUSE_STACKID | BPF_F_USER_STACK
                    );""" % (ctx_name)
        else:
                stack_trace += "key.user_stack_id = -1;"
        if self.kernel_stack:
                stack_trace += """
                    key.kernel_stack_id = stack_traces.get_stackid(
                      %s, BPF_F_REUSE_STACKID
                    );""" % (ctx_name)
        else:
                stack_trace += "key.kernel_stack_id = -1;"

        trace_count_text = """
int trace_count(void *ctx) {
    FILTER
    struct key_t key = {};
    key.tgid = GET_TGID;
    STORE_COMM
    %s
    counts.increment(key);
    return 0;
}
        """
        trace_count_text = trace_count_text % (stack_trace)

        bpf_text = """#include <uapi/linux/ptrace.h>
#include <linux/sched.h>

struct key_t {
    // no pid (thread ID) so that we do not needlessly split this key
    u32 tgid;
    int kernel_stack_id;
    int user_stack_id;
    char name[TASK_COMM_LEN];
};

BPF_HASH(counts, struct key_t);
BPF_STACK_TRACE(stack_traces, 1024);
        """

        # We really mean the tgid from the kernel's perspective, which is in
        # the top 32 bits of bpf_get_current_pid_tgid().
        if self.is_kernel_probe() and self.pid:
            trace_count_text = trace_count_text.replace('FILTER',
                ('u32 pid; pid = bpf_get_current_pid_tgid() >> 32; ' +
                'if (pid != %d) { return 0; }') % (self.pid))
        else:
            trace_count_text = trace_count_text.replace('FILTER', '')

        # We need per-pid statistics when tracing a user-space process, because
        # the meaning of the symbols depends on the pid. We also need them if
        # per-pid statistics were requested with -P, or for user stacks.
        if self.per_pid or not self.is_kernel_probe() or self.user_stack:
            trace_count_text = trace_count_text.replace('GET_TGID',
                                        'bpf_get_current_pid_tgid() >> 32')
            trace_count_text = trace_count_text.replace('STORE_COMM',
                        'bpf_get_current_comm(&key.name, sizeof(key.name));')
        else:
            # kernel stacks only. skip splitting on PID so these aggregate
            # together, and don't store the process name.
            trace_count_text = trace_count_text.replace(
                                    'GET_TGID', '0xffffffff')
            trace_count_text = trace_count_text.replace('STORE_COMM', '')

        self.usdt = None
        if self.type == "u":
            self.usdt = USDT(path=self.library, pid=self.pid)
            for probe in self.usdt.enumerate_probes():
                if not self.pid and (probe.bin_path != self.library):
                    continue
                if re.match(self.pattern, probe.name):
                    # This hack is required because the bpf_usdt_readarg
                    # functions generated need different function names for
                    # each attached probe. If we just stick to trace_count,
                    # we'd get multiple bpf_usdt_readarg helpers with the same
                    # name when enabling more than one USDT probe.
                    new_func = "trace_count_%d" % self.matched
                    bpf_text += trace_count_text.replace(
                                            "trace_count", new_func)
                    self.usdt.enable_probe(probe.name, new_func)
                    self.matched += 1
            if debug:
                print(self.usdt.get_text())
        else:
            bpf_text += trace_count_text

        if debug:
            print(bpf_text)
        self.bpf = BPF(text=bpf_text,
                       usdt_contexts=[self.usdt] if self.usdt else [])
Beispiel #8
0
class Probe(object):
        probe_count = 0
        streq_index = 0
        max_events = None
        event_count = 0
        first_ts = 0
        use_localtime = True
        tgid = -1
        pid = -1
        page_cnt = None

        @classmethod
        def configure(cls, args):
                cls.max_events = args.max_events
                cls.print_time = args.timestamp or args.time
                cls.use_localtime = not args.timestamp
                cls.first_ts = BPF.monotonic_time()
                cls.tgid = args.tgid or -1
                cls.pid = args.pid or -1
                cls.page_cnt = args.buffer_pages

        def __init__(self, probe, string_size, kernel_stack, user_stack):
                self.usdt = None
                self.streq_functions = ""
                self.raw_probe = probe
                self.string_size = string_size
                self.kernel_stack = kernel_stack
                self.user_stack = user_stack
                Probe.probe_count += 1
                self._parse_probe()
                self.probe_num = Probe.probe_count
                self.probe_name = "probe_%s_%d" % \
                                (self._display_function(), self.probe_num)
                self.probe_name = re.sub(r'[^A-Za-z0-9_]', '_',
                                         self.probe_name)

        def __str__(self):
                return "%s:%s:%s FLT=%s ACT=%s/%s" % (self.probe_type,
                        self.library, self._display_function(), self.filter,
                        self.types, self.values)

        def is_default_action(self):
                return self.python_format == ""

        def _bail(self, error):
                raise ValueError("error in probe '%s': %s" %
                                 (self.raw_probe, error))

        def _parse_probe(self):
                text = self.raw_probe

                # There might be a function signature preceding the actual
                # filter/print part, or not. Find the probe specifier first --
                # it ends with either a space or an open paren ( for the
                # function signature part.
                #                                          opt. signature
                #                               probespec       |      rest
                #                               ---------  ----------   --
                (spec, sig, rest) = re.match(r'([^ \t\(]+)(\([^\(]*\))?(.*)',
                                             text).groups()

                self._parse_spec(spec)
                # Remove the parens
                self.signature = sig[1:-1] if sig else None
                if self.signature and self.probe_type in ['u', 't']:
                        self._bail("USDT and tracepoint probes can't have " +
                                   "a function signature; use arg1, arg2, " +
                                   "... instead")

                text = rest.lstrip()
                # If we now have a (, wait for the balanced closing ) and that
                # will be the predicate
                self.filter = None
                if len(text) > 0 and text[0] == "(":
                        balance = 1
                        for i in range(1, len(text)):
                                if text[i] == "(":
                                        balance += 1
                                if text[i] == ")":
                                        balance -= 1
                                if balance == 0:
                                        self._parse_filter(text[:i + 1])
                                        text = text[i + 1:]
                                        break
                        if self.filter is None:
                                self._bail("unmatched end of predicate")

                if self.filter is None:
                        self.filter = "1"

                # The remainder of the text is the printf action
                self._parse_action(text.lstrip())

        def _parse_spec(self, spec):
                parts = spec.split(":")
                # Two special cases: 'func' means 'p::func', 'lib:func' means
                # 'p:lib:func'. Other combinations need to provide an empty
                # value between delimiters, e.g. 'r::func' for a kretprobe on
                # the function func.
                if len(parts) == 1:
                        parts = ["p", "", parts[0]]
                elif len(parts) == 2:
                        parts = ["p", parts[0], parts[1]]
                if len(parts[0]) == 0:
                        self.probe_type = "p"
                elif parts[0] in ["p", "r", "t", "u"]:
                        self.probe_type = parts[0]
                else:
                        self._bail("probe type must be '', 'p', 't', 'r', " +
                                   "or 'u', but got '%s'" % parts[0])
                if self.probe_type == "t":
                        self.tp_category = parts[1]
                        self.tp_event = parts[2]
                        self.library = ""       # kernel
                        self.function = ""      # from TRACEPOINT_PROBE
                elif self.probe_type == "u":
                        self.library = ':'.join(parts[1:-1])
                        self.usdt_name = parts[-1]
                        self.function = ""      # no function, just address
                        # We will discover the USDT provider by matching on
                        # the USDT name in the specified library
                        self._find_usdt_probe()
                else:
                        self.library = ':'.join(parts[1:-1])
                        self.function = parts[-1]

        def _find_usdt_probe(self):
                target = Probe.pid if Probe.pid and Probe.pid != -1 \
                                   else Probe.tgid
                self.usdt = USDT(path=self.library, pid=target)
                for probe in self.usdt.enumerate_probes():
                        if probe.name == self.usdt_name:
                                return  # Found it, will enable later
                self._bail("unrecognized USDT probe %s" % self.usdt_name)

        def _parse_filter(self, filt):
                self.filter = self._rewrite_expr(filt)

        def _parse_types(self, fmt):
                for match in re.finditer(
                            r'[^%]%(s|u|d|llu|lld|hu|hd|x|llx|c|K|U)', fmt):
                        self.types.append(match.group(1))
                fmt = re.sub(r'([^%]%)(u|d|llu|lld|hu|hd)', r'\1d', fmt)
                fmt = re.sub(r'([^%]%)(x|llx)', r'\1x', fmt)
                fmt = re.sub('%K|%U', '%s', fmt)
                self.python_format = fmt.strip('"')

        def _parse_action(self, action):
                self.values = []
                self.types = []
                self.python_format = ""
                if len(action) == 0:
                        return

                action = action.strip()
                match = re.search(r'(\".*?\"),?(.*)', action)
                if match is None:
                        self._bail("expected format string in \"s")

                self.raw_format = match.group(1)
                self._parse_types(self.raw_format)
                for part in re.split('(?<!"),', match.group(2)):
                        part = self._rewrite_expr(part)
                        if len(part) > 0:
                                self.values.append(part)

        aliases = {
                "retval": "PT_REGS_RC(ctx)",
                "arg1": "PT_REGS_PARM1(ctx)",
                "arg2": "PT_REGS_PARM2(ctx)",
                "arg3": "PT_REGS_PARM3(ctx)",
                "arg4": "PT_REGS_PARM4(ctx)",
                "arg5": "PT_REGS_PARM5(ctx)",
                "arg6": "PT_REGS_PARM6(ctx)",
                "$uid": "(unsigned)(bpf_get_current_uid_gid() & 0xffffffff)",
                "$gid": "(unsigned)(bpf_get_current_uid_gid() >> 32)",
                "$pid": "(unsigned)(bpf_get_current_pid_tgid() & 0xffffffff)",
                "$tgid": "(unsigned)(bpf_get_current_pid_tgid() >> 32)",
                "$cpu": "bpf_get_smp_processor_id()"
        }

        def _generate_streq_function(self, string):
                fname = "streq_%d" % Probe.streq_index
                Probe.streq_index += 1
                self.streq_functions += """
static inline bool %s(char const *ignored, uintptr_t str) {
        char needle[] = %s;
        char haystack[sizeof(needle)];
        bpf_probe_read(&haystack, sizeof(haystack), (void *)str);
        for (int i = 0; i < sizeof(needle) - 1; ++i) {
                if (needle[i] != haystack[i]) {
                        return false;
                }
        }
        return true;
}
                """ % (fname, string)
                return fname

        def _rewrite_expr(self, expr):
                for alias, replacement in Probe.aliases.items():
                        # For USDT probes, we replace argN values with the
                        # actual arguments for that probe obtained using
                        # bpf_readarg_N macros emitted at BPF construction.
                        if alias.startswith("arg") and self.probe_type == "u":
                                continue
                        expr = expr.replace(alias, replacement)
                matches = re.finditer('STRCMP\\(("[^"]+\\")', expr)
                for match in matches:
                        string = match.group(1)
                        fname = self._generate_streq_function(string)
                        expr = expr.replace("STRCMP", fname, 1)
                return expr

        p_type = {"u": ct.c_uint, "d": ct.c_int,
                  "llu": ct.c_ulonglong, "lld": ct.c_longlong,
                  "hu": ct.c_ushort, "hd": ct.c_short,
                  "x": ct.c_uint, "llx": ct.c_ulonglong, "c": ct.c_ubyte,
                  "K": ct.c_ulonglong, "U": ct.c_ulonglong}

        def _generate_python_field_decl(self, idx, fields):
                field_type = self.types[idx]
                if field_type == "s":
                        ptype = ct.c_char * self.string_size
                else:
                        ptype = Probe.p_type[field_type]
                fields.append(("v%d" % idx, ptype))

        def _generate_python_data_decl(self):
                self.python_struct_name = "%s_%d_Data" % \
                                (self._display_function(), self.probe_num)
                fields = [
                        ("timestamp_ns", ct.c_ulonglong),
                        ("tgid", ct.c_uint),
                        ("pid", ct.c_uint),
                        ("comm", ct.c_char * 16)       # TASK_COMM_LEN
                ]
                for i in range(0, len(self.types)):
                        self._generate_python_field_decl(i, fields)
                if self.kernel_stack:
                        fields.append(("kernel_stack_id", ct.c_int))
                if self.user_stack:
                        fields.append(("user_stack_id", ct.c_int))
                return type(self.python_struct_name, (ct.Structure,),
                            dict(_fields_=fields))

        c_type = {"u": "unsigned int", "d": "int",
                  "llu": "unsigned long long", "lld": "long long",
                  "hu": "unsigned short", "hd": "short",
                  "x": "unsigned int", "llx": "unsigned long long",
                  "c": "char", "K": "unsigned long long",
                  "U": "unsigned long long"}
        fmt_types = c_type.keys()

        def _generate_field_decl(self, idx):
                field_type = self.types[idx]
                if field_type == "s":
                        return "char v%d[%d];\n" % (idx, self.string_size)
                if field_type in Probe.fmt_types:
                        return "%s v%d;\n" % (Probe.c_type[field_type], idx)
                self._bail("unrecognized format specifier %s" % field_type)

        def _generate_data_decl(self):
                # The BPF program will populate values into the struct
                # according to the format string, and the Python program will
                # construct the final display string.
                self.events_name = "%s_events" % self.probe_name
                self.struct_name = "%s_data_t" % self.probe_name
                self.stacks_name = "%s_stacks" % self.probe_name
                stack_table = "BPF_STACK_TRACE(%s, 1024);" % self.stacks_name \
                              if (self.kernel_stack or self.user_stack) else ""
                data_fields = ""
                for i, field_type in enumerate(self.types):
                        data_fields += "        " + \
                                       self._generate_field_decl(i)

                kernel_stack_str = "       int kernel_stack_id;" \
                                   if self.kernel_stack else ""
                user_stack_str = "       int user_stack_id;" \
                                 if self.user_stack else ""

                text = """
struct %s
{
        u64 timestamp_ns;
        u32 tgid;
        u32 pid;
        char comm[TASK_COMM_LEN];
%s
%s
%s
};

BPF_PERF_OUTPUT(%s);
%s
"""
                return text % (self.struct_name, data_fields,
                               kernel_stack_str, user_stack_str,
                               self.events_name, stack_table)

        def _generate_field_assign(self, idx):
                field_type = self.types[idx]
                expr = self.values[idx].strip()
                text = ""
                if self.probe_type == "u" and expr[0:3] == "arg":
                        arg_index = int(expr[3])
                        arg_ctype = self.usdt.get_probe_arg_ctype(
                                self.usdt_name, arg_index - 1)
                        text = ("        %s %s = 0;\n" +
                                "        bpf_usdt_readarg(%s, ctx, &%s);\n") \
                                % (arg_ctype, expr, expr[3], expr)

                if field_type == "s":
                        return text + """
        if (%s != 0) {
                bpf_probe_read(&__data.v%d, sizeof(__data.v%d), (void *)%s);
        }
                """ % (expr, idx, idx, expr)
                if field_type in Probe.fmt_types:
                        return text + "        __data.v%d = (%s)%s;\n" % \
                                        (idx, Probe.c_type[field_type], expr)
                self._bail("unrecognized field type %s" % field_type)

        def _generate_usdt_filter_read(self):
            text = ""
            if self.probe_type != "u":
                    return text
            for arg, _ in Probe.aliases.items():
                    if not (arg.startswith("arg") and
                            (arg in self.filter)):
                            continue
                    arg_index = int(arg.replace("arg", ""))
                    arg_ctype = self.usdt.get_probe_arg_ctype(
                            self.usdt_name, arg_index - 1)
                    if not arg_ctype:
                            self._bail("Unable to determine type of {} "
                                       "in the filter".format(arg))
                    text += """
        {} {}_filter;
        bpf_usdt_readarg({}, ctx, &{}_filter);
                    """.format(arg_ctype, arg, arg_index, arg)
                    self.filter = self.filter.replace(
                            arg, "{}_filter".format(arg))
            return text

        def generate_program(self, include_self):
                data_decl = self._generate_data_decl()
                if Probe.pid != -1:
                        pid_filter = """
        if (__pid != %d) { return 0; }
                """ % Probe.pid
                # uprobes can have a built-in tgid filter passed to
                # attach_uprobe, hence the check here -- for kprobes, we
                # need to do the tgid test by hand:
                elif len(self.library) == 0 and Probe.tgid != -1:
                        pid_filter = """
        if (__tgid != %d) { return 0; }
                """ % Probe.tgid
                elif not include_self:
                        pid_filter = """
        if (__tgid == %d) { return 0; }
                """ % os.getpid()
                else:
                        pid_filter = ""

                prefix = ""
                signature = "struct pt_regs *ctx"
                if self.signature:
                        signature += ", " + self.signature

                data_fields = ""
                for i, expr in enumerate(self.values):
                        data_fields += self._generate_field_assign(i)

                if self.probe_type == "t":
                        heading = "TRACEPOINT_PROBE(%s, %s)" % \
                                  (self.tp_category, self.tp_event)
                        ctx_name = "args"
                else:
                        heading = "int %s(%s)" % (self.probe_name, signature)
                        ctx_name = "ctx"

                stack_trace = ""
                if self.user_stack:
                        stack_trace += """
        __data.user_stack_id = %s.get_stackid(
          %s, BPF_F_REUSE_STACKID | BPF_F_USER_STACK
        );""" % (self.stacks_name, ctx_name)
                if self.kernel_stack:
                        stack_trace += """
        __data.kernel_stack_id = %s.get_stackid(
          %s, BPF_F_REUSE_STACKID
        );""" % (self.stacks_name, ctx_name)

                text = heading + """
{
        u64 __pid_tgid = bpf_get_current_pid_tgid();
        u32 __tgid = __pid_tgid >> 32;
        u32 __pid = __pid_tgid; // implicit cast to u32 for bottom half
        %s
        %s
        %s
        if (!(%s)) return 0;

        struct %s __data = {0};
        __data.timestamp_ns = bpf_ktime_get_ns();
        __data.tgid = __tgid;
        __data.pid = __pid;
        bpf_get_current_comm(&__data.comm, sizeof(__data.comm));
%s
%s
        %s.perf_submit(%s, &__data, sizeof(__data));
        return 0;
}
"""
                text = text % (pid_filter, prefix,
                               self._generate_usdt_filter_read(), self.filter,
                               self.struct_name, data_fields,
                               stack_trace, self.events_name, ctx_name)

                return self.streq_functions + data_decl + "\n" + text

        @classmethod
        def _time_off_str(cls, timestamp_ns):
                return "%.6f" % (1e-9 * (timestamp_ns - cls.first_ts))

        def _display_function(self):
                if self.probe_type == 'p' or self.probe_type == 'r':
                        return self.function
                elif self.probe_type == 'u':
                        return self.usdt_name
                else:   # self.probe_type == 't'
                        return self.tp_event

        def print_stack(self, bpf, stack_id, tgid):
            if stack_id < 0:
                    print("        %d" % stack_id)
                    return

            stack = list(bpf.get_table(self.stacks_name).walk(stack_id))
            for addr in stack:
                    print("        %s" % (bpf.sym(addr, tgid,
                                         show_module=True, show_offset=True)))

        def _format_message(self, bpf, tgid, values):
                # Replace each %K with kernel sym and %U with user sym in tgid
                kernel_placeholders = [i for i, t in enumerate(self.types)
                                       if t == 'K']
                user_placeholders = [i for i, t in enumerate(self.types)
                                     if t == 'U']
                for kp in kernel_placeholders:
                        values[kp] = bpf.ksym(values[kp], show_offset=True)
                for up in user_placeholders:
                        values[up] = bpf.sym(values[up], tgid,
                                           show_module=True, show_offset=True)
                return self.python_format % tuple(values)

        def print_event(self, bpf, cpu, data, size):
                # Cast as the generated structure type and display
                # according to the format string in the probe.
                event = ct.cast(data, ct.POINTER(self.python_struct)).contents
                values = map(lambda i: getattr(event, "v%d" % i),
                             range(0, len(self.values)))
                msg = self._format_message(bpf, event.tgid, values)
                if not Probe.print_time:
                    print("%-6d %-6d %-12s %-16s %s" %
                          (event.tgid, event.pid, event.comm.decode(),
                           self._display_function(), msg))
                else:
                    time = strftime("%H:%M:%S") if Probe.use_localtime else \
                           Probe._time_off_str(event.timestamp_ns)
                    print("%-8s %-6d %-6d %-12s %-16s %s" %
                          (time[:8], event.tgid, event.pid, event.comm.decode(),
                           self._display_function(), msg))

                if self.kernel_stack:
                        self.print_stack(bpf, event.kernel_stack_id, -1)
                if self.user_stack:
                        self.print_stack(bpf, event.user_stack_id, event.tgid)
                if self.user_stack or self.kernel_stack:
                        print("")

                Probe.event_count += 1
                if Probe.max_events is not None and \
                   Probe.event_count >= Probe.max_events:
                        exit()

        def attach(self, bpf, verbose):
                if len(self.library) == 0:
                        self._attach_k(bpf)
                else:
                        self._attach_u(bpf)
                self.python_struct = self._generate_python_data_decl()
                callback = partial(self.print_event, bpf)
                bpf[self.events_name].open_perf_buffer(callback,
                        page_cnt=self.page_cnt)

        def _attach_k(self, bpf):
                if self.probe_type == "r":
                        bpf.attach_kretprobe(event=self.function,
                                             fn_name=self.probe_name)
                elif self.probe_type == "p":
                        bpf.attach_kprobe(event=self.function,
                                          fn_name=self.probe_name)
                # Note that tracepoints don't need an explicit attach

        def _attach_u(self, bpf):
                libpath = BPF.find_library(self.library)
                if libpath is None:
                        # This might be an executable (e.g. 'bash')
                        libpath = BPF.find_exe(self.library)
                if libpath is None or len(libpath) == 0:
                        self._bail("unable to find library %s" % self.library)

                if self.probe_type == "u":
                        pass    # Was already enabled by the BPF constructor
                elif self.probe_type == "r":
                        bpf.attach_uretprobe(name=libpath,
                                             sym=self.function,
                                             fn_name=self.probe_name,
                                             pid=Probe.tgid)
                else:
                        bpf.attach_uprobe(name=libpath,
                                          sym=self.function,
                                          fn_name=self.probe_name,
                                          pid=Probe.tgid)
Beispiel #9
0
class Probe(object):
    probe_count = 0
    streq_index = 0
    max_events = None
    event_count = 0
    first_ts = 0
    first_ts_real = None
    print_time = False
    print_unix_timestamp = False
    use_localtime = True
    time_field = False
    print_cpu = False
    print_address = False
    tgid = -1
    pid = -1
    page_cnt = None
    build_id_enabled = False

    @classmethod
    def configure(cls, args):
        cls.max_events = args.max_events
        cls.print_time = args.timestamp or args.time
        cls.print_unix_timestamp = args.unix_timestamp
        cls.use_localtime = not args.timestamp
        cls.time_field = cls.print_time and (not cls.use_localtime)
        cls.print_cpu = args.print_cpu
        cls.print_address = args.address
        cls.first_ts = BPF.monotonic_time()
        cls.first_ts_real = time.time()
        cls.tgid = args.tgid or -1
        cls.pid = args.pid or -1
        cls.page_cnt = args.buffer_pages
        cls.bin_cmp = args.bin_cmp
        cls.build_id_enabled = args.sym_file_list is not None

    def __init__(self, probe, string_size, kernel_stack, user_stack,
                 cgroup_map_name, name, msg_filter):
        self.usdt = None
        self.streq_functions = ""
        self.raw_probe = probe
        self.string_size = string_size
        self.kernel_stack = kernel_stack
        self.user_stack = user_stack
        self.probe_user_list = set()
        Probe.probe_count += 1
        self._parse_probe()
        self.probe_num = Probe.probe_count
        self.probe_name = "probe_%s_%d" % \
                        (self._display_function(), self.probe_num)
        self.probe_name = re.sub(r'[^A-Za-z0-9_]', '_', self.probe_name)
        self.cgroup_map_name = cgroup_map_name
        self.name = name
        self.msg_filter = msg_filter
        # compiler can generate proper codes for function
        # signatures with "syscall__" prefix
        if self.is_syscall_kprobe:
            self.probe_name = "syscall__" + self.probe_name[6:]

    def __str__(self):
        return "%s:%s:%s FLT=%s ACT=%s/%s" % (
            self.probe_type, self.library, self._display_function(),
            self.filter, self.types, self.values)

    def is_default_action(self):
        return self.python_format == ""

    def _bail(self, error):
        raise ValueError("error in probe '%s': %s" % (self.raw_probe, error))

    def _parse_probe(self):
        text = self.raw_probe

        # There might be a function signature preceding the actual
        # filter/print part, or not. Find the probe specifier first --
        # it ends with either a space or an open paren ( for the
        # function signature part.
        #                                          opt. signature
        #                               probespec       |      rest
        #                               ---------  ----------   --
        (spec, sig, rest) = re.match(r'([^ \t\(]+)(\([^\(]*\))?(.*)',
                                     text).groups()

        self._parse_spec(spec)
        # Remove the parens
        self.signature = sig[1:-1] if sig else None
        if self.signature and self.probe_type in ['u', 't']:
            self._bail("USDT and tracepoint probes can't have " +
                       "a function signature; use arg1, arg2, " +
                       "... instead")

        text = rest.lstrip()
        # If we now have a (, wait for the balanced closing ) and that
        # will be the predicate
        self.filter = None
        if len(text) > 0 and text[0] == "(":
            balance = 1
            for i in range(1, len(text)):
                if text[i] == "(":
                    balance += 1
                if text[i] == ")":
                    balance -= 1
                if balance == 0:
                    self._parse_filter(text[:i + 1])
                    text = text[i + 1:]
                    break
            if self.filter is None:
                self._bail("unmatched end of predicate")

        if self.filter is None:
            self.filter = "1"

        # The remainder of the text is the printf action
        self._parse_action(text.lstrip())

    def _parse_spec(self, spec):
        parts = spec.split(":")
        # Two special cases: 'func' means 'p::func', 'lib:func' means
        # 'p:lib:func'. Other combinations need to provide an empty
        # value between delimiters, e.g. 'r::func' for a kretprobe on
        # the function func.
        if len(parts) == 1:
            parts = ["p", "", parts[0]]
        elif len(parts) == 2:
            parts = ["p", parts[0], parts[1]]
        if len(parts[0]) == 0:
            self.probe_type = "p"
        elif parts[0] in ["p", "r", "t", "u"]:
            self.probe_type = parts[0]
        else:
            self._bail("probe type must be '', 'p', 't', 'r', " +
                       "or 'u', but got '%s'" % parts[0])
        if self.probe_type == "t":
            self.tp_category = parts[1]
            self.tp_event = parts[2]
            self.library = ""  # kernel
            self.function = ""  # from TRACEPOINT_PROBE
        elif self.probe_type == "u":
            # u:<library>[:<provider>]:<probe> where :<provider> is optional
            self.library = parts[1]
            self.usdt_name = ":".join(parts[2:])
            self.function = ""  # no function, just address
            # We will discover the USDT provider by matching on
            # the USDT name in the specified library
            self._find_usdt_probe()
        else:
            self.library = ':'.join(parts[1:-1])
            self.function = parts[-1]

        # only x64 syscalls needs checking, no other syscall wrapper yet.
        self.is_syscall_kprobe = False
        if self.probe_type == "p" and len(self.library) == 0 and \
           self.function[:10] == "__x64_sys_":
            self.is_syscall_kprobe = True

    def _find_usdt_probe(self):
        target = Probe.pid if Probe.pid and Probe.pid != -1 \
                           else Probe.tgid
        self.usdt = USDT(path=self.library, pid=target)

        parts = self.usdt_name.split(":")
        if len(parts) == 1:
            provider_name = None
            usdt_name = parts[0].encode("ascii")
        else:
            provider_name = parts[0].encode("ascii")
            usdt_name = parts[1].encode("ascii")
        for probe in self.usdt.enumerate_probes():
            if ((not provider_name or probe.provider == provider_name)
                    and probe.name == usdt_name):
                return  # Found it, will enable later
        self._bail("unrecognized USDT probe %s" % self.usdt_name)

    def _parse_filter(self, filt):
        self.filter = self._rewrite_expr(filt)

    def _parse_types(self, fmt):
        for match in re.finditer(
                r'[^%]%(s|u|d|lu|llu|ld|lld|hu|hd|x|lx|llx|c|K|U)', fmt):
            self.types.append(match.group(1))
        fmt = re.sub(r'([^%]%)(u|d|lu|llu|ld|lld|hu|hd)', r'\1d', fmt)
        fmt = re.sub(r'([^%]%)(x|lx|llx)', r'\1x', fmt)
        fmt = re.sub('%K|%U', '%s', fmt)
        self.python_format = fmt.strip('"')

    def _parse_action(self, action):
        self.values = []
        self.types = []
        self.python_format = ""
        if len(action) == 0:
            return

        action = action.strip()
        match = re.search(r'(\".*?\"),?(.*)', action)
        if match is None:
            self._bail("expected format string in \"s")

        self.raw_format = match.group(1)
        self._parse_types(self.raw_format)
        for part in re.split('(?<!"),', match.group(2)):
            part = self._rewrite_expr(part)
            if len(part) > 0:
                self.values.append(part)

    aliases_arg = {
        "arg1": "PT_REGS_PARM1(ctx)",
        "arg2": "PT_REGS_PARM2(ctx)",
        "arg3": "PT_REGS_PARM3(ctx)",
        "arg4": "PT_REGS_PARM4(ctx)",
        "arg5": "PT_REGS_PARM5(ctx)",
        "arg6": "PT_REGS_PARM6(ctx)",
    }

    aliases_indarg = {
        "arg1":
        "({u64 _val; struct pt_regs *_ctx = (struct pt_regs *)PT_REGS_PARM1(ctx);"
        "  bpf_probe_read(&_val, sizeof(_val), &(PT_REGS_PARM1(_ctx))); _val;})",
        "arg2":
        "({u64 _val; struct pt_regs *_ctx = (struct pt_regs *)PT_REGS_PARM1(ctx);"
        "  bpf_probe_read(&_val, sizeof(_val), &(PT_REGS_PARM2(_ctx))); _val;})",
        "arg3":
        "({u64 _val; struct pt_regs *_ctx = (struct pt_regs *)PT_REGS_PARM1(ctx);"
        "  bpf_probe_read(&_val, sizeof(_val), &(PT_REGS_PARM3(_ctx))); _val;})",
        "arg4":
        "({u64 _val; struct pt_regs *_ctx = (struct pt_regs *)PT_REGS_PARM1(ctx);"
        "  bpf_probe_read(&_val, sizeof(_val), &(PT_REGS_PARM4(_ctx))); _val;})",
        "arg5":
        "({u64 _val; struct pt_regs *_ctx = (struct pt_regs *)PT_REGS_PARM1(ctx);"
        "  bpf_probe_read(&_val, sizeof(_val), &(PT_REGS_PARM5(_ctx))); _val;})",
        "arg6":
        "({u64 _val; struct pt_regs *_ctx = (struct pt_regs *)PT_REGS_PARM1(ctx);"
        "  bpf_probe_read(&_val, sizeof(_val), &(PT_REGS_PARM6(_ctx))); _val;})",
    }

    aliases_common = {
        "retval": "PT_REGS_RC(ctx)",
        "$uid": "(unsigned)(bpf_get_current_uid_gid() & 0xffffffff)",
        "$gid": "(unsigned)(bpf_get_current_uid_gid() >> 32)",
        "$pid": "(unsigned)(bpf_get_current_pid_tgid() & 0xffffffff)",
        "$tgid": "(unsigned)(bpf_get_current_pid_tgid() >> 32)",
        "$cpu": "bpf_get_smp_processor_id()",
        "$task": "((struct task_struct *)bpf_get_current_task())"
    }

    def _rewrite_expr(self, expr):
        # Find the occurances of any arg[1-6]@user. Use it later to
        # identify bpf_probe_read_user
        for matches in re.finditer(r'(arg[1-6])(@user)', expr):
            if matches.group(1).strip() not in self.probe_user_list:
                self.probe_user_list.add(matches.group(1).strip())
        # Remove @user occurrences from arg before resolving to its
        # corresponding aliases.
        expr = re.sub(r'(arg[1-6])@user', r'\1', expr)
        rdict = StrcmpRewrite.rewrite_expr(expr, self.bin_cmp, self.library,
                                           self.probe_user_list,
                                           self.streq_functions,
                                           Probe.streq_index)
        expr = rdict["expr"]
        self.streq_functions = rdict["streq_functions"]
        Probe.streq_index = rdict["probeid"]
        alias_to_check = Probe.aliases_indarg \
                            if self.is_syscall_kprobe \
                            else Probe.aliases_arg
        # For USDT probes, we replace argN values with the
        # actual arguments for that probe obtained using
        # bpf_readarg_N macros emitted at BPF construction.
        if not self.probe_type == "u":
            for alias, replacement in alias_to_check.items():
                expr = expr.replace(alias, replacement)
        for alias, replacement in Probe.aliases_common.items():
            expr = expr.replace(alias, replacement)
        return expr

    p_type = {
        "u": ct.c_uint,
        "d": ct.c_int,
        "lu": ct.c_ulong,
        "ld": ct.c_long,
        "llu": ct.c_ulonglong,
        "lld": ct.c_longlong,
        "hu": ct.c_ushort,
        "hd": ct.c_short,
        "x": ct.c_uint,
        "lx": ct.c_ulong,
        "llx": ct.c_ulonglong,
        "c": ct.c_ubyte,
        "K": ct.c_ulonglong,
        "U": ct.c_ulonglong
    }

    def _generate_python_field_decl(self, idx, fields):
        field_type = self.types[idx]
        if field_type == "s":
            ptype = ct.c_char * self.string_size
        else:
            ptype = Probe.p_type[field_type]
        fields.append(("v%d" % idx, ptype))

    def _generate_python_data_decl(self):
        self.python_struct_name = "%s_%d_Data" % \
                        (self._display_function(), self.probe_num)
        fields = []
        if self.time_field:
            fields.append(("timestamp_ns", ct.c_ulonglong))
        if self.print_cpu:
            fields.append(("cpu", ct.c_int))
        fields.extend([
            ("tgid", ct.c_uint),
            ("pid", ct.c_uint),
            ("comm", ct.c_char * 16)  # TASK_COMM_LEN
        ])
        for i in range(0, len(self.types)):
            self._generate_python_field_decl(i, fields)
        if self.kernel_stack:
            fields.append(("kernel_stack_id", ct.c_int))
        if self.user_stack:
            fields.append(("user_stack_id", ct.c_int))
        return type(self.python_struct_name, (ct.Structure, ),
                    dict(_fields_=fields))

    c_type = {
        "u": "unsigned int",
        "d": "int",
        "lu": "unsigned long",
        "ld": "long",
        "llu": "unsigned long long",
        "lld": "long long",
        "hu": "unsigned short",
        "hd": "short",
        "x": "unsigned int",
        "lx": "unsigned long",
        "llx": "unsigned long long",
        "c": "char",
        "K": "unsigned long long",
        "U": "unsigned long long"
    }
    fmt_types = c_type.keys()

    def _generate_field_decl(self, idx):
        field_type = self.types[idx]
        if field_type == "s":
            return "char v%d[%d];\n" % (idx, self.string_size)
        if field_type in Probe.fmt_types:
            return "%s v%d;\n" % (Probe.c_type[field_type], idx)
        self._bail("unrecognized format specifier %s" % field_type)

    def _generate_data_decl(self):
        # The BPF program will populate values into the struct
        # according to the format string, and the Python program will
        # construct the final display string.
        self.events_name = "%s_events" % self.probe_name
        self.struct_name = "%s_data_t" % self.probe_name
        self.stacks_name = "%s_stacks" % self.probe_name
        stack_type = "BPF_STACK_TRACE" if self.build_id_enabled is False \
                     else "BPF_STACK_TRACE_BUILDID"
        stack_table = "%s(%s, 1024);" % (stack_type,self.stacks_name) \
                      if (self.kernel_stack or self.user_stack) else ""
        data_fields = ""
        for i, field_type in enumerate(self.types):
            data_fields += "        " + \
                           self._generate_field_decl(i)
        time_str = "u64 timestamp_ns;" if self.time_field else ""
        cpu_str = "int cpu;" if self.print_cpu else ""
        kernel_stack_str = "       int kernel_stack_id;" \
                           if self.kernel_stack else ""
        user_stack_str = "       int user_stack_id;" \
                         if self.user_stack else ""

        text = """
struct %s
{
%s
%s
        u32 tgid;
        u32 pid;
        char comm[TASK_COMM_LEN];
%s
%s
%s
};

BPF_PERF_OUTPUT(%s);
%s
"""
        return text % (self.struct_name, time_str, cpu_str, data_fields,
                       kernel_stack_str, user_stack_str, self.events_name,
                       stack_table)

    def _generate_field_assign(self, idx):
        field_type = self.types[idx]
        expr = self.values[idx].strip()
        text = ""
        if self.probe_type == "u" and expr[0:3] == "arg":
            arg_index = int(expr[3])
            arg_ctype = self.usdt.get_probe_arg_ctype(self.usdt_name,
                                                      arg_index - 1)
            text = ("        %s %s = 0;\n" +
                    "        bpf_usdt_readarg(%s, ctx, &%s);\n") \
                    % (arg_ctype, expr, expr[3], expr)
        probe_read_func = "bpf_probe_read"
        if field_type == "s":
            if self.library:
                probe_read_func = "bpf_probe_read_user"
            else:
                alias_to_check = Probe.aliases_indarg \
                                    if self.is_syscall_kprobe \
                                    else Probe.aliases_arg
                for arg, alias in alias_to_check.items():
                    if alias == expr and arg in self.probe_user_list:
                        probe_read_func = "bpf_probe_read_user"
                        break
            return text + """
        if (%s != 0) {
                void *__tmp = (void *)%s;
                %s(&__data.v%d, sizeof(__data.v%d), __tmp);
        }
                """ % (expr, expr, probe_read_func, idx, idx)
        if field_type in Probe.fmt_types:
            return text + "        __data.v%d = (%s)%s;\n" % \
                            (idx, Probe.c_type[field_type], expr)
        self._bail("unrecognized field type %s" % field_type)

    def _generate_usdt_filter_read(self):
        text = ""
        if self.probe_type != "u":
            return text
        for arg, _ in Probe.aliases_arg.items():
            if not (arg in self.filter):
                continue
            arg_index = int(arg.replace("arg", ""))
            arg_ctype = self.usdt.get_probe_arg_ctype(self.usdt_name,
                                                      arg_index - 1)
            if not arg_ctype:
                self._bail("Unable to determine type of {} "
                           "in the filter".format(arg))
            text += """
        {} {}_filter;
        bpf_usdt_readarg({}, ctx, &{}_filter);
                    """.format(arg_ctype, arg, arg_index, arg)
            self.filter = self.filter.replace(arg, "{}_filter".format(arg))
        return text

    def generate_program(self, include_self):
        data_decl = self._generate_data_decl()
        if Probe.pid != -1:
            pid_filter = """
        if (__pid != %d) { return 0; }
                """ % Probe.pid
        # uprobes can have a built-in tgid filter passed to
        # attach_uprobe, hence the check here -- for kprobes, we
        # need to do the tgid test by hand:
        elif len(self.library) == 0 and Probe.tgid != -1:
            pid_filter = """
        if (__tgid != %d) { return 0; }
                """ % Probe.tgid
        elif not include_self:
            pid_filter = """
        if (__tgid == %d) { return 0; }
                """ % os.getpid()
        else:
            pid_filter = ""

        if self.cgroup_map_name is not None:
            cgroup_filter = """
        if (%s.check_current_task(0) <= 0) { return 0; }
                """ % self.cgroup_map_name
        else:
            cgroup_filter = ""

        prefix = ""
        signature = "struct pt_regs *ctx"
        if self.signature:
            signature += ", " + self.signature

        data_fields = ""
        for i, expr in enumerate(self.values):
            data_fields += self._generate_field_assign(i)

        if self.probe_type == "t":
            heading = "TRACEPOINT_PROBE(%s, %s)" % \
                      (self.tp_category, self.tp_event)
            ctx_name = "args"
        else:
            heading = "int %s(%s)" % (self.probe_name, signature)
            ctx_name = "ctx"

        time_str = """
        __data.timestamp_ns = bpf_ktime_get_ns();""" if self.time_field else ""
        cpu_str = """
        __data.cpu = bpf_get_smp_processor_id();""" if self.print_cpu else ""
        stack_trace = ""
        if self.user_stack:
            stack_trace += """
        __data.user_stack_id = %s.get_stackid(
          %s, BPF_F_USER_STACK
        );""" % (self.stacks_name, ctx_name)
        if self.kernel_stack:
            stack_trace += """
        __data.kernel_stack_id = %s.get_stackid(
          %s, 0
        );""" % (self.stacks_name, ctx_name)

        text = heading + """
{
        u64 __pid_tgid = bpf_get_current_pid_tgid();
        u32 __tgid = __pid_tgid >> 32;
        u32 __pid = __pid_tgid; // implicit cast to u32 for bottom half
        %s
        %s
        %s
        %s
        if (!(%s)) return 0;

        struct %s __data = {0};
        %s
        %s
        __data.tgid = __tgid;
        __data.pid = __pid;
        bpf_get_current_comm(&__data.comm, sizeof(__data.comm));
%s
%s
        %s.perf_submit(%s, &__data, sizeof(__data));
        return 0;
}
"""
        text = text % (pid_filter, cgroup_filter, prefix,
                       self._generate_usdt_filter_read(), self.filter,
                       self.struct_name, time_str, cpu_str, data_fields,
                       stack_trace, self.events_name, ctx_name)

        return self.streq_functions + data_decl + "\n" + text

    @classmethod
    def _time_off_str(cls, timestamp_ns):
        offset = 1e-9 * (timestamp_ns - cls.first_ts)
        if cls.print_unix_timestamp:
            return "%.6f" % (offset + cls.first_ts_real)
        else:
            return "%.6f" % offset

    def _display_function(self):
        if self.probe_type == 'p' or self.probe_type == 'r':
            return self.function
        elif self.probe_type == 'u':
            return self.usdt_name
        else:  # self.probe_type == 't'
            return self.tp_event

    def print_stack(self, bpf, stack_id, tgid):
        if stack_id < 0:
            print("        %d" % stack_id)
            return

        stack = list(bpf.get_table(self.stacks_name).walk(stack_id))
        for addr in stack:
            print("        ", end="")
            if Probe.print_address:
                print("%16x " % addr, end="")
            print("%s" %
                  (bpf.sym(addr, tgid, show_module=True, show_offset=True)))

    def _format_message(self, bpf, tgid, values):
        # Replace each %K with kernel sym and %U with user sym in tgid
        kernel_placeholders = [i for i, t in enumerate(self.types) if t == 'K']
        user_placeholders = [i for i, t in enumerate(self.types) if t == 'U']
        for kp in kernel_placeholders:
            values[kp] = bpf.ksym(values[kp], show_offset=True)
        for up in user_placeholders:
            values[up] = bpf.sym(values[up],
                                 tgid,
                                 show_module=True,
                                 show_offset=True)
        return self.python_format % tuple(values)

    def print_event(self, bpf, cpu, data, size):
        # Cast as the generated structure type and display
        # according to the format string in the probe.
        event = ct.cast(data, ct.POINTER(self.python_struct)).contents
        if self.name and bytes(self.name) not in event.comm:
            return
        values = map(lambda i: getattr(event, "v%d" % i),
                     range(0, len(self.values)))
        msg = self._format_message(bpf, event.tgid, values)
        if self.msg_filter and bytes(self.msg_filter) not in msg:
            return
        if Probe.print_time:
            time = strftime("%H:%M:%S") if Probe.use_localtime else \
                   Probe._time_off_str(event.timestamp_ns)
            if Probe.print_unix_timestamp:
                print("%-17s " % time[:17], end="")
            else:
                print("%-8s " % time[:8], end="")
        if Probe.print_cpu:
            print("%-3s " % event.cpu, end="")
        print("%-7d %-7d %-15s %-16s %s" %
              (event.tgid, event.pid, event.comm.decode(
                  'utf-8', 'replace'), self._display_function(), msg))

        if self.kernel_stack:
            self.print_stack(bpf, event.kernel_stack_id, -1)
        if self.user_stack:
            self.print_stack(bpf, event.user_stack_id, event.tgid)
        if self.user_stack or self.kernel_stack:
            print("")

        Probe.event_count += 1
        if Probe.max_events is not None and \
           Probe.event_count >= Probe.max_events:
            exit()
        sys.stdout.flush()

    def attach(self, bpf, verbose):
        if len(self.library) == 0:
            self._attach_k(bpf)
        else:
            self._attach_u(bpf)
        self.python_struct = self._generate_python_data_decl()
        callback = partial(self.print_event, bpf)
        bpf[self.events_name].open_perf_buffer(callback,
                                               page_cnt=self.page_cnt)

    def _attach_k(self, bpf):
        if self.probe_type == "r":
            bpf.attach_kretprobe(event=self.function, fn_name=self.probe_name)
        elif self.probe_type == "p":
            bpf.attach_kprobe(event=self.function, fn_name=self.probe_name)
        # Note that tracepoints don't need an explicit attach

    def _attach_u(self, bpf):
        libpath = BPF.find_library(self.library)
        if libpath is None:
            # This might be an executable (e.g. 'bash')
            libpath = BPF.find_exe(self.library)
        if libpath is None or len(libpath) == 0:
            self._bail("unable to find library %s" % self.library)

        if self.probe_type == "u":
            pass  # Was already enabled by the BPF constructor
        elif self.probe_type == "r":
            bpf.attach_uretprobe(name=libpath,
                                 sym=self.function,
                                 fn_name=self.probe_name,
                                 pid=Probe.tgid)
        else:
            bpf.attach_uprobe(name=libpath,
                              sym=self.function,
                              fn_name=self.probe_name,
                              pid=Probe.tgid)
Beispiel #10
0
class Probe(object):
    def __init__(self,
                 pattern,
                 kernel_stack,
                 user_stack,
                 use_regex=False,
                 pid=None,
                 per_pid=False,
                 cpu=None):
        """Init a new probe.

        Init the probe from the pattern provided by the user. The supported
        patterns mimic the 'trace' and 'argdist' tools, but are simpler because
        we don't have to distinguish between probes and retprobes.

            func            -- probe a kernel function
            lib:func        -- probe a user-space function in the library 'lib'
            p::func         -- same thing as 'func'
            p:lib:func      -- same thing as 'lib:func'
            t:cat:event     -- probe a kernel tracepoint
            u:lib:probe     -- probe a USDT tracepoint
        """
        self.kernel_stack = kernel_stack
        self.user_stack = user_stack
        parts = pattern.split(':')
        if len(parts) == 1:
            parts = ["p", "", parts[0]]
        elif len(parts) == 2:
            parts = ["p", parts[0], parts[1]]
        elif len(parts) == 3:
            if parts[0] == "t":
                parts = ["t", "", "%s:%s" % tuple(parts[1:])]
            if parts[0] not in ["p", "t", "u"]:
                raise Exception("Type must be 'p', 't', or 'u', but got %s" %
                                parts[0])
        else:
            raise Exception("Too many ':'-separated components in pattern %s" %
                            pattern)

        (self.type, self.library, self.pattern) = parts
        if not use_regex:
            self.pattern = self.pattern.replace('*', '.*')
            self.pattern = '^' + self.pattern + '$'

        if (self.type == "p" and self.library) or self.type == "u":
            libpath = BPF.find_library(self.library)
            if libpath is None:
                # This might be an executable (e.g. 'bash')
                libpath = BPF.find_exe(self.library)
            if libpath is None or len(libpath) == 0:
                raise Exception("unable to find library %s" % self.library)
            self.library = libpath

        self.pid = pid
        self.per_pid = per_pid
        self.cpu = cpu
        self.matched = 0

    def is_kernel_probe(self):
        return self.type == "t" or (self.type == "p" and self.library == "")

    def attach(self):
        if self.type == "p":
            if self.library:
                self.bpf.attach_uprobe(name=self.library,
                                       sym_re=self.pattern,
                                       fn_name="trace_count",
                                       pid=self.pid or -1)
                self.matched = self.bpf.num_open_uprobes()
            else:
                self.bpf.attach_kprobe(event_re=self.pattern,
                                       fn_name="trace_count")
                self.matched = self.bpf.num_open_kprobes()
        elif self.type == "t":
            self.bpf.attach_tracepoint(tp_re=self.pattern,
                                       fn_name="trace_count")
            self.matched = self.bpf.num_open_tracepoints()
        elif self.type == "u":
            pass  # Nothing to do -- attach already happened in `load`

        if self.matched == 0:
            raise Exception("No functions matched by pattern %s" %
                            self.pattern)

    def load(self):
        ctx_name = "ctx"
        stack_trace = ""
        if self.user_stack:
            stack_trace += """
                    key.user_stack_id = stack_traces.get_stackid(
                      %s, BPF_F_USER_STACK
                    );""" % (ctx_name)
        else:
            stack_trace += "key.user_stack_id = -1;"
        if self.kernel_stack:
            stack_trace += """
                    key.kernel_stack_id = stack_traces.get_stackid(
                      %s, 0
                    );""" % (ctx_name)
        else:
            stack_trace += "key.kernel_stack_id = -1;"

        trace_count_text = """
int trace_count(void *ctx) {
    FILTER
    struct key_t key = {};
    key.tgid = GET_TGID;
    STORE_COMM
    %s
    counts.increment(key);
    return 0;
}
        """
        trace_count_text = trace_count_text % (stack_trace)

        bpf_text = """#include <uapi/linux/ptrace.h>
#include <linux/sched.h>

struct key_t {
    // no pid (thread ID) so that we do not needlessly split this key
    u32 tgid;
    int kernel_stack_id;
    int user_stack_id;
    char name[TASK_COMM_LEN];
};

BPF_HASH(counts, struct key_t);
BPF_STACK_TRACE(stack_traces, 1024);
        """

        filter_text = []
        # We really mean the tgid from the kernel's perspective, which is in
        # the top 32 bits of bpf_get_current_pid_tgid().
        if self.is_kernel_probe() and self.pid:
            filter_text.append(
                'u32 pid; pid = bpf_get_current_pid_tgid() >> 32; ' +
                'if (pid != %d) { return 0; }' % self.pid)

        if self.is_kernel_probe() and self.cpu:
            filter_text.append(
                'struct task_struct *task; task = (struct task_struct*)bpf_get_current_task(); '
                + 'if (task->cpu != %d) { return 0; }' % self.cpu)

        trace_count_text = trace_count_text.replace('FILTER',
                                                    '\n    '.join(filter_text))

        # Do per-pid statistics iff -P is provided
        if self.per_pid:
            trace_count_text = trace_count_text.replace(
                'GET_TGID', 'bpf_get_current_pid_tgid() >> 32')
            trace_count_text = trace_count_text.replace(
                'STORE_COMM',
                'bpf_get_current_comm(&key.name, sizeof(key.name));')
        else:
            # skip splitting on PID so these aggregate
            # together, and don't store the process name.
            trace_count_text = trace_count_text.replace(
                'GET_TGID', '0xffffffff')
            trace_count_text = trace_count_text.replace('STORE_COMM', '')

        self.usdt = None
        if self.type == "u":
            self.usdt = USDT(path=self.library, pid=self.pid)
            for probe in self.usdt.enumerate_probes():
                if not self.pid and (probe.bin_path != self.library):
                    continue
                if re.match(self.pattern, probe.name):
                    # This hack is required because the bpf_usdt_readarg
                    # functions generated need different function names for
                    # each attached probe. If we just stick to trace_count,
                    # we'd get multiple bpf_usdt_readarg helpers with the same
                    # name when enabling more than one USDT probe.
                    new_func = "trace_count_%d" % self.matched
                    bpf_text += trace_count_text.replace(
                        "trace_count", new_func)
                    self.usdt.enable_probe(probe.name, new_func)
                    self.matched += 1
            if debug:
                print(self.usdt.get_text())
        else:
            bpf_text += trace_count_text

        if debug:
            print(bpf_text)
        self.bpf = BPF(text=bpf_text,
                       usdt_contexts=[self.usdt] if self.usdt else [])
Beispiel #11
0
class Probe(object):
    def __init__(self, pattern, use_regex=False, pid=None):
        """Init a new probe.

        Init the probe from the pattern provided by the user. The supported
        patterns mimic the 'trace' and 'argdist' tools, but are simpler because
        we don't have to distinguish between probes and retprobes.

            func            -- probe a kernel function
            lib:func        -- probe a user-space function in the library 'lib'
            /path:func      -- probe a user-space function in binary '/path'
            p::func         -- same thing as 'func'
            p:lib:func      -- same thing as 'lib:func'
            t:cat:event     -- probe a kernel tracepoint
            u:lib:probe     -- probe a USDT tracepoint
        """
        parts = pattern.split(':')
        if len(parts) == 1:
            parts = ["p", "", parts[0]]
        elif len(parts) == 2:
            parts = ["p", parts[0], parts[1]]
        elif len(parts) == 3:
            if parts[0] == "t":
                parts = ["t", "", "%s:%s" % tuple(parts[1:])]
            if parts[0] not in ["p", "t", "u"]:
                raise Exception("Type must be 'p', 't', or 'u', but got %s" %
                                parts[0])
        else:
            raise Exception("Too many ':'-separated components in pattern %s" %
                            pattern)

        (self.type, self.library, self.pattern) = parts
        if not use_regex:
            self.pattern = self.pattern.replace('*', '.*')
            self.pattern = '^' + self.pattern + '$'

        if (self.type == "p" and self.library) or self.type == "u":
            libpath = BPF.find_library(self.library)
            if libpath is None:
                # This might be an executable (e.g. 'bash')
                libpath = BPF.find_exe(self.library)
            if libpath is None or len(libpath) == 0:
                raise Exception("unable to find library %s" % self.library)
            self.library = libpath

        self.pid = pid
        self.matched = 0
        self.trace_functions = {}   # map location number to function name

    def is_kernel_probe(self):
        return self.type == "t" or (self.type == "p" and self.library == "")

    def attach(self):
        if self.type == "p" and not self.library:
            for index, function in self.trace_functions.items():
                self.bpf.attach_kprobe(
                        event=function,
                        fn_name="trace_count_%d" % index,
                        pid=self.pid or -1)
        elif self.type == "p" and self.library:
            for index, function in self.trace_functions.items():
                self.bpf.attach_uprobe(
                        name=self.library,
                        sym=function,
                        fn_name="trace_count_%d" % index,
                        pid=self.pid or -1)
        elif self.type == "t":
            for index, function in self.trace_functions.items():
                self.bpf.attach_tracepoint(
                        tp=function,
                        fn_name="trace_count_%d" % index,
                        pid=self.pid or -1)
        elif self.type == "u":
            pass    # Nothing to do -- attach already happened in `load`

    def _add_function(self, template, probe_name):
        new_func = "trace_count_%d" % self.matched
        text = template.replace("PROBE_FUNCTION", new_func)
        text = text.replace("LOCATION", str(self.matched))
        self.trace_functions[self.matched] = probe_name
        self.matched += 1
        return text

    def _generate_functions(self, template):
        self.usdt = None
        text = ""
        if self.type == "p" and not self.library:
            functions = BPF.get_kprobe_functions(self.pattern)
            verify_limit(len(functions))
            for function in functions:
                text += self._add_function(template, function)
        elif self.type == "p" and self.library:
            # uprobes are tricky because the same function may have multiple
            # addresses, and the same address may be mapped to multiple
            # functions. We aren't allowed to create more than one uprobe
            # per address, so track unique addresses and ignore functions that
            # map to an address that we've already seen. Also ignore functions
            # that may repeat multiple times with different addresses.
            addresses, functions = (set(), set())
            functions_and_addresses = BPF.get_user_functions_and_addresses(
                                        self.library, self.pattern)
            verify_limit(len(functions_and_addresses))
            for function, address in functions_and_addresses:
                if address in addresses or function in functions:
                    continue
                addresses.add(address)
                functions.add(function)
                text += self._add_function(template, function)
        elif self.type == "t":
            tracepoints = BPF.get_tracepoints(self.pattern)
            verify_limit(len(tracepoints))
            for tracepoint in tracepoints:
                text += self._add_function(template, tracepoint)
        elif self.type == "u":
            self.usdt = USDT(path=self.library, pid=self.pid)
            matches = []
            for probe in self.usdt.enumerate_probes():
                if not self.pid and (probe.bin_path != self.library):
                    continue
                if re.match(self.pattern, probe.name):
                    matches.append(probe.name)
            verify_limit(len(matches))
            for match in matches:
                new_func = "trace_count_%d" % self.matched
                text += self._add_function(template, match)
                self.usdt.enable_probe(match, new_func)
            if debug:
                print(self.usdt.get_text())
        return text

    def load(self):
        trace_count_text = """
int PROBE_FUNCTION(void *ctx) {
    FILTER
    int loc = LOCATION;
    u64 *val = counts.lookup(&loc);
    if (!val) {
        return 0;   // Should never happen, # of locations is known
    }
    (*val)++;
    return 0;
}
        """
        bpf_text = """#include <uapi/linux/ptrace.h>

BPF_ARRAY(counts, u64, NUMLOCATIONS);
        """

        # We really mean the tgid from the kernel's perspective, which is in
        # the top 32 bits of bpf_get_current_pid_tgid().
        if self.pid:
            trace_count_text = trace_count_text.replace('FILTER',
                """u32 pid = bpf_get_current_pid_tgid() >> 32;
                   if (pid != %d) { return 0; }""" % self.pid)
        else:
            trace_count_text = trace_count_text.replace('FILTER', '')

        bpf_text += self._generate_functions(trace_count_text)
        bpf_text = bpf_text.replace("NUMLOCATIONS",
                                    str(len(self.trace_functions)))
        if debug:
            print(bpf_text)

        if self.matched == 0:
            raise Exception("No functions matched by pattern %s" %
                            self.pattern)

        self.bpf = BPF(text=bpf_text,
                       usdt_contexts=[self.usdt] if self.usdt else [])
        self.clear()    # Initialize all array items to zero

    def counts(self):
        return self.bpf["counts"]

    def clear(self):
        counts = self.bpf["counts"]
        for location, _ in list(self.trace_functions.items()):
            counts[counts.Key(location)] = counts.Leaf()
Beispiel #12
0
class Probe(object):
        probe_count = 0
        max_events = None
        event_count = 0
        first_ts = 0
        use_localtime = True
        pid = -1

        @classmethod
        def configure(cls, args):
                cls.max_events = args.max_events
                cls.use_localtime = not args.offset
                cls.first_ts = Time.monotonic_time()
                cls.pid = args.pid or -1

        def __init__(self, probe, string_size, kernel_stack, user_stack):
                self.usdt = None
                self.raw_probe = probe
                self.string_size = string_size
                self.kernel_stack = kernel_stack
                self.user_stack = user_stack
                Probe.probe_count += 1
                self._parse_probe()
                self.probe_num = Probe.probe_count
                self.probe_name = "probe_%s_%d" % \
                                (self._display_function(), self.probe_num)

        def __str__(self):
                return "%s:%s:%s FLT=%s ACT=%s/%s" % (self.probe_type,
                        self.library, self._display_function(), self.filter,
                        self.types, self.values)

        def is_default_action(self):
                return self.python_format == ""

        def _bail(self, error):
                raise ValueError("error in probe '%s': %s" %
                                 (self.raw_probe, error))

        def _parse_probe(self):
                text = self.raw_probe

                # Everything until the first space is the probe specifier
                first_space = text.find(' ')
                spec = text[:first_space] if first_space >= 0 else text
                self._parse_spec(spec)
                if first_space >= 0:
                        text = text[first_space:].lstrip()
                else:
                        text = ""

                # If we now have a (, wait for the balanced closing ) and that
                # will be the predicate
                self.filter = None
                if len(text) > 0 and text[0] == "(":
                        balance = 1
                        for i in range(1, len(text)):
                                if text[i] == "(":
                                        balance += 1
                                if text[i] == ")":
                                        balance -= 1
                                if balance == 0:
                                        self._parse_filter(text[:i+1])
                                        text = text[i+1:]
                                        break
                        if self.filter is None:
                                self._bail("unmatched end of predicate")

                if self.filter is None:
                        self.filter = "1"

                # The remainder of the text is the printf action
                self._parse_action(text.lstrip())

        def _parse_spec(self, spec):
                parts = spec.split(":")
                # Two special cases: 'func' means 'p::func', 'lib:func' means
                # 'p:lib:func'. Other combinations need to provide an empty
                # value between delimiters, e.g. 'r::func' for a kretprobe on
                # the function func.
                if len(parts) == 1:
                        parts = ["p", "", parts[0]]
                elif len(parts) == 2:
                        parts = ["p", parts[0], parts[1]]
                if len(parts[0]) == 0:
                        self.probe_type = "p"
                elif parts[0] in ["p", "r", "t", "u"]:
                        self.probe_type = parts[0]
                else:
                        self._bail("probe type must be '', 'p', 't', 'r', " +
                                   "or 'u', but got '%s'" % parts[0])
                if self.probe_type == "t":
                        self.tp_category = parts[1]
                        self.tp_event = parts[2]
                        self.library = ""       # kernel
                        self.function = ""      # generated from TRACEPOINT_PROBE
                elif self.probe_type == "u":
                        self.library = parts[1]
                        self.usdt_name = parts[2]
                        self.function = ""      # no function, just address
                        # We will discover the USDT provider by matching on
                        # the USDT name in the specified library
                        self._find_usdt_probe()
                else:
                        self.library = parts[1]
                        self.function = parts[2]

        def _find_usdt_probe(self):
                self.usdt = USDT(path=self.library, pid=Probe.pid)
                for probe in self.usdt.enumerate_probes():
                        if probe.name == self.usdt_name:
                                return # Found it, will enable later
                self._bail("unrecognized USDT probe %s" % self.usdt_name)

        def _parse_filter(self, filt):
                self.filter = self._replace_args(filt)

        def _parse_types(self, fmt):
                for match in re.finditer(
                                r'[^%]%(s|u|d|llu|lld|hu|hd|x|llx|c)', fmt):
                        self.types.append(match.group(1))
                fmt = re.sub(r'([^%]%)(u|d|llu|lld|hu|hd)', r'\1d', fmt)
                fmt = re.sub(r'([^%]%)(x|llx)', r'\1x', fmt)
                self.python_format = fmt.strip('"')

        def _parse_action(self, action):
                self.values = []
                self.types = []
                self.python_format = ""
                if len(action) == 0:
                        return

                action = action.strip()
                match = re.search(r'(\".*\"),?(.*)', action)
                if match is None:
                        self._bail("expected format string in \"s")

                self.raw_format = match.group(1)
                self._parse_types(self.raw_format)
                for part in match.group(2).split(','):
                        part = self._replace_args(part)
                        if len(part) > 0:
                                self.values.append(part)

        aliases = {
                "retval": "PT_REGS_RC(ctx)",
                "arg1": "PT_REGS_PARM1(ctx)",
                "arg2": "PT_REGS_PARM2(ctx)",
                "arg3": "PT_REGS_PARM3(ctx)",
                "arg4": "PT_REGS_PARM4(ctx)",
                "arg5": "PT_REGS_PARM5(ctx)",
                "arg6": "PT_REGS_PARM6(ctx)",
                "$uid": "(unsigned)(bpf_get_current_uid_gid() & 0xffffffff)",
                "$gid": "(unsigned)(bpf_get_current_uid_gid() >> 32)",
                "$pid": "(unsigned)(bpf_get_current_pid_tgid() & 0xffffffff)",
                "$tgid": "(unsigned)(bpf_get_current_pid_tgid() >> 32)",
                "$cpu": "bpf_get_smp_processor_id()"
        }

        def _replace_args(self, expr):
                for alias, replacement in Probe.aliases.items():
                        # For USDT probes, we replace argN values with the
                        # actual arguments for that probe obtained using special
                        # bpf_readarg_N macros emitted at BPF construction.
                        if alias.startswith("arg") and self.probe_type == "u":
                                continue
                        expr = expr.replace(alias, replacement)
                return expr

        p_type = { "u": ct.c_uint, "d": ct.c_int,
                   "llu": ct.c_ulonglong, "lld": ct.c_longlong,
                   "hu": ct.c_ushort, "hd": ct.c_short,
                   "x": ct.c_uint, "llx": ct.c_ulonglong,
                   "c": ct.c_ubyte }

        def _generate_python_field_decl(self, idx, fields):
                field_type = self.types[idx]
                if field_type == "s":
                        ptype = ct.c_char * self.string_size
                else:
                        ptype = Probe.p_type[field_type]
                fields.append(("v%d" % idx, ptype))

        def _generate_python_data_decl(self):
                self.python_struct_name = "%s_%d_Data" % \
                                (self._display_function(), self.probe_num)
                fields = [
                        ("timestamp_ns", ct.c_ulonglong),
                        ("pid", ct.c_uint),
                        ("comm", ct.c_char * 16)       # TASK_COMM_LEN
                ]
                for i in range(0, len(self.types)):
                        self._generate_python_field_decl(i, fields)
                if self.kernel_stack:
                        fields.append(("kernel_stack_id", ct.c_int))
                if self.user_stack:
                        fields.append(("user_stack_id", ct.c_int))
                return type(self.python_struct_name, (ct.Structure,),
                            dict(_fields_=fields))

        c_type = { "u": "unsigned int", "d": "int",
                   "llu": "unsigned long long", "lld": "long long",
                   "hu": "unsigned short", "hd": "short",
                   "x": "unsigned int", "llx": "unsigned long long",
                   "c": "char" }
        fmt_types = c_type.keys()

        def _generate_field_decl(self, idx):
                field_type = self.types[idx]
                if field_type == "s":
                        return "char v%d[%d];\n" % (idx, self.string_size)
                if field_type in Probe.fmt_types:
                        return "%s v%d;\n" % (Probe.c_type[field_type], idx)
                self._bail("unrecognized format specifier %s" % field_type)

        def _generate_data_decl(self):
                # The BPF program will populate values into the struct
                # according to the format string, and the Python program will
                # construct the final display string.
                self.events_name = "%s_events" % self.probe_name
                self.struct_name = "%s_data_t" % self.probe_name
                self.stacks_name = "%s_stacks" % self.probe_name
                stack_table = "BPF_STACK_TRACE(%s, 1024);" % self.stacks_name \
                              if (self.kernel_stack or self.user_stack) else ""
                data_fields = ""
                for i, field_type in enumerate(self.types):
                        data_fields += "        " + \
                                       self._generate_field_decl(i)

                kernel_stack_str = "       int kernel_stack_id;" \
                                   if self.kernel_stack else ""
                user_stack_str = "       int user_stack_id;" \
                                 if self.user_stack else ""

                text = """
struct %s
{
        u64 timestamp_ns;
        u32 pid;
        char comm[TASK_COMM_LEN];
%s
%s
%s
};

BPF_PERF_OUTPUT(%s);
%s
"""
                return text % (self.struct_name, data_fields,
                               kernel_stack_str, user_stack_str,
                               self.events_name, stack_table)

        def _generate_field_assign(self, idx):
                field_type = self.types[idx]
                expr = self.values[idx].strip()
                text = ""
                if self.probe_type == "u" and expr[0:3] == "arg":
                        text = ("        u64 %s = 0;\n" +
                                "        bpf_usdt_readarg(%s, ctx, &%s);\n") % \
                                (expr, expr[3], expr)

                if field_type == "s":
                        return text + """
        if (%s != 0) {
                bpf_probe_read(&__data.v%d, sizeof(__data.v%d), (void *)%s);
        }
"""                     % (expr, idx, idx, expr)
                if field_type in Probe.fmt_types:
                        return text + "        __data.v%d = (%s)%s;\n" % \
                                        (idx, Probe.c_type[field_type], expr)
                self._bail("unrecognized field type %s" % field_type)

        def _generate_usdt_filter_read(self):
            text = ""
            if self.probe_type == "u":
                    for arg, _ in Probe.aliases.items():
                        if not (arg.startswith("arg") and (arg in self.filter)):
                                continue
                        arg_index = int(arg.replace("arg", ""))
                        arg_ctype = self.usdt.get_probe_arg_ctype(
                                self.usdt_name, arg_index)
                        if not arg_ctype:
                                self._bail("Unable to determine type of {} "
                                           "in the filter".format(arg))
                        text += """
        {} {}_filter;
        bpf_usdt_readarg({}, ctx, &{}_filter);
                        """.format(arg_ctype, arg, arg_index, arg)
                        self.filter = self.filter.replace(
                                arg, "{}_filter".format(arg))
            return text

        def generate_program(self, include_self):
                data_decl = self._generate_data_decl()
                # kprobes don't have built-in pid filters, so we have to add
                # it to the function body:
                if len(self.library) == 0 and Probe.pid != -1:
                        pid_filter = """
        u32 __pid = bpf_get_current_pid_tgid();
        if (__pid != %d) { return 0; }
"""             % Probe.pid
                elif not include_self:
                        pid_filter = """
        u32 __pid = bpf_get_current_pid_tgid();
        if (__pid == %d) { return 0; }
"""             % os.getpid()
                else:
                        pid_filter = ""

                prefix = ""
                signature = "struct pt_regs *ctx"

                data_fields = ""
                for i, expr in enumerate(self.values):
                        data_fields += self._generate_field_assign(i)

                stack_trace = ""
                if self.user_stack:
                        stack_trace += """
        __data.user_stack_id = %s.get_stackid(
          ctx, BPF_F_REUSE_STACKID | BPF_F_USER_STACK
        );""" % self.stacks_name
                if self.kernel_stack:
                        stack_trace += """
        __data.kernel_stack_id = %s.get_stackid(
          ctx, BPF_F_REUSE_STACKID
        );""" % self.stacks_name

                if self.probe_type == "t":
                        heading = "TRACEPOINT_PROBE(%s, %s)" % \
                                  (self.tp_category, self.tp_event)
                        ctx_name = "args"
                else:
                        heading = "int %s(%s)" % (self.probe_name, signature)
                        ctx_name = "ctx"
                text = heading + """
{
        %s
        %s
        %s
        if (!(%s)) return 0;

        struct %s __data = {0};
        __data.timestamp_ns = bpf_ktime_get_ns();
        __data.pid = bpf_get_current_pid_tgid();
        bpf_get_current_comm(&__data.comm, sizeof(__data.comm));
%s
%s
        %s.perf_submit(%s, &__data, sizeof(__data));
        return 0;
}
"""
                text = text % (pid_filter, prefix,
                               self._generate_usdt_filter_read(), self.filter,
                               self.struct_name, data_fields,
                               stack_trace, self.events_name, ctx_name)

                return data_decl + "\n" + text

        @classmethod
        def _time_off_str(cls, timestamp_ns):
                return "%.6f" % (1e-9 * (timestamp_ns - cls.first_ts))

        def _display_function(self):
                if self.probe_type == 'p' or self.probe_type == 'r':
                        return self.function
                elif self.probe_type == 'u':
                        return self.usdt_name
                else:   # self.probe_type == 't'
                        return self.tp_event

        def print_stack(self, bpf, stack_id, pid):
            if stack_id < 0:
                print("        %d" % stack_id)
                return

            stack = list(bpf.get_table(self.stacks_name).walk(stack_id))
            for addr in stack:
                print("        %016x %s" % (addr, bpf.sym(addr, pid)))

        def print_event(self, bpf, cpu, data, size):
                # Cast as the generated structure type and display
                # according to the format string in the probe.
                event = ct.cast(data, ct.POINTER(self.python_struct)).contents
                values = map(lambda i: getattr(event, "v%d" % i),
                             range(0, len(self.values)))
                msg = self.python_format % tuple(values)
                time = strftime("%H:%M:%S") if Probe.use_localtime else \
                       Probe._time_off_str(event.timestamp_ns)
                print("%-8s %-6d %-12s %-16s %s" % \
                    (time[:8], event.pid, event.comm[:12],
                     self._display_function(), msg))

                if self.user_stack:
                    print("    User Stack Trace:")
                    self.print_stack(bpf, event.user_stack_id, event.pid)
                if self.kernel_stack:
                    print("    Kernel Stack Trace:")
                    self.print_stack(bpf, event.kernel_stack_id, -1)
                if self.user_stack or self.kernel_stack:
                    print("")

                Probe.event_count += 1
                if Probe.max_events is not None and \
                   Probe.event_count >= Probe.max_events:
                        exit()

        def attach(self, bpf, verbose):
                if len(self.library) == 0:
                        self._attach_k(bpf)
                else:
                        self._attach_u(bpf)
                self.python_struct = self._generate_python_data_decl()
                callback = partial(self.print_event, bpf)
                bpf[self.events_name].open_perf_buffer(callback)

        def _attach_k(self, bpf):
                if self.probe_type == "r":
                        bpf.attach_kretprobe(event=self.function,
                                             fn_name=self.probe_name)
                elif self.probe_type == "p":
                        bpf.attach_kprobe(event=self.function,
                                          fn_name=self.probe_name)
                # Note that tracepoints don't need an explicit attach

        def _attach_u(self, bpf):
                libpath = BPF.find_library(self.library)
                if libpath is None:
                        # This might be an executable (e.g. 'bash')
                        libpath = BPF.find_exe(self.library)
                if libpath is None or len(libpath) == 0:
                        self._bail("unable to find library %s" % self.library)

                if self.probe_type == "u":
                        pass # Was already enabled by the BPF constructor
                elif self.probe_type == "r":
                        bpf.attach_uretprobe(name=libpath,
                                             sym=self.function,
                                             fn_name=self.probe_name,
                                             pid=Probe.pid)
                else:
                        bpf.attach_uprobe(name=libpath,
                                          sym=self.function,
                                          fn_name=self.probe_name,
                                          pid=Probe.pid)
Beispiel #13
0
class Probe(object):
    def __init__(self, pattern, use_regex=False, pid=None, per_pid=False):
        """Init a new probe.

        Init the probe from the pattern provided by the user. The supported
        patterns mimic the 'trace' and 'argdist' tools, but are simpler because
        we don't have to distinguish between probes and retprobes.

            func            -- probe a kernel function
            lib:func        -- probe a user-space function in the library 'lib'
            p::func         -- same thing as 'func'
            p:lib:func      -- same thing as 'lib:func'
            t:cat:event     -- probe a kernel tracepoint
            u:lib:probe     -- probe a USDT tracepoint
        """
        parts = pattern.split(':')
        if len(parts) == 1:
            parts = ["p", "", parts[0]]
        elif len(parts) == 2:
            parts = ["p", parts[0], parts[1]]
        elif len(parts) == 3:
            if parts[0] == "t":
                parts = ["t", "", "%s:%s" % tuple(parts[1:])]
            if parts[0] not in ["p", "t", "u"]:
                raise Exception("Type must be 'p', 't', or 'u', but got %s" %
                                parts[0])
        else:
            raise Exception("Too many ':'-separated components in pattern %s" %
                            pattern)

        (self.type, self.library, self.pattern) = parts
        if not use_regex:
            self.pattern = self.pattern.replace('*', '.*')
            self.pattern = '^' + self.pattern + '$'

        if (self.type == "p" and self.library) or self.type == "u":
            libpath = BPF.find_library(self.library)
            if libpath is None:
                # This might be an executable (e.g. 'bash')
                libpath = BPF.find_exe(self.library)
            if libpath is None or len(libpath) == 0:
                raise Exception("unable to find library %s" % self.library)
            self.library = libpath

        self.pid = pid
        self.per_pid = per_pid
        self.matched = 0

    def is_kernel_probe(self):
        return self.type == "t" or (self.type == "p" and self.library == "")

    def attach(self):
        if self.type == "p":
            if self.library:
                self.bpf.attach_uprobe(name=self.library,
                                       sym_re=self.pattern,
                                       fn_name="trace_count",
                                       pid=self.pid or -1)
                self.matched = self.bpf.num_open_uprobes()
            else:
                self.bpf.attach_kprobe(event_re=self.pattern,
                                       fn_name="trace_count",
                                       pid=self.pid or -1)
                self.matched = self.bpf.num_open_kprobes()
        elif self.type == "t":
            self.bpf.attach_tracepoint(tp_re=self.pattern,
                                       fn_name="trace_count",
                                       pid=self.pid or -1)
            self.matched = self.bpf.num_open_tracepoints()
        elif self.type == "u":
            pass  # Nothing to do -- attach already happened in `load`

        if self.matched == 0:
            raise Exception("No functions matched by pattern %s" %
                            self.pattern)

    def load(self):
        trace_count_text = """
int trace_count(void *ctx) {
    FILTER
    struct key_t key = {};
    key.pid = GET_PID;
    key.stackid = stack_traces.get_stackid(ctx, STACK_FLAGS);
    u64 zero = 0;
    u64 *val = counts.lookup_or_init(&key, &zero);
    (*val)++;
    return 0;
}
        """
        bpf_text = """#include <uapi/linux/ptrace.h>

struct key_t {
    u32 pid;
    int stackid;
};

BPF_HASH(counts, struct key_t);
BPF_STACK_TRACE(stack_traces, 1024);

        """

        # We really mean the tgid from the kernel's perspective, which is in
        # the top 32 bits of bpf_get_current_pid_tgid().
        if self.is_kernel_probe() and self.pid:
            trace_count_text = trace_count_text.replace(
                'FILTER',
                ('u32 pid; pid = bpf_get_current_pid_tgid() >> 32; ' +
                 'if (pid != %d) { return 0; }') % (self.pid))
        else:
            trace_count_text = trace_count_text.replace('FILTER', '')

        # We need per-pid statistics when tracing a user-space process, because
        # the meaning of the symbols depends on the pid. We also need them if
        # per-pid statistics were requested with -P.
        if self.per_pid or not self.is_kernel_probe():
            trace_count_text = trace_count_text.replace(
                'GET_PID', 'bpf_get_current_pid_tgid() >> 32')
        else:
            trace_count_text = trace_count_text.replace(
                'GET_PID', '0xffffffff')

        stack_flags = 'BPF_F_REUSE_STACKID'
        if not self.is_kernel_probe():
            stack_flags += '| BPF_F_USER_STACK'  # can't do both U *and* K
        trace_count_text = trace_count_text.replace('STACK_FLAGS', stack_flags)

        self.usdt = None
        if self.type == "u":
            self.usdt = USDT(path=self.library, pid=self.pid)
            for probe in self.usdt.enumerate_probes():
                if not self.pid and (probe.bin_path != self.library):
                    continue
                if re.match(self.pattern, probe.name):
                    # This hack is required because the bpf_usdt_readarg
                    # functions generated need different function names for
                    # each attached probe. If we just stick to trace_count,
                    # we'd get multiple bpf_usdt_readarg helpers with the same
                    # name when enabling more than one USDT probe.
                    new_func = "trace_count_%d" % self.matched
                    bpf_text += trace_count_text.replace(
                        "trace_count", new_func)
                    self.usdt.enable_probe(probe.name, new_func)
                    self.matched += 1
            if debug:
                print(self.usdt.get_text())
        else:
            bpf_text += trace_count_text

        if debug:
            print(bpf_text)
        self.bpf = BPF(text=bpf_text,
                       usdt_contexts=[self.usdt] if self.usdt else [])
Beispiel #14
0
class Probe(object):
    probe_count = 0
    streq_index = 0
    max_events = None
    event_count = 0
    first_ts = 0
    use_localtime = True
    tgid = -1
    pid = -1

    @classmethod
    def configure(cls, args):
        cls.max_events = args.max_events
        cls.use_localtime = not args.offset
        cls.first_ts = Time.monotonic_time()
        cls.tgid = args.tgid or -1
        cls.pid = args.pid or -1

    def __init__(self, probe, string_size, kernel_stack, user_stack):
        self.usdt = None
        self.streq_functions = ""
        self.raw_probe = probe
        self.string_size = string_size
        self.kernel_stack = kernel_stack
        self.user_stack = user_stack
        Probe.probe_count += 1
        self._parse_probe()
        self.probe_num = Probe.probe_count
        self.probe_name = "probe_%s_%d" % (self._display_function(), self.probe_num)

    def __str__(self):
        return "%s:%s:%s FLT=%s ACT=%s/%s" % (
            self.probe_type,
            self.library,
            self._display_function(),
            self.filter,
            self.types,
            self.values,
        )

    def is_default_action(self):
        return self.python_format == ""

    def _bail(self, error):
        raise ValueError("error in probe '%s': %s" % (self.raw_probe, error))

    def _parse_probe(self):
        text = self.raw_probe

        # Everything until the first space is the probe specifier
        first_space = text.find(" ")
        spec = text[:first_space] if first_space >= 0 else text
        self._parse_spec(spec)
        if first_space >= 0:
            text = text[first_space:].lstrip()
        else:
            text = ""

        # If we now have a (, wait for the balanced closing ) and that
        # will be the predicate
        self.filter = None
        if len(text) > 0 and text[0] == "(":
            balance = 1
            for i in range(1, len(text)):
                if text[i] == "(":
                    balance += 1
                if text[i] == ")":
                    balance -= 1
                if balance == 0:
                    self._parse_filter(text[: i + 1])
                    text = text[i + 1 :]
                    break
            if self.filter is None:
                self._bail("unmatched end of predicate")

        if self.filter is None:
            self.filter = "1"

        # The remainder of the text is the printf action
        self._parse_action(text.lstrip())

    def _parse_spec(self, spec):
        parts = spec.split(":")
        # Two special cases: 'func' means 'p::func', 'lib:func' means
        # 'p:lib:func'. Other combinations need to provide an empty
        # value between delimiters, e.g. 'r::func' for a kretprobe on
        # the function func.
        if len(parts) == 1:
            parts = ["p", "", parts[0]]
        elif len(parts) == 2:
            parts = ["p", parts[0], parts[1]]
        if len(parts[0]) == 0:
            self.probe_type = "p"
        elif parts[0] in ["p", "r", "t", "u"]:
            self.probe_type = parts[0]
        else:
            self._bail("probe type must be '', 'p', 't', 'r', " + "or 'u', but got '%s'" % parts[0])
        if self.probe_type == "t":
            self.tp_category = parts[1]
            self.tp_event = parts[2]
            self.library = ""  # kernel
            self.function = ""  # from TRACEPOINT_PROBE
        elif self.probe_type == "u":
            self.library = parts[1]
            self.usdt_name = parts[2]
            self.function = ""  # no function, just address
            # We will discover the USDT provider by matching on
            # the USDT name in the specified library
            self._find_usdt_probe()
        else:
            self.library = parts[1]
            self.function = parts[2]

    def _find_usdt_probe(self):
        target = Probe.pid if Probe.pid and Probe.pid != -1 else Probe.tgid
        self.usdt = USDT(path=self.library, pid=target)
        for probe in self.usdt.enumerate_probes():
            if probe.name == self.usdt_name:
                return  # Found it, will enable later
        self._bail("unrecognized USDT probe %s" % self.usdt_name)

    def _parse_filter(self, filt):
        self.filter = self._rewrite_expr(filt)

    def _parse_types(self, fmt):
        for match in re.finditer(r"[^%]%(s|u|d|llu|lld|hu|hd|x|llx|c|K|U)", fmt):
            self.types.append(match.group(1))
        fmt = re.sub(r"([^%]%)(u|d|llu|lld|hu|hd)", r"\1d", fmt)
        fmt = re.sub(r"([^%]%)(x|llx)", r"\1x", fmt)
        fmt = re.sub("%K|%U", "%s", fmt)
        self.python_format = fmt.strip('"')

    def _parse_action(self, action):
        self.values = []
        self.types = []
        self.python_format = ""
        if len(action) == 0:
            return

        action = action.strip()
        match = re.search(r"(\".*?\"),?(.*)", action)
        if match is None:
            self._bail('expected format string in "s')

        self.raw_format = match.group(1)
        self._parse_types(self.raw_format)
        for part in re.split('(?<!"),', match.group(2)):
            part = self._rewrite_expr(part)
            if len(part) > 0:
                self.values.append(part)

    aliases = {
        "retval": "PT_REGS_RC(ctx)",
        "arg1": "PT_REGS_PARM1(ctx)",
        "arg2": "PT_REGS_PARM2(ctx)",
        "arg3": "PT_REGS_PARM3(ctx)",
        "arg4": "PT_REGS_PARM4(ctx)",
        "arg5": "PT_REGS_PARM5(ctx)",
        "arg6": "PT_REGS_PARM6(ctx)",
        "$uid": "(unsigned)(bpf_get_current_uid_gid() & 0xffffffff)",
        "$gid": "(unsigned)(bpf_get_current_uid_gid() >> 32)",
        "$pid": "(unsigned)(bpf_get_current_pid_tgid() & 0xffffffff)",
        "$tgid": "(unsigned)(bpf_get_current_pid_tgid() >> 32)",
        "$cpu": "bpf_get_smp_processor_id()",
    }

    def _generate_streq_function(self, string):
        fname = "streq_%d" % Probe.streq_index
        Probe.streq_index += 1
        self.streq_functions += """
static inline bool %s(char const *ignored, unsigned long str) {
        char needle[] = %s;
        char haystack[sizeof(needle)];
        bpf_probe_read(&haystack, sizeof(haystack), (void *)str);
        for (int i = 0; i < sizeof(needle); ++i) {
                if (needle[i] != haystack[i]) {
                        return false;
                }
        }
        return true;
}
                """ % (
            fname,
            string,
        )
        return fname

    def _rewrite_expr(self, expr):
        for alias, replacement in Probe.aliases.items():
            # For USDT probes, we replace argN values with the
            # actual arguments for that probe obtained using
            # bpf_readarg_N macros emitted at BPF construction.
            if alias.startswith("arg") and self.probe_type == "u":
                continue
            expr = expr.replace(alias, replacement)
        matches = re.finditer('STRCMP\\(("[^"]+\\")', expr)
        for match in matches:
            string = match.group(1)
            fname = self._generate_streq_function(string)
            expr = expr.replace("STRCMP", fname, 1)
        return expr

    p_type = {
        "u": ct.c_uint,
        "d": ct.c_int,
        "llu": ct.c_ulonglong,
        "lld": ct.c_longlong,
        "hu": ct.c_ushort,
        "hd": ct.c_short,
        "x": ct.c_uint,
        "llx": ct.c_ulonglong,
        "c": ct.c_ubyte,
        "K": ct.c_ulonglong,
        "U": ct.c_ulonglong,
    }

    def _generate_python_field_decl(self, idx, fields):
        field_type = self.types[idx]
        if field_type == "s":
            ptype = ct.c_char * self.string_size
        else:
            ptype = Probe.p_type[field_type]
        fields.append(("v%d" % idx, ptype))

    def _generate_python_data_decl(self):
        self.python_struct_name = "%s_%d_Data" % (self._display_function(), self.probe_num)
        fields = [
            ("timestamp_ns", ct.c_ulonglong),
            ("tgid", ct.c_uint),
            ("pid", ct.c_uint),
            ("comm", ct.c_char * 16),  # TASK_COMM_LEN
        ]
        for i in range(0, len(self.types)):
            self._generate_python_field_decl(i, fields)
        if self.kernel_stack:
            fields.append(("kernel_stack_id", ct.c_int))
        if self.user_stack:
            fields.append(("user_stack_id", ct.c_int))
        return type(self.python_struct_name, (ct.Structure,), dict(_fields_=fields))

    c_type = {
        "u": "unsigned int",
        "d": "int",
        "llu": "unsigned long long",
        "lld": "long long",
        "hu": "unsigned short",
        "hd": "short",
        "x": "unsigned int",
        "llx": "unsigned long long",
        "c": "char",
        "K": "unsigned long long",
        "U": "unsigned long long",
    }
    fmt_types = c_type.keys()

    def _generate_field_decl(self, idx):
        field_type = self.types[idx]
        if field_type == "s":
            return "char v%d[%d];\n" % (idx, self.string_size)
        if field_type in Probe.fmt_types:
            return "%s v%d;\n" % (Probe.c_type[field_type], idx)
        self._bail("unrecognized format specifier %s" % field_type)

    def _generate_data_decl(self):
        # The BPF program will populate values into the struct
        # according to the format string, and the Python program will
        # construct the final display string.
        self.events_name = "%s_events" % self.probe_name
        self.struct_name = "%s_data_t" % self.probe_name
        self.stacks_name = "%s_stacks" % self.probe_name
        stack_table = "BPF_STACK_TRACE(%s, 1024);" % self.stacks_name if (self.kernel_stack or self.user_stack) else ""
        data_fields = ""
        for i, field_type in enumerate(self.types):
            data_fields += "        " + self._generate_field_decl(i)

        kernel_stack_str = "       int kernel_stack_id;" if self.kernel_stack else ""
        user_stack_str = "       int user_stack_id;" if self.user_stack else ""

        text = """
struct %s
{
        u64 timestamp_ns;
        u32 tgid;
        u32 pid;
        char comm[TASK_COMM_LEN];
%s
%s
%s
};

BPF_PERF_OUTPUT(%s);
%s
"""
        return text % (self.struct_name, data_fields, kernel_stack_str, user_stack_str, self.events_name, stack_table)

    def _generate_field_assign(self, idx):
        field_type = self.types[idx]
        expr = self.values[idx].strip()
        text = ""
        if self.probe_type == "u" and expr[0:3] == "arg":
            text = ("        u64 %s = 0;\n" + "        bpf_usdt_readarg(%s, ctx, &%s);\n") % (expr, expr[3], expr)

        if field_type == "s":
            return (
                text
                + """
        if (%s != 0) {
                bpf_probe_read(&__data.v%d, sizeof(__data.v%d), (void *)%s);
        }
                """
                % (expr, idx, idx, expr)
            )
        if field_type in Probe.fmt_types:
            return text + "        __data.v%d = (%s)%s;\n" % (idx, Probe.c_type[field_type], expr)
        self._bail("unrecognized field type %s" % field_type)

    def _generate_usdt_filter_read(self):
        text = ""
        if self.probe_type == "u":
            for arg, _ in Probe.aliases.items():
                if not (arg.startswith("arg") and (arg in self.filter)):
                    continue
                arg_index = int(arg.replace("arg", ""))
                arg_ctype = self.usdt.get_probe_arg_ctype(self.usdt_name, arg_index)
                if not arg_ctype:
                    self._bail("Unable to determine type of {} " "in the filter".format(arg))
                text += """
        {} {}_filter;
        bpf_usdt_readarg({}, ctx, &{}_filter);
                        """.format(
                    arg_ctype, arg, arg_index, arg
                )
                self.filter = self.filter.replace(arg, "{}_filter".format(arg))
        return text

    def generate_program(self, include_self):
        data_decl = self._generate_data_decl()
        # kprobes don't have built-in pid filters, so we have to add
        # it to the function body:
        if len(self.library) == 0 and Probe.pid != -1:
            pid_filter = (
                """
        if (__pid != %d) { return 0; }
                """
                % Probe.pid
            )
        elif len(self.library) == 0 and Probe.tgid != -1:
            pid_filter = (
                """
        if (__tgid != %d) { return 0; }
                """
                % Probe.tgid
            )
        elif not include_self:
            pid_filter = (
                """
        if (__tgid == %d) { return 0; }
                """
                % os.getpid()
            )
        else:
            pid_filter = ""

        prefix = ""
        signature = "struct pt_regs *ctx"

        data_fields = ""
        for i, expr in enumerate(self.values):
            data_fields += self._generate_field_assign(i)

        if self.probe_type == "t":
            heading = "TRACEPOINT_PROBE(%s, %s)" % (self.tp_category, self.tp_event)
            ctx_name = "args"
        else:
            heading = "int %s(%s)" % (self.probe_name, signature)
            ctx_name = "ctx"

        stack_trace = ""
        if self.user_stack:
            stack_trace += """
        __data.user_stack_id = %s.get_stackid(
          %s, BPF_F_REUSE_STACKID | BPF_F_USER_STACK
        );""" % (
                self.stacks_name,
                ctx_name,
            )
        if self.kernel_stack:
            stack_trace += """
        __data.kernel_stack_id = %s.get_stackid(
          %s, BPF_F_REUSE_STACKID
        );""" % (
                self.stacks_name,
                ctx_name,
            )

        text = (
            heading
            + """
{
        u64 __pid_tgid = bpf_get_current_pid_tgid();
        u32 __tgid = __pid_tgid >> 32;
        u32 __pid = __pid_tgid; // implicit cast to u32 for bottom half
        %s
        %s
        %s
        if (!(%s)) return 0;

        struct %s __data = {0};
        __data.timestamp_ns = bpf_ktime_get_ns();
        __data.tgid = __tgid;
        __data.pid = __pid;
        bpf_get_current_comm(&__data.comm, sizeof(__data.comm));
%s
%s
        %s.perf_submit(%s, &__data, sizeof(__data));
        return 0;
}
"""
        )
        text = text % (
            pid_filter,
            prefix,
            self._generate_usdt_filter_read(),
            self.filter,
            self.struct_name,
            data_fields,
            stack_trace,
            self.events_name,
            ctx_name,
        )

        return self.streq_functions + data_decl + "\n" + text

    @classmethod
    def _time_off_str(cls, timestamp_ns):
        return "%.6f" % (1e-9 * (timestamp_ns - cls.first_ts))

    def _display_function(self):
        if self.probe_type == "p" or self.probe_type == "r":
            return self.function
        elif self.probe_type == "u":
            return self.usdt_name
        else:  # self.probe_type == 't'
            return self.tp_event

    def print_stack(self, bpf, stack_id, tgid):
        if stack_id < 0:
            print("        %d" % stack_id)
            return

        stack = list(bpf.get_table(self.stacks_name).walk(stack_id))
        for addr in stack:
            print("        %016x %s" % (addr, bpf.sym(addr, tgid)))

    def _format_message(self, bpf, tgid, values):
        # Replace each %K with kernel sym and %U with user sym in tgid
        kernel_placeholders = [i for i in xrange(0, len(self.types)) if self.types[i] == "K"]
        user_placeholders = [i for i in xrange(0, len(self.types)) if self.types[i] == "U"]
        for kp in kernel_placeholders:
            values[kp] = bpf.ksymaddr(values[kp])
        for up in user_placeholders:
            values[up] = bpf.symaddr(values[up], tgid)
        return self.python_format % tuple(values)

    def print_event(self, bpf, cpu, data, size):
        # Cast as the generated structure type and display
        # according to the format string in the probe.
        event = ct.cast(data, ct.POINTER(self.python_struct)).contents
        values = map(lambda i: getattr(event, "v%d" % i), range(0, len(self.values)))
        msg = self._format_message(bpf, event.tgid, values)
        time = strftime("%H:%M:%S") if Probe.use_localtime else Probe._time_off_str(event.timestamp_ns)
        print(
            "%-8s %-6d %-6d %-12s %-16s %s"
            % (time[:8], event.tgid, event.pid, event.comm, self._display_function(), msg)
        )

        if self.kernel_stack:
            self.print_stack(bpf, event.kernel_stack_id, -1)
        if self.user_stack:
            self.print_stack(bpf, event.user_stack_id, event.tgid)
        if self.user_stack or self.kernel_stack:
            print("")

        Probe.event_count += 1
        if Probe.max_events is not None and Probe.event_count >= Probe.max_events:
            exit()

    def attach(self, bpf, verbose):
        if len(self.library) == 0:
            self._attach_k(bpf)
        else:
            self._attach_u(bpf)
        self.python_struct = self._generate_python_data_decl()
        callback = partial(self.print_event, bpf)
        bpf[self.events_name].open_perf_buffer(callback)

    def _attach_k(self, bpf):
        if self.probe_type == "r":
            bpf.attach_kretprobe(event=self.function, fn_name=self.probe_name)
        elif self.probe_type == "p":
            bpf.attach_kprobe(event=self.function, fn_name=self.probe_name)
        # Note that tracepoints don't need an explicit attach

    def _attach_u(self, bpf):
        libpath = BPF.find_library(self.library)
        if libpath is None:
            # This might be an executable (e.g. 'bash')
            libpath = BPF.find_exe(self.library)
        if libpath is None or len(libpath) == 0:
            self._bail("unable to find library %s" % self.library)

        if self.probe_type == "u":
            pass  # Was already enabled by the BPF constructor
        elif self.probe_type == "r":
            bpf.attach_uretprobe(name=libpath, sym=self.function, fn_name=self.probe_name, pid=Probe.pid)
        else:
            bpf.attach_uprobe(name=libpath, sym=self.function, fn_name=self.probe_name, pid=Probe.pid)
Beispiel #15
0
class Probe():
    "Parse, load and attach BPF probes"

    def __init__(self,
                 probe_spec,
                 kernel_stack,
                 user_stack,
                 use_regex=False,
                 pid=None,
                 per_pid=False,
                 cpu=None):
        self.kernel_stack = kernel_stack
        self.user_stack = user_stack

        if DEBUG:
            print(probe_spec)

        self._parse_spec(probe_spec)

        if (self.type == "p" and self.library) or self.type == "u":
            libpath = BPF.find_library(self.library)
            if libpath is None:
                # This might be an executable (e.g. 'bash')
                libpath = BPF.find_exe(self.library)
            if libpath is None or len(libpath) == 0:
                raise Exception("unable to find library %s" % self.library)
            self.library = str(libpath, 'ascii')

        self.pid = pid
        # FIXME: don't hardcode this here.
        self.per_pid = True
        self.cpu = cpu
        self.matched = 0
        self.bpf = None
        self.usdt = None

    def _parse_spec(self, spec):
        parts = spec.split(":")
        # Two special cases: 'func' means 'p::func', 'lib:func' means
        # 'p:lib:func'. Other combinations need to provide an empty
        # value between delimiters, e.g. 'r::func' for a kretprobe on
        # the function func.
        if len(parts) == 1:
            parts = ["p", "", parts[0]]
        elif len(parts) == 2:
            parts = ["p", parts[0], parts[1]]

        if len(parts[0]) == 0:
            self.type = "p"
        elif parts[0] in ["p", "r", "t", "u"]:
            self.type = parts[0]
        else:
            raise Exception("probe type must be '', 'p', 't', 'r', " +
                            "or 'u', but got '%s'" % parts[0])

        if self.type == "u":
            # u:<library>[:<provider>]:<probe> where :<provider> is optional
            self.library = parts[1]
            self.pattern = ":".join(parts[2:])
        else:
            self.library = ':'.join(parts[1:-1])
            self.pattern = parts[-1]

    def load(self):
        ctx_name = "ctx"
        stack_trace = ""
        if self.user_stack:
            stack_trace += """
            key.user_stack_id = stack_traces.get_stackid(
            %s, BPF_F_USER_STACK
            );""" % (ctx_name)
        else:
            stack_trace += "key.user_stack_id = -1;"
        if self.kernel_stack:
            stack_trace += """
            key.kernel_stack_id = stack_traces.get_stackid(
            %s, 0
            );""" % (ctx_name)
        else:
            stack_trace += "key.kernel_stack_id = -1;"

        trace_count_text = """
int trace_count(void *ctx) {
    FILTER
    struct key_t key = {};
    key.tgid = GET_TGID;
    STORE_COMM
    %s
    counts.increment(key);
    return 0;
}
        """
        trace_count_text = trace_count_text % (stack_trace)

        bpf_text = """#include <uapi/linux/ptrace.h>
#include <linux/sched.h>

struct key_t {
    // no pid (thread ID) so that we do not needlessly split this key
    u32 tgid;
    int kernel_stack_id;
    int user_stack_id;
    char name[TASK_COMM_LEN];
};

BPF_HASH(counts, struct key_t);
BPF_STACK_TRACE(stack_traces, 1024);
        """

        filter_text = []
        trace_count_text = trace_count_text.replace('FILTER',
                                                    '\n    '.join(filter_text))

        # Do per-pid statistics iff -P is provided
        if self.per_pid:
            trace_count_text = trace_count_text.replace(
                'GET_TGID', 'bpf_get_current_pid_tgid() >> 32')
            trace_count_text = trace_count_text.replace(
                'STORE_COMM',
                'bpf_get_current_comm(&key.name, sizeof(key.name));')
        else:
            # skip splitting on PID so these aggregate
            # together, and don't store the process name.
            trace_count_text = trace_count_text.replace(
                'GET_TGID', '0xffffffff')
            trace_count_text = trace_count_text.replace('STORE_COMM', '')

        if self.type == "u":
            self.usdt = USDT(path=self.library, pid=self.pid)
            for probe in self.usdt.enumerate_probes():
                if not self.pid and (str(probe.bin_path, 'ascii') !=
                                     self.library):
                    continue
                parts = self.pattern.split(":")
                if len(parts) == 1:
                    provider_name = None
                    usdt_name = parts[0].encode("ascii")
                else:
                    provider_name = parts[0]
                    usdt_name = parts[1]
                if (str(probe.name, 'ascii') == usdt_name
                        and str(probe.provider, 'ascii') == provider_name):
                    # This hack is required because the bpf_usdt_readarg
                    # functions generated need different function names for
                    # each attached probe. If we just stick to trace_count,
                    # we'd get multiple bpf_usdt_readarg helpers with the same
                    # name when enabling more than one USDT probe.
                    new_func = "trace_count_%d" % self.matched
                    bpf_text += trace_count_text.replace(
                        "trace_count", new_func)
                    self.usdt.enable_probe(str(probe.name, 'ascii'), new_func)
                    self.matched += 1
            if DEBUG:
                print(self.usdt.get_text())
        else:
            bpf_text += trace_count_text

        if DEBUG:
            print(bpf_text)
        self.bpf = BPF(text=bpf_text,
                       usdt_contexts=[self.usdt] if self.usdt else [])

    def attach(self):
        if self.type == "p":
            if self.library:
                self.bpf.attach_uprobe(name=self.library,
                                       sym_re=self.pattern,
                                       fn_name="trace_count",
                                       pid=self.pid or -1)
                self.matched = self.bpf.num_open_uprobes()
            else:
                self.bpf.attach_kprobe(event_re=self.pattern,
                                       fn_name="trace_count")
                self.matched = self.bpf.num_open_kprobes()
        elif self.type == "t":
            self.bpf.attach_tracepoint(tp_re=self.pattern,
                                       fn_name="trace_count")
            self.matched = self.bpf.num_open_tracepoints()
        elif self.type == "u":
            pass  # Nothing to do -- attach already happened in `load`

        if self.matched == 0:
            raise Exception("No functions matched by pattern %s" %
                            self.pattern)