Beispiel #1
0
class Probe(object):
        next_probe_index = 0
        streq_index = 0
        aliases = {"$PID": "(bpf_get_current_pid_tgid() >> 32)"}

        def _substitute_aliases(self, expr):
                if expr is None:
                        return expr
                for alias, subst in Probe.aliases.items():
                        expr = expr.replace(alias, subst)
                return expr

        def _parse_signature(self):
                params = map(str.strip, self.signature.split(','))
                self.param_types = {}
                for param in params:
                        # If the type is a pointer, the * can be next to the
                        # param name. Other complex types like arrays are not
                        # supported right now.
                        index = param.rfind('*')
                        index = index if index != -1 else param.rfind(' ')
                        param_type = param[0:index + 1].strip()
                        param_name = param[index + 1:].strip()
                        self.param_types[param_name] = param_type

        def _generate_entry(self):
                self.entry_probe_func = self.probe_func_name + "_entry"
                text = """
int PROBENAME(struct pt_regs *ctx SIGNATURE)
{
        u64 __pid_tgid = bpf_get_current_pid_tgid();
        u32 __pid      = __pid_tgid;        // lower 32 bits
        u32 __tgid     = __pid_tgid >> 32;  // upper 32 bits
        PID_FILTER
        COLLECT
        return 0;
}
"""
                text = text.replace("PROBENAME", self.entry_probe_func)
                text = text.replace("SIGNATURE",
                     "" if len(self.signature) == 0 else ", " + self.signature)
                text = text.replace("PID_FILTER", self._generate_pid_filter())
                collect = ""
                for pname in self.args_to_probe:
                        param_hash = self.hashname_prefix + pname
                        if pname == "__latency":
                                collect += """
u64 __time = bpf_ktime_get_ns();
%s.update(&__pid, &__time);
                        """ % param_hash
                        else:
                                collect += "%s.update(&__pid, &%s);\n" % \
                                           (param_hash, pname)
                text = text.replace("COLLECT", collect)
                return text

        def _generate_entry_probe(self):
                # Any $entry(name) expressions result in saving that argument
                # when entering the function.
                self.args_to_probe = set()
                regex = r"\$entry\((\w+)\)"
                for expr in self.exprs:
                        for arg in re.finditer(regex, expr):
                                self.args_to_probe.add(arg.group(1))
                for arg in re.finditer(regex, self.filter):
                        self.args_to_probe.add(arg.group(1))
                if any(map(lambda expr: "$latency" in expr, self.exprs)) or \
                   "$latency" in self.filter:
                        self.args_to_probe.add("__latency")
                        self.param_types["__latency"] = "u64"    # nanoseconds
                for pname in self.args_to_probe:
                        if pname not in self.param_types:
                                raise ValueError("$entry(%s): no such param" %
                                                 arg)

                self.hashname_prefix = "%s_param_" % self.probe_hash_name
                text = ""
                for pname in self.args_to_probe:
                        # Each argument is stored in a separate hash that is
                        # keyed by pid.
                        text += "BPF_HASH(%s, u32, %s);\n" % \
                             (self.hashname_prefix + pname,
                              self.param_types[pname])
                text += self._generate_entry()
                return text

        def _generate_retprobe_prefix(self):
                # After we're done here, there are __%s_val variables for each
                # argument we needed to probe using $entry(name), and they all
                # have values (which isn't necessarily the case if we missed
                # the method entry probe).
                text = ""
                self.param_val_names = {}
                for pname in self.args_to_probe:
                        val_name = "__%s_val" % pname
                        text += "%s *%s = %s.lookup(&__pid);\n" % \
                                (self.param_types[pname], val_name,
                                 self.hashname_prefix + pname)
                        text += "if (%s == 0) { return 0 ; }\n" % val_name
                        self.param_val_names[pname] = val_name
                return text

        def _replace_entry_exprs(self):
                for pname, vname in self.param_val_names.items():
                        if pname == "__latency":
                                entry_expr = "$latency"
                                val_expr = "(bpf_ktime_get_ns() - *%s)" % vname
                        else:
                                entry_expr = "$entry(%s)" % pname
                                val_expr = "(*%s)" % vname
                        for i in range(0, len(self.exprs)):
                                self.exprs[i] = self.exprs[i].replace(
                                                entry_expr, val_expr)
                        self.filter = self.filter.replace(entry_expr,
                                                          val_expr)

        def _attach_entry_probe(self):
                if self.is_user:
                        self.bpf.attach_uprobe(name=self.library,
                                               sym=self.function,
                                               fn_name=self.entry_probe_func,
                                               pid=self.pid or -1)
                else:
                        self.bpf.attach_kprobe(event=self.function,
                                               fn_name=self.entry_probe_func)

        def _bail(self, error):
                raise ValueError("error parsing probe '%s': %s" %
                                 (self.raw_spec, error))

        def _validate_specifier(self):
                # Everything after '#' is the probe label, ignore it
                spec = self.raw_spec.split('#')[0]
                parts = spec.strip().split(':')
                if len(parts) < 3:
                        self._bail("at least the probe type, library, and " +
                                   "function signature must be specified")
                if len(parts) > 6:
                        self._bail("extraneous ':'-separated parts detected")
                if parts[0] not in ["r", "p", "t", "u"]:
                        self._bail("probe type must be 'p', 'r', 't', or 'u'" +
                                   " but got '%s'" % parts[0])
                if re.match(r"\S+\(.*\)", parts[2]) is None:
                        self._bail(("function signature '%s' has an invalid " +
                                    "format") % parts[2])

        def _parse_expr_types(self, expr_types):
                if len(expr_types) == 0:
                        self._bail("no expr types specified")
                self.expr_types = expr_types.split(',')

        def _parse_exprs(self, exprs):
                if len(exprs) == 0:
                        self._bail("no exprs specified")
                self.exprs = exprs.split(',')

        def _make_valid_identifier(self, ident):
                return re.sub(r'[^A-Za-z0-9_]', '_', ident)

        def __init__(self, tool, type, specifier):
                self.usdt_ctx = None
                self.streq_functions = ""
                self.pid = tool.args.pid
                self.cumulative = tool.args.cumulative or False
                self.raw_spec = specifier
                self._validate_specifier()

                spec_and_label = specifier.split('#')
                self.label = spec_and_label[1] \
                             if len(spec_and_label) == 2 else None

                parts = spec_and_label[0].strip().split(':')
                self.type = type    # hist or freq
                self.probe_type = parts[0]
                fparts = parts[2].split('(')
                self.function = fparts[0].strip()
                if self.probe_type == "t":
                        self.library = ""       # kernel
                        self.tp_category = parts[1]
                        self.tp_event = self.function
                elif self.probe_type == "u":
                        self.library = parts[1]
                        self.probe_func_name = self._make_valid_identifier(
                                "%s_probe%d" %
                                (self.function, Probe.next_probe_index))
                        self._enable_usdt_probe()
                else:
                        self.library = parts[1]
                self.is_user = len(self.library) > 0
                self.signature = fparts[1].strip()[:-1]
                self._parse_signature()

                # If the user didn't specify an expression to probe, we probe
                # the retval in a ret probe, or simply the value "1" otherwise.
                self.is_default_expr = len(parts) < 5
                if not self.is_default_expr:
                        self._parse_expr_types(parts[3])
                        self._parse_exprs(parts[4])
                        if len(self.exprs) != len(self.expr_types):
                                self._bail("mismatched # of exprs and types")
                        if self.type == "hist" and len(self.expr_types) > 1:
                                self._bail("histograms can only have 1 expr")
                else:
                        if not self.probe_type == "r" and self.type == "hist":
                                self._bail("histograms must have expr")
                        self.expr_types = \
                          ["u64" if not self.probe_type == "r" else "int"]
                        self.exprs = \
                          ["1" if not self.probe_type == "r" else "$retval"]
                self.filter = "" if len(parts) != 6 else parts[5]
                self._substitute_exprs()

                # Do we need to attach an entry probe so that we can collect an
                # argument that is required for an exit (return) probe?
                def check(expr):
                        keywords = ["$entry", "$latency"]
                        return any(map(lambda kw: kw in expr, keywords))
                self.entry_probe_required = self.probe_type == "r" and \
                        (any(map(check, self.exprs)) or check(self.filter))

                self.probe_func_name = self._make_valid_identifier(
                        "%s_probe%d" %
                        (self.function, Probe.next_probe_index))
                self.probe_hash_name = self._make_valid_identifier(
                        "%s_hash%d" %
                        (self.function, Probe.next_probe_index))
                Probe.next_probe_index += 1

        def _enable_usdt_probe(self):
                self.usdt_ctx = USDT(path=self.library, pid=self.pid)
                self.usdt_ctx.enable_probe(
                        self.function, self.probe_func_name)

        def _generate_streq_function(self, string):
                fname = "streq_%d" % Probe.streq_index
                Probe.streq_index += 1
                self.streq_functions += """
static inline bool %s(char const *ignored, char const *str) {
        char needle[] = %s;
        char haystack[sizeof(needle)];
        bpf_probe_read(&haystack, sizeof(haystack), (void *)str);
        for (int i = 0; i < sizeof(needle) - 1; ++i) {
                if (needle[i] != haystack[i]) {
                        return false;
                }
        }
        return true;
}
                """ % (fname, string)
                return fname

        def _substitute_exprs(self):
                def repl(expr):
                        expr = self._substitute_aliases(expr)
                        matches = re.finditer('STRCMP\\(("[^"]+\\")', expr)
                        for match in matches:
                                string = match.group(1)
                                fname = self._generate_streq_function(string)
                                expr = expr.replace("STRCMP", fname, 1)
                        return expr.replace("$retval", "PT_REGS_RC(ctx)")
                for i in range(0, len(self.exprs)):
                        self.exprs[i] = repl(self.exprs[i])
                self.filter = repl(self.filter)

        def _is_string(self, expr_type):
                return expr_type == "char*" or expr_type == "char *"

        def _generate_hash_field(self, i):
                if self._is_string(self.expr_types[i]):
                        return "struct __string_t v%d;\n" % i
                else:
                        return "%s v%d;\n" % (self.expr_types[i], i)

        def _generate_usdt_arg_assignment(self, i):
                expr = self.exprs[i]
                if self.probe_type == "u" and expr[0:3] == "arg":
                        arg_index = int(expr[3])
                        arg_ctype = self.usdt_ctx.get_probe_arg_ctype(
                                self.function, arg_index - 1)
                        return ("        %s %s = 0;\n" +
                                "        bpf_usdt_readarg(%s, ctx, &%s);\n") \
                                % (arg_ctype, expr, expr[3], expr)
                else:
                        return ""

        def _generate_field_assignment(self, i):
                text = self._generate_usdt_arg_assignment(i)
                if self._is_string(self.expr_types[i]):
                        return (text + "        bpf_probe_read(&__key.v%d.s," +
                                " sizeof(__key.v%d.s), (void *)%s);\n") % \
                                (i, i, self.exprs[i])
                else:
                        return text + "        __key.v%d = %s;\n" % \
                               (i, self.exprs[i])

        def _generate_hash_decl(self):
                if self.type == "hist":
                        return "BPF_HISTOGRAM(%s, %s);" % \
                               (self.probe_hash_name, self.expr_types[0])
                else:
                        text = "struct %s_key_t {\n" % self.probe_hash_name
                        for i in range(0, len(self.expr_types)):
                                text += self._generate_hash_field(i)
                        text += "};\n"
                        text += "BPF_HASH(%s, struct %s_key_t, u64);\n" % \
                                (self.probe_hash_name, self.probe_hash_name)
                        return text

        def _generate_key_assignment(self):
                if self.type == "hist":
                        return self._generate_usdt_arg_assignment(0) + \
                               ("%s __key = %s;\n" %
                                (self.expr_types[0], self.exprs[0]))
                else:
                        text = "struct %s_key_t __key = {};\n" % \
                                self.probe_hash_name
                        for i in range(0, len(self.exprs)):
                                text += self._generate_field_assignment(i)
                        return text

        def _generate_hash_update(self):
                if self.type == "hist":
                        return "%s.increment(bpf_log2l(__key));" % \
                                self.probe_hash_name
                else:
                        return "%s.increment(__key);" % self.probe_hash_name

        def _generate_pid_filter(self):
                # Kernel probes need to explicitly filter pid, because the
                # attach interface doesn't support pid filtering
                if self.pid is not None and not self.is_user:
                        return "if (__tgid != %d) { return 0; }" % self.pid
                else:
                        return ""

        def generate_text(self):
                program = ""
                probe_text = """
DATA_DECL
                """ + (
                    "TRACEPOINT_PROBE(%s, %s)" %
                    (self.tp_category, self.tp_event)
                    if self.probe_type == "t"
                    else "int PROBENAME(struct pt_regs *ctx SIGNATURE)") + """
{
        u64 __pid_tgid = bpf_get_current_pid_tgid();
        u32 __pid      = __pid_tgid;        // lower 32 bits
        u32 __tgid     = __pid_tgid >> 32;  // upper 32 bits
        PID_FILTER
        PREFIX
        if (!(FILTER)) return 0;
        KEY_EXPR
        COLLECT
        return 0;
}
"""
                prefix = ""
                signature = ""

                # If any entry arguments are probed in a ret probe, we need
                # to generate an entry probe to collect them
                if self.entry_probe_required:
                        program += self._generate_entry_probe()
                        prefix += self._generate_retprobe_prefix()
                        # Replace $entry(paramname) with a reference to the
                        # value we collected when entering the function:
                        self._replace_entry_exprs()

                if self.probe_type == "p" and len(self.signature) > 0:
                        # Only entry uprobes/kprobes can have user-specified
                        # signatures. Other probes force it to ().
                        signature = ", " + self.signature

                program += probe_text.replace("PROBENAME",
                                              self.probe_func_name)
                program = program.replace("SIGNATURE", signature)
                program = program.replace("PID_FILTER",
                                          self._generate_pid_filter())

                decl = self._generate_hash_decl()
                key_expr = self._generate_key_assignment()
                collect = self._generate_hash_update()
                program = program.replace("DATA_DECL", decl)
                program = program.replace("KEY_EXPR", key_expr)
                program = program.replace("FILTER",
                        "1" if len(self.filter) == 0 else self.filter)
                program = program.replace("COLLECT", collect)
                program = program.replace("PREFIX", prefix)

                return self.streq_functions + program

        def _attach_u(self):
                libpath = BPF.find_library(self.library)
                if libpath is None:
                        libpath = BPF.find_exe(self.library)
                if libpath is None or len(libpath) == 0:
                        self._bail("unable to find library %s" % self.library)

                if self.probe_type == "r":
                        self.bpf.attach_uretprobe(name=libpath,
                                                  sym=self.function,
                                                  fn_name=self.probe_func_name,
                                                  pid=self.pid or -1)
                else:
                        self.bpf.attach_uprobe(name=libpath,
                                               sym=self.function,
                                               fn_name=self.probe_func_name,
                                               pid=self.pid or -1)

        def _attach_k(self):
                if self.probe_type == "t":
                        pass    # Nothing to do for tracepoints
                elif self.probe_type == "r":
                        self.bpf.attach_kretprobe(event=self.function,
                                             fn_name=self.probe_func_name)
                else:
                        self.bpf.attach_kprobe(event=self.function,
                                          fn_name=self.probe_func_name)

        def attach(self, bpf):
                self.bpf = bpf
                if self.probe_type == "u":
                        return
                if self.is_user:
                        self._attach_u()
                else:
                        self._attach_k()
                if self.entry_probe_required:
                        self._attach_entry_probe()

        def _v2s(self, v):
                # Most fields can be converted with plain str(), but strings
                # are wrapped in a __string_t which has an .s field
                if "__string_t" in type(v).__name__:
                        return str(v.s)
                return str(v)

        def _display_expr(self, i):
                # Replace ugly latency calculation with $latency
                expr = self.exprs[i].replace(
                        "(bpf_ktime_get_ns() - *____latency_val)", "$latency")
                # Replace alias values back with the alias name
                for alias, subst in Probe.aliases.items():
                        expr = expr.replace(subst, alias)
                # Replace retval expression with $retval
                expr = expr.replace("PT_REGS_RC(ctx)", "$retval")
                # Replace ugly (*__param_val) expressions with param name
                return re.sub(r"\(\*__(\w+)_val\)", r"\1", expr)

        def _display_key(self, key):
                if self.is_default_expr:
                        if not self.probe_type == "r":
                                return "total calls"
                        else:
                                return "retval = %s" % str(key.v0)
                else:
                        # The key object has v0, ..., vk fields containing
                        # the values of the expressions from self.exprs
                        def str_i(i):
                                key_i = self._v2s(getattr(key, "v%d" % i))
                                return "%s = %s" % \
                                        (self._display_expr(i), key_i)
                        return ", ".join(map(str_i, range(0, len(self.exprs))))

        def display(self, top):
                data = self.bpf.get_table(self.probe_hash_name)
                if self.type == "freq":
                        print(self.label or self.raw_spec)
                        print("\t%-10s %s" % ("COUNT", "EVENT"))
                        sdata = sorted(data.items(), key=lambda p: p[1].value)
                        if top is not None:
                                sdata = sdata[-top:]
                        for key, value in sdata:
                                # Print some nice values if the user didn't
                                # specify an expression to probe
                                if self.is_default_expr:
                                        if not self.probe_type == "r":
                                                key_str = "total calls"
                                        else:
                                                key_str = "retval = %s" % \
                                                          self._v2s(key.v0)
                                else:
                                        key_str = self._display_key(key)
                                print("\t%-10s %s" %
                                      (str(value.value), key_str))
                elif self.type == "hist":
                        label = self.label or (self._display_expr(0)
                                if not self.is_default_expr else "retval")
                        data.print_log2_hist(val_type=label)
                if not self.cumulative:
                        data.clear()

        def __str__(self):
                return self.label or self.raw_spec
Beispiel #2
0
class Probe(object):
        probe_count = 0
        streq_index = 0
        max_events = None
        event_count = 0
        first_ts = 0
        print_time = False
        use_localtime = True
        time_field = False
        print_cpu = False
        print_address = False
        tgid = -1
        pid = -1
        page_cnt = None
        build_id_enabled = False

        @classmethod
        def configure(cls, args):
                cls.max_events = args.max_events
                cls.print_time = args.timestamp or args.time
                cls.use_localtime = not args.timestamp
                cls.time_field = cls.print_time and (not cls.use_localtime)
                cls.print_cpu = args.print_cpu
                cls.print_address = args.address
                cls.first_ts = BPF.monotonic_time()
                cls.tgid = args.tgid or -1
                cls.pid = args.pid or -1
                cls.page_cnt = args.buffer_pages
                cls.bin_cmp = args.bin_cmp
                cls.build_id_enabled = args.sym_file_list is not None

        def __init__(self, probe, string_size, kernel_stack, user_stack):
                self.usdt = None
                self.streq_functions = ""
                self.raw_probe = probe
                self.string_size = string_size
                self.kernel_stack = kernel_stack
                self.user_stack = user_stack
                Probe.probe_count += 1
                self._parse_probe()
                self.probe_num = Probe.probe_count
                self.probe_name = "probe_%s_%d" % \
                                (self._display_function(), self.probe_num)
                self.probe_name = re.sub(r'[^A-Za-z0-9_]', '_',
                                         self.probe_name)

                # compiler can generate proper codes for function
                # signatures with "syscall__" prefix
                if self.is_syscall_kprobe:
                        self.probe_name = "syscall__" + self.probe_name[6:]

        def __str__(self):
                return "%s:%s:%s FLT=%s ACT=%s/%s" % (self.probe_type,
                        self.library, self._display_function(), self.filter,
                        self.types, self.values)

        def is_default_action(self):
                return self.python_format == ""

        def _bail(self, error):
                raise ValueError("error in probe '%s': %s" %
                                 (self.raw_probe, error))

        def _parse_probe(self):
                text = self.raw_probe

                # There might be a function signature preceding the actual
                # filter/print part, or not. Find the probe specifier first --
                # it ends with either a space or an open paren ( for the
                # function signature part.
                #                                          opt. signature
                #                               probespec       |      rest
                #                               ---------  ----------   --
                (spec, sig, rest) = re.match(r'([^ \t\(]+)(\([^\(]*\))?(.*)',
                                             text).groups()

                self._parse_spec(spec)
                # Remove the parens
                self.signature = sig[1:-1] if sig else None
                if self.signature and self.probe_type in ['u', 't']:
                        self._bail("USDT and tracepoint probes can't have " +
                                   "a function signature; use arg1, arg2, " +
                                   "... instead")

                text = rest.lstrip()
                # If we now have a (, wait for the balanced closing ) and that
                # will be the predicate
                self.filter = None
                if len(text) > 0 and text[0] == "(":
                        balance = 1
                        for i in range(1, len(text)):
                                if text[i] == "(":
                                        balance += 1
                                if text[i] == ")":
                                        balance -= 1
                                if balance == 0:
                                        self._parse_filter(text[:i + 1])
                                        text = text[i + 1:]
                                        break
                        if self.filter is None:
                                self._bail("unmatched end of predicate")

                if self.filter is None:
                        self.filter = "1"

                # The remainder of the text is the printf action
                self._parse_action(text.lstrip())

        def _parse_spec(self, spec):
                parts = spec.split(":")
                # Two special cases: 'func' means 'p::func', 'lib:func' means
                # 'p:lib:func'. Other combinations need to provide an empty
                # value between delimiters, e.g. 'r::func' for a kretprobe on
                # the function func.
                if len(parts) == 1:
                        parts = ["p", "", parts[0]]
                elif len(parts) == 2:
                        parts = ["p", parts[0], parts[1]]
                if len(parts[0]) == 0:
                        self.probe_type = "p"
                elif parts[0] in ["p", "r", "t", "u"]:
                        self.probe_type = parts[0]
                else:
                        self._bail("probe type must be '', 'p', 't', 'r', " +
                                   "or 'u', but got '%s'" % parts[0])
                if self.probe_type == "t":
                        self.tp_category = parts[1]
                        self.tp_event = parts[2]
                        self.library = ""       # kernel
                        self.function = ""      # from TRACEPOINT_PROBE
                elif self.probe_type == "u":
                        self.library = ':'.join(parts[1:-1])
                        self.usdt_name = parts[-1]
                        self.function = ""      # no function, just address
                        # We will discover the USDT provider by matching on
                        # the USDT name in the specified library
                        self._find_usdt_probe()
                else:
                        self.library = ':'.join(parts[1:-1])
                        self.function = parts[-1]

                # only x64 syscalls needs checking, no other syscall wrapper yet.
                self.is_syscall_kprobe = False
                if self.probe_type == "p" and len(self.library) == 0 and \
                   self.function[:10] == "__x64_sys_":
                        self.is_syscall_kprobe = True

        def _find_usdt_probe(self):
                target = Probe.pid if Probe.pid and Probe.pid != -1 \
                                   else Probe.tgid
                self.usdt = USDT(path=self.library, pid=target)
                for probe in self.usdt.enumerate_probes():
                        if probe.name == self.usdt_name.encode('ascii'):
                                return  # Found it, will enable later
                self._bail("unrecognized USDT probe %s" % self.usdt_name)

        def _parse_filter(self, filt):
                self.filter = self._rewrite_expr(filt)

        def _parse_types(self, fmt):
                for match in re.finditer(
                            r'[^%]%(s|u|d|lu|llu|ld|lld|hu|hd|x|lx|llx|c|K|U)', fmt):
                        self.types.append(match.group(1))
                fmt = re.sub(r'([^%]%)(u|d|lu|llu|ld|lld|hu|hd)', r'\1d', fmt)
                fmt = re.sub(r'([^%]%)(x|lx|llx)', r'\1x', fmt)
                fmt = re.sub('%K|%U', '%s', fmt)
                self.python_format = fmt.strip('"')

        def _parse_action(self, action):
                self.values = []
                self.types = []
                self.python_format = ""
                if len(action) == 0:
                        return

                action = action.strip()
                match = re.search(r'(\".*?\"),?(.*)', action)
                if match is None:
                        self._bail("expected format string in \"s")

                self.raw_format = match.group(1)
                self._parse_types(self.raw_format)
                for part in re.split('(?<!"),', match.group(2)):
                        part = self._rewrite_expr(part)
                        if len(part) > 0:
                                self.values.append(part)

        aliases_arg = {
                "arg1": "PT_REGS_PARM1(ctx)",
                "arg2": "PT_REGS_PARM2(ctx)",
                "arg3": "PT_REGS_PARM3(ctx)",
                "arg4": "PT_REGS_PARM4(ctx)",
                "arg5": "PT_REGS_PARM5(ctx)",
                "arg6": "PT_REGS_PARM6(ctx)",
        }

        aliases_indarg = {
                "arg1": "({u64 _val; struct pt_regs *_ctx = (struct pt_regs *)PT_REGS_PARM1(ctx);"
                        "  bpf_probe_read(&_val, sizeof(_val), &(PT_REGS_PARM1(_ctx))); _val;})",
                "arg2": "({u64 _val; struct pt_regs *_ctx = (struct pt_regs *)PT_REGS_PARM1(ctx);"
                        "  bpf_probe_read(&_val, sizeof(_val), &(PT_REGS_PARM2(_ctx))); _val;})",
                "arg3": "({u64 _val; struct pt_regs *_ctx = (struct pt_regs *)PT_REGS_PARM1(ctx);"
                        "  bpf_probe_read(&_val, sizeof(_val), &(PT_REGS_PARM3(_ctx))); _val;})",
                "arg4": "({u64 _val; struct pt_regs *_ctx = (struct pt_regs *)PT_REGS_PARM1(ctx);"
                        "  bpf_probe_read(&_val, sizeof(_val), &(PT_REGS_PARM4(_ctx))); _val;})",
                "arg5": "({u64 _val; struct pt_regs *_ctx = (struct pt_regs *)PT_REGS_PARM1(ctx);"
                        "  bpf_probe_read(&_val, sizeof(_val), &(PT_REGS_PARM5(_ctx))); _val;})",
                "arg6": "({u64 _val; struct pt_regs *_ctx = (struct pt_regs *)PT_REGS_PARM1(ctx);"
                        "  bpf_probe_read(&_val, sizeof(_val), &(PT_REGS_PARM6(_ctx))); _val;})",
        }

        aliases_common = {
                "retval": "PT_REGS_RC(ctx)",
                "$uid": "(unsigned)(bpf_get_current_uid_gid() & 0xffffffff)",
                "$gid": "(unsigned)(bpf_get_current_uid_gid() >> 32)",
                "$pid": "(unsigned)(bpf_get_current_pid_tgid() & 0xffffffff)",
                "$tgid": "(unsigned)(bpf_get_current_pid_tgid() >> 32)",
                "$cpu": "bpf_get_smp_processor_id()",
                "$task" : "((struct task_struct *)bpf_get_current_task())"
        }

        def _generate_streq_function(self, string):
                fname = "streq_%d" % Probe.streq_index
                Probe.streq_index += 1
                self.streq_functions += """
static inline bool %s(char const *ignored, uintptr_t str) {
        char needle[] = %s;
        char haystack[sizeof(needle)];
        bpf_probe_read(&haystack, sizeof(haystack), (void *)str);
        for (int i = 0; i < sizeof(needle) - 1; ++i) {
                if (needle[i] != haystack[i]) {
                        return false;
                }
        }
        return true;
}
                """ % (fname, string)
                return fname

        def _rewrite_expr(self, expr):
                if self.is_syscall_kprobe:
                    for alias, replacement in Probe.aliases_indarg.items():
                        expr = expr.replace(alias, replacement)
                else:
                    for alias, replacement in Probe.aliases_arg.items():
                        # For USDT probes, we replace argN values with the
                        # actual arguments for that probe obtained using
                        # bpf_readarg_N macros emitted at BPF construction.
                        if self.probe_type == "u":
                                continue
                        expr = expr.replace(alias, replacement)
                for alias, replacement in Probe.aliases_common.items():
                    expr = expr.replace(alias, replacement)
                if self.bin_cmp:
                    STRCMP_RE = 'STRCMP\\(\"([^"]+)\\"'
                else:
                    STRCMP_RE = 'STRCMP\\(("[^"]+\\")'
                matches = re.finditer(STRCMP_RE, expr)
                for match in matches:
                        string = match.group(1)
                        fname = self._generate_streq_function(string)
                        expr = expr.replace("STRCMP", fname, 1)
                return expr

        p_type = {"u": ct.c_uint, "d": ct.c_int, "lu": ct.c_ulong,
                  "ld": ct.c_long,
                  "llu": ct.c_ulonglong, "lld": ct.c_longlong,
                  "hu": ct.c_ushort, "hd": ct.c_short,
                  "x": ct.c_uint, "lx": ct.c_ulong, "llx": ct.c_ulonglong,
                  "c": ct.c_ubyte,
                  "K": ct.c_ulonglong, "U": ct.c_ulonglong}

        def _generate_python_field_decl(self, idx, fields):
                field_type = self.types[idx]
                if field_type == "s":
                        ptype = ct.c_char * self.string_size
                else:
                        ptype = Probe.p_type[field_type]
                fields.append(("v%d" % idx, ptype))

        def _generate_python_data_decl(self):
                self.python_struct_name = "%s_%d_Data" % \
                                (self._display_function(), self.probe_num)
                fields = []
                if self.time_field:
                    fields.append(("timestamp_ns", ct.c_ulonglong))
                if self.print_cpu:
                    fields.append(("cpu", ct.c_int))
                fields.extend([
                        ("tgid", ct.c_uint),
                        ("pid", ct.c_uint),
                        ("comm", ct.c_char * 16)       # TASK_COMM_LEN
                ])
                for i in range(0, len(self.types)):
                        self._generate_python_field_decl(i, fields)
                if self.kernel_stack:
                        fields.append(("kernel_stack_id", ct.c_int))
                if self.user_stack:
                        fields.append(("user_stack_id", ct.c_int))
                return type(self.python_struct_name, (ct.Structure,),
                            dict(_fields_=fields))

        c_type = {"u": "unsigned int", "d": "int",
                  "lu": "unsigned long", "ld": "long",
                  "llu": "unsigned long long", "lld": "long long",
                  "hu": "unsigned short", "hd": "short",
                  "x": "unsigned int", "lx": "unsigned long",
                  "llx": "unsigned long long",
                  "c": "char", "K": "unsigned long long",
                  "U": "unsigned long long"}
        fmt_types = c_type.keys()

        def _generate_field_decl(self, idx):
                field_type = self.types[idx]
                if field_type == "s":
                        return "char v%d[%d];\n" % (idx, self.string_size)
                if field_type in Probe.fmt_types:
                        return "%s v%d;\n" % (Probe.c_type[field_type], idx)
                self._bail("unrecognized format specifier %s" % field_type)

        def _generate_data_decl(self):
                # The BPF program will populate values into the struct
                # according to the format string, and the Python program will
                # construct the final display string.
                self.events_name = "%s_events" % self.probe_name
                self.struct_name = "%s_data_t" % self.probe_name
                self.stacks_name = "%s_stacks" % self.probe_name
                stack_type = "BPF_STACK_TRACE" if self.build_id_enabled is False \
                             else "BPF_STACK_TRACE_BUILDID"
                stack_table = "%s(%s, 1024);" % (stack_type,self.stacks_name) \
                              if (self.kernel_stack or self.user_stack) else ""
                data_fields = ""
                for i, field_type in enumerate(self.types):
                        data_fields += "        " + \
                                       self._generate_field_decl(i)
                time_str = "u64 timestamp_ns;" if self.time_field else ""
                cpu_str = "int cpu;" if self.print_cpu else ""
                kernel_stack_str = "       int kernel_stack_id;" \
                                   if self.kernel_stack else ""
                user_stack_str = "       int user_stack_id;" \
                                 if self.user_stack else ""

                text = """
struct %s
{
%s
%s
        u32 tgid;
        u32 pid;
        char comm[TASK_COMM_LEN];
%s
%s
%s
};

BPF_PERF_OUTPUT(%s);
%s
"""
                return text % (self.struct_name, time_str, cpu_str, data_fields,
                               kernel_stack_str, user_stack_str,
                               self.events_name, stack_table)

        def _generate_field_assign(self, idx):
                field_type = self.types[idx]
                expr = self.values[idx].strip()
                text = ""
                if self.probe_type == "u" and expr[0:3] == "arg":
                        arg_index = int(expr[3])
                        arg_ctype = self.usdt.get_probe_arg_ctype(
                                self.usdt_name, arg_index - 1)
                        text = ("        %s %s = 0;\n" +
                                "        bpf_usdt_readarg(%s, ctx, &%s);\n") \
                                % (arg_ctype, expr, expr[3], expr)

                if field_type == "s":
                        return text + """
        if (%s != 0) {
                void *__tmp = (void *)%s;
                bpf_probe_read(&__data.v%d, sizeof(__data.v%d), __tmp);
        }
                """ % (expr, expr, idx, idx)
                if field_type in Probe.fmt_types:
                        return text + "        __data.v%d = (%s)%s;\n" % \
                                        (idx, Probe.c_type[field_type], expr)
                self._bail("unrecognized field type %s" % field_type)

        def _generate_usdt_filter_read(self):
            text = ""
            if self.probe_type != "u":
                    return text
            for arg, _ in Probe.aliases_arg.items():
                    if not (arg in self.filter):
                            continue
                    arg_index = int(arg.replace("arg", ""))
                    arg_ctype = self.usdt.get_probe_arg_ctype(
                            self.usdt_name, arg_index - 1)
                    if not arg_ctype:
                            self._bail("Unable to determine type of {} "
                                       "in the filter".format(arg))
                    text += """
        {} {}_filter;
        bpf_usdt_readarg({}, ctx, &{}_filter);
                    """.format(arg_ctype, arg, arg_index, arg)
                    self.filter = self.filter.replace(
                            arg, "{}_filter".format(arg))
            return text

        def generate_program(self, include_self):
                data_decl = self._generate_data_decl()
                if Probe.pid != -1:
                        pid_filter = """
        if (__pid != %d) { return 0; }
                """ % Probe.pid
                # uprobes can have a built-in tgid filter passed to
                # attach_uprobe, hence the check here -- for kprobes, we
                # need to do the tgid test by hand:
                elif len(self.library) == 0 and Probe.tgid != -1:
                        pid_filter = """
        if (__tgid != %d) { return 0; }
                """ % Probe.tgid
                elif not include_self:
                        pid_filter = """
        if (__tgid == %d) { return 0; }
                """ % os.getpid()
                else:
                        pid_filter = ""

                prefix = ""
                signature = "struct pt_regs *ctx"
                if self.signature:
                        signature += ", " + self.signature

                data_fields = ""
                for i, expr in enumerate(self.values):
                        data_fields += self._generate_field_assign(i)

                if self.probe_type == "t":
                        heading = "TRACEPOINT_PROBE(%s, %s)" % \
                                  (self.tp_category, self.tp_event)
                        ctx_name = "args"
                else:
                        heading = "int %s(%s)" % (self.probe_name, signature)
                        ctx_name = "ctx"

                time_str = """
        __data.timestamp_ns = bpf_ktime_get_ns();""" if self.time_field else ""
                cpu_str = """
        __data.cpu = bpf_get_smp_processor_id();""" if self.print_cpu else ""
                stack_trace = ""
                if self.user_stack:
                        stack_trace += """
        __data.user_stack_id = %s.get_stackid(
          %s, BPF_F_REUSE_STACKID | BPF_F_USER_STACK
        );""" % (self.stacks_name, ctx_name)
                if self.kernel_stack:
                        stack_trace += """
        __data.kernel_stack_id = %s.get_stackid(
          %s, BPF_F_REUSE_STACKID
        );""" % (self.stacks_name, ctx_name)

                text = heading + """
{
        u64 __pid_tgid = bpf_get_current_pid_tgid();
        u32 __tgid = __pid_tgid >> 32;
        u32 __pid = __pid_tgid; // implicit cast to u32 for bottom half
        %s
        %s
        %s
        if (!(%s)) return 0;

        struct %s __data = {0};
        %s
        %s
        __data.tgid = __tgid;
        __data.pid = __pid;
        bpf_get_current_comm(&__data.comm, sizeof(__data.comm));
%s
%s
        %s.perf_submit(%s, &__data, sizeof(__data));
        return 0;
}
"""
                text = text % (pid_filter, prefix,
                               self._generate_usdt_filter_read(), self.filter,
                               self.struct_name, time_str, cpu_str, data_fields,
                               stack_trace, self.events_name, ctx_name)

                return self.streq_functions + data_decl + "\n" + text

        @classmethod
        def _time_off_str(cls, timestamp_ns):
                return "%.6f" % (1e-9 * (timestamp_ns - cls.first_ts))

        def _display_function(self):
                if self.probe_type == 'p' or self.probe_type == 'r':
                        return self.function
                elif self.probe_type == 'u':
                        return self.usdt_name
                else:   # self.probe_type == 't'
                        return self.tp_event

        def print_stack(self, bpf, stack_id, tgid):
            if stack_id < 0:
                print("        %d" % stack_id)
                return

            stack = list(bpf.get_table(self.stacks_name).walk(stack_id))
            for addr in stack:
                print("        ", end="")
                if Probe.print_address:
                    print("%16x " % addr, end="")
                print("%s" % (bpf.sym(addr, tgid,
                                     show_module=True, show_offset=True)))

        def _format_message(self, bpf, tgid, values):
                # Replace each %K with kernel sym and %U with user sym in tgid
                kernel_placeholders = [i for i, t in enumerate(self.types)
                                       if t == 'K']
                user_placeholders = [i for i, t in enumerate(self.types)
                                     if t == 'U']
                for kp in kernel_placeholders:
                        values[kp] = bpf.ksym(values[kp], show_offset=True)
                for up in user_placeholders:
                        values[up] = bpf.sym(values[up], tgid,
                                           show_module=True, show_offset=True)
                return self.python_format % tuple(values)

        def print_event(self, bpf, cpu, data, size):
                # Cast as the generated structure type and display
                # according to the format string in the probe.
                event = ct.cast(data, ct.POINTER(self.python_struct)).contents
                values = map(lambda i: getattr(event, "v%d" % i),
                             range(0, len(self.values)))
                msg = self._format_message(bpf, event.tgid, values)
                if Probe.print_time:
                    time = strftime("%H:%M:%S") if Probe.use_localtime else \
                           Probe._time_off_str(event.timestamp_ns)
                    print("%-8s " % time[:8], end="")
                if Probe.print_cpu:
                    print("%-3s " % event.cpu, end="")
                print("%-7d %-7d %-15s %-16s %s" %
                      (event.tgid, event.pid,
                       event.comm.decode('utf-8', 'replace'),
                       self._display_function(), msg))

                if self.kernel_stack:
                        self.print_stack(bpf, event.kernel_stack_id, -1)
                if self.user_stack:
                        self.print_stack(bpf, event.user_stack_id, event.tgid)
                if self.user_stack or self.kernel_stack:
                        print("")

                Probe.event_count += 1
                if Probe.max_events is not None and \
                   Probe.event_count >= Probe.max_events:
                        exit()

        def attach(self, bpf, verbose):
                if len(self.library) == 0:
                        self._attach_k(bpf)
                else:
                        self._attach_u(bpf)
                self.python_struct = self._generate_python_data_decl()
                callback = partial(self.print_event, bpf)
                bpf[self.events_name].open_perf_buffer(callback,
                        page_cnt=self.page_cnt)

        def _attach_k(self, bpf):
                if self.probe_type == "r":
                        bpf.attach_kretprobe(event=self.function,
                                             fn_name=self.probe_name)
                elif self.probe_type == "p":
                        bpf.attach_kprobe(event=self.function,
                                          fn_name=self.probe_name)
                # Note that tracepoints don't need an explicit attach

        def _attach_u(self, bpf):
                libpath = BPF.find_library(self.library)
                if libpath is None:
                        # This might be an executable (e.g. 'bash')
                        libpath = BPF.find_exe(self.library)
                if libpath is None or len(libpath) == 0:
                        self._bail("unable to find library %s" % self.library)

                if self.probe_type == "u":
                        pass    # Was already enabled by the BPF constructor
                elif self.probe_type == "r":
                        bpf.attach_uretprobe(name=libpath,
                                             sym=self.function,
                                             fn_name=self.probe_name,
                                             pid=Probe.tgid)
                else:
                        bpf.attach_uprobe(name=libpath,
                                          sym=self.function,
                                          fn_name=self.probe_name,
                                          pid=Probe.tgid)
Beispiel #3
0
class Probe(object):
        probe_count = 0
        streq_index = 0
        max_events = None
        event_count = 0
        first_ts = 0
        use_localtime = True
        tgid = -1
        pid = -1
        page_cnt = None

        @classmethod
        def configure(cls, args):
                cls.max_events = args.max_events
                cls.print_time = args.timestamp or args.time
                cls.use_localtime = not args.timestamp
                cls.first_ts = BPF.monotonic_time()
                cls.tgid = args.tgid or -1
                cls.pid = args.pid or -1
                cls.page_cnt = args.buffer_pages

        def __init__(self, probe, string_size, kernel_stack, user_stack):
                self.usdt = None
                self.streq_functions = ""
                self.raw_probe = probe
                self.string_size = string_size
                self.kernel_stack = kernel_stack
                self.user_stack = user_stack
                Probe.probe_count += 1
                self._parse_probe()
                self.probe_num = Probe.probe_count
                self.probe_name = "probe_%s_%d" % \
                                (self._display_function(), self.probe_num)
                self.probe_name = re.sub(r'[^A-Za-z0-9_]', '_',
                                         self.probe_name)

        def __str__(self):
                return "%s:%s:%s FLT=%s ACT=%s/%s" % (self.probe_type,
                        self.library, self._display_function(), self.filter,
                        self.types, self.values)

        def is_default_action(self):
                return self.python_format == ""

        def _bail(self, error):
                raise ValueError("error in probe '%s': %s" %
                                 (self.raw_probe, error))

        def _parse_probe(self):
                text = self.raw_probe

                # There might be a function signature preceding the actual
                # filter/print part, or not. Find the probe specifier first --
                # it ends with either a space or an open paren ( for the
                # function signature part.
                #                                          opt. signature
                #                               probespec       |      rest
                #                               ---------  ----------   --
                (spec, sig, rest) = re.match(r'([^ \t\(]+)(\([^\(]*\))?(.*)',
                                             text).groups()

                self._parse_spec(spec)
                # Remove the parens
                self.signature = sig[1:-1] if sig else None
                if self.signature and self.probe_type in ['u', 't']:
                        self._bail("USDT and tracepoint probes can't have " +
                                   "a function signature; use arg1, arg2, " +
                                   "... instead")

                text = rest.lstrip()
                # If we now have a (, wait for the balanced closing ) and that
                # will be the predicate
                self.filter = None
                if len(text) > 0 and text[0] == "(":
                        balance = 1
                        for i in range(1, len(text)):
                                if text[i] == "(":
                                        balance += 1
                                if text[i] == ")":
                                        balance -= 1
                                if balance == 0:
                                        self._parse_filter(text[:i + 1])
                                        text = text[i + 1:]
                                        break
                        if self.filter is None:
                                self._bail("unmatched end of predicate")

                if self.filter is None:
                        self.filter = "1"

                # The remainder of the text is the printf action
                self._parse_action(text.lstrip())

        def _parse_spec(self, spec):
                parts = spec.split(":")
                # Two special cases: 'func' means 'p::func', 'lib:func' means
                # 'p:lib:func'. Other combinations need to provide an empty
                # value between delimiters, e.g. 'r::func' for a kretprobe on
                # the function func.
                if len(parts) == 1:
                        parts = ["p", "", parts[0]]
                elif len(parts) == 2:
                        parts = ["p", parts[0], parts[1]]
                if len(parts[0]) == 0:
                        self.probe_type = "p"
                elif parts[0] in ["p", "r", "t", "u"]:
                        self.probe_type = parts[0]
                else:
                        self._bail("probe type must be '', 'p', 't', 'r', " +
                                   "or 'u', but got '%s'" % parts[0])
                if self.probe_type == "t":
                        self.tp_category = parts[1]
                        self.tp_event = parts[2]
                        self.library = ""       # kernel
                        self.function = ""      # from TRACEPOINT_PROBE
                elif self.probe_type == "u":
                        self.library = ':'.join(parts[1:-1])
                        self.usdt_name = parts[-1]
                        self.function = ""      # no function, just address
                        # We will discover the USDT provider by matching on
                        # the USDT name in the specified library
                        self._find_usdt_probe()
                else:
                        self.library = ':'.join(parts[1:-1])
                        self.function = parts[-1]

        def _find_usdt_probe(self):
                target = Probe.pid if Probe.pid and Probe.pid != -1 \
                                   else Probe.tgid
                self.usdt = USDT(path=self.library, pid=target)
                for probe in self.usdt.enumerate_probes():
                        if probe.name == self.usdt_name:
                                return  # Found it, will enable later
                self._bail("unrecognized USDT probe %s" % self.usdt_name)

        def _parse_filter(self, filt):
                self.filter = self._rewrite_expr(filt)

        def _parse_types(self, fmt):
                for match in re.finditer(
                            r'[^%]%(s|u|d|llu|lld|hu|hd|x|llx|c|K|U)', fmt):
                        self.types.append(match.group(1))
                fmt = re.sub(r'([^%]%)(u|d|llu|lld|hu|hd)', r'\1d', fmt)
                fmt = re.sub(r'([^%]%)(x|llx)', r'\1x', fmt)
                fmt = re.sub('%K|%U', '%s', fmt)
                self.python_format = fmt.strip('"')

        def _parse_action(self, action):
                self.values = []
                self.types = []
                self.python_format = ""
                if len(action) == 0:
                        return

                action = action.strip()
                match = re.search(r'(\".*?\"),?(.*)', action)
                if match is None:
                        self._bail("expected format string in \"s")

                self.raw_format = match.group(1)
                self._parse_types(self.raw_format)
                for part in re.split('(?<!"),', match.group(2)):
                        part = self._rewrite_expr(part)
                        if len(part) > 0:
                                self.values.append(part)

        aliases = {
                "retval": "PT_REGS_RC(ctx)",
                "arg1": "PT_REGS_PARM1(ctx)",
                "arg2": "PT_REGS_PARM2(ctx)",
                "arg3": "PT_REGS_PARM3(ctx)",
                "arg4": "PT_REGS_PARM4(ctx)",
                "arg5": "PT_REGS_PARM5(ctx)",
                "arg6": "PT_REGS_PARM6(ctx)",
                "$uid": "(unsigned)(bpf_get_current_uid_gid() & 0xffffffff)",
                "$gid": "(unsigned)(bpf_get_current_uid_gid() >> 32)",
                "$pid": "(unsigned)(bpf_get_current_pid_tgid() & 0xffffffff)",
                "$tgid": "(unsigned)(bpf_get_current_pid_tgid() >> 32)",
                "$cpu": "bpf_get_smp_processor_id()"
        }

        def _generate_streq_function(self, string):
                fname = "streq_%d" % Probe.streq_index
                Probe.streq_index += 1
                self.streq_functions += """
static inline bool %s(char const *ignored, uintptr_t str) {
        char needle[] = %s;
        char haystack[sizeof(needle)];
        bpf_probe_read(&haystack, sizeof(haystack), (void *)str);
        for (int i = 0; i < sizeof(needle) - 1; ++i) {
                if (needle[i] != haystack[i]) {
                        return false;
                }
        }
        return true;
}
                """ % (fname, string)
                return fname

        def _rewrite_expr(self, expr):
                for alias, replacement in Probe.aliases.items():
                        # For USDT probes, we replace argN values with the
                        # actual arguments for that probe obtained using
                        # bpf_readarg_N macros emitted at BPF construction.
                        if alias.startswith("arg") and self.probe_type == "u":
                                continue
                        expr = expr.replace(alias, replacement)
                matches = re.finditer('STRCMP\\(("[^"]+\\")', expr)
                for match in matches:
                        string = match.group(1)
                        fname = self._generate_streq_function(string)
                        expr = expr.replace("STRCMP", fname, 1)
                return expr

        p_type = {"u": ct.c_uint, "d": ct.c_int,
                  "llu": ct.c_ulonglong, "lld": ct.c_longlong,
                  "hu": ct.c_ushort, "hd": ct.c_short,
                  "x": ct.c_uint, "llx": ct.c_ulonglong, "c": ct.c_ubyte,
                  "K": ct.c_ulonglong, "U": ct.c_ulonglong}

        def _generate_python_field_decl(self, idx, fields):
                field_type = self.types[idx]
                if field_type == "s":
                        ptype = ct.c_char * self.string_size
                else:
                        ptype = Probe.p_type[field_type]
                fields.append(("v%d" % idx, ptype))

        def _generate_python_data_decl(self):
                self.python_struct_name = "%s_%d_Data" % \
                                (self._display_function(), self.probe_num)
                fields = [
                        ("timestamp_ns", ct.c_ulonglong),
                        ("tgid", ct.c_uint),
                        ("pid", ct.c_uint),
                        ("comm", ct.c_char * 16)       # TASK_COMM_LEN
                ]
                for i in range(0, len(self.types)):
                        self._generate_python_field_decl(i, fields)
                if self.kernel_stack:
                        fields.append(("kernel_stack_id", ct.c_int))
                if self.user_stack:
                        fields.append(("user_stack_id", ct.c_int))
                return type(self.python_struct_name, (ct.Structure,),
                            dict(_fields_=fields))

        c_type = {"u": "unsigned int", "d": "int",
                  "llu": "unsigned long long", "lld": "long long",
                  "hu": "unsigned short", "hd": "short",
                  "x": "unsigned int", "llx": "unsigned long long",
                  "c": "char", "K": "unsigned long long",
                  "U": "unsigned long long"}
        fmt_types = c_type.keys()

        def _generate_field_decl(self, idx):
                field_type = self.types[idx]
                if field_type == "s":
                        return "char v%d[%d];\n" % (idx, self.string_size)
                if field_type in Probe.fmt_types:
                        return "%s v%d;\n" % (Probe.c_type[field_type], idx)
                self._bail("unrecognized format specifier %s" % field_type)

        def _generate_data_decl(self):
                # The BPF program will populate values into the struct
                # according to the format string, and the Python program will
                # construct the final display string.
                self.events_name = "%s_events" % self.probe_name
                self.struct_name = "%s_data_t" % self.probe_name
                self.stacks_name = "%s_stacks" % self.probe_name
                stack_table = "BPF_STACK_TRACE(%s, 1024);" % self.stacks_name \
                              if (self.kernel_stack or self.user_stack) else ""
                data_fields = ""
                for i, field_type in enumerate(self.types):
                        data_fields += "        " + \
                                       self._generate_field_decl(i)

                kernel_stack_str = "       int kernel_stack_id;" \
                                   if self.kernel_stack else ""
                user_stack_str = "       int user_stack_id;" \
                                 if self.user_stack else ""

                text = """
struct %s
{
        u64 timestamp_ns;
        u32 tgid;
        u32 pid;
        char comm[TASK_COMM_LEN];
%s
%s
%s
};

BPF_PERF_OUTPUT(%s);
%s
"""
                return text % (self.struct_name, data_fields,
                               kernel_stack_str, user_stack_str,
                               self.events_name, stack_table)

        def _generate_field_assign(self, idx):
                field_type = self.types[idx]
                expr = self.values[idx].strip()
                text = ""
                if self.probe_type == "u" and expr[0:3] == "arg":
                        arg_index = int(expr[3])
                        arg_ctype = self.usdt.get_probe_arg_ctype(
                                self.usdt_name, arg_index - 1)
                        text = ("        %s %s = 0;\n" +
                                "        bpf_usdt_readarg(%s, ctx, &%s);\n") \
                                % (arg_ctype, expr, expr[3], expr)

                if field_type == "s":
                        return text + """
        if (%s != 0) {
                bpf_probe_read(&__data.v%d, sizeof(__data.v%d), (void *)%s);
        }
                """ % (expr, idx, idx, expr)
                if field_type in Probe.fmt_types:
                        return text + "        __data.v%d = (%s)%s;\n" % \
                                        (idx, Probe.c_type[field_type], expr)
                self._bail("unrecognized field type %s" % field_type)

        def _generate_usdt_filter_read(self):
            text = ""
            if self.probe_type != "u":
                    return text
            for arg, _ in Probe.aliases.items():
                    if not (arg.startswith("arg") and
                            (arg in self.filter)):
                            continue
                    arg_index = int(arg.replace("arg", ""))
                    arg_ctype = self.usdt.get_probe_arg_ctype(
                            self.usdt_name, arg_index - 1)
                    if not arg_ctype:
                            self._bail("Unable to determine type of {} "
                                       "in the filter".format(arg))
                    text += """
        {} {}_filter;
        bpf_usdt_readarg({}, ctx, &{}_filter);
                    """.format(arg_ctype, arg, arg_index, arg)
                    self.filter = self.filter.replace(
                            arg, "{}_filter".format(arg))
            return text

        def generate_program(self, include_self):
                data_decl = self._generate_data_decl()
                if Probe.pid != -1:
                        pid_filter = """
        if (__pid != %d) { return 0; }
                """ % Probe.pid
                # uprobes can have a built-in tgid filter passed to
                # attach_uprobe, hence the check here -- for kprobes, we
                # need to do the tgid test by hand:
                elif len(self.library) == 0 and Probe.tgid != -1:
                        pid_filter = """
        if (__tgid != %d) { return 0; }
                """ % Probe.tgid
                elif not include_self:
                        pid_filter = """
        if (__tgid == %d) { return 0; }
                """ % os.getpid()
                else:
                        pid_filter = ""

                prefix = ""
                signature = "struct pt_regs *ctx"
                if self.signature:
                        signature += ", " + self.signature

                data_fields = ""
                for i, expr in enumerate(self.values):
                        data_fields += self._generate_field_assign(i)

                if self.probe_type == "t":
                        heading = "TRACEPOINT_PROBE(%s, %s)" % \
                                  (self.tp_category, self.tp_event)
                        ctx_name = "args"
                else:
                        heading = "int %s(%s)" % (self.probe_name, signature)
                        ctx_name = "ctx"

                stack_trace = ""
                if self.user_stack:
                        stack_trace += """
        __data.user_stack_id = %s.get_stackid(
          %s, BPF_F_REUSE_STACKID | BPF_F_USER_STACK
        );""" % (self.stacks_name, ctx_name)
                if self.kernel_stack:
                        stack_trace += """
        __data.kernel_stack_id = %s.get_stackid(
          %s, BPF_F_REUSE_STACKID
        );""" % (self.stacks_name, ctx_name)

                text = heading + """
{
        u64 __pid_tgid = bpf_get_current_pid_tgid();
        u32 __tgid = __pid_tgid >> 32;
        u32 __pid = __pid_tgid; // implicit cast to u32 for bottom half
        %s
        %s
        %s
        if (!(%s)) return 0;

        struct %s __data = {0};
        __data.timestamp_ns = bpf_ktime_get_ns();
        __data.tgid = __tgid;
        __data.pid = __pid;
        bpf_get_current_comm(&__data.comm, sizeof(__data.comm));
%s
%s
        %s.perf_submit(%s, &__data, sizeof(__data));
        return 0;
}
"""
                text = text % (pid_filter, prefix,
                               self._generate_usdt_filter_read(), self.filter,
                               self.struct_name, data_fields,
                               stack_trace, self.events_name, ctx_name)

                return self.streq_functions + data_decl + "\n" + text

        @classmethod
        def _time_off_str(cls, timestamp_ns):
                return "%.6f" % (1e-9 * (timestamp_ns - cls.first_ts))

        def _display_function(self):
                if self.probe_type == 'p' or self.probe_type == 'r':
                        return self.function
                elif self.probe_type == 'u':
                        return self.usdt_name
                else:   # self.probe_type == 't'
                        return self.tp_event

        def print_stack(self, bpf, stack_id, tgid):
            if stack_id < 0:
                    print("        %d" % stack_id)
                    return

            stack = list(bpf.get_table(self.stacks_name).walk(stack_id))
            for addr in stack:
                    print("        %s" % (bpf.sym(addr, tgid,
                                         show_module=True, show_offset=True)))

        def _format_message(self, bpf, tgid, values):
                # Replace each %K with kernel sym and %U with user sym in tgid
                kernel_placeholders = [i for i, t in enumerate(self.types)
                                       if t == 'K']
                user_placeholders = [i for i, t in enumerate(self.types)
                                     if t == 'U']
                for kp in kernel_placeholders:
                        values[kp] = bpf.ksym(values[kp], show_offset=True)
                for up in user_placeholders:
                        values[up] = bpf.sym(values[up], tgid,
                                           show_module=True, show_offset=True)
                return self.python_format % tuple(values)

        def print_event(self, bpf, cpu, data, size):
                # Cast as the generated structure type and display
                # according to the format string in the probe.
                event = ct.cast(data, ct.POINTER(self.python_struct)).contents
                values = map(lambda i: getattr(event, "v%d" % i),
                             range(0, len(self.values)))
                msg = self._format_message(bpf, event.tgid, values)
                if not Probe.print_time:
                    print("%-6d %-6d %-12s %-16s %s" %
                          (event.tgid, event.pid, event.comm.decode(),
                           self._display_function(), msg))
                else:
                    time = strftime("%H:%M:%S") if Probe.use_localtime else \
                           Probe._time_off_str(event.timestamp_ns)
                    print("%-8s %-6d %-6d %-12s %-16s %s" %
                          (time[:8], event.tgid, event.pid, event.comm.decode(),
                           self._display_function(), msg))

                if self.kernel_stack:
                        self.print_stack(bpf, event.kernel_stack_id, -1)
                if self.user_stack:
                        self.print_stack(bpf, event.user_stack_id, event.tgid)
                if self.user_stack or self.kernel_stack:
                        print("")

                Probe.event_count += 1
                if Probe.max_events is not None and \
                   Probe.event_count >= Probe.max_events:
                        exit()

        def attach(self, bpf, verbose):
                if len(self.library) == 0:
                        self._attach_k(bpf)
                else:
                        self._attach_u(bpf)
                self.python_struct = self._generate_python_data_decl()
                callback = partial(self.print_event, bpf)
                bpf[self.events_name].open_perf_buffer(callback,
                        page_cnt=self.page_cnt)

        def _attach_k(self, bpf):
                if self.probe_type == "r":
                        bpf.attach_kretprobe(event=self.function,
                                             fn_name=self.probe_name)
                elif self.probe_type == "p":
                        bpf.attach_kprobe(event=self.function,
                                          fn_name=self.probe_name)
                # Note that tracepoints don't need an explicit attach

        def _attach_u(self, bpf):
                libpath = BPF.find_library(self.library)
                if libpath is None:
                        # This might be an executable (e.g. 'bash')
                        libpath = BPF.find_exe(self.library)
                if libpath is None or len(libpath) == 0:
                        self._bail("unable to find library %s" % self.library)

                if self.probe_type == "u":
                        pass    # Was already enabled by the BPF constructor
                elif self.probe_type == "r":
                        bpf.attach_uretprobe(name=libpath,
                                             sym=self.function,
                                             fn_name=self.probe_name,
                                             pid=Probe.tgid)
                else:
                        bpf.attach_uprobe(name=libpath,
                                          sym=self.function,
                                          fn_name=self.probe_name,
                                          pid=Probe.tgid)
Beispiel #4
0
class Probe(object):
    next_probe_index = 0
    streq_index = 0
    aliases = {"$PID": "(bpf_get_current_pid_tgid() >> 32)"}

    def _substitute_aliases(self, expr):
        if expr is None:
            return expr
        for alias, subst in Probe.aliases.items():
            expr = expr.replace(alias, subst)
        return expr

    def _parse_signature(self):
        params = map(str.strip, self.signature.split(','))
        self.param_types = {}
        for param in params:
            # If the type is a pointer, the * can be next to the
            # param name. Other complex types like arrays are not
            # supported right now.
            index = param.rfind('*')
            index = index if index != -1 else param.rfind(' ')
            param_type = param[0:index + 1].strip()
            param_name = param[index + 1:].strip()
            self.param_types[param_name] = param_type

    def _generate_entry(self):
        self.entry_probe_func = self.probe_func_name + "_entry"
        text = """
int PROBENAME(struct pt_regs *ctx SIGNATURE)
{
        u64 __pid_tgid = bpf_get_current_pid_tgid();
        u32 __pid      = __pid_tgid;        // lower 32 bits
        u32 __tgid     = __pid_tgid >> 32;  // upper 32 bits
        PID_FILTER
        COLLECT
        return 0;
}
"""
        text = text.replace("PROBENAME", self.entry_probe_func)
        text = text.replace(
            "SIGNATURE",
            "" if len(self.signature) == 0 else ", " + self.signature)
        text = text.replace("PID_FILTER", self._generate_pid_filter())
        collect = ""
        for pname in self.args_to_probe:
            param_hash = self.hashname_prefix + pname
            if pname == "__latency":
                collect += """
u64 __time = bpf_ktime_get_ns();
%s.update(&__pid, &__time);
                        """ % param_hash
            else:
                collect += "%s.update(&__pid, &%s);\n" % \
                           (param_hash, pname)
        text = text.replace("COLLECT", collect)
        return text

    def _generate_entry_probe(self):
        # Any $entry(name) expressions result in saving that argument
        # when entering the function.
        self.args_to_probe = set()
        regex = r"\$entry\((\w+)\)"
        for expr in self.exprs:
            for arg in re.finditer(regex, expr):
                self.args_to_probe.add(arg.group(1))
        for arg in re.finditer(regex, self.filter):
            self.args_to_probe.add(arg.group(1))
        if any(map(lambda expr: "$latency" in expr, self.exprs)) or \
           "$latency" in self.filter:
            self.args_to_probe.add("__latency")
            self.param_types["__latency"] = "u64"  # nanoseconds
        for pname in self.args_to_probe:
            if pname not in self.param_types:
                raise ValueError("$entry(%s): no such param" % arg)

        self.hashname_prefix = "%s_param_" % self.probe_hash_name
        text = ""
        for pname in self.args_to_probe:
            # Each argument is stored in a separate hash that is
            # keyed by pid.
            text += "BPF_HASH(%s, u32, %s);\n" % \
                 (self.hashname_prefix + pname,
                  self.param_types[pname])
        text += self._generate_entry()
        return text

    def _generate_retprobe_prefix(self):
        # After we're done here, there are __%s_val variables for each
        # argument we needed to probe using $entry(name), and they all
        # have values (which isn't necessarily the case if we missed
        # the method entry probe).
        text = ""
        self.param_val_names = {}
        for pname in self.args_to_probe:
            val_name = "__%s_val" % pname
            text += "%s *%s = %s.lookup(&__pid);\n" % \
                    (self.param_types[pname], val_name,
                     self.hashname_prefix + pname)
            text += "if (%s == 0) { return 0 ; }\n" % val_name
            self.param_val_names[pname] = val_name
        return text

    def _replace_entry_exprs(self):
        for pname, vname in self.param_val_names.items():
            if pname == "__latency":
                entry_expr = "$latency"
                val_expr = "(bpf_ktime_get_ns() - *%s)" % vname
            else:
                entry_expr = "$entry(%s)" % pname
                val_expr = "(*%s)" % vname
            for i in range(0, len(self.exprs)):
                self.exprs[i] = self.exprs[i].replace(entry_expr, val_expr)
            self.filter = self.filter.replace(entry_expr, val_expr)

    def _attach_entry_probe(self):
        if self.is_user:
            self.bpf.attach_uprobe(name=self.library,
                                   sym=self.function,
                                   fn_name=self.entry_probe_func,
                                   pid=self.pid or -1)
        else:
            self.bpf.attach_kprobe(event=self.function,
                                   fn_name=self.entry_probe_func)

    def _bail(self, error):
        raise ValueError("error parsing probe '%s': %s" %
                         (self.raw_spec, error))

    def _validate_specifier(self):
        # Everything after '#' is the probe label, ignore it
        spec = self.raw_spec.split('#')[0]
        parts = spec.strip().split(':')
        if len(parts) < 3:
            self._bail("at least the probe type, library, and " +
                       "function signature must be specified")
        if len(parts) > 6:
            self._bail("extraneous ':'-separated parts detected")
        if parts[0] not in ["r", "p", "t", "u"]:
            self._bail("probe type must be 'p', 'r', 't', or 'u'" +
                       " but got '%s'" % parts[0])
        if re.match(r"\S+\(.*\)", parts[2]) is None:
            self._bail(("function signature '%s' has an invalid " + "format") %
                       parts[2])

    def _parse_expr_types(self, expr_types):
        if len(expr_types) == 0:
            self._bail("no expr types specified")
        self.expr_types = expr_types.split(',')

    def _parse_exprs(self, exprs):
        if len(exprs) == 0:
            self._bail("no exprs specified")
        self.exprs = exprs.split(',')

    def _make_valid_identifier(self, ident):
        return re.sub(r'[^A-Za-z0-9_]', '_', ident)

    def __init__(self, tool, type, specifier):
        self.usdt_ctx = None
        self.streq_functions = ""
        self.pid = tool.args.pid
        self.cumulative = tool.args.cumulative or False
        self.raw_spec = specifier
        self._validate_specifier()

        spec_and_label = specifier.split('#')
        self.label = spec_and_label[1] \
                     if len(spec_and_label) == 2 else None

        parts = spec_and_label[0].strip().split(':')
        self.type = type  # hist or freq
        self.probe_type = parts[0]
        fparts = parts[2].split('(')
        self.function = fparts[0].strip()
        if self.probe_type == "t":
            self.library = ""  # kernel
            self.tp_category = parts[1]
            self.tp_event = self.function
        elif self.probe_type == "u":
            self.library = parts[1]
            self.probe_func_name = self._make_valid_identifier(
                "%s_probe%d" % (self.function, Probe.next_probe_index))
            self._enable_usdt_probe()
        else:
            self.library = parts[1]
        self.is_user = len(self.library) > 0
        self.signature = fparts[1].strip()[:-1]
        self._parse_signature()

        # If the user didn't specify an expression to probe, we probe
        # the retval in a ret probe, or simply the value "1" otherwise.
        self.is_default_expr = len(parts) < 5
        if not self.is_default_expr:
            self._parse_expr_types(parts[3])
            self._parse_exprs(parts[4])
            if len(self.exprs) != len(self.expr_types):
                self._bail("mismatched # of exprs and types")
            if self.type == "hist" and len(self.expr_types) > 1:
                self._bail("histograms can only have 1 expr")
        else:
            if not self.probe_type == "r" and self.type == "hist":
                self._bail("histograms must have expr")
            self.expr_types = \
              ["u64" if not self.probe_type == "r" else "int"]
            self.exprs = \
              ["1" if not self.probe_type == "r" else "$retval"]
        self.filter = "" if len(parts) != 6 else parts[5]
        self._substitute_exprs()

        # Do we need to attach an entry probe so that we can collect an
        # argument that is required for an exit (return) probe?
        def check(expr):
            keywords = ["$entry", "$latency"]
            return any(map(lambda kw: kw in expr, keywords))
        self.entry_probe_required = self.probe_type == "r" and \
                (any(map(check, self.exprs)) or check(self.filter))

        self.probe_func_name = self._make_valid_identifier(
            "%s_probe%d" % (self.function, Probe.next_probe_index))
        self.probe_hash_name = self._make_valid_identifier(
            "%s_hash%d" % (self.function, Probe.next_probe_index))
        Probe.next_probe_index += 1

    def _enable_usdt_probe(self):
        self.usdt_ctx = USDT(path=self.library, pid=self.pid)
        self.usdt_ctx.enable_probe(self.function, self.probe_func_name)

    def _generate_streq_function(self, string):
        fname = "streq_%d" % Probe.streq_index
        Probe.streq_index += 1
        self.streq_functions += """
static inline bool %s(char const *ignored, char const *str) {
        char needle[] = %s;
        char haystack[sizeof(needle)];
        bpf_probe_read(&haystack, sizeof(haystack), (void *)str);
        for (int i = 0; i < sizeof(needle) - 1; ++i) {
                if (needle[i] != haystack[i]) {
                        return false;
                }
        }
        return true;
}
                """ % (fname, string)
        return fname

    def _substitute_exprs(self):
        def repl(expr):
            expr = self._substitute_aliases(expr)
            matches = re.finditer('STRCMP\\(("[^"]+\\")', expr)
            for match in matches:
                string = match.group(1)
                fname = self._generate_streq_function(string)
                expr = expr.replace("STRCMP", fname, 1)
            return expr.replace("$retval", "PT_REGS_RC(ctx)")

        for i in range(0, len(self.exprs)):
            self.exprs[i] = repl(self.exprs[i])
        self.filter = repl(self.filter)

    def _is_string(self, expr_type):
        return expr_type == "char*" or expr_type == "char *"

    def _generate_hash_field(self, i):
        if self._is_string(self.expr_types[i]):
            return "struct __string_t v%d;\n" % i
        else:
            return "%s v%d;\n" % (self.expr_types[i], i)

    def _generate_usdt_arg_assignment(self, i):
        expr = self.exprs[i]
        if self.probe_type == "u" and expr[0:3] == "arg":
            arg_index = int(expr[3])
            arg_ctype = self.usdt_ctx.get_probe_arg_ctype(
                self.function, arg_index - 1)
            return ("        %s %s = 0;\n" +
                    "        bpf_usdt_readarg(%s, ctx, &%s);\n") \
                    % (arg_ctype, expr, expr[3], expr)
        else:
            return ""

    def _generate_field_assignment(self, i):
        text = self._generate_usdt_arg_assignment(i)
        if self._is_string(self.expr_types[i]):
            return (text + "        bpf_probe_read(&__key.v%d.s," +
                    " sizeof(__key.v%d.s), (void *)%s);\n") % \
                    (i, i, self.exprs[i])
        else:
            return text + "        __key.v%d = %s;\n" % \
                   (i, self.exprs[i])

    def _generate_hash_decl(self):
        if self.type == "hist":
            return "BPF_HISTOGRAM(%s, %s);" % \
                   (self.probe_hash_name, self.expr_types[0])
        else:
            text = "struct %s_key_t {\n" % self.probe_hash_name
            for i in range(0, len(self.expr_types)):
                text += self._generate_hash_field(i)
            text += "};\n"
            text += "BPF_HASH(%s, struct %s_key_t, u64);\n" % \
                    (self.probe_hash_name, self.probe_hash_name)
            return text

    def _generate_key_assignment(self):
        if self.type == "hist":
            return self._generate_usdt_arg_assignment(0) + \
                   ("%s __key = %s;\n" %
                    (self.expr_types[0], self.exprs[0]))
        else:
            text = "struct %s_key_t __key = {};\n" % \
                    self.probe_hash_name
            for i in range(0, len(self.exprs)):
                text += self._generate_field_assignment(i)
            return text

    def _generate_hash_update(self):
        if self.type == "hist":
            return "%s.increment(bpf_log2l(__key));" % \
                    self.probe_hash_name
        else:
            return "%s.increment(__key);" % self.probe_hash_name

    def _generate_pid_filter(self):
        # Kernel probes need to explicitly filter pid, because the
        # attach interface doesn't support pid filtering
        if self.pid is not None and not self.is_user:
            return "if (__tgid != %d) { return 0; }" % self.pid
        else:
            return ""

    def generate_text(self):
        program = ""
        probe_text = """
DATA_DECL
                """ + ("TRACEPOINT_PROBE(%s, %s)" %
                       (self.tp_category, self.tp_event)
                       if self.probe_type == "t" else
                       "int PROBENAME(struct pt_regs *ctx SIGNATURE)") + """
{
        u64 __pid_tgid = bpf_get_current_pid_tgid();
        u32 __pid      = __pid_tgid;        // lower 32 bits
        u32 __tgid     = __pid_tgid >> 32;  // upper 32 bits
        PID_FILTER
        PREFIX
        if (!(FILTER)) return 0;
        KEY_EXPR
        COLLECT
        return 0;
}
"""
        prefix = ""
        signature = ""

        # If any entry arguments are probed in a ret probe, we need
        # to generate an entry probe to collect them
        if self.entry_probe_required:
            program += self._generate_entry_probe()
            prefix += self._generate_retprobe_prefix()
            # Replace $entry(paramname) with a reference to the
            # value we collected when entering the function:
            self._replace_entry_exprs()

        if self.probe_type == "p" and len(self.signature) > 0:
            # Only entry uprobes/kprobes can have user-specified
            # signatures. Other probes force it to ().
            signature = ", " + self.signature

        program += probe_text.replace("PROBENAME", self.probe_func_name)
        program = program.replace("SIGNATURE", signature)
        program = program.replace("PID_FILTER", self._generate_pid_filter())

        decl = self._generate_hash_decl()
        key_expr = self._generate_key_assignment()
        collect = self._generate_hash_update()
        program = program.replace("DATA_DECL", decl)
        program = program.replace("KEY_EXPR", key_expr)
        program = program.replace(
            "FILTER", "1" if len(self.filter) == 0 else self.filter)
        program = program.replace("COLLECT", collect)
        program = program.replace("PREFIX", prefix)

        return self.streq_functions + program

    def _attach_u(self):
        libpath = BPF.find_library(self.library)
        if libpath is None:
            libpath = BPF.find_exe(self.library)
        if libpath is None or len(libpath) == 0:
            self._bail("unable to find library %s" % self.library)

        if self.probe_type == "r":
            self.bpf.attach_uretprobe(name=libpath,
                                      sym=self.function,
                                      fn_name=self.probe_func_name,
                                      pid=self.pid or -1)
        else:
            self.bpf.attach_uprobe(name=libpath,
                                   sym=self.function,
                                   fn_name=self.probe_func_name,
                                   pid=self.pid or -1)

    def _attach_k(self):
        if self.probe_type == "t":
            pass  # Nothing to do for tracepoints
        elif self.probe_type == "r":
            self.bpf.attach_kretprobe(event=self.function,
                                      fn_name=self.probe_func_name)
        else:
            self.bpf.attach_kprobe(event=self.function,
                                   fn_name=self.probe_func_name)

    def attach(self, bpf):
        self.bpf = bpf
        if self.probe_type == "u":
            return
        if self.is_user:
            self._attach_u()
        else:
            self._attach_k()
        if self.entry_probe_required:
            self._attach_entry_probe()

    def _v2s(self, v):
        # Most fields can be converted with plain str(), but strings
        # are wrapped in a __string_t which has an .s field
        if "__string_t" in type(v).__name__:
            return str(v.s)
        return str(v)

    def _display_expr(self, i):
        # Replace ugly latency calculation with $latency
        expr = self.exprs[i].replace("(bpf_ktime_get_ns() - *____latency_val)",
                                     "$latency")
        # Replace alias values back with the alias name
        for alias, subst in Probe.aliases.items():
            expr = expr.replace(subst, alias)
        # Replace retval expression with $retval
        expr = expr.replace("PT_REGS_RC(ctx)", "$retval")
        # Replace ugly (*__param_val) expressions with param name
        return re.sub(r"\(\*__(\w+)_val\)", r"\1", expr)

    def _display_key(self, key):
        if self.is_default_expr:
            if not self.probe_type == "r":
                return "total calls"
            else:
                return "retval = %s" % str(key.v0)
        else:
            # The key object has v0, ..., vk fields containing
            # the values of the expressions from self.exprs
            def str_i(i):
                key_i = self._v2s(getattr(key, "v%d" % i))
                return "%s = %s" % \
                        (self._display_expr(i), key_i)

            return ", ".join(map(str_i, range(0, len(self.exprs))))

    def display(self, top):
        data = self.bpf.get_table(self.probe_hash_name)
        if self.type == "freq":
            print(self.label or self.raw_spec)
            print("\t%-10s %s" % ("COUNT", "EVENT"))
            sdata = sorted(data.items(), key=lambda p: p[1].value)
            if top is not None:
                sdata = sdata[-top:]
            for key, value in sdata:
                # Print some nice values if the user didn't
                # specify an expression to probe
                if self.is_default_expr:
                    if not self.probe_type == "r":
                        key_str = "total calls"
                    else:
                        key_str = "retval = %s" % \
                                  self._v2s(key.v0)
                else:
                    key_str = self._display_key(key)
                print("\t%-10s %s" % (str(value.value), key_str))
        elif self.type == "hist":
            label = self.label or (self._display_expr(0)
                                   if not self.is_default_expr else "retval")
            data.print_log2_hist(val_type=label)
        if not self.cumulative:
            data.clear()

    def __str__(self):
        return self.label or self.raw_spec
Beispiel #5
0
class Probe(object):
    probe_count = 0
    streq_index = 0
    max_events = None
    event_count = 0
    first_ts = 0
    first_ts_real = None
    print_time = False
    print_unix_timestamp = False
    use_localtime = True
    time_field = False
    print_cpu = False
    print_address = False
    tgid = -1
    pid = -1
    page_cnt = None
    build_id_enabled = False

    @classmethod
    def configure(cls, args):
        cls.max_events = args.max_events
        cls.print_time = args.timestamp or args.time
        cls.print_unix_timestamp = args.unix_timestamp
        cls.use_localtime = not args.timestamp
        cls.time_field = cls.print_time and (not cls.use_localtime)
        cls.print_cpu = args.print_cpu
        cls.print_address = args.address
        cls.first_ts = BPF.monotonic_time()
        cls.first_ts_real = time.time()
        cls.tgid = args.tgid or -1
        cls.pid = args.pid or -1
        cls.page_cnt = args.buffer_pages
        cls.bin_cmp = args.bin_cmp
        cls.build_id_enabled = args.sym_file_list is not None

    def __init__(self, probe, string_size, kernel_stack, user_stack,
                 cgroup_map_name, name, msg_filter):
        self.usdt = None
        self.streq_functions = ""
        self.raw_probe = probe
        self.string_size = string_size
        self.kernel_stack = kernel_stack
        self.user_stack = user_stack
        self.probe_user_list = set()
        Probe.probe_count += 1
        self._parse_probe()
        self.probe_num = Probe.probe_count
        self.probe_name = "probe_%s_%d" % \
                        (self._display_function(), self.probe_num)
        self.probe_name = re.sub(r'[^A-Za-z0-9_]', '_', self.probe_name)
        self.cgroup_map_name = cgroup_map_name
        self.name = name
        self.msg_filter = msg_filter
        # compiler can generate proper codes for function
        # signatures with "syscall__" prefix
        if self.is_syscall_kprobe:
            self.probe_name = "syscall__" + self.probe_name[6:]

    def __str__(self):
        return "%s:%s:%s FLT=%s ACT=%s/%s" % (
            self.probe_type, self.library, self._display_function(),
            self.filter, self.types, self.values)

    def is_default_action(self):
        return self.python_format == ""

    def _bail(self, error):
        raise ValueError("error in probe '%s': %s" % (self.raw_probe, error))

    def _parse_probe(self):
        text = self.raw_probe

        # There might be a function signature preceding the actual
        # filter/print part, or not. Find the probe specifier first --
        # it ends with either a space or an open paren ( for the
        # function signature part.
        #                                          opt. signature
        #                               probespec       |      rest
        #                               ---------  ----------   --
        (spec, sig, rest) = re.match(r'([^ \t\(]+)(\([^\(]*\))?(.*)',
                                     text).groups()

        self._parse_spec(spec)
        # Remove the parens
        self.signature = sig[1:-1] if sig else None
        if self.signature and self.probe_type in ['u', 't']:
            self._bail("USDT and tracepoint probes can't have " +
                       "a function signature; use arg1, arg2, " +
                       "... instead")

        text = rest.lstrip()
        # If we now have a (, wait for the balanced closing ) and that
        # will be the predicate
        self.filter = None
        if len(text) > 0 and text[0] == "(":
            balance = 1
            for i in range(1, len(text)):
                if text[i] == "(":
                    balance += 1
                if text[i] == ")":
                    balance -= 1
                if balance == 0:
                    self._parse_filter(text[:i + 1])
                    text = text[i + 1:]
                    break
            if self.filter is None:
                self._bail("unmatched end of predicate")

        if self.filter is None:
            self.filter = "1"

        # The remainder of the text is the printf action
        self._parse_action(text.lstrip())

    def _parse_spec(self, spec):
        parts = spec.split(":")
        # Two special cases: 'func' means 'p::func', 'lib:func' means
        # 'p:lib:func'. Other combinations need to provide an empty
        # value between delimiters, e.g. 'r::func' for a kretprobe on
        # the function func.
        if len(parts) == 1:
            parts = ["p", "", parts[0]]
        elif len(parts) == 2:
            parts = ["p", parts[0], parts[1]]
        if len(parts[0]) == 0:
            self.probe_type = "p"
        elif parts[0] in ["p", "r", "t", "u"]:
            self.probe_type = parts[0]
        else:
            self._bail("probe type must be '', 'p', 't', 'r', " +
                       "or 'u', but got '%s'" % parts[0])
        if self.probe_type == "t":
            self.tp_category = parts[1]
            self.tp_event = parts[2]
            self.library = ""  # kernel
            self.function = ""  # from TRACEPOINT_PROBE
        elif self.probe_type == "u":
            # u:<library>[:<provider>]:<probe> where :<provider> is optional
            self.library = parts[1]
            self.usdt_name = ":".join(parts[2:])
            self.function = ""  # no function, just address
            # We will discover the USDT provider by matching on
            # the USDT name in the specified library
            self._find_usdt_probe()
        else:
            self.library = ':'.join(parts[1:-1])
            self.function = parts[-1]

        # only x64 syscalls needs checking, no other syscall wrapper yet.
        self.is_syscall_kprobe = False
        if self.probe_type == "p" and len(self.library) == 0 and \
           self.function[:10] == "__x64_sys_":
            self.is_syscall_kprobe = True

    def _find_usdt_probe(self):
        target = Probe.pid if Probe.pid and Probe.pid != -1 \
                           else Probe.tgid
        self.usdt = USDT(path=self.library, pid=target)

        parts = self.usdt_name.split(":")
        if len(parts) == 1:
            provider_name = None
            usdt_name = parts[0].encode("ascii")
        else:
            provider_name = parts[0].encode("ascii")
            usdt_name = parts[1].encode("ascii")
        for probe in self.usdt.enumerate_probes():
            if ((not provider_name or probe.provider == provider_name)
                    and probe.name == usdt_name):
                return  # Found it, will enable later
        self._bail("unrecognized USDT probe %s" % self.usdt_name)

    def _parse_filter(self, filt):
        self.filter = self._rewrite_expr(filt)

    def _parse_types(self, fmt):
        for match in re.finditer(
                r'[^%]%(s|u|d|lu|llu|ld|lld|hu|hd|x|lx|llx|c|K|U)', fmt):
            self.types.append(match.group(1))
        fmt = re.sub(r'([^%]%)(u|d|lu|llu|ld|lld|hu|hd)', r'\1d', fmt)
        fmt = re.sub(r'([^%]%)(x|lx|llx)', r'\1x', fmt)
        fmt = re.sub('%K|%U', '%s', fmt)
        self.python_format = fmt.strip('"')

    def _parse_action(self, action):
        self.values = []
        self.types = []
        self.python_format = ""
        if len(action) == 0:
            return

        action = action.strip()
        match = re.search(r'(\".*?\"),?(.*)', action)
        if match is None:
            self._bail("expected format string in \"s")

        self.raw_format = match.group(1)
        self._parse_types(self.raw_format)
        for part in re.split('(?<!"),', match.group(2)):
            part = self._rewrite_expr(part)
            if len(part) > 0:
                self.values.append(part)

    aliases_arg = {
        "arg1": "PT_REGS_PARM1(ctx)",
        "arg2": "PT_REGS_PARM2(ctx)",
        "arg3": "PT_REGS_PARM3(ctx)",
        "arg4": "PT_REGS_PARM4(ctx)",
        "arg5": "PT_REGS_PARM5(ctx)",
        "arg6": "PT_REGS_PARM6(ctx)",
    }

    aliases_indarg = {
        "arg1":
        "({u64 _val; struct pt_regs *_ctx = (struct pt_regs *)PT_REGS_PARM1(ctx);"
        "  bpf_probe_read(&_val, sizeof(_val), &(PT_REGS_PARM1(_ctx))); _val;})",
        "arg2":
        "({u64 _val; struct pt_regs *_ctx = (struct pt_regs *)PT_REGS_PARM1(ctx);"
        "  bpf_probe_read(&_val, sizeof(_val), &(PT_REGS_PARM2(_ctx))); _val;})",
        "arg3":
        "({u64 _val; struct pt_regs *_ctx = (struct pt_regs *)PT_REGS_PARM1(ctx);"
        "  bpf_probe_read(&_val, sizeof(_val), &(PT_REGS_PARM3(_ctx))); _val;})",
        "arg4":
        "({u64 _val; struct pt_regs *_ctx = (struct pt_regs *)PT_REGS_PARM1(ctx);"
        "  bpf_probe_read(&_val, sizeof(_val), &(PT_REGS_PARM4(_ctx))); _val;})",
        "arg5":
        "({u64 _val; struct pt_regs *_ctx = (struct pt_regs *)PT_REGS_PARM1(ctx);"
        "  bpf_probe_read(&_val, sizeof(_val), &(PT_REGS_PARM5(_ctx))); _val;})",
        "arg6":
        "({u64 _val; struct pt_regs *_ctx = (struct pt_regs *)PT_REGS_PARM1(ctx);"
        "  bpf_probe_read(&_val, sizeof(_val), &(PT_REGS_PARM6(_ctx))); _val;})",
    }

    aliases_common = {
        "retval": "PT_REGS_RC(ctx)",
        "$uid": "(unsigned)(bpf_get_current_uid_gid() & 0xffffffff)",
        "$gid": "(unsigned)(bpf_get_current_uid_gid() >> 32)",
        "$pid": "(unsigned)(bpf_get_current_pid_tgid() & 0xffffffff)",
        "$tgid": "(unsigned)(bpf_get_current_pid_tgid() >> 32)",
        "$cpu": "bpf_get_smp_processor_id()",
        "$task": "((struct task_struct *)bpf_get_current_task())"
    }

    def _rewrite_expr(self, expr):
        # Find the occurances of any arg[1-6]@user. Use it later to
        # identify bpf_probe_read_user
        for matches in re.finditer(r'(arg[1-6])(@user)', expr):
            if matches.group(1).strip() not in self.probe_user_list:
                self.probe_user_list.add(matches.group(1).strip())
        # Remove @user occurrences from arg before resolving to its
        # corresponding aliases.
        expr = re.sub(r'(arg[1-6])@user', r'\1', expr)
        rdict = StrcmpRewrite.rewrite_expr(expr, self.bin_cmp, self.library,
                                           self.probe_user_list,
                                           self.streq_functions,
                                           Probe.streq_index)
        expr = rdict["expr"]
        self.streq_functions = rdict["streq_functions"]
        Probe.streq_index = rdict["probeid"]
        alias_to_check = Probe.aliases_indarg \
                            if self.is_syscall_kprobe \
                            else Probe.aliases_arg
        # For USDT probes, we replace argN values with the
        # actual arguments for that probe obtained using
        # bpf_readarg_N macros emitted at BPF construction.
        if not self.probe_type == "u":
            for alias, replacement in alias_to_check.items():
                expr = expr.replace(alias, replacement)
        for alias, replacement in Probe.aliases_common.items():
            expr = expr.replace(alias, replacement)
        return expr

    p_type = {
        "u": ct.c_uint,
        "d": ct.c_int,
        "lu": ct.c_ulong,
        "ld": ct.c_long,
        "llu": ct.c_ulonglong,
        "lld": ct.c_longlong,
        "hu": ct.c_ushort,
        "hd": ct.c_short,
        "x": ct.c_uint,
        "lx": ct.c_ulong,
        "llx": ct.c_ulonglong,
        "c": ct.c_ubyte,
        "K": ct.c_ulonglong,
        "U": ct.c_ulonglong
    }

    def _generate_python_field_decl(self, idx, fields):
        field_type = self.types[idx]
        if field_type == "s":
            ptype = ct.c_char * self.string_size
        else:
            ptype = Probe.p_type[field_type]
        fields.append(("v%d" % idx, ptype))

    def _generate_python_data_decl(self):
        self.python_struct_name = "%s_%d_Data" % \
                        (self._display_function(), self.probe_num)
        fields = []
        if self.time_field:
            fields.append(("timestamp_ns", ct.c_ulonglong))
        if self.print_cpu:
            fields.append(("cpu", ct.c_int))
        fields.extend([
            ("tgid", ct.c_uint),
            ("pid", ct.c_uint),
            ("comm", ct.c_char * 16)  # TASK_COMM_LEN
        ])
        for i in range(0, len(self.types)):
            self._generate_python_field_decl(i, fields)
        if self.kernel_stack:
            fields.append(("kernel_stack_id", ct.c_int))
        if self.user_stack:
            fields.append(("user_stack_id", ct.c_int))
        return type(self.python_struct_name, (ct.Structure, ),
                    dict(_fields_=fields))

    c_type = {
        "u": "unsigned int",
        "d": "int",
        "lu": "unsigned long",
        "ld": "long",
        "llu": "unsigned long long",
        "lld": "long long",
        "hu": "unsigned short",
        "hd": "short",
        "x": "unsigned int",
        "lx": "unsigned long",
        "llx": "unsigned long long",
        "c": "char",
        "K": "unsigned long long",
        "U": "unsigned long long"
    }
    fmt_types = c_type.keys()

    def _generate_field_decl(self, idx):
        field_type = self.types[idx]
        if field_type == "s":
            return "char v%d[%d];\n" % (idx, self.string_size)
        if field_type in Probe.fmt_types:
            return "%s v%d;\n" % (Probe.c_type[field_type], idx)
        self._bail("unrecognized format specifier %s" % field_type)

    def _generate_data_decl(self):
        # The BPF program will populate values into the struct
        # according to the format string, and the Python program will
        # construct the final display string.
        self.events_name = "%s_events" % self.probe_name
        self.struct_name = "%s_data_t" % self.probe_name
        self.stacks_name = "%s_stacks" % self.probe_name
        stack_type = "BPF_STACK_TRACE" if self.build_id_enabled is False \
                     else "BPF_STACK_TRACE_BUILDID"
        stack_table = "%s(%s, 1024);" % (stack_type,self.stacks_name) \
                      if (self.kernel_stack or self.user_stack) else ""
        data_fields = ""
        for i, field_type in enumerate(self.types):
            data_fields += "        " + \
                           self._generate_field_decl(i)
        time_str = "u64 timestamp_ns;" if self.time_field else ""
        cpu_str = "int cpu;" if self.print_cpu else ""
        kernel_stack_str = "       int kernel_stack_id;" \
                           if self.kernel_stack else ""
        user_stack_str = "       int user_stack_id;" \
                         if self.user_stack else ""

        text = """
struct %s
{
%s
%s
        u32 tgid;
        u32 pid;
        char comm[TASK_COMM_LEN];
%s
%s
%s
};

BPF_PERF_OUTPUT(%s);
%s
"""
        return text % (self.struct_name, time_str, cpu_str, data_fields,
                       kernel_stack_str, user_stack_str, self.events_name,
                       stack_table)

    def _generate_field_assign(self, idx):
        field_type = self.types[idx]
        expr = self.values[idx].strip()
        text = ""
        if self.probe_type == "u" and expr[0:3] == "arg":
            arg_index = int(expr[3])
            arg_ctype = self.usdt.get_probe_arg_ctype(self.usdt_name,
                                                      arg_index - 1)
            text = ("        %s %s = 0;\n" +
                    "        bpf_usdt_readarg(%s, ctx, &%s);\n") \
                    % (arg_ctype, expr, expr[3], expr)
        probe_read_func = "bpf_probe_read"
        if field_type == "s":
            if self.library:
                probe_read_func = "bpf_probe_read_user"
            else:
                alias_to_check = Probe.aliases_indarg \
                                    if self.is_syscall_kprobe \
                                    else Probe.aliases_arg
                for arg, alias in alias_to_check.items():
                    if alias == expr and arg in self.probe_user_list:
                        probe_read_func = "bpf_probe_read_user"
                        break
            return text + """
        if (%s != 0) {
                void *__tmp = (void *)%s;
                %s(&__data.v%d, sizeof(__data.v%d), __tmp);
        }
                """ % (expr, expr, probe_read_func, idx, idx)
        if field_type in Probe.fmt_types:
            return text + "        __data.v%d = (%s)%s;\n" % \
                            (idx, Probe.c_type[field_type], expr)
        self._bail("unrecognized field type %s" % field_type)

    def _generate_usdt_filter_read(self):
        text = ""
        if self.probe_type != "u":
            return text
        for arg, _ in Probe.aliases_arg.items():
            if not (arg in self.filter):
                continue
            arg_index = int(arg.replace("arg", ""))
            arg_ctype = self.usdt.get_probe_arg_ctype(self.usdt_name,
                                                      arg_index - 1)
            if not arg_ctype:
                self._bail("Unable to determine type of {} "
                           "in the filter".format(arg))
            text += """
        {} {}_filter;
        bpf_usdt_readarg({}, ctx, &{}_filter);
                    """.format(arg_ctype, arg, arg_index, arg)
            self.filter = self.filter.replace(arg, "{}_filter".format(arg))
        return text

    def generate_program(self, include_self):
        data_decl = self._generate_data_decl()
        if Probe.pid != -1:
            pid_filter = """
        if (__pid != %d) { return 0; }
                """ % Probe.pid
        # uprobes can have a built-in tgid filter passed to
        # attach_uprobe, hence the check here -- for kprobes, we
        # need to do the tgid test by hand:
        elif len(self.library) == 0 and Probe.tgid != -1:
            pid_filter = """
        if (__tgid != %d) { return 0; }
                """ % Probe.tgid
        elif not include_self:
            pid_filter = """
        if (__tgid == %d) { return 0; }
                """ % os.getpid()
        else:
            pid_filter = ""

        if self.cgroup_map_name is not None:
            cgroup_filter = """
        if (%s.check_current_task(0) <= 0) { return 0; }
                """ % self.cgroup_map_name
        else:
            cgroup_filter = ""

        prefix = ""
        signature = "struct pt_regs *ctx"
        if self.signature:
            signature += ", " + self.signature

        data_fields = ""
        for i, expr in enumerate(self.values):
            data_fields += self._generate_field_assign(i)

        if self.probe_type == "t":
            heading = "TRACEPOINT_PROBE(%s, %s)" % \
                      (self.tp_category, self.tp_event)
            ctx_name = "args"
        else:
            heading = "int %s(%s)" % (self.probe_name, signature)
            ctx_name = "ctx"

        time_str = """
        __data.timestamp_ns = bpf_ktime_get_ns();""" if self.time_field else ""
        cpu_str = """
        __data.cpu = bpf_get_smp_processor_id();""" if self.print_cpu else ""
        stack_trace = ""
        if self.user_stack:
            stack_trace += """
        __data.user_stack_id = %s.get_stackid(
          %s, BPF_F_USER_STACK
        );""" % (self.stacks_name, ctx_name)
        if self.kernel_stack:
            stack_trace += """
        __data.kernel_stack_id = %s.get_stackid(
          %s, 0
        );""" % (self.stacks_name, ctx_name)

        text = heading + """
{
        u64 __pid_tgid = bpf_get_current_pid_tgid();
        u32 __tgid = __pid_tgid >> 32;
        u32 __pid = __pid_tgid; // implicit cast to u32 for bottom half
        %s
        %s
        %s
        %s
        if (!(%s)) return 0;

        struct %s __data = {0};
        %s
        %s
        __data.tgid = __tgid;
        __data.pid = __pid;
        bpf_get_current_comm(&__data.comm, sizeof(__data.comm));
%s
%s
        %s.perf_submit(%s, &__data, sizeof(__data));
        return 0;
}
"""
        text = text % (pid_filter, cgroup_filter, prefix,
                       self._generate_usdt_filter_read(), self.filter,
                       self.struct_name, time_str, cpu_str, data_fields,
                       stack_trace, self.events_name, ctx_name)

        return self.streq_functions + data_decl + "\n" + text

    @classmethod
    def _time_off_str(cls, timestamp_ns):
        offset = 1e-9 * (timestamp_ns - cls.first_ts)
        if cls.print_unix_timestamp:
            return "%.6f" % (offset + cls.first_ts_real)
        else:
            return "%.6f" % offset

    def _display_function(self):
        if self.probe_type == 'p' or self.probe_type == 'r':
            return self.function
        elif self.probe_type == 'u':
            return self.usdt_name
        else:  # self.probe_type == 't'
            return self.tp_event

    def print_stack(self, bpf, stack_id, tgid):
        if stack_id < 0:
            print("        %d" % stack_id)
            return

        stack = list(bpf.get_table(self.stacks_name).walk(stack_id))
        for addr in stack:
            print("        ", end="")
            if Probe.print_address:
                print("%16x " % addr, end="")
            print("%s" %
                  (bpf.sym(addr, tgid, show_module=True, show_offset=True)))

    def _format_message(self, bpf, tgid, values):
        # Replace each %K with kernel sym and %U with user sym in tgid
        kernel_placeholders = [i for i, t in enumerate(self.types) if t == 'K']
        user_placeholders = [i for i, t in enumerate(self.types) if t == 'U']
        for kp in kernel_placeholders:
            values[kp] = bpf.ksym(values[kp], show_offset=True)
        for up in user_placeholders:
            values[up] = bpf.sym(values[up],
                                 tgid,
                                 show_module=True,
                                 show_offset=True)
        return self.python_format % tuple(values)

    def print_event(self, bpf, cpu, data, size):
        # Cast as the generated structure type and display
        # according to the format string in the probe.
        event = ct.cast(data, ct.POINTER(self.python_struct)).contents
        if self.name and bytes(self.name) not in event.comm:
            return
        values = map(lambda i: getattr(event, "v%d" % i),
                     range(0, len(self.values)))
        msg = self._format_message(bpf, event.tgid, values)
        if self.msg_filter and bytes(self.msg_filter) not in msg:
            return
        if Probe.print_time:
            time = strftime("%H:%M:%S") if Probe.use_localtime else \
                   Probe._time_off_str(event.timestamp_ns)
            if Probe.print_unix_timestamp:
                print("%-17s " % time[:17], end="")
            else:
                print("%-8s " % time[:8], end="")
        if Probe.print_cpu:
            print("%-3s " % event.cpu, end="")
        print("%-7d %-7d %-15s %-16s %s" %
              (event.tgid, event.pid, event.comm.decode(
                  'utf-8', 'replace'), self._display_function(), msg))

        if self.kernel_stack:
            self.print_stack(bpf, event.kernel_stack_id, -1)
        if self.user_stack:
            self.print_stack(bpf, event.user_stack_id, event.tgid)
        if self.user_stack or self.kernel_stack:
            print("")

        Probe.event_count += 1
        if Probe.max_events is not None and \
           Probe.event_count >= Probe.max_events:
            exit()
        sys.stdout.flush()

    def attach(self, bpf, verbose):
        if len(self.library) == 0:
            self._attach_k(bpf)
        else:
            self._attach_u(bpf)
        self.python_struct = self._generate_python_data_decl()
        callback = partial(self.print_event, bpf)
        bpf[self.events_name].open_perf_buffer(callback,
                                               page_cnt=self.page_cnt)

    def _attach_k(self, bpf):
        if self.probe_type == "r":
            bpf.attach_kretprobe(event=self.function, fn_name=self.probe_name)
        elif self.probe_type == "p":
            bpf.attach_kprobe(event=self.function, fn_name=self.probe_name)
        # Note that tracepoints don't need an explicit attach

    def _attach_u(self, bpf):
        libpath = BPF.find_library(self.library)
        if libpath is None:
            # This might be an executable (e.g. 'bash')
            libpath = BPF.find_exe(self.library)
        if libpath is None or len(libpath) == 0:
            self._bail("unable to find library %s" % self.library)

        if self.probe_type == "u":
            pass  # Was already enabled by the BPF constructor
        elif self.probe_type == "r":
            bpf.attach_uretprobe(name=libpath,
                                 sym=self.function,
                                 fn_name=self.probe_name,
                                 pid=Probe.tgid)
        else:
            bpf.attach_uprobe(name=libpath,
                              sym=self.function,
                              fn_name=self.probe_name,
                              pid=Probe.tgid)
Beispiel #6
0
class Probe(object):
        probe_count = 0
        max_events = None
        event_count = 0
        first_ts = 0
        use_localtime = True
        pid = -1

        @classmethod
        def configure(cls, args):
                cls.max_events = args.max_events
                cls.use_localtime = not args.offset
                cls.first_ts = Time.monotonic_time()
                cls.pid = args.pid or -1

        def __init__(self, probe, string_size, kernel_stack, user_stack):
                self.usdt = None
                self.raw_probe = probe
                self.string_size = string_size
                self.kernel_stack = kernel_stack
                self.user_stack = user_stack
                Probe.probe_count += 1
                self._parse_probe()
                self.probe_num = Probe.probe_count
                self.probe_name = "probe_%s_%d" % \
                                (self._display_function(), self.probe_num)

        def __str__(self):
                return "%s:%s:%s FLT=%s ACT=%s/%s" % (self.probe_type,
                        self.library, self._display_function(), self.filter,
                        self.types, self.values)

        def is_default_action(self):
                return self.python_format == ""

        def _bail(self, error):
                raise ValueError("error in probe '%s': %s" %
                                 (self.raw_probe, error))

        def _parse_probe(self):
                text = self.raw_probe

                # Everything until the first space is the probe specifier
                first_space = text.find(' ')
                spec = text[:first_space] if first_space >= 0 else text
                self._parse_spec(spec)
                if first_space >= 0:
                        text = text[first_space:].lstrip()
                else:
                        text = ""

                # If we now have a (, wait for the balanced closing ) and that
                # will be the predicate
                self.filter = None
                if len(text) > 0 and text[0] == "(":
                        balance = 1
                        for i in range(1, len(text)):
                                if text[i] == "(":
                                        balance += 1
                                if text[i] == ")":
                                        balance -= 1
                                if balance == 0:
                                        self._parse_filter(text[:i+1])
                                        text = text[i+1:]
                                        break
                        if self.filter is None:
                                self._bail("unmatched end of predicate")

                if self.filter is None:
                        self.filter = "1"

                # The remainder of the text is the printf action
                self._parse_action(text.lstrip())

        def _parse_spec(self, spec):
                parts = spec.split(":")
                # Two special cases: 'func' means 'p::func', 'lib:func' means
                # 'p:lib:func'. Other combinations need to provide an empty
                # value between delimiters, e.g. 'r::func' for a kretprobe on
                # the function func.
                if len(parts) == 1:
                        parts = ["p", "", parts[0]]
                elif len(parts) == 2:
                        parts = ["p", parts[0], parts[1]]
                if len(parts[0]) == 0:
                        self.probe_type = "p"
                elif parts[0] in ["p", "r", "t", "u"]:
                        self.probe_type = parts[0]
                else:
                        self._bail("probe type must be '', 'p', 't', 'r', " +
                                   "or 'u', but got '%s'" % parts[0])
                if self.probe_type == "t":
                        self.tp_category = parts[1]
                        self.tp_event = parts[2]
                        self.library = ""       # kernel
                        self.function = ""      # generated from TRACEPOINT_PROBE
                elif self.probe_type == "u":
                        self.library = parts[1]
                        self.usdt_name = parts[2]
                        self.function = ""      # no function, just address
                        # We will discover the USDT provider by matching on
                        # the USDT name in the specified library
                        self._find_usdt_probe()
                else:
                        self.library = parts[1]
                        self.function = parts[2]

        def _find_usdt_probe(self):
                self.usdt = USDT(path=self.library, pid=Probe.pid)
                for probe in self.usdt.enumerate_probes():
                        if probe.name == self.usdt_name:
                                return # Found it, will enable later
                self._bail("unrecognized USDT probe %s" % self.usdt_name)

        def _parse_filter(self, filt):
                self.filter = self._replace_args(filt)

        def _parse_types(self, fmt):
                for match in re.finditer(
                                r'[^%]%(s|u|d|llu|lld|hu|hd|x|llx|c)', fmt):
                        self.types.append(match.group(1))
                fmt = re.sub(r'([^%]%)(u|d|llu|lld|hu|hd)', r'\1d', fmt)
                fmt = re.sub(r'([^%]%)(x|llx)', r'\1x', fmt)
                self.python_format = fmt.strip('"')

        def _parse_action(self, action):
                self.values = []
                self.types = []
                self.python_format = ""
                if len(action) == 0:
                        return

                action = action.strip()
                match = re.search(r'(\".*\"),?(.*)', action)
                if match is None:
                        self._bail("expected format string in \"s")

                self.raw_format = match.group(1)
                self._parse_types(self.raw_format)
                for part in match.group(2).split(','):
                        part = self._replace_args(part)
                        if len(part) > 0:
                                self.values.append(part)

        aliases = {
                "retval": "PT_REGS_RC(ctx)",
                "arg1": "PT_REGS_PARM1(ctx)",
                "arg2": "PT_REGS_PARM2(ctx)",
                "arg3": "PT_REGS_PARM3(ctx)",
                "arg4": "PT_REGS_PARM4(ctx)",
                "arg5": "PT_REGS_PARM5(ctx)",
                "arg6": "PT_REGS_PARM6(ctx)",
                "$uid": "(unsigned)(bpf_get_current_uid_gid() & 0xffffffff)",
                "$gid": "(unsigned)(bpf_get_current_uid_gid() >> 32)",
                "$pid": "(unsigned)(bpf_get_current_pid_tgid() & 0xffffffff)",
                "$tgid": "(unsigned)(bpf_get_current_pid_tgid() >> 32)",
                "$cpu": "bpf_get_smp_processor_id()"
        }

        def _replace_args(self, expr):
                for alias, replacement in Probe.aliases.items():
                        # For USDT probes, we replace argN values with the
                        # actual arguments for that probe obtained using special
                        # bpf_readarg_N macros emitted at BPF construction.
                        if alias.startswith("arg") and self.probe_type == "u":
                                continue
                        expr = expr.replace(alias, replacement)
                return expr

        p_type = { "u": ct.c_uint, "d": ct.c_int,
                   "llu": ct.c_ulonglong, "lld": ct.c_longlong,
                   "hu": ct.c_ushort, "hd": ct.c_short,
                   "x": ct.c_uint, "llx": ct.c_ulonglong,
                   "c": ct.c_ubyte }

        def _generate_python_field_decl(self, idx, fields):
                field_type = self.types[idx]
                if field_type == "s":
                        ptype = ct.c_char * self.string_size
                else:
                        ptype = Probe.p_type[field_type]
                fields.append(("v%d" % idx, ptype))

        def _generate_python_data_decl(self):
                self.python_struct_name = "%s_%d_Data" % \
                                (self._display_function(), self.probe_num)
                fields = [
                        ("timestamp_ns", ct.c_ulonglong),
                        ("pid", ct.c_uint),
                        ("comm", ct.c_char * 16)       # TASK_COMM_LEN
                ]
                for i in range(0, len(self.types)):
                        self._generate_python_field_decl(i, fields)
                if self.kernel_stack:
                        fields.append(("kernel_stack_id", ct.c_int))
                if self.user_stack:
                        fields.append(("user_stack_id", ct.c_int))
                return type(self.python_struct_name, (ct.Structure,),
                            dict(_fields_=fields))

        c_type = { "u": "unsigned int", "d": "int",
                   "llu": "unsigned long long", "lld": "long long",
                   "hu": "unsigned short", "hd": "short",
                   "x": "unsigned int", "llx": "unsigned long long",
                   "c": "char" }
        fmt_types = c_type.keys()

        def _generate_field_decl(self, idx):
                field_type = self.types[idx]
                if field_type == "s":
                        return "char v%d[%d];\n" % (idx, self.string_size)
                if field_type in Probe.fmt_types:
                        return "%s v%d;\n" % (Probe.c_type[field_type], idx)
                self._bail("unrecognized format specifier %s" % field_type)

        def _generate_data_decl(self):
                # The BPF program will populate values into the struct
                # according to the format string, and the Python program will
                # construct the final display string.
                self.events_name = "%s_events" % self.probe_name
                self.struct_name = "%s_data_t" % self.probe_name
                self.stacks_name = "%s_stacks" % self.probe_name
                stack_table = "BPF_STACK_TRACE(%s, 1024);" % self.stacks_name \
                              if (self.kernel_stack or self.user_stack) else ""
                data_fields = ""
                for i, field_type in enumerate(self.types):
                        data_fields += "        " + \
                                       self._generate_field_decl(i)

                kernel_stack_str = "       int kernel_stack_id;" \
                                   if self.kernel_stack else ""
                user_stack_str = "       int user_stack_id;" \
                                 if self.user_stack else ""

                text = """
struct %s
{
        u64 timestamp_ns;
        u32 pid;
        char comm[TASK_COMM_LEN];
%s
%s
%s
};

BPF_PERF_OUTPUT(%s);
%s
"""
                return text % (self.struct_name, data_fields,
                               kernel_stack_str, user_stack_str,
                               self.events_name, stack_table)

        def _generate_field_assign(self, idx):
                field_type = self.types[idx]
                expr = self.values[idx].strip()
                text = ""
                if self.probe_type == "u" and expr[0:3] == "arg":
                        text = ("        u64 %s = 0;\n" +
                                "        bpf_usdt_readarg(%s, ctx, &%s);\n") % \
                                (expr, expr[3], expr)

                if field_type == "s":
                        return text + """
        if (%s != 0) {
                bpf_probe_read(&__data.v%d, sizeof(__data.v%d), (void *)%s);
        }
"""                     % (expr, idx, idx, expr)
                if field_type in Probe.fmt_types:
                        return text + "        __data.v%d = (%s)%s;\n" % \
                                        (idx, Probe.c_type[field_type], expr)
                self._bail("unrecognized field type %s" % field_type)

        def _generate_usdt_filter_read(self):
            text = ""
            if self.probe_type == "u":
                    for arg, _ in Probe.aliases.items():
                        if not (arg.startswith("arg") and (arg in self.filter)):
                                continue
                        arg_index = int(arg.replace("arg", ""))
                        arg_ctype = self.usdt.get_probe_arg_ctype(
                                self.usdt_name, arg_index)
                        if not arg_ctype:
                                self._bail("Unable to determine type of {} "
                                           "in the filter".format(arg))
                        text += """
        {} {}_filter;
        bpf_usdt_readarg({}, ctx, &{}_filter);
                        """.format(arg_ctype, arg, arg_index, arg)
                        self.filter = self.filter.replace(
                                arg, "{}_filter".format(arg))
            return text

        def generate_program(self, include_self):
                data_decl = self._generate_data_decl()
                # kprobes don't have built-in pid filters, so we have to add
                # it to the function body:
                if len(self.library) == 0 and Probe.pid != -1:
                        pid_filter = """
        u32 __pid = bpf_get_current_pid_tgid();
        if (__pid != %d) { return 0; }
"""             % Probe.pid
                elif not include_self:
                        pid_filter = """
        u32 __pid = bpf_get_current_pid_tgid();
        if (__pid == %d) { return 0; }
"""             % os.getpid()
                else:
                        pid_filter = ""

                prefix = ""
                signature = "struct pt_regs *ctx"

                data_fields = ""
                for i, expr in enumerate(self.values):
                        data_fields += self._generate_field_assign(i)

                stack_trace = ""
                if self.user_stack:
                        stack_trace += """
        __data.user_stack_id = %s.get_stackid(
          ctx, BPF_F_REUSE_STACKID | BPF_F_USER_STACK
        );""" % self.stacks_name
                if self.kernel_stack:
                        stack_trace += """
        __data.kernel_stack_id = %s.get_stackid(
          ctx, BPF_F_REUSE_STACKID
        );""" % self.stacks_name

                if self.probe_type == "t":
                        heading = "TRACEPOINT_PROBE(%s, %s)" % \
                                  (self.tp_category, self.tp_event)
                        ctx_name = "args"
                else:
                        heading = "int %s(%s)" % (self.probe_name, signature)
                        ctx_name = "ctx"
                text = heading + """
{
        %s
        %s
        %s
        if (!(%s)) return 0;

        struct %s __data = {0};
        __data.timestamp_ns = bpf_ktime_get_ns();
        __data.pid = bpf_get_current_pid_tgid();
        bpf_get_current_comm(&__data.comm, sizeof(__data.comm));
%s
%s
        %s.perf_submit(%s, &__data, sizeof(__data));
        return 0;
}
"""
                text = text % (pid_filter, prefix,
                               self._generate_usdt_filter_read(), self.filter,
                               self.struct_name, data_fields,
                               stack_trace, self.events_name, ctx_name)

                return data_decl + "\n" + text

        @classmethod
        def _time_off_str(cls, timestamp_ns):
                return "%.6f" % (1e-9 * (timestamp_ns - cls.first_ts))

        def _display_function(self):
                if self.probe_type == 'p' or self.probe_type == 'r':
                        return self.function
                elif self.probe_type == 'u':
                        return self.usdt_name
                else:   # self.probe_type == 't'
                        return self.tp_event

        def print_stack(self, bpf, stack_id, pid):
            if stack_id < 0:
                print("        %d" % stack_id)
                return

            stack = list(bpf.get_table(self.stacks_name).walk(stack_id))
            for addr in stack:
                print("        %016x %s" % (addr, bpf.sym(addr, pid)))

        def print_event(self, bpf, cpu, data, size):
                # Cast as the generated structure type and display
                # according to the format string in the probe.
                event = ct.cast(data, ct.POINTER(self.python_struct)).contents
                values = map(lambda i: getattr(event, "v%d" % i),
                             range(0, len(self.values)))
                msg = self.python_format % tuple(values)
                time = strftime("%H:%M:%S") if Probe.use_localtime else \
                       Probe._time_off_str(event.timestamp_ns)
                print("%-8s %-6d %-12s %-16s %s" % \
                    (time[:8], event.pid, event.comm[:12],
                     self._display_function(), msg))

                if self.user_stack:
                    print("    User Stack Trace:")
                    self.print_stack(bpf, event.user_stack_id, event.pid)
                if self.kernel_stack:
                    print("    Kernel Stack Trace:")
                    self.print_stack(bpf, event.kernel_stack_id, -1)
                if self.user_stack or self.kernel_stack:
                    print("")

                Probe.event_count += 1
                if Probe.max_events is not None and \
                   Probe.event_count >= Probe.max_events:
                        exit()

        def attach(self, bpf, verbose):
                if len(self.library) == 0:
                        self._attach_k(bpf)
                else:
                        self._attach_u(bpf)
                self.python_struct = self._generate_python_data_decl()
                callback = partial(self.print_event, bpf)
                bpf[self.events_name].open_perf_buffer(callback)

        def _attach_k(self, bpf):
                if self.probe_type == "r":
                        bpf.attach_kretprobe(event=self.function,
                                             fn_name=self.probe_name)
                elif self.probe_type == "p":
                        bpf.attach_kprobe(event=self.function,
                                          fn_name=self.probe_name)
                # Note that tracepoints don't need an explicit attach

        def _attach_u(self, bpf):
                libpath = BPF.find_library(self.library)
                if libpath is None:
                        # This might be an executable (e.g. 'bash')
                        libpath = BPF.find_exe(self.library)
                if libpath is None or len(libpath) == 0:
                        self._bail("unable to find library %s" % self.library)

                if self.probe_type == "u":
                        pass # Was already enabled by the BPF constructor
                elif self.probe_type == "r":
                        bpf.attach_uretprobe(name=libpath,
                                             sym=self.function,
                                             fn_name=self.probe_name,
                                             pid=Probe.pid)
                else:
                        bpf.attach_uprobe(name=libpath,
                                          sym=self.function,
                                          fn_name=self.probe_name,
                                          pid=Probe.pid)
Beispiel #7
0
class Probe(object):
    probe_count = 0
    streq_index = 0
    max_events = None
    event_count = 0
    first_ts = 0
    use_localtime = True
    tgid = -1
    pid = -1

    @classmethod
    def configure(cls, args):
        cls.max_events = args.max_events
        cls.use_localtime = not args.offset
        cls.first_ts = Time.monotonic_time()
        cls.tgid = args.tgid or -1
        cls.pid = args.pid or -1

    def __init__(self, probe, string_size, kernel_stack, user_stack):
        self.usdt = None
        self.streq_functions = ""
        self.raw_probe = probe
        self.string_size = string_size
        self.kernel_stack = kernel_stack
        self.user_stack = user_stack
        Probe.probe_count += 1
        self._parse_probe()
        self.probe_num = Probe.probe_count
        self.probe_name = "probe_%s_%d" % (self._display_function(), self.probe_num)

    def __str__(self):
        return "%s:%s:%s FLT=%s ACT=%s/%s" % (
            self.probe_type,
            self.library,
            self._display_function(),
            self.filter,
            self.types,
            self.values,
        )

    def is_default_action(self):
        return self.python_format == ""

    def _bail(self, error):
        raise ValueError("error in probe '%s': %s" % (self.raw_probe, error))

    def _parse_probe(self):
        text = self.raw_probe

        # Everything until the first space is the probe specifier
        first_space = text.find(" ")
        spec = text[:first_space] if first_space >= 0 else text
        self._parse_spec(spec)
        if first_space >= 0:
            text = text[first_space:].lstrip()
        else:
            text = ""

        # If we now have a (, wait for the balanced closing ) and that
        # will be the predicate
        self.filter = None
        if len(text) > 0 and text[0] == "(":
            balance = 1
            for i in range(1, len(text)):
                if text[i] == "(":
                    balance += 1
                if text[i] == ")":
                    balance -= 1
                if balance == 0:
                    self._parse_filter(text[: i + 1])
                    text = text[i + 1 :]
                    break
            if self.filter is None:
                self._bail("unmatched end of predicate")

        if self.filter is None:
            self.filter = "1"

        # The remainder of the text is the printf action
        self._parse_action(text.lstrip())

    def _parse_spec(self, spec):
        parts = spec.split(":")
        # Two special cases: 'func' means 'p::func', 'lib:func' means
        # 'p:lib:func'. Other combinations need to provide an empty
        # value between delimiters, e.g. 'r::func' for a kretprobe on
        # the function func.
        if len(parts) == 1:
            parts = ["p", "", parts[0]]
        elif len(parts) == 2:
            parts = ["p", parts[0], parts[1]]
        if len(parts[0]) == 0:
            self.probe_type = "p"
        elif parts[0] in ["p", "r", "t", "u"]:
            self.probe_type = parts[0]
        else:
            self._bail("probe type must be '', 'p', 't', 'r', " + "or 'u', but got '%s'" % parts[0])
        if self.probe_type == "t":
            self.tp_category = parts[1]
            self.tp_event = parts[2]
            self.library = ""  # kernel
            self.function = ""  # from TRACEPOINT_PROBE
        elif self.probe_type == "u":
            self.library = parts[1]
            self.usdt_name = parts[2]
            self.function = ""  # no function, just address
            # We will discover the USDT provider by matching on
            # the USDT name in the specified library
            self._find_usdt_probe()
        else:
            self.library = parts[1]
            self.function = parts[2]

    def _find_usdt_probe(self):
        target = Probe.pid if Probe.pid and Probe.pid != -1 else Probe.tgid
        self.usdt = USDT(path=self.library, pid=target)
        for probe in self.usdt.enumerate_probes():
            if probe.name == self.usdt_name:
                return  # Found it, will enable later
        self._bail("unrecognized USDT probe %s" % self.usdt_name)

    def _parse_filter(self, filt):
        self.filter = self._rewrite_expr(filt)

    def _parse_types(self, fmt):
        for match in re.finditer(r"[^%]%(s|u|d|llu|lld|hu|hd|x|llx|c|K|U)", fmt):
            self.types.append(match.group(1))
        fmt = re.sub(r"([^%]%)(u|d|llu|lld|hu|hd)", r"\1d", fmt)
        fmt = re.sub(r"([^%]%)(x|llx)", r"\1x", fmt)
        fmt = re.sub("%K|%U", "%s", fmt)
        self.python_format = fmt.strip('"')

    def _parse_action(self, action):
        self.values = []
        self.types = []
        self.python_format = ""
        if len(action) == 0:
            return

        action = action.strip()
        match = re.search(r"(\".*?\"),?(.*)", action)
        if match is None:
            self._bail('expected format string in "s')

        self.raw_format = match.group(1)
        self._parse_types(self.raw_format)
        for part in re.split('(?<!"),', match.group(2)):
            part = self._rewrite_expr(part)
            if len(part) > 0:
                self.values.append(part)

    aliases = {
        "retval": "PT_REGS_RC(ctx)",
        "arg1": "PT_REGS_PARM1(ctx)",
        "arg2": "PT_REGS_PARM2(ctx)",
        "arg3": "PT_REGS_PARM3(ctx)",
        "arg4": "PT_REGS_PARM4(ctx)",
        "arg5": "PT_REGS_PARM5(ctx)",
        "arg6": "PT_REGS_PARM6(ctx)",
        "$uid": "(unsigned)(bpf_get_current_uid_gid() & 0xffffffff)",
        "$gid": "(unsigned)(bpf_get_current_uid_gid() >> 32)",
        "$pid": "(unsigned)(bpf_get_current_pid_tgid() & 0xffffffff)",
        "$tgid": "(unsigned)(bpf_get_current_pid_tgid() >> 32)",
        "$cpu": "bpf_get_smp_processor_id()",
    }

    def _generate_streq_function(self, string):
        fname = "streq_%d" % Probe.streq_index
        Probe.streq_index += 1
        self.streq_functions += """
static inline bool %s(char const *ignored, unsigned long str) {
        char needle[] = %s;
        char haystack[sizeof(needle)];
        bpf_probe_read(&haystack, sizeof(haystack), (void *)str);
        for (int i = 0; i < sizeof(needle); ++i) {
                if (needle[i] != haystack[i]) {
                        return false;
                }
        }
        return true;
}
                """ % (
            fname,
            string,
        )
        return fname

    def _rewrite_expr(self, expr):
        for alias, replacement in Probe.aliases.items():
            # For USDT probes, we replace argN values with the
            # actual arguments for that probe obtained using
            # bpf_readarg_N macros emitted at BPF construction.
            if alias.startswith("arg") and self.probe_type == "u":
                continue
            expr = expr.replace(alias, replacement)
        matches = re.finditer('STRCMP\\(("[^"]+\\")', expr)
        for match in matches:
            string = match.group(1)
            fname = self._generate_streq_function(string)
            expr = expr.replace("STRCMP", fname, 1)
        return expr

    p_type = {
        "u": ct.c_uint,
        "d": ct.c_int,
        "llu": ct.c_ulonglong,
        "lld": ct.c_longlong,
        "hu": ct.c_ushort,
        "hd": ct.c_short,
        "x": ct.c_uint,
        "llx": ct.c_ulonglong,
        "c": ct.c_ubyte,
        "K": ct.c_ulonglong,
        "U": ct.c_ulonglong,
    }

    def _generate_python_field_decl(self, idx, fields):
        field_type = self.types[idx]
        if field_type == "s":
            ptype = ct.c_char * self.string_size
        else:
            ptype = Probe.p_type[field_type]
        fields.append(("v%d" % idx, ptype))

    def _generate_python_data_decl(self):
        self.python_struct_name = "%s_%d_Data" % (self._display_function(), self.probe_num)
        fields = [
            ("timestamp_ns", ct.c_ulonglong),
            ("tgid", ct.c_uint),
            ("pid", ct.c_uint),
            ("comm", ct.c_char * 16),  # TASK_COMM_LEN
        ]
        for i in range(0, len(self.types)):
            self._generate_python_field_decl(i, fields)
        if self.kernel_stack:
            fields.append(("kernel_stack_id", ct.c_int))
        if self.user_stack:
            fields.append(("user_stack_id", ct.c_int))
        return type(self.python_struct_name, (ct.Structure,), dict(_fields_=fields))

    c_type = {
        "u": "unsigned int",
        "d": "int",
        "llu": "unsigned long long",
        "lld": "long long",
        "hu": "unsigned short",
        "hd": "short",
        "x": "unsigned int",
        "llx": "unsigned long long",
        "c": "char",
        "K": "unsigned long long",
        "U": "unsigned long long",
    }
    fmt_types = c_type.keys()

    def _generate_field_decl(self, idx):
        field_type = self.types[idx]
        if field_type == "s":
            return "char v%d[%d];\n" % (idx, self.string_size)
        if field_type in Probe.fmt_types:
            return "%s v%d;\n" % (Probe.c_type[field_type], idx)
        self._bail("unrecognized format specifier %s" % field_type)

    def _generate_data_decl(self):
        # The BPF program will populate values into the struct
        # according to the format string, and the Python program will
        # construct the final display string.
        self.events_name = "%s_events" % self.probe_name
        self.struct_name = "%s_data_t" % self.probe_name
        self.stacks_name = "%s_stacks" % self.probe_name
        stack_table = "BPF_STACK_TRACE(%s, 1024);" % self.stacks_name if (self.kernel_stack or self.user_stack) else ""
        data_fields = ""
        for i, field_type in enumerate(self.types):
            data_fields += "        " + self._generate_field_decl(i)

        kernel_stack_str = "       int kernel_stack_id;" if self.kernel_stack else ""
        user_stack_str = "       int user_stack_id;" if self.user_stack else ""

        text = """
struct %s
{
        u64 timestamp_ns;
        u32 tgid;
        u32 pid;
        char comm[TASK_COMM_LEN];
%s
%s
%s
};

BPF_PERF_OUTPUT(%s);
%s
"""
        return text % (self.struct_name, data_fields, kernel_stack_str, user_stack_str, self.events_name, stack_table)

    def _generate_field_assign(self, idx):
        field_type = self.types[idx]
        expr = self.values[idx].strip()
        text = ""
        if self.probe_type == "u" and expr[0:3] == "arg":
            text = ("        u64 %s = 0;\n" + "        bpf_usdt_readarg(%s, ctx, &%s);\n") % (expr, expr[3], expr)

        if field_type == "s":
            return (
                text
                + """
        if (%s != 0) {
                bpf_probe_read(&__data.v%d, sizeof(__data.v%d), (void *)%s);
        }
                """
                % (expr, idx, idx, expr)
            )
        if field_type in Probe.fmt_types:
            return text + "        __data.v%d = (%s)%s;\n" % (idx, Probe.c_type[field_type], expr)
        self._bail("unrecognized field type %s" % field_type)

    def _generate_usdt_filter_read(self):
        text = ""
        if self.probe_type == "u":
            for arg, _ in Probe.aliases.items():
                if not (arg.startswith("arg") and (arg in self.filter)):
                    continue
                arg_index = int(arg.replace("arg", ""))
                arg_ctype = self.usdt.get_probe_arg_ctype(self.usdt_name, arg_index)
                if not arg_ctype:
                    self._bail("Unable to determine type of {} " "in the filter".format(arg))
                text += """
        {} {}_filter;
        bpf_usdt_readarg({}, ctx, &{}_filter);
                        """.format(
                    arg_ctype, arg, arg_index, arg
                )
                self.filter = self.filter.replace(arg, "{}_filter".format(arg))
        return text

    def generate_program(self, include_self):
        data_decl = self._generate_data_decl()
        # kprobes don't have built-in pid filters, so we have to add
        # it to the function body:
        if len(self.library) == 0 and Probe.pid != -1:
            pid_filter = (
                """
        if (__pid != %d) { return 0; }
                """
                % Probe.pid
            )
        elif len(self.library) == 0 and Probe.tgid != -1:
            pid_filter = (
                """
        if (__tgid != %d) { return 0; }
                """
                % Probe.tgid
            )
        elif not include_self:
            pid_filter = (
                """
        if (__tgid == %d) { return 0; }
                """
                % os.getpid()
            )
        else:
            pid_filter = ""

        prefix = ""
        signature = "struct pt_regs *ctx"

        data_fields = ""
        for i, expr in enumerate(self.values):
            data_fields += self._generate_field_assign(i)

        if self.probe_type == "t":
            heading = "TRACEPOINT_PROBE(%s, %s)" % (self.tp_category, self.tp_event)
            ctx_name = "args"
        else:
            heading = "int %s(%s)" % (self.probe_name, signature)
            ctx_name = "ctx"

        stack_trace = ""
        if self.user_stack:
            stack_trace += """
        __data.user_stack_id = %s.get_stackid(
          %s, BPF_F_REUSE_STACKID | BPF_F_USER_STACK
        );""" % (
                self.stacks_name,
                ctx_name,
            )
        if self.kernel_stack:
            stack_trace += """
        __data.kernel_stack_id = %s.get_stackid(
          %s, BPF_F_REUSE_STACKID
        );""" % (
                self.stacks_name,
                ctx_name,
            )

        text = (
            heading
            + """
{
        u64 __pid_tgid = bpf_get_current_pid_tgid();
        u32 __tgid = __pid_tgid >> 32;
        u32 __pid = __pid_tgid; // implicit cast to u32 for bottom half
        %s
        %s
        %s
        if (!(%s)) return 0;

        struct %s __data = {0};
        __data.timestamp_ns = bpf_ktime_get_ns();
        __data.tgid = __tgid;
        __data.pid = __pid;
        bpf_get_current_comm(&__data.comm, sizeof(__data.comm));
%s
%s
        %s.perf_submit(%s, &__data, sizeof(__data));
        return 0;
}
"""
        )
        text = text % (
            pid_filter,
            prefix,
            self._generate_usdt_filter_read(),
            self.filter,
            self.struct_name,
            data_fields,
            stack_trace,
            self.events_name,
            ctx_name,
        )

        return self.streq_functions + data_decl + "\n" + text

    @classmethod
    def _time_off_str(cls, timestamp_ns):
        return "%.6f" % (1e-9 * (timestamp_ns - cls.first_ts))

    def _display_function(self):
        if self.probe_type == "p" or self.probe_type == "r":
            return self.function
        elif self.probe_type == "u":
            return self.usdt_name
        else:  # self.probe_type == 't'
            return self.tp_event

    def print_stack(self, bpf, stack_id, tgid):
        if stack_id < 0:
            print("        %d" % stack_id)
            return

        stack = list(bpf.get_table(self.stacks_name).walk(stack_id))
        for addr in stack:
            print("        %016x %s" % (addr, bpf.sym(addr, tgid)))

    def _format_message(self, bpf, tgid, values):
        # Replace each %K with kernel sym and %U with user sym in tgid
        kernel_placeholders = [i for i in xrange(0, len(self.types)) if self.types[i] == "K"]
        user_placeholders = [i for i in xrange(0, len(self.types)) if self.types[i] == "U"]
        for kp in kernel_placeholders:
            values[kp] = bpf.ksymaddr(values[kp])
        for up in user_placeholders:
            values[up] = bpf.symaddr(values[up], tgid)
        return self.python_format % tuple(values)

    def print_event(self, bpf, cpu, data, size):
        # Cast as the generated structure type and display
        # according to the format string in the probe.
        event = ct.cast(data, ct.POINTER(self.python_struct)).contents
        values = map(lambda i: getattr(event, "v%d" % i), range(0, len(self.values)))
        msg = self._format_message(bpf, event.tgid, values)
        time = strftime("%H:%M:%S") if Probe.use_localtime else Probe._time_off_str(event.timestamp_ns)
        print(
            "%-8s %-6d %-6d %-12s %-16s %s"
            % (time[:8], event.tgid, event.pid, event.comm, self._display_function(), msg)
        )

        if self.kernel_stack:
            self.print_stack(bpf, event.kernel_stack_id, -1)
        if self.user_stack:
            self.print_stack(bpf, event.user_stack_id, event.tgid)
        if self.user_stack or self.kernel_stack:
            print("")

        Probe.event_count += 1
        if Probe.max_events is not None and Probe.event_count >= Probe.max_events:
            exit()

    def attach(self, bpf, verbose):
        if len(self.library) == 0:
            self._attach_k(bpf)
        else:
            self._attach_u(bpf)
        self.python_struct = self._generate_python_data_decl()
        callback = partial(self.print_event, bpf)
        bpf[self.events_name].open_perf_buffer(callback)

    def _attach_k(self, bpf):
        if self.probe_type == "r":
            bpf.attach_kretprobe(event=self.function, fn_name=self.probe_name)
        elif self.probe_type == "p":
            bpf.attach_kprobe(event=self.function, fn_name=self.probe_name)
        # Note that tracepoints don't need an explicit attach

    def _attach_u(self, bpf):
        libpath = BPF.find_library(self.library)
        if libpath is None:
            # This might be an executable (e.g. 'bash')
            libpath = BPF.find_exe(self.library)
        if libpath is None or len(libpath) == 0:
            self._bail("unable to find library %s" % self.library)

        if self.probe_type == "u":
            pass  # Was already enabled by the BPF constructor
        elif self.probe_type == "r":
            bpf.attach_uretprobe(name=libpath, sym=self.function, fn_name=self.probe_name, pid=Probe.pid)
        else:
            bpf.attach_uprobe(name=libpath, sym=self.function, fn_name=self.probe_name, pid=Probe.pid)