Ejemplo n.º 1
0
class GraphParserDriver(BaseTraceTaskDriver):
    """
    Task driver that generates a cheriplot graph from a trace file.

    Available parameters are:

    * :class:`BaseTraceTaskDriver` parameters
    * threads: the number of threads to use (default 1)
    * outfile: the output trace file (default <trace_file_name>_graph.gt
    """
    description = """
    Trace parse tool.
    This tool generates a cheriplot graph from a CHERI trace.
    """

    threads = Option(
        type=int,
        default=1,
        help="Run the tool with the given number of workers (experimental)")
    outfile = Option(default=None,
                     type=file_path_validator,
                     help="Output graph file name")
    display_name = Option(default=None,
                          help="User-readable name of the dataset")
    cheri_cap_size = Option(default="256",
                            choices=("128", "256"),
                            help="Cheri capability size")

    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        default_outfile = "{}_graph.gt".format(self.config.trace)
        outfile = self.config.outfile or default_outfile
        if self.config.cheri_cap_size == "128":
            cap_size = 16
        elif self.config.cheri_cap_size == "256":
            cap_size = 32
        else:
            raise ValueError("Invalid capability size {}".format(
                self.config.cheri_cap_size))

        self.pgm = ProvenanceGraphManager(outfile)
        """Graph manager."""

        self._parser = CheriMipsModelParser(self.pgm,
                                            capability_size=cap_size,
                                            trace_path=self.config.trace,
                                            threads=self.config.threads)
        """Graph parser strategy, depends on the architecture."""

    def run(self):
        self._parser.parse()
        # get the parsed provenance graph model
        self.pgm.save(name=self.config.display_name)
        # force free the parser to reclaim memory
        del self._parser
Ejemplo n.º 2
0
class PtrHeadroomPlotDriver(TaskDriver, ExternalLegendTopPlotBuilder):

    title = "Size of untouched head/tail space accessible by a capability"
    x_label = "Size (power of 2)"
    y_label = "Amount of capabilities"

    outfile = Option(help="Output file", default="ptrheadroom.pdf")
    publish = Option(help="Adjust the plot for publication", action="store_true")
    bucket_where = Option(help="Plot fn-address vs t-alloc of the capabilities"
                          " that end up in a given bucket", default=-1, type=int)
    # bucket_parent = Option(help="Plot parent vs capabilities in bucket for"
    #                        "the capabilities in the given bucket", default=-1)

    def __init__(self, pgm_list, vmmap, **kwargs):
        super().__init__(**kwargs)
        self.pgm_list = pgm_list
        """List of graph managers to plot"""

        self.vmmap = vmmap
        """VM map model of the process"""

        if self.config.publish:
            self._style["font"] = FontProperties(size=25)
    
    def _get_xlabels_kwargs(self):
        kw = super()._get_xlabels_kwargs()
        kw["rotation"] = "vertical"
        return kw

    def _get_axes_rect(self):
        if self.config.publish:
            return [0.1, 0.15, 0.85, 0.8]
        return super()._get_axes_rect()

    def run(self):
        headroom_datasets = []
        # for now use only the first graph that we are given
        pgm = self.pgm_list[0]
        headroom_datasets.append(PtrHeadroom(pgm))
        if self.config.bucket_where >= 0:
            # specific bucket information                        
            self.register_patch_builder(headroom_datasets,
                                        FnPerBucketPatchBuilder(self.config.bucket_where))
        else:
            # headroom plot
            self.register_patch_builder(headroom_datasets, HeadroomPatchBuilder())
        self.process(out_file=self.config.outfile)
Ejemplo n.º 3
0
class PytracedumpDriver(BaseTraceTaskDriver):

    description = """Dump CHERI binary trace.
    Each instruction entry has the following format:
    {<ASID>:<instruction_cycle_number>} <PC> <instr_mnemonic> <operands>

    Memory accesses show the referenced address in the line below:
    <target_register> = [<hex_addr>] or [<hex_addr>] = <source_register>

    Capabilities as displayed in the following format:
    [b:<base> o:<offset> l:<length> p:<permission> t:<obj_type> v:<valid> s:<sealed>]
    t_alloc and t_free are only relevant in the provenance graph.

    When dumping the register set, the format of each entry is the following:
    [<register_value_valid>] <register> = <value>"""

    scan = NestedConfig(TraceDumpParser)
    symbols_path = Option(nargs="*",
                          help="Path where to look for binaries in the vmmap, "
                          "default is current directory.",
                          default=["."])
    vmmap = NestedConfig(VMMapFileParser)
    threads = Option(type=int,
                     default=1,
                     help="Run the tool with the given number of workers")

    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self.vmmap = VMMapFileParser(config=self.config.vmmap)
        self.vmmap.parse()
        self.symbols = SymReader(vmmap=self.vmmap,
                                 path=self.config.symbols_path)
        self.parser = TraceDumpParser(trace_path=self.config.trace,
                                      sym_reader=self.symbols,
                                      config=self.config.scan,
                                      threads=self.config.threads)

    def update_config(self, config):
        super().update_config(config)
        self.parser.update_config(config.scan)

    def run(self):
        self.parser.parse(self.config.scan.start, self.config.scan.end)
Ejemplo n.º 4
0
class CallGraphPlot(ConfigurableComponent):
    """Handle plotting of a call graph using graph-tool layouts"""

    outfile = Option(
        "-o",
        help="Save plot to file, see matplotlib for supported formats "
        "(svg, png, pgf...)",
        required=True)

    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self.fig, self.ax = self.init_axes()

    def init_axes(self):
        plt.switch_backend("cairo")
        fig = plt.figure(figsize=(15, 10))
        ax = fig.add_axes([
            0.05,
            0.15,
            0.9,
            0.80,
        ])
        return (fig, ax)

    def plot(self, cgm):
        """
        Plot the call graph

        :param cgm: The call graph model to plot
        :type cgm: :class:`cheriplot.callgraph.model.CallGraphManager`
        """
        graph = cgm.graph
        # layout = arf_layout(graph, max_iter=0, d=5)
        layout = sfdp_layout(graph)
        pen_width = prop_to_size(cgm.backtrace, mi=0.5, ma=5)
        label = graph.new_vertex_property("string")
        map_property_values(graph.vp.addr, label, lambda addr: "0x%x" % addr)

        self.ax.set_axis_off()

        graph_draw(graph,
                   pos=layout,
                   mplfig=self.ax,
                   vertex_shape="circle",
                   vertex_text=label,
                   vertex_text_position=-1,
                   edge_pen_width=pen_width)
        plt.savefig(self.config.outfile)
        print("Written file %s" % self.config.outfile)
Ejemplo n.º 5
0
class BaseAddressMapPlotDriver(VMMapPlotDriver, ASAxesPlotBuilderNoTitle):
    """
    Plot that shows the capability size in the address space
    vs the time of allocation (i.e. when the capability is created).

    Note this only builds a plot for the first graph in
    :prop:`VMMapPlotDriver._pgm_list`.
    """

    title = "Capabilities derivation time vs capability position"
    x_label = "Virtual Address"
    y_label = "Time (million of instructions)"

    publish = Option(help="Adjust plot for publication", action="store_true")

    patch_builder_class = None

    def _get_axes_rect(self):
        if self.config.publish:
            return [0.1, 0.25, 0.85, 0.65]
        return super()._get_axes_rect()

    def make_axes(self):
        """
        Set the y-axis scale to display millions of cycles instead of
        the number of cyles.
        """
        fig, ax = super().make_axes()
        ax.set_yscale("linear_unit", unit=10**-6)
        return (fig, ax)

    def make_plot(self):
        """Create the address-map plot."""
        super().make_plot()
        self.ax.invert_yaxis()

    def run(self):
        if self.config.publish:
            # set the style
            self._style["font"] = FontProperties(size=20)
            self._style["font-small"] = FontProperties(size=15)
        pgm = self._pgm_list[0]
        graph = pgm.prov_view()
        cap_builder = self.patch_builder_class(figure=self.fig, pgm=pgm)
        self.register_patch_builder(graph.vertices(), cap_builder)
        self.register_patch_builder(self._vmmap, VMMapPatchBuilder(self.ax))
        self.process(out_file=self._outfile)
Ejemplo n.º 6
0
class VMMapPlotDriver(TaskDriver):
    """
    Base driver for plots that require a vmmap file as an input
    """
    outfile = Option(help="Output file", default=None)

    def __init__(self, pgm_list, vmmap, **kwargs):
        """
        :param pgm: provenance graph manager
        :param kwargs: TaskDriver arguments
        """
        super().__init__(**kwargs)
        self._pgm_list = pgm_list
        """List of graph managers for every input graph."""

        self._vmmap = vmmap
        """The process memory mapping model."""

        default_outfile = "{}_plot.pdf".format(self.__class__.__name__.lower())
        self._outfile = self.config.outfile or default_outfile
        """Output file name for the plot."""
Ejemplo n.º 7
0
class TraceDumpParser(MultiprocessCallbackParser, ConfigurableComponent):
    """Parser that performs filtering and search operations on a trace"""

    range_format_help = "Accept a range in the form <start>-<end>, -<end>, "\
                        "<start>- or <single_value>"

    info = Option(action="store_true", help="Print trace info and exit")
    start = Option("-s", type=int, default=0, help="Start offset in the trace")
    end = Option("-e", type=int, default=None, help="Stop offset in the trace")
    outfile = Option("-o",
                     type=str,
                     default=None,
                     help="Write output to the given file")
    show_regs = Option("-r", action="store_true", help="Dump register content")
    instr = Option(default=None, help="Find instruction occurrences")
    reg = Option(default=None,
                 help="Show the instructions that use the given register")
    pc = Option(type=option_range_validator,
                default=None,
                help="Find instructions with PC in given range. " +
                range_format_help)
    mem = Option(
        type=option_range_validator,
        default=None,
        help="Show the instructions that use the given memory address. " +
        range_format_help)
    exception = Option(
        default=None,
        help="Show the instructions that raise a given exception. "
        "Accept the exception number in [0-30] or 'any'.")
    syscall = Option(default=None,
                     type=int,
                     help="Show the syscalls with given code")
    nop = Option(type=any_int_validator,
                 default=None,
                 help="Show canonical nops with given code")
    perms = Option(type=any_int_validator,
                   default=None,
                   help="Find instructions that touch capabilities"
                   " with the given permission bits set")
    after = Option("-A",
                   type=int,
                   default=0,
                   help="Dump n instructions after a matching one")
    before = Option("-B",
                    type=int,
                    default=0,
                    help="Dump n instructions before a matching one")
    match_any = Option(
        action="store_true",
        help="Return a trace entry when matches any of the conditions "
        "instead of all")

    def __init__(self, **kwargs):
        """
        This parser filters the trace according to a set of match
        conditions. Multiple match conditions can be used at the same time
        to refine or widen the filter.

        :param sym_reader: symbol reader helper, used to extract
        symbol information
        """
        sym_reader = kwargs.pop("sym_reader")
        assert "trace_path" in kwargs, "trace_path argument is required!"
        if "keyframe_file" not in kwargs:
            kwargs["keyframe_file"] = "{}.kf".format(kwargs["trace_path"])
        super().__init__(**kwargs)

        if not self.is_worker:
            # update kwargs used to create workers
            self.kwargs["sym_reader"] = sym_reader

        self._entry_history = deque([], self.config.before)
        """FIFO instructions that may be shown if a match is found"""

        self._dump_next = 0
        """The remaining number of instructions to dump after a match"""

        self._kernel_mode = False
        """Keep track of kernel-userspace transitions"""

        self.sym_reader = sym_reader
        """Helper used to search symbols for addresses"""

        self.filters = [
            self._match_instr, self._match_pc, self._match_addr,
            self._match_reg, self._match_exception, self._match_nop,
            self._match_syscall, self._match_perm
        ]

        self.out = sys.stdout
        """Output file stream"""

        if self.config.outfile:
            self.out = tempfile.NamedTemporaryFile(mode="w")

        self.update_config(self.config)

    def update_config(self, config):
        self._entry_history = deque([], config.before)
        self._dump_next = 0
        self._kernel_mode = False

    def repr_register(self, entry):
        if (entry.gpr_number() != -1):
            return "$%d" % entry.gpr_number()
        elif (entry.capreg_number() != -1):
            return "$c%d" % entry.capreg_number()

    def dump_cap(self, cap):
        chericap = CheriCap(cap)
        return str(chericap)

    def dump_regs(self, entry, regs, last_regs):
        for idx in range(0, 31):
            real_regnum = idx + 1
            self.out.write("[%d] $%d = %x\n" %
                           (regs.valid_gprs[idx], real_regnum, regs.gpr[idx]))
        for idx in range(0, 32):
            self.out.write(
                "[%d] $c%d = %s\n" %
                (regs.valid_caps[idx], idx, self.dump_cap(regs.cap_reg[idx])))

    def dump_instr(self, inst, entry, idx):
        if entry.exception != 31:
            exception = "except:%x" % entry.exception
        else:
            # no exception
            exception = ""
        instr_dump = "{%d:%d} 0x%x %s %s" % (
            entry.asid, entry.cycles, entry.pc, inst.inst.name, exception)
        sym = None
        # XXX it would be nice to have an inst.is_branch property
        # it should be provided by LLVM quite easily
        if inst.opcode == "cjalr":
            sym_addr = inst.op1.value.base + inst.op1.value.offset
            sym = self.sym_reader.find_symbol(sym_addr)
        elif inst.opcode == "cjr":
            sym_addr = inst.op0.value.base + inst.op0.value.offset
            sym = self.sym_reader.find_symbol(sym_addr)
        elif inst.opcode == "jalr" or inst.opcode == "jr":
            sym_addr = inst.op0.value
            sym = self.sym_reader.find_symbol(sym_addr)
        if sym:
            instr_dump = "%s (%s)" % (instr_dump, sym)
        self.out.write(instr_dump)
        self.out.write("\n")
        # dump read/write
        if inst.cd is None:
            # no operands for the instruction
            return

        if entry.is_load or entry.is_store:
            sym = self.sym_reader.find_symbol(entry.memory_address)
            if sym:
                loc = "[%x (%s)]" % (entry.memory_address, sym)
            else:
                loc = "[%x]" % entry.memory_address
            if entry.is_load:
                self.out.write("$%s = %s\n" % (inst.cd.name, loc))
            else:
                self.out.write("%s = $%s\n" % (loc, inst.cd.name))

        if inst.op0.is_register:
            if (inst.op0.gpr_index != -1):
                gpr_value = inst.op0.value
                gpr_name = inst.op0.name
                if gpr_value is not None:
                    self.out.write("$%s = %x\n" % (gpr_name, gpr_value))
                else:
                    self.out.write("$%s = Unknown\n" % gpr_name)
            elif (inst.op0.cap_index != -1 or inst.op0.caphw_index != -1):
                cap_name = inst.op0.name
                cap_value = inst.op0.value
                if cap_value is not None:
                    self.out.write("$%s = %s\n" %
                                   (cap_name, self.dump_cap(cap_value)))
                else:
                    self.out.write("$%s = Unknown\n" % cap_name)

    def dump_kernel_user_switch(self, entry):
        if self._kernel_mode != entry.is_kernel():
            if entry.is_kernel():
                self.out.write("Enter kernel mode {%d:%d}\n" %
                               (entry.asid, entry.cycles))
            else:
                self.out.write("Enter user mode {%d:%d}\n" %
                               (entry.asid, entry.cycles))
            self._kernel_mode = entry.is_kernel()

    def do_dump(self, inst, entry, regs, last_regs, idx):
        # dump instr
        self.dump_instr(inst, entry, idx)
        if self.config.show_regs:
            self.dump_regs(entry, regs, last_regs)

    def _update_match_result(self, match, value):
        """
        Combine the current match result with the value of
        a test according to the match mode
        """
        if value is None:
            return match
        if self.config.match_any:
            return match or value
        else:
            return match and value

    def _check_limits(self, start, end, value):
        result = True
        if start != None and start > value:
            result = False
        if end != None and end < value:
            result = False
        return result

    def _match_instr(self, inst, regs):
        """Check if the current instruction matches"""
        if self.config.instr:
            return self.config.instr == inst.opcode
        return None

    def _match_pc(self, inst, regs):
        """Check if the current instruction PC matches"""
        if self.config.pc:
            start, end = self.config.pc
            return self._check_limits(start, end, inst.entry.pc)
        return None

    def _match_addr(self, inst, regs):
        """Check if the current load or store address matches"""
        if self.config.mem:
            if inst.entry.is_load or inst.entry.is_store:
                start, end = self.config.mem
                return self._check_limits(start, end,
                                          inst.entry.memory_address)
            else:
                return False
        return None

    def _match_reg(self, inst, regs):
        """Check if the current instruction uses a register"""
        if self.config.reg:
            for operand in inst.operands:
                if not operand.is_register:
                    continue
                if operand.name == self.config.reg:
                    return True
            return False
        return None

    def _match_exception(self, inst, regs):
        """Check if an exception occurred while executing an instruction"""
        if self.config.exception:
            if inst.entry.exception == 31:
                # no exception
                return False
            elif self.config.exception == "any":
                return True
            else:
                return inst.entry.exception == int(self.config.exception)
        return None

    def _match_syscall(self, inst, regs):
        """Check if this instruction is a syscall with given code"""
        # system call code is in v0
        code_reg = 2
        if self.config.syscall:
            if inst.opcode == "syscall" and inst.entry.exception == 8:
                if (regs.valid_grps[code_reg]
                        and regs.gpr[code_reg] == self.config.syscall):
                    return True
            return False
        return None

    def _match_perm(self, inst, regs):
        """Check if this instruction uses capabilities with the given perms"""
        if self.config.perms:
            for operand in inst.operands:
                if (not operand.is_capability or operand.value is None):
                    # if not a capability or the register in the register set
                    # is not valid
                    continue
                cap_reg = CheriCap(operand.value)
                if cap_reg.has_perm(self.config.perms):
                    return True
            return False
        return None

    def _match_nop(self, inst, regs):
        """Check if instruction is a given canonical NOP"""
        if self.config.nop:
            if inst.opcode == "lui":
                return (inst.op0.gpr_index == 0
                        and inst.op1.value == self.config.nop)
            return False
        return None

    def scan_all(self, inst, entry, regs, last_regs, idx):
        if self._dump_next > 0:
            self.dump_kernel_user_switch(entry)
            self._dump_next -= 1
            self.do_dump(inst, entry, regs, last_regs, idx)
        else:
            # initial match value, if match_any is true
            # we OR the match results so start with false
            # else we AND them, so start with true
            match = not self.config.match_any
            for checker in self.filters:
                result = checker(inst, regs)
                match = self._update_match_result(match, result)
            if match:
                self.dump_kernel_user_switch(entry)
                # dump all the instructions in the queue
                while len(self._entry_history) > 0:
                    old_inst, idx = self._entry_history.popleft()
                    self.do_dump(old_inst, old_inst.entry, old_inst._regset,
                                 old_inst._prev_regset, idx)
                try:
                    self.do_dump(inst, entry, regs, last_regs, idx)
                except Exception as e:
                    logger.error("Can not dump instruction %s: %s", inst, e)
                    return True
                self._dump_next = self.config.after
            else:
                self._entry_history.append((inst, idx))
        return False

    def parse(self, start=None, end=None, direction=0):
        start = start or self.config.start
        end = end or self.config.end
        if self.config.info:
            self.out.write("Trace size: %s\n" % len(self))
        else:
            super().parse(start, end)

    def mp_result(self):
        """Return the temporary file."""
        self.out.flush()
        return self.out.name

    def mp_merge(self, results):
        """Concatenate temporary files"""
        if self.config.outfile:
            with open(self.config.outfile, 'wb') as out:
                for in_file in results:
                    with open(in_file, 'rb') as fd:
                        shutil.copyfileobj(fd, out, 1024 * 1024 * 50)
Ejemplo n.º 8
0
class SymbolResolutionDriver(BaseToolTaskDriver):
    """
    Task driver that fetches symbol names from binary and source files.

    This step requires different input data:

    * Output from procstat-like commands in csv or tab-separated format,
      this is required to extract the base address of the sections of
      a binary in memory.
    * Binary ELF files containing the debug symbols.
    * Kernel syscalls.master file to map syscall numbers to a name/signature.
    """
    description = """
    Resolve call symbols in a cheriplot graph.
    This is a postprocessing tool that extracts debug information from
    ELF files, sources and runtime information to add symbol names and
    call signatures to the cheriplot graph.

    The tool is incremental, it can be run multiple times on the graph.
    """

    graph = Argument(type=file_path_validator,
                     help="Path to the cheriplot graph.")
    no_output = Option(
        action="store_true",
        help="Do not store output graph, useful for cheriplot-runner")
    vmmap = NestedConfig(VMMapFileParser)
    elfpath = Option(nargs="+",
                     type=file_path_validator,
                     default=[],
                     help="Paths where to look for ELF files with symbols")
    syscalls = Option(default=None,
                      type=file_path_validator,
                      help="Path to the syscalls.master file")
    outfile = Option(default=None,
                     type=file_path_validator,
                     help="Output file name, defaults to the input file")

    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self.pgm = None
        """Loaded graph manager."""

        self.vmmap = VMMapFileParser(config=self.config.vmmap)
        """Memory map file parser."""

        self.vmmap.parse()
        self.symreader = SymReader(vmmap=self.vmmap, path=self.config.elfpath)
        """Symbol reader"""

        self._outfile = self.config.outfile or self.config.graph
        """Output file path, defaults to the input file"""

        # self.syscalls = BSDSyscallMasterParser(self.config.syscalls)
        # """Parser for the syscalls.master file."""
        self._load_graph()

    def _load_graph(self):
        self.pgm = ProvenanceGraphManager.load(self.config.graph)

    def run(self):
        # self.syscalls.parse()
        visitor = ResolveSymbolsGraphVisit(self.pgm, self.symreader, None)
        visitor(self.pgm.graph)
        if not self.config.no_output:
            with ProgressTimer("Write output graph", logger):
                self.pgm.save(self._outfile)
Ejemplo n.º 9
0
class ProvenanceGraphDumpDriver(BaseToolTaskDriver):
    """
    Dump and manually filter the nodes in the
    provenance graph.
    """

    range_format_help = "Accept a range in the form <start>-<end>, -<end>, "\
                        "<start>- or <single_value>"

    graph = Argument(help="Path to the provenance graph file")
    ignore_filter = Option(action="store_true",
                           help="Ignore any graph filter in place")
    layer = Option(help="Graph layer to dump.",
                   choices=("prov", "call", "all"),
                   default="all")
    # provenance layer filters
    origin = Option(help="Find vertices with specific origin.",
                    choices=("root", "csetbounds", "cfromptr", "ptrbounds",
                             "candperm", "partial", "call", "syscall"),
                    default=None)
    pc = Option(type=option_range_validator,
                default=None,
                help="Find vertices with PC in the given range. " +
                range_format_help)
    time = Option(type=option_range_validator,
                  help="Find all vertices created at given time. " +
                  range_format_help)
    lifetime = Option(
        type=option_range_validator,
        help="Find all vertices with a lifetime (t_free - t_alloc) "
        "in the given range. " + range_format_help)
    mem = Option(type=option_range_validator,
                 help="Show all vertices stored at a memory address. " +
                 range_format_help)
    deref = Option(
        type=option_range_validator,
        help="Show all vertices dereferenced at a memory address. " +
        range_format_help)
    size = Option(type=option_range_validator,
                  help="Show vertices with given length. " + range_format_help)
    perms = Option(type=any_int_validator,
                   help="Find vertices with given permission bits set.")
    otype = Option(type=any_int_validator,
                   help="Find vertices with given object type.")
    match_any = Option(action="store_true",
                       help="Return a trace entry when matches any"
                       " of the conditions, otherwise all conditions"
                       " must be verified.")
    predecessors = Option(
        action="store_true",
        help="Show the predecessors of a matching capability.")
    successors = Option(action="store_true",
                        help="Show the successors of a matching capability.")
    full_info = Option(action="store_true",
                       help="Show the full vertex information")
    vmmap = NestedConfig(VMMapFileParser)
    elfpath = Option(nargs="+",
                     type=file_path_validator,
                     default=[],
                     help="Paths where to look for ELF files with symbols")
    # call layer filters
    target = Option(
        help="Show calls to the given target address or symbol name.")
    related = Option(
        action="store_true",
        help="Show vertices in the provenance layer related to each call.")

    def __init__(self, **kwargs):
        super().__init__(**kwargs)

        self.pgm = ProvenanceGraphManager.load(self.config.graph)
        """Manager for the graph to dump."""

        self.match_origin = None
        """Search for nodes with this origin"""

        self._check_origin_arg(self.config.origin)

        self.prov_filters = [
            self._match_origin,
            self._match_pc,
            self._match_mem,
            self._match_deref,
            self._match_perms,
            self._match_otype,
            self._match_alloc,
            self._match_len,
            self._match_lifetime,
        ]
        self.call_filters = [
            self._match_call_type,
            self._match_call_target,
        ]

        if self.config.vmmap and self.config.elfpath:
            self.vmmap = VMMapFileParser(config=self.config.vmmap)
            self.vmmap.parse()
            self.symreader = SymReader(vmmap=self.vmmap,
                                       path=self.config.elfpath)
        else:
            self.vmmap = None
            self.symreader = None

    def _check_origin_arg(self, match_origin):
        if match_origin == None:
            return
        elif match_origin == "root":
            self.match_origin = CheriNodeOrigin.ROOT
        elif match_origin == "csetbounds":
            self.match_origin = CheriNodeOrigin.SETBOUNDS
        elif match_origin == "cfromptr":
            self.match_origin = CheriNodeOrigin.FROMPTR
        elif match_origin == "ptrbounds":
            self.match_origin = CheriNodeOrigin.PTR_SETBOUNDS
        elif match_origin == "andperm":
            self.match_origin = CheriNodeOrigin.ANDPERM
        elif match_origin == "partial":
            self.match_origin = CheriNodeOrigin.PARTIAL
        elif match_origin == "call":
            self.match_origin = EdgeOperation.CALL
        elif match_origin == "syscall":
            self.match_origin = EdgeOperation.SYSCALL
        else:
            raise ValueError("Invalid match_origin parameter")

    def _update_match_result(self, match, value):
        """
        Combine the current match result with the value of
        a test according to the match mode.
        """
        if value is None:
            return match
        if self.config.match_any:
            return match or value
        else:
            return match and value

    def _check_limits(self, start, end, value):
        if start is None:
            start = 0
        if end is None:
            end = np.inf
        if start <= value and value <= end:
            return True
        return False

    def _match_lifetime(self, edge, vdata):
        if self.config.lifetime:
            start, end = self.config.lifetime
            if vdata.cap.t_free >= 0:
                lifetime = vdata.cap.t_free - v_data.cap.t_alloc
            else:
                lifetime = np.inf
            return self._check_limits(start, end, lifetime)
        return None

    def _match_origin(self, edge, vdata):
        if self.match_origin:
            return vdata.origin == self.match_origin
        return None

    def _match_pc(self, edge, vdata):
        if self.config.pc:
            start, end = self.config.pc
            return self._check_limits(start, end, vdata.pc)
        return None

    def _match_mem(self, edge, vdata):
        if self.config.mem:
            start, end = self.config.mem
            result = False
            for addr in vdata.address["addr"]:
                result |= self._check_limits(start, end, addr)
                if result:
                    break
            return result
        return None

    def _match_deref(self, edge, vdata):
        if self.config.deref:
            start, end = self.config.deref
            result = False
            for addr in vdata.deref["addr"]:
                result |= self._check_limits(start, end, addr)
                if result:
                    break
            return result
        return None

    def _match_perms(self, edge, vdata):
        if self.config.perms:
            return vdata.cap.has_perm(self.config.perms)
        return None

    def _match_otype(self, edge, vdata):
        if self.config.otype:
            return vdata.cap.objtype == self.config.otype
        return None

    def _match_alloc(self, edge, vdata):
        if self.config.time:
            start, end = self.config.time
            return self._check_limits(start, end, vdata.cap.t_alloc)
        return None

    def _match_len(self, edge, vdata):
        if self.config.size:
            start, end = self.config.size
            return self._check_limits(start, end, vdata.cap.length)
        return None

    def _match_call_type(self, edge, vdata):
        if self.config.origin and edge is not None:
            eop = self.pgm.edge_operation[edge]
            return eop == self.match_origin
        return None

    def _match_call_target(self, edge, vdata):
        if self.config.target:
            return (vdata.symbol == self.config.target)
        return None

    def _find_function_for_pc(self, pc):
        if self.symreader is not None:
            rt = self.symreader.find_function(pc)
            if rt is None:
                return rt
            # return file:symbol
            return "{}:{}".format(rt[1], rt[0])
        return None

    def _find_symbol_at(self, addr):
        if self.symreader is not None:
            rt = self.symreader.find_address(addr)
            if rt is None:
                return None
            # return file:symbol
            return "{}:{}".format(rt[1], rt[0])
        return None

    def _dump_prov_vertex(self, edge, v):
        vdata = self.pgm.data[v]
        str_vertex = StringIO()
        str_vertex.write("(provenance) {} ".format(vdata))

        # Display annotated_XXX properties
        str_vertex.write(" annotations: { ")
        for key in self.pgm.graph.vp.keys():
            if not key.startswith("annotated_"):
                continue
            name = key[len("annotated_"):]
            property_map = self.pgm.graph.vp[key]
            if property_map[v]:
                # vertes is in the property map
                str_vertex.write("{} ".format(name.upper()))
        str_vertex.write("} ")

        # Dump event table
        events = vdata.event_tbl
        n_load = (events["type"] & EventType.DEREF_LOAD).sum()
        n_store = (events["type"] & EventType.DEREF_STORE).sum()
        str_vertex.write("deref-load:{:d} deref-store:{:d} ".format(
            n_load, n_store))
        n_loaded = (events["type"] & EventType.LOAD).sum()
        n_stored = (events["type"] & EventType.STORE).sum()
        str_vertex.write("load:{:d} store:{:d}".format(n_loaded, n_stored))

        # Display symbol name
        symbol = self._find_symbol_at(vdata.cap.base)
        if symbol:
            str_vertex.write(" to:{}".format(symbol))

        # Display function at PC
        fn_sym = self._find_function_for_pc(vdata.pc)
        if fn_sym:
            str_vertex.write(" {}".format(fn_sym))

        # Dump event table details
        if self.config.full_info:
            str_vertex.write("\n")
            frame_str = vdata.event_tbl.to_string(
                formatters={
                    "addr": "0x{0:x}".format,
                    "type": lambda t: str(EventType(t))
                })
            str_vertex.write("Event table:\n{}\n".format(frame_str))

        return str_vertex.getvalue()

    def _dump_call_vertex(self, edge, v):
        vdata = self.pgm.data[v]
        str_vertex = StringIO()
        if edge is not None:
            eop = EdgeOperation(self.pgm.edge_operation[edge])
            eaddr = self.pgm.edge_addr[edge]
            etime = self.pgm.edge_time[edge]
        else:
            eop = None
            eaddr = etime = 0
        str_vertex.write(
            "(call) op:{!s} caller:0x{:x} t_call:{:d} {!s}\n".format(
                eop, eaddr, etime, vdata))
        return str_vertex.getvalue()

    def _dump_vertex(self, edge, v):
        if self.pgm.layer_prov[v]:
            return self._dump_prov_vertex(edge, v)
        elif self.pgm.layer_call[v]:
            return self._dump_call_vertex(edge, v)
        else:
            logger.warning("dump_vertex: invalid layer %s", self.pgm.data[v])

    def _dump_predecessors(self, view, v):
        if not self.config.predecessors or v is None:
            return
        predecessors = []
        current = v
        while True:
            parent, edge = self._get_parent(view, current)
            predecessors.insert(0, (current, edge))
            if parent is None:
                break
            current = parent
        for pred, edge in predecessors:
            print("+- {}".format(self._dump_vertex(edge, pred)))
            print("^")

    def _dump_successors(self, v):
        if not self.config.successors:
            return
        vertices = list(zip(repeat(1), repeat(v), v.out_neighbours()))
        # list of tuples (depth, vertex)
        while len(vertices):
            depth, parent, s = vertices.pop(0)
            edge = self.pgm.graph.edge(parent, s)
            successors = list(
                zip(repeat(depth + 1), repeat(s), s.out_neighbours()))
            successors.extend(vertices)
            vertices = successors
            space = "  " * depth
            print("{}+- {}".format(space, self._dump_vertex(edge, s)))

    def _dump_related(self, v):
        if not self.config.related:
            return
        if self.pgm.layer_prov[v]:
            # dump call vertices where this vertex is visible either
            # at CALL or RETURN time
            u = self.pgm.graph.vertex(v)
            for edge in u.out_edges():
                if not self.pgm.layer_call[edge.target()]:
                    continue
                eop = EdgeOperation(self.pgm.edge_operation[edge])
                regno = ", ".join(map(str, self.pgm.edge_regs[edge]))
                dst = self.pgm.data[edge.target()]
                print("[{}] @ c{} +-> {}".format(eop.name, regno, dst))
        if self.pgm.layer_call[v]:
            # dump visible vertices at CALL and RETURN time
            u = self.pgm.graph.vertex(v)
            for edge in u.in_edges():
                if not self.pgm.layer_prov[edge.source()]:
                    continue
                eop = EdgeOperation(self.pgm.edge_operation[edge])
                regno = ", ".join(map(str, self.pgm.edge_regs[edge]))
                src = self.pgm.data[edge.source()]
                print("[{}] @ c{} +-> {}".format(eop.name, regno, src))

    def _get_parent(self, view, v):
        """
        Get the parent vertex in the given layer and the connecting
        edge.
        """
        parents = list(v.in_neighbours())
        if len(parents) == 0:
            return None, None
        return parents[0], view.edge(parents[0], v)

    def _dump_layer(self, view):
        for v in view.vertices():
            vdata = self.pgm.data[v]
            parent, edge = self._get_parent(view, v)
            # initial match value, if match_any is true
            # we OR the match results so start with false
            # else we AND them, so start with true
            match = not self.config.match_any
            if self.pgm.layer_prov[v]:
                filters = self.prov_filters
            elif self.pgm.layer_call[v]:
                filters = self.call_filters
            else:
                logger.warning("dump_layer: invalid layer %s", vdata)

            for checker in filters:
                result = checker(edge, vdata)
                match = self._update_match_result(match, result)
            if match:
                self._dump_predecessors(view, parent)
                print("+- {}".format(self._dump_vertex(edge, v)))
                self._dump_related(v)
                self._dump_successors(v)
                print("######")

    def run(self):
        if self.config.ignore_filter:
            self.pgm.graph.clear_filters()

        if self.config.layer == "all" or self.config.layer == "prov":
            self._dump_layer(self.pgm.prov_view())
        if self.config.layer == "all" or self.config.layer == "call":
            self._dump_layer(self.pgm.call_view())
Ejemplo n.º 10
0
class VMMapFileParser(ConfigurableComponent):
    """
    Parse a vmmap file created by procstat or libprocstat-based vmmap_dump tool
    """
    vmmap_file = Option(
        default=None,
        type=file_path_validator,
        help="File that specify the VM mappings for the traced process")

    def __init__(self, **kwargs):
        super().__init__(**kwargs)

        self.vmmap = VMMapModel()
        """Model that describes vmmap entries."""

        self.map_file = None
        """File where the entries are specified."""

        self.csv_style = False
        """The map file can be csv or space-separated."""

        if self.config.vmmap_file is None:
            logger.debug("No vmmap file, empty vmmap")
            return

        try:
            self.map_file = open(self.config.vmmap_file, "r")
        except IOError:
            logger.error("Can not open %s", self.config.vmmap_file)
            raise
        # try to guess the format of the file
        line = self.map_file.readline()
        self.map_file.seek(0)
        try:
            line.index(",")
            has_csv_delim = True
        except ValueError:
            has_csv_delim = False
        self.csv_style = has_csv_delim

    def get_model(self):
        return self.vmmap

    def parse(self):
        if not self.map_file:
            # nothing to parse
            return

        if self.csv_style:
            logger.info("Try to load vmmap_dump memory map file")
            vmmap_dump_cols = [
                "start", "end", "offset", "perm", "res", "pres", "ref", "shd",
                "flag", "tp", "path"
            ]
            maybe_b16_int = lambda x: int(x, 16) if str(x).strip().startswith(
                "0x") else int(x)
            col_converters = {"start": maybe_b16_int, "end": maybe_b16_int}
            vmmap = pd.read_csv(self.map_file,
                                names=vmmap_dump_cols,
                                converters=col_converters)
        else:
            logger.info("Try to load procstat memory map file")
            procstat_cols = [
                "pid", "start", "end", "perm", "res", "pres", "ref", "shd",
                "flag", "tp", "path"
            ]
            col_types = {
                "pid": np.int_,
                "start": np.uint64,
                "end": np.uint64,
                "perm": str,
                "res": np.int_,
                "pres": np.int_,
                "ref": np.int_,
                "shd": np.int_,
                "flag": str,
                "tp": str,
                "path": str
            }
            from_b16_int = lambda x: int(x, 16)
            col_converters = {"start": from_b16_int, "end": from_b16_int}
            vmmap = pd.read_table(self.map_file,
                                  names=procstat_cols,
                                  sep="\s+",
                                  dtype=col_types,
                                  converters=col_converters)
        vmmap = vmmap.fillna("")
        logger.debug("Parsed vmmap")
        self.vmmap.vmmap = vmmap.ix[:,
                                    ["start", "end", "perm", "flag", "path"]]
Ejemplo n.º 11
0
class GraphFilterDriver(BaseToolTaskDriver):
    """Driver that implements the top-level filtering tool."""
    description = """
    Graph filtering tool.
    This tool processes a cheriplot graph to produce a filtered version.
    The filtered graph still includes all the vertices but carries a mask
    that removes some of the vertices when used.
    """

    graph = Argument(type=file_path_validator,
                     help="Path to the cheriplot graph")
    outfile = Option(default=None,
                     type=file_path_validator,
                     help="Path to the output file")
    display_name = Option(default=None, help="New display-name for the graph")
    purge = Option(action="store_true",
                   help="Purge filtered elements in the output graph. "
                   "This is not reversible.")
    incremental = Option(action="store_true",
                         help="Do not remove existing graph filters.")
    no_output = Option(
        action="store_true",
        help="Do not store output graph, useful for cheriplot-runner")
    vmmap = NestedConfig(VMMapFileParser)
    no_null = Option(action="store_true", help="Filter null vertices")
    no_kernel = Option(action="store_true", help="Filter kernel vertices")
    no_cfromptr = Option(action="store_true", help="Filter cfromptr vertices")
    no_andperm = Option(action="store_true", help="Filter candperm vertices")
    no_stack = Option(action="store_true",
                      help="Filter vertices pointing to the stack")
    no_roots = Option(action="store_true", help="Filter root vertices")
    annotate_stack = Option(action="store_true",
                            help="Mark vertices pointing to the stack")
    annotate_malloc = Option(action="store_true",
                             help="Mark vertices derived from malloc")
    annotate_malloc_ancestors = Option(
        action="store_true",
        help="Mark vertices that are used to derive malloc capabilities.")
    annotate_mmap = Option(action="store_true",
                           help="Mark vertices derived from mmap")
    annotate_xnx = Option(action="store_true",
                          help="Mark executable and non executable vertices")
    remove_qtrace_execve = Option(
        action="store_true",
        help=
        "Mask out vertices that are created before the return from the last execve() issued in qtrace."
    )
    aggregate_ptrbounds = Option(
        action="store_true",
        help="Merge sequences of cfromptr+csetbounds. This is not reversible.")
    tslice = Option(action="store_true",
                    help="Filter a graph slice (see tslice parameters)")
    tslice_mode = Option(nargs="+",
                         choices=("deref", "create", "access"),
                         default=["create"],
                         help="""tslice filter mode parameter:
        deref: cap dereference time (load/store/call via capability)
        create: cap create time
        access: cap access time (load/store of the capability)
        """)
    tslice_time = Option(
        nargs=2,
        type=int,
        metavar=("start", "end"),
        help="tslice filter start-time and end-time parameters")

    def __init__(self, **kwargs):
        super().__init__(**kwargs)

        self.pgm = None
        """Loaded graph managers."""

        self._vmmap_parser = VMMapFileParser(config=self.config.vmmap)
        """Process memory mapping CSV parser."""

        self._outfile = self.config.outfile or self.config.graph
        """Output file path, defaults to the input file."""

        self._load_graph()

    def _load_graph(self):
        self.pgm = ProvenanceGraphManager.load(self.config.graph)

    def _get_filter(self, pgm):
        """Get a combined filter for a given graph manager."""
        filters = ChainGraphVisit(pgm)
        if self.config.remove_qtrace_execve:
            filters += FilterBeforeExecve(pgm)
        if self.config.no_null:
            filters += FilterNullVertices(pgm)
        if self.config.no_roots:
            filters += FilterRootVertices(pgm)
        if self.config.aggregate_ptrbounds:
            filters += MergeCfromptr(pgm)
        if self.config.no_cfromptr:
            filters += FilterCfromptr(pgm)
        if self.config.no_andperm:
            filters += FilterCandperm(pgm)
        if self.config.no_stack:
            vmmap = self._vmmap_parser.get_model()
            for entry in vmmap:
                if entry.grows_down:
                    break
            else:
                logger.error("no-stack filter requires vmmap argument")
                raise RuntimeError("np-stack filter requires vmmap argument")
            filters += FilterStackVertices(pgm, entry.start, entry.end)
        if self.config.no_kernel:
            filters += FilterKernelVertices(pgm)

        if self.config.tslice:
            start, end = self.config.tslice_time
            deref = "deref" in self.config.tslice_mode
            create = "create" in self.config.tslice_mode
            access = "access" in self.config.tslice_mode
            filters += ProvGraphTimeSlice(pgm,
                                          start,
                                          end,
                                          creation_time=create,
                                          deref_time=deref,
                                          access_time=access)

        if self.config.annotate_stack:
            vmmap = self._vmmap_parser.get_model()
            for entry in vmmap:
                if entry.grows_down:
                    break
            else:
                logger.error("mark-stack filter requires vmmap argument")
                raise RuntimeError("mark-stack filter requires vmmap argument")
            filters += DecorateStackStrict(pgm, entry.start, entry.end)
            filters += DecorateStackAll(pgm, entry.start, entry.end)

        if self.config.annotate_mmap:
            filters += DecorateMmap(pgm)
            filters += DecorateMmapReturn(pgm)

        if self.config.annotate_malloc:
            vmmap = self._vmmap_parser.get_model()
            min_addr = 2**64
            heap_entry = None
            # first entry in the memory map
            for entry in vmmap:
                if entry.end < min_addr:
                    min_addr = entry.end
                    heap_entry = entry
            if not heap_entry:
                logger.error("mark-malloc filter requires vmmap argument")
                raise RuntimeError(
                    "mark-malloc filter requires vmmap argument")
            # filters += DecorateHeap(pgm, heap_entry.start, heap_entry.end)
            filters += DecorateMalloc(pgm)
            # filters += DecorateMallocReturn(pgm)

        if self.config.annotate_xnx:
            filters += DecorateExecutable(pgm)

        return filters

    def run(self):
        self._vmmap_parser.parse()
        vmmap = self._vmmap_parser.get_model()
        if not self.config.incremental:
            self.pgm.graph.clear_filters()
        graph_filter = self._get_filter(self.pgm)
        filtered_graph = graph_filter(self.pgm.graph)
        vfilt, _ = filtered_graph.get_vertex_filter()
        self.pgm.graph.set_vertex_filter(vfilt)
        if self.config.purge:
            with ProgressTimer("Purge filtered vertices", logger):
                self.pgm.graph.purge_vertices()
        if self.config.display_name:
            self.pgm.graph.gp.name = self.config.display_name
        if not self.config.no_output:
            with ProgressTimer("Write output graph", logger):
                self.pgm.save(self._outfile)
Ejemplo n.º 12
0
class TxtTraceCmpParser(CallbackTraceParser, TaskDriver):
    """
    Compare a text trace with a binary trace and
    report any difference.
    """
    description = "Scan two traces and inspect differences"

    cvtrace = Argument(help="Path to cvtrace file")
    txttrace = Argument(help="Path to the text trace file")
    pc_only = Option("-p",
                     action="store_true",
                     help="Only check instruction PC")
    quiet = Option("-q", action="store_true", help="Suppress warning messages")

    def __init__(self, config):
        CallbackTraceParser.__init__(config.cvtrace)
        TaskDriver.__init__(config)

        if config.quiet:
            logging.basicConfig(level=logging.ERROR)

        # txt trace perser state machine
        self.txt_parse_state = State.S_INSTR
        self.txt_trace = open(config.txttrace, "r")
        # skip lines from the txt trace until the first
        # instruction
        self._skiplines(inst_only=True)
        self.txt_parse_state = State.S_INSTR

    def run(self):
        self.parse()

    def _skiplines(self, inst_only=False):
        """Skip lines that are not used"""

        while True:
            saved_pos = self.txt_trace.tell()
            line = self.txt_trace.readline()
            # test all the pattern that should not be skipped
            if inst_only == False:
                if re.search("Cap Memory Read", line) is not None:
                    self.txt_parse_state = State.S_CAP_MEM
                    break
                if re.search("Cap Memory Write", line) is not None:
                    self.txt_parse_state = State.S_CAP_MEM
                    break
                if re.search("Memory Read", line) is not None:
                    self.txt_parse_state = State.S_MEM
                    break
                if re.search("Memory Write", line) is not None:
                    self.txt_parse_state = State.S_MEM
                    break
                if re.search("Write [C\$]?[a-z0-9]+", line) is not None:
                    self.txt_parse_state = State.S_REG
                    break
            if re.match("[0-9xa-f]+:", line) is not None:
                # the next call to the parser function will
                # continue from here
                self.txt_parse_state = State.S_INSTR_END
                break
        self.txt_trace.seek(saved_pos)

    def _txt_instr(self, inst):
        line = self.txt_trace.readline()
        # line matches "[0-9xa-f]+:"
        # parse addr
        addr, rest = line.split(':')
        _, addr = addr.split("x")
        intaddr = struct.unpack(">Q", bytes.fromhex(addr))[0]
        inst["pc"] = intaddr
        rest = re.sub("[ \t]+", " ", rest.strip())
        opcode = rest.split(" ")[0]
        inst["opcode"] = opcode
        if len(rest.split(" ")) > 1:
            operands = rest.split(" ")[1]
            op0 = operands.split(",")[0]
        else:
            op0 = None

        # if we find a li zero, <something> is a canonical nop so
        # we need to skip until the next instruction is found
        if inst["opcode"] == "li" and op0 == "zero":
            self._skiplines(inst_only=True)
        else:
            # seek to next valid line and change state
            self._skiplines()

    def _txt_reg(self, inst):
        line = self.txt_trace.readline()
        m = re.search("Write \$?([a-z0-9]+) = ([a-f0-9]+)", line)
        if m:
            # write to gpr format
            # Write t4 = 0000000000008400
            reg = m.group(1)
            val = m.group(2)
            intval = struct.unpack(">Q", bytes.fromhex(val))[0]
            inst["reg"] = reg
            inst["data"] = intval
        else:
            # write to cap register format
            # Write C24|v:1 s:0 p:7fff807d b:0000007fffffdb20 l:0000000000000400
            # |o:0000000000000000 t:0
            m = re.search(
                "Write C([0-9]+)\|v:([01]) s:([01]) p:([a-f0-9]+) "
                "b:([a-f0-9]+) l:([a-f0-9]+)", line)
            if m is None:
                raise RuntimeError("Malformed cap reg write")
            # first line of a capability match
            # next line must match this
            line = self.txt_trace.readline()
            nxt = re.search("\|o:([a-f0-9]+) t:([a-f0-9]+)", line)
            if nxt is None:
                raise RuntimeError("Malformed cap reg write")
            v = m.group(2)
            s = m.group(3)
            p = m.group(4)
            b = m.group(5)
            l = m.group(6)
            o = nxt.group(1)
            t = nxt.group(2)
            try:
                if len(t) % 2:
                    # hotfix fromhex() that do not like odd num of digits
                    t = "0" + t
                t = bytes.fromhex(t)
                if len(t) < 4:
                    for i in range(4 - len(t)):
                        t = bytes.fromhex("00") + t
            except Exception:
                logger.error("Can not load type field %s %s", m.groups(),
                             nxt.groups())
                raise
            # take only 16bit for permissions, the upper 16bit
            # are stored in the trace but ignored by cheritrace
            # as we do not care about uperms apparently.
            intp = struct.unpack(">L", bytes.fromhex(p))[0] & 0xffff
            intb = struct.unpack(">Q", bytes.fromhex(b))[0]
            intl = struct.unpack(">Q", bytes.fromhex(l))[0]
            into = struct.unpack(">Q", bytes.fromhex(o))[0]
            intt = struct.unpack(">L", t)[0] & 0x00ffffff
            inst["cap"] = {
                "valid": int(v),
                "sealed": int(s),
                "perms": intp,
                "base": intb,
                "length": intl,
                "offset": into,
                "otype": intt,
            }
        # seek to next valid line and change state
        self._skiplines()

    def _txt_mem(self, inst):
        line = self.txt_trace.readline()
        m = re.search("(Cap )?Memory Read +\[([0-9a-f]+)\]", line)
        if m:
            # data load
            is_cap = m.group(1)
            addr = m.group(2)
            intaddr = struct.unpack(">Q", bytes.fromhex(addr))[0]
            inst["load"] = intaddr
            if is_cap:
                # skip another line
                self.txt_trace.readline()
        else:
            m = re.search("(Cap )?Memory Write +\[([0-9a-f]+)\]", line)
            if m is None:
                raise RuntimeError("Mem not a read nor a write")
            #data store
            is_cap = m.group(1)
            addr = m.group(2)
            intaddr = struct.unpack(">Q", bytes.fromhex(addr))[0]
            inst["store"] = intaddr
            if is_cap:
                # skip another line
                self.txt_trace.readline()
        # seek to next valid line and change state
        self._skiplines()

    def _next_txt_instr(self):
        """
        Fetch the next instruction from the txt trace.
        This is the state machine main loop.
        """
        instr = {}

        while self.txt_parse_state != State.S_INSTR_END:
            if self.txt_parse_state == State.S_SKIP:
                self._skiplines()
            elif self.txt_parse_state == State.S_INSTR:
                self._txt_instr(instr)
            elif self.txt_parse_state == State.S_REG:
                self._txt_reg(instr)
            elif self.txt_parse_state == State.S_MEM:
                self._txt_mem(instr)
            elif self.txt_parse_state == State.S_CAP_MEM:
                self._txt_mem(instr)
        # next call starts always from an instruction
        self.txt_parse_state = State.S_INSTR
        return instr

    def _dump_txt_inst(self, txt_inst):
        string = "pc:0x%x %s" % (txt_inst["pc"], txt_inst["opcode"])
        if "load" in txt_inst:
            string += " load:%x" % txt_inst["load"]
        if "store" in txt_inst:
            string += " store:%x" % txt_inst["store"]
        if "data" in txt_inst:
            string += " val:%x" % txt_inst["data"]
        if "cap" in txt_inst:
            txt_cap = txt_inst["cap"]
            string += " v:%d s:%d b:%x o:%x l:%x p:%x t:%x" % (
                txt_cap["valid"], txt_cap["sealed"], txt_cap["base"],
                txt_cap["offset"], txt_cap["length"], txt_cap["perms"],
                txt_cap["otype"])
        return string

    def _parse_exception(self, entry, regs, disasm, idx):
        super()._parse_exception(entry, regs, disasm, idx)

        # read entry from
        txt_inst = self._next_txt_instr()
        logger.debug("Scan txt:<%s>, bin:<unparsed>",
                     self._dump_txt_inst(txt_inst))
        # check only pc which must be valid anyway
        assert txt_inst["pc"] == entry.pc

    def scan_all(self, inst, entry, regs, last_regs, idx):

        # read entry from
        txt_inst = self._next_txt_instr()
        logger.debug("Scan txt:<%s>, bin:%s", self._dump_txt_inst(txt_inst),
                     inst)
        try:
            # check that the instruction matches
            assert txt_inst["pc"] == entry.pc
            if self.pc_only:
                # only check pc, skip everything else
                return False
            if inst.opcode in ["mfc0"]:
                # these have weird behaviour so just ignore for now
                return False

            if txt_inst["opcode"] != inst.opcode:
                # opcode check is not mandatory due to disassembly differences
                # issue a warning anyway for now
                logger.warning("Opcode differ {%d} txt:<%s> bin:%s",
                               entry.cycles, self._dump_txt_inst(txt_inst),
                               inst)
            if "load" in txt_inst:
                assert txt_inst["load"] == entry.memory_address
            if "store" in txt_inst:
                assert txt_inst["store"] == entry.memory_address
            if "data" in txt_inst:
                if inst.opcode not in ["mfc0"]:
                    reg_number = entry.gpr_number()
                    for op in inst.operands:
                        if op.is_register and op.gpr_index == reg_number:
                            logger.debug("gpr:%d reg:%d")
                            assert txt_inst["data"] == op.value, \
                                "reg data do not match %d != %d" % (
                                    txt_inst["data"], op.value)
                            break
                #     # XXX we have a problem with extracting the jump target
                #     # from jal/j the binary trace have an offset that does
                #     # not make much sense..
                #     assert txt_inst["data"] == inst.op0.value
            if "cap" in txt_inst:
                cap = CheriCap(inst.op0.value)
                txt_cap = txt_inst["cap"]
                assert txt_cap["valid"] == cap.valid, \
                    "tag do not match %d != %d" % (
                        txt_cap["valid"], cap.valid)
                assert txt_cap["sealed"] == cap.sealed, \
                    "seal do not match %d != %d" % (
                        txt_cap["sealed"], cap.sealed)
                assert txt_cap["base"] == cap.base, \
                    "base do not match %x != %x" % (
                        txt_cap["base"], cap.base)
                assert txt_cap["length"] == cap.length, \
                    "length do not match %x != %x" % (
                        txt_cap["length"], cap.length)
                assert txt_cap["offset"] == cap.offset, \
                    "offset do not match %x != %x" % (
                        txt_cap["offset"], cap.offset)
                assert txt_cap["perms"] == cap.permissions, \
                    "perms do not match %x != %x" % (
                        txt_cap["perms"], cap.permissions)
                assert txt_cap["otype"] == cap.objtype, \
                    "otype do not match %x != %x" % (
                        txt_cap["otype"], cap.objtype)

        except AssertionError:
            logger.error("Assertion failed at {%d} inst:%s txt:<%s>",
                         entry.cycles, inst, self._dump_txt_inst(txt_inst))
            raise
        self.progress.advance()
        return False
Ejemplo n.º 13
0
class PtrSizeCdfDriver(TaskDriver, ExternalLegendTopPlotBuilder):

    title = "CDF of the size of capabilities created"
    x_label = "Size"
    y_label = "Proportion of the total number of capabilities"

    outfile = Option(help="Output file", default="ptrsize_cdf.pdf")
    publish = Option(help="Adjust plot for publication", action="store_true")
    absolute = Option(help="Do not normalize y axis, show absolute count"
                      " of capabilities", action="store_true")

    filters = Option(
        default=[],
        action="append",
        nargs="+",
        choices=("stack", "mmap", "malloc"),
        help="set of possible elements to modify for the CDF, assume"
        "that the size of the given elements is the maximum possible.")

    split = Option(
        default=[],
        action="append",
        choices=("stack", "stack-all", "malloc", "exec",
                 "glob", "kern", "caprelocs", "caprelocs-only"),
        help="Separate the given vertices in a separate CDF")


    def __init__(self, pgm_list, vmmap, **kwargs):
        super().__init__(**kwargs)
        self.pgm_list = pgm_list
        """List of graph managers to plot."""

        self.vmmap = vmmap
        """VMmap model of the process memory mapping."""

        self.datasets = []

        if self.config.publish:
            self._style["font"] = FontProperties(size=25)

        if self.config.absolute:
            self.y_label = "Number of capabilities"

        self.title += " in {}".format(",".join([pgm.name for pgm in self.pgm_list]))

    def _get_title_kwargs(self):
        kw = super()._get_title_kwargs()
        if self.config.publish:
            # suppress the title so we have more space
            kw.update({"visible": False})
        return kw

    def _get_savefig_kwargs(self):
        kw = super()._get_figure_kwargs()
        kw["dpi"] = 300
        return kw

    def _get_axes_rect(self):
        if self.config.publish:
            return [0.125, 0.08, 0.85, 0.8]
        return super()._get_axes_rect()

    def _get_legend_kwargs(self):
        kw = super()._get_legend_kwargs()
        kw["loc"] = "lower right"
        return kw

    def make_plot(self):
        super().make_plot()
        self.ax.set_xscale("log", basex=2)

    def _make_cdf_dataset(self, pgm, vfilt, setname):
        if vfilt is not None:
            view = GraphView(pgm.graph, vfilt=vfilt)
            cdf = PtrBoundCdf(pgm, self.config.absolute, graph=view)
        else:
            cdf = PtrBoundCdf(pgm, self.config.absolute)
        cdf.num_ignored = -1
        cdf.name = setname
        cdf.slice_name = None
        cdf.build_cdf()
        self.datasets.append(cdf)

    def _get_legend_kwargs(self):
        """
        Change layout of the number of colums in the legend
        """
        kw = super()._get_legend_kwargs()
        kw.update({
            "bbox_to_anchor": (-0.125, 1.009, 1.125, 0.102),
            "ncol": 6,
            "labelspacing": 0.5,
            "borderpad": 0.1
        })
        return kw

    def run(self):
        for idx, pgm in enumerate(self.pgm_list):
            self._make_cdf_dataset(pgm, None, "all")

            for split_set in self.config.split:
                if split_set == "stack":
                    self._make_cdf_dataset(pgm, pgm.graph.vp.annotated_usr_stack,
                                           split_set)
                elif split_set == "stack-all":
                    self._make_cdf_dataset(pgm, pgm.graph.vp.annotated_usr_stack,
                                           "stack")
                    self._make_cdf_dataset(pgm, pgm.graph.vp.annotated_stack,
                                           "stack-deref")
                elif split_set == "malloc":
                    self._make_cdf_dataset(pgm, pgm.graph.vp.annotated_malloc,
                                           split_set)
                elif split_set == "exec":
                    self._make_cdf_dataset(pgm, pgm.graph.vp.annotated_exec,
                                           split_set)
                elif split_set == "caprelocs":
                    self._make_cdf_dataset(pgm, pgm.graph.vp.annotated_capreloc,
                                           "relocs")
                elif split_set == "caprelocs-only":
                    difference = pgm.graph.new_vertex_property("bool")
                    difference.a = (pgm.graph.vp.annotated_capreloc.a &
                                    ~pgm.graph.vp.annotated_globptr.a)                    
                    self._make_cdf_dataset(pgm, difference, "relocs-only")
                elif split_set == "glob":
                    # any global pointers or pointers derived from global pointers
                    combined = pgm.graph.new_vertex_property("bool")
                    combined.a = (pgm.graph.vp.annotated_globptr.a |
                                  pgm.graph.vp.annotated_globderived.a)
                    self._make_cdf_dataset(pgm, combined, split_set)
                    self._make_cdf_dataset(pgm, pgm.graph.vp.annotated_captblptr, "captbl")
                elif split_set == "kern":
                    # kernel originated and syscall originated vertices
                    self._make_cdf_dataset(pgm, pgm.graph.vp.annotated_ksyscall, "syscall")
                    self._make_cdf_dataset(pgm, pgm.graph.vp.annotated_korigin, "kern")
                else:
                    logger.error("Invalid --split option value %s", split_set)
                    raise ValueError("Invalid --split option value")

        self.register_patch_builder(
            self.datasets, CdfPatchBuilder(self.config.absolute))
        self.process(out_file=self.config.outfile)
Ejemplo n.º 14
0
class CallGraphTraceParser(CallbackTraceParser, TaskDriver):
    """
    Dump a stack trace given a cheri trace instruction.
    We also detect all the functions that have been called and returned
    during the backtrace.

    Keep 2 data structures:
    - a deque that holds the last return address found
    - a pandas dataframe that holds the function landing pad and return points for nested functions

    The deque is used during parsing to detect nested functions and call sites
    The pandas dataframe holds:
    - the nested functions (symbol address, return address, parent symbol address, resolved symbol info)
    - the backtrace functions (symbol address, parent symbol address, resolved symbol info)
    Starting from the given instruction (cycle) iterate the trace
    backwards and detect all the (c)jalr and (c)jr instructions.
    When a <return> is found, it is added to the pandas dataset
    """

    start = Option("-s",
                   type=int,
                   help="Backtrace starting from trace position",
                   default=None)
    end = Option("-e",
                 type=int,
                 help="Stop backtrace at given position",
                 default=None)
    depth = Option("-d",
                   help="Build the call graph/backtrace for the "
                   "last <depth> function calls",
                   type=int,
                   default=None)

    def __init__(self, **kwargs):
        super().__init__(**kwargs)

        self._backtrace_num = 0

        self.backtrace_depth = self.config.depth
        """Stop parsing after the backtrace has length <depth>"""

        self.return_stack = deque()
        """Stack of return instructions found"""

        self.call_site_map = {}
        """Map call site addresses to vertices"""

        self.cgm = CallGraphManager()
        """Handle the structure of the call graph"""

        self.root = self.cgm.graph.add_vertex()
        """
        Root is an empty node that is filled with the current function info
        as the trace is parsed backwards.
        """

    def parse(self, start=None, end=None):
        # parse from the given start backwards
        logger.info("Scan trace %s", self.path)
        super().parse(self.config.start, self.config.end, 1)

    def do_scan(self, inst, entry):
        """Decide whether we should scan this instruction or not"""
        if entry.is_kernel():
            return False
        return True

    def check_depth(self):
        """Check backtrace depth and decide whether to stop backtracing"""
        if self.backtrace_depth != None:
            if self.backtrace_depth <= self._backtrace_num:
                return True
        return False

    def add_call(self, target, time, pc):
        """
        Register a call in the call graph when the call is not part of
        the backtrace.
        A new edge is added between the current "root" vertex and the 
        call target vertex. If the target vertex does not exist it is
        created.
        """
        logger.debug("[%d] Call to 0x%x", time, target)
        # do we have a node for this call target?
        if target in self.call_site_map:
            target_vertex = self.call_site_map[target]
            # do we have already an edge towards that vertex?
            for e in self.root.out_edges():
                if e.target() == target_vertex:
                    # just increment the call count
                    call_edge = e
                    break
            else:
                # create the edge towards the target
                call_edge = self.cgm.graph.add_edge(self.root, target_vertex)
        else:
            # found a new call target, so create a vertex for it
            target_vertex = self.cgm.graph.add_vertex()
            self.cgm.addr[target_vertex] = target
            self.call_site_map[target] = target_vertex
            call_edge = self.cgm.graph.add_edge(self.root, target_vertex)
        self.cgm.t_call[call_edge].append(time)

    def add_backtrace(self, target, time, pc):
        """
        Register a call in the backtrace.
        Create a new "root" vertex and create an edge for the call between
        the new vertex and the current "root".
        If the current "root" vertex address exists in the graph reroute
        all edges from the current root to the duplicate vertex. This is
        required to handle recursion.
        """
        logger.debug("[%d] Backtrace call to 0x%x", time, target)
        if target in self.call_site_map:
            # there is already a vertex for the current function
            # take all edges from the current "root" and make them
            # start at the existing vertex, the root is not changed because
            # it stays empty and can be reused
            target_vertex = self.call_site_map[target]
            for e in self.root.out_edges:
                new_e = self.cgm.graph.add_edge(target_vertex, e.target())
                self.cgm.t_call[new_e] = self.cgm.t_call[e]
                self.cgm.backtrace[new_e] = self.cgm.backtrace[e]
                self.cgm.graph.remove_edge(e)
        else:
            self.cgm.addr[self.root] = target
            target_vertex = self.root
            self.root = self.cgm.graph.add_vertex()
        # connect the current root with the call target
        e = self.cgm.graph.add_edge(self.root, target_vertex)
        self.cgm.t_call[e].append(time)
        self.cgm.backtrace[e] = time
        self._backtrace_num += 1

    def scan_cjalr(self, inst, entry, regs, last_regs, idx):
        # check that the call matches the last return instruction
        # that we have
        if not self.do_scan(inst, entry):
            return False

        call = inst.op1.value
        ret = inst.op0.value

        if len(self.return_stack) > 0:
            # if there was a return, that must match this call
            cjr_addr, cjr_cycles, is_cap = self.return_stack.pop()
            if not is_cap:
                logger.error("cjalr matches a non-capability return %s", inst)
                raise RuntimeError("cjalr matches a non-capability return")
            # check return value
            if ret.base + ret.offset != cjr_addr:
                logger.error(
                    "cjalr specifies different return addr "
                    "0x%x != 0x%x, inst %s", ret.base + ret.offset, cjr_addr,
                    inst)
                raise RuntimeError("cjalr specifies different return addr")
            self.add_call(call.base + call.offset, entry.cycles, entry.pc)
        else:
            self.add_backtrace(call.base + call.offset, entry.cycles, entry.pc)
        return self.check_depth()

    def scan_cjr(self, inst, entry, regs, last_regs, idx):
        if not self.do_scan(inst, entry):
            return False
        ret_cap = inst.op0.value
        # append (addr, cycles, is_cap)
        self.return_stack.append(
            (ret_cap.base + ret_cap.offset, entry.cycles, True))
        return self.check_depth()
Ejemplo n.º 15
0
class CallGraphDriver(TaskDriver):
    """
    Run component for the call-graph and backtrace generator.
    The scan config element holds the interactive-mode arguments,
    other options are fixed at instantiation.
    """

    description = "Generate call graph and stack traces from cvtrace files"

    trace = Argument(help="Path to cvtrace file")
    sym = Option(nargs="*", help="Binaries providing symbols", default=None)
    vmmap = Option("-m",
                   help="Memory map file that specifies base addresses for "
                   "the binaries in --sym",
                   default=None)
    scan = NestedConfig(CallGraphTraceParser)
    backtrace = SubCommand(help="Plot the call graph")
    callgraph = SubCommand(CallGraphPlot, help="Show the backtrace")

    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self.parser = CallGraphTraceParser(trace_path=self.config.trace,
                                           config=self.config.scan)

    def update_config(self, config):
        super().update_config(config)
        self.parser.update_config(config.scan)

    def run(self):
        self.parser.parse()
        # get the parsed model
        cgm = self.parser.cgm
        # if we have symbols and a vmmap, add symbols to the call graph
        if self.config.sym and self.config.vmmap:
            add_symbols = CallGraphAddSymbols(cgm, self.config.sym,
                                              self.config.vmmap)
            cgm.bfs_transform(add_symbols)
        with suppress(AttributeError):
            if self.config.backtrace:
                self.call_graph_backtrace(cgm)
        with suppress(AttributeError):
            if self.config.callgraph:
                self.plot = CallGraphPlot(config=self.config.callgraph)
                self.plot.plot(cgm)

    def call_graph_backtrace(self, cgm):
        """
        Dump the backtrace from the call graph parsed in the parser.
        """
        has_backtrace_info = cgm.graph.new_edge_property("bool")
        map_property_values(cgm.backtrace, has_backtrace_info,
                            lambda b: b != 0)
        cgm.graph.set_edge_filter(has_backtrace_info)

        bt = sorted(cgm.graph.edges(), key=lambda e: cgm.backtrace[e])
        for e in bt:
            fn_time = cgm.backtrace[e]
            fn_addr = cgm.addr[e.source()]
            fn_name = cgm.name[e.source()]
            print("[%d] 0x%x %s" % (fn_time, fn_addr, fn_name))
        cgm.graph.clear_filters()
Ejemplo n.º 16
0
class UserGraphPreprocessDriver(BaseToolTaskDriver):

    description = """
    Prepare graph that represent a single user process trace captured with qtrace.
    This will perform the following:
    - Annotate graph with symbols
    - Find the last execve where we enter the process and find the stack pointer.
    - Annotate successors of the stack pointer (annotated_stack)
    - Annotate anything dereferenced in the stack map
    - Annotate vertices returned by malloc (annotated_malloc)
    - Annotate executable vertices (annotated_exec)
    - Annotate global pointers and pointers used to load from captable
    - Annotate pointers that are returned from syscalls
    - Annotate pointers that originated in the kernel and are loaded from memory
    - Mask out all vertices and calls that are created before the last call to execve()
      and have not been marked
    - Mask out NULL capabilities
    - Mask out kernel capabilities that have not been marked
    """

    elfpath = Option(
        required=True,
        nargs="+",
        type=file_path_validator,
        default=[],
        help="Paths where to look for ELF files with symbols")
    vmmap = NestedConfig(VMMapFileParser)
    graph = Argument(
        type=file_path_validator,
        help="Path to the cheriplot graph.")
    outfile = Option(
        default=None,
        type=file_path_validator,
        help="Path to the output file")
    display_name = Option(
        default=None,
        help="New display-name for the graph")

    # available plots
    ptrsize_cdf = SubCommand(PtrSizeCdfDriver)

    def __init__(self, **kwargs):
        super().__init__(**kwargs)

        self.vmmap = VMMapFileParser(config=self.config.vmmap)
        """Memory map file parser"""

        self.vmmap.parse()
        self.symreader = SymReader(vmmap=self.vmmap, path=self.config.elfpath)
        """Symbol reader"""

        self.outfile = self.config.outfile or self.config.graph
        """Output file path, defaults to the current graph path"""

        self.pgm = ProvenanceGraphManager.load(self.config.graph)
        """Provenance graph manager"""

    def _get_stack_map(self):
        for vme in self.vmmap.get_model():
            if vme.grows_down:
                return (vme.start, vme.end)
        return None

    def _get_visit_chain(self):
        vchain = ChainGraphVisit(self.pgm)
        vchain += ResolveSymbolsGraphVisit(self.pgm, self.symreader, None)
        vchain += FindLastExecve(self.pgm)
        # vchain += DetectStackCapability(self.pgm)
        stack_begin, stack_end = self._get_stack_map()
        vchain += DecorateStackCapabilities(self.pgm)
        vchain += DecorateStackAll(self.pgm, stack_begin, stack_end)
        vchain += DecorateMalloc(self.pgm)
        vchain += DecorateExecutable(self.pgm)
        vchain += DecorateGlobalPointers(self.pgm, self.symreader)
        vchain += DecorateCapRelocs(self.pgm, self.symreader)
        vchain += DecorateKernelCapabilities(self.pgm)
        vchain += DecorateAccessedInUserspace(self.pgm)
        vchain += FilterBeforeExecve(self.pgm)
        vchain += FilterNullVertices(self.pgm)
        vchain += FilterUnusedKernelVertices(self.pgm)
        return vchain
        
    def run(self):
        """
        Run the tool filtering stages and save the output graph.
        """
        vmmap = self.vmmap.get_model()
        # reset filters on the graph
        self.pgm.graph.clear_filters()
        # get the new filter chain
        visitor_chain = self._get_visit_chain()
        # apply all operations in order
        filtered_graph = visitor_chain(self.pgm.graph)
        # get the resulting vertex filter and apply it to the main graph
        vfilt, _ = filtered_graph.get_vertex_filter()
        self.pgm.graph.set_vertex_filter(vfilt)
        # if we have to change the display name, do it
        if self.config.display_name:
            self.pgm.graph.gp.name = self.config.display_name
        # write out the graph
        with ProgressTimer("Write output graph", logger):
            self.pgm.save(self.outfile)

        with suppress(AttributeError):
            if self.config.subcommand_class:
                # generate plot (would be nice to support more than 1 per run)
                plot = self.config.subcommand_class([self.pgm], self.vmmap,
                                                    config=self.config)
                plot.run()