class GraphParserDriver(BaseTraceTaskDriver): """ Task driver that generates a cheriplot graph from a trace file. Available parameters are: * :class:`BaseTraceTaskDriver` parameters * threads: the number of threads to use (default 1) * outfile: the output trace file (default <trace_file_name>_graph.gt """ description = """ Trace parse tool. This tool generates a cheriplot graph from a CHERI trace. """ threads = Option( type=int, default=1, help="Run the tool with the given number of workers (experimental)") outfile = Option(default=None, type=file_path_validator, help="Output graph file name") display_name = Option(default=None, help="User-readable name of the dataset") cheri_cap_size = Option(default="256", choices=("128", "256"), help="Cheri capability size") def __init__(self, **kwargs): super().__init__(**kwargs) default_outfile = "{}_graph.gt".format(self.config.trace) outfile = self.config.outfile or default_outfile if self.config.cheri_cap_size == "128": cap_size = 16 elif self.config.cheri_cap_size == "256": cap_size = 32 else: raise ValueError("Invalid capability size {}".format( self.config.cheri_cap_size)) self.pgm = ProvenanceGraphManager(outfile) """Graph manager.""" self._parser = CheriMipsModelParser(self.pgm, capability_size=cap_size, trace_path=self.config.trace, threads=self.config.threads) """Graph parser strategy, depends on the architecture.""" def run(self): self._parser.parse() # get the parsed provenance graph model self.pgm.save(name=self.config.display_name) # force free the parser to reclaim memory del self._parser
class PtrHeadroomPlotDriver(TaskDriver, ExternalLegendTopPlotBuilder): title = "Size of untouched head/tail space accessible by a capability" x_label = "Size (power of 2)" y_label = "Amount of capabilities" outfile = Option(help="Output file", default="ptrheadroom.pdf") publish = Option(help="Adjust the plot for publication", action="store_true") bucket_where = Option(help="Plot fn-address vs t-alloc of the capabilities" " that end up in a given bucket", default=-1, type=int) # bucket_parent = Option(help="Plot parent vs capabilities in bucket for" # "the capabilities in the given bucket", default=-1) def __init__(self, pgm_list, vmmap, **kwargs): super().__init__(**kwargs) self.pgm_list = pgm_list """List of graph managers to plot""" self.vmmap = vmmap """VM map model of the process""" if self.config.publish: self._style["font"] = FontProperties(size=25) def _get_xlabels_kwargs(self): kw = super()._get_xlabels_kwargs() kw["rotation"] = "vertical" return kw def _get_axes_rect(self): if self.config.publish: return [0.1, 0.15, 0.85, 0.8] return super()._get_axes_rect() def run(self): headroom_datasets = [] # for now use only the first graph that we are given pgm = self.pgm_list[0] headroom_datasets.append(PtrHeadroom(pgm)) if self.config.bucket_where >= 0: # specific bucket information self.register_patch_builder(headroom_datasets, FnPerBucketPatchBuilder(self.config.bucket_where)) else: # headroom plot self.register_patch_builder(headroom_datasets, HeadroomPatchBuilder()) self.process(out_file=self.config.outfile)
class PytracedumpDriver(BaseTraceTaskDriver): description = """Dump CHERI binary trace. Each instruction entry has the following format: {<ASID>:<instruction_cycle_number>} <PC> <instr_mnemonic> <operands> Memory accesses show the referenced address in the line below: <target_register> = [<hex_addr>] or [<hex_addr>] = <source_register> Capabilities as displayed in the following format: [b:<base> o:<offset> l:<length> p:<permission> t:<obj_type> v:<valid> s:<sealed>] t_alloc and t_free are only relevant in the provenance graph. When dumping the register set, the format of each entry is the following: [<register_value_valid>] <register> = <value>""" scan = NestedConfig(TraceDumpParser) symbols_path = Option(nargs="*", help="Path where to look for binaries in the vmmap, " "default is current directory.", default=["."]) vmmap = NestedConfig(VMMapFileParser) threads = Option(type=int, default=1, help="Run the tool with the given number of workers") def __init__(self, **kwargs): super().__init__(**kwargs) self.vmmap = VMMapFileParser(config=self.config.vmmap) self.vmmap.parse() self.symbols = SymReader(vmmap=self.vmmap, path=self.config.symbols_path) self.parser = TraceDumpParser(trace_path=self.config.trace, sym_reader=self.symbols, config=self.config.scan, threads=self.config.threads) def update_config(self, config): super().update_config(config) self.parser.update_config(config.scan) def run(self): self.parser.parse(self.config.scan.start, self.config.scan.end)
class CallGraphPlot(ConfigurableComponent): """Handle plotting of a call graph using graph-tool layouts""" outfile = Option( "-o", help="Save plot to file, see matplotlib for supported formats " "(svg, png, pgf...)", required=True) def __init__(self, **kwargs): super().__init__(**kwargs) self.fig, self.ax = self.init_axes() def init_axes(self): plt.switch_backend("cairo") fig = plt.figure(figsize=(15, 10)) ax = fig.add_axes([ 0.05, 0.15, 0.9, 0.80, ]) return (fig, ax) def plot(self, cgm): """ Plot the call graph :param cgm: The call graph model to plot :type cgm: :class:`cheriplot.callgraph.model.CallGraphManager` """ graph = cgm.graph # layout = arf_layout(graph, max_iter=0, d=5) layout = sfdp_layout(graph) pen_width = prop_to_size(cgm.backtrace, mi=0.5, ma=5) label = graph.new_vertex_property("string") map_property_values(graph.vp.addr, label, lambda addr: "0x%x" % addr) self.ax.set_axis_off() graph_draw(graph, pos=layout, mplfig=self.ax, vertex_shape="circle", vertex_text=label, vertex_text_position=-1, edge_pen_width=pen_width) plt.savefig(self.config.outfile) print("Written file %s" % self.config.outfile)
class BaseAddressMapPlotDriver(VMMapPlotDriver, ASAxesPlotBuilderNoTitle): """ Plot that shows the capability size in the address space vs the time of allocation (i.e. when the capability is created). Note this only builds a plot for the first graph in :prop:`VMMapPlotDriver._pgm_list`. """ title = "Capabilities derivation time vs capability position" x_label = "Virtual Address" y_label = "Time (million of instructions)" publish = Option(help="Adjust plot for publication", action="store_true") patch_builder_class = None def _get_axes_rect(self): if self.config.publish: return [0.1, 0.25, 0.85, 0.65] return super()._get_axes_rect() def make_axes(self): """ Set the y-axis scale to display millions of cycles instead of the number of cyles. """ fig, ax = super().make_axes() ax.set_yscale("linear_unit", unit=10**-6) return (fig, ax) def make_plot(self): """Create the address-map plot.""" super().make_plot() self.ax.invert_yaxis() def run(self): if self.config.publish: # set the style self._style["font"] = FontProperties(size=20) self._style["font-small"] = FontProperties(size=15) pgm = self._pgm_list[0] graph = pgm.prov_view() cap_builder = self.patch_builder_class(figure=self.fig, pgm=pgm) self.register_patch_builder(graph.vertices(), cap_builder) self.register_patch_builder(self._vmmap, VMMapPatchBuilder(self.ax)) self.process(out_file=self._outfile)
class VMMapPlotDriver(TaskDriver): """ Base driver for plots that require a vmmap file as an input """ outfile = Option(help="Output file", default=None) def __init__(self, pgm_list, vmmap, **kwargs): """ :param pgm: provenance graph manager :param kwargs: TaskDriver arguments """ super().__init__(**kwargs) self._pgm_list = pgm_list """List of graph managers for every input graph.""" self._vmmap = vmmap """The process memory mapping model.""" default_outfile = "{}_plot.pdf".format(self.__class__.__name__.lower()) self._outfile = self.config.outfile or default_outfile """Output file name for the plot."""
class TraceDumpParser(MultiprocessCallbackParser, ConfigurableComponent): """Parser that performs filtering and search operations on a trace""" range_format_help = "Accept a range in the form <start>-<end>, -<end>, "\ "<start>- or <single_value>" info = Option(action="store_true", help="Print trace info and exit") start = Option("-s", type=int, default=0, help="Start offset in the trace") end = Option("-e", type=int, default=None, help="Stop offset in the trace") outfile = Option("-o", type=str, default=None, help="Write output to the given file") show_regs = Option("-r", action="store_true", help="Dump register content") instr = Option(default=None, help="Find instruction occurrences") reg = Option(default=None, help="Show the instructions that use the given register") pc = Option(type=option_range_validator, default=None, help="Find instructions with PC in given range. " + range_format_help) mem = Option( type=option_range_validator, default=None, help="Show the instructions that use the given memory address. " + range_format_help) exception = Option( default=None, help="Show the instructions that raise a given exception. " "Accept the exception number in [0-30] or 'any'.") syscall = Option(default=None, type=int, help="Show the syscalls with given code") nop = Option(type=any_int_validator, default=None, help="Show canonical nops with given code") perms = Option(type=any_int_validator, default=None, help="Find instructions that touch capabilities" " with the given permission bits set") after = Option("-A", type=int, default=0, help="Dump n instructions after a matching one") before = Option("-B", type=int, default=0, help="Dump n instructions before a matching one") match_any = Option( action="store_true", help="Return a trace entry when matches any of the conditions " "instead of all") def __init__(self, **kwargs): """ This parser filters the trace according to a set of match conditions. Multiple match conditions can be used at the same time to refine or widen the filter. :param sym_reader: symbol reader helper, used to extract symbol information """ sym_reader = kwargs.pop("sym_reader") assert "trace_path" in kwargs, "trace_path argument is required!" if "keyframe_file" not in kwargs: kwargs["keyframe_file"] = "{}.kf".format(kwargs["trace_path"]) super().__init__(**kwargs) if not self.is_worker: # update kwargs used to create workers self.kwargs["sym_reader"] = sym_reader self._entry_history = deque([], self.config.before) """FIFO instructions that may be shown if a match is found""" self._dump_next = 0 """The remaining number of instructions to dump after a match""" self._kernel_mode = False """Keep track of kernel-userspace transitions""" self.sym_reader = sym_reader """Helper used to search symbols for addresses""" self.filters = [ self._match_instr, self._match_pc, self._match_addr, self._match_reg, self._match_exception, self._match_nop, self._match_syscall, self._match_perm ] self.out = sys.stdout """Output file stream""" if self.config.outfile: self.out = tempfile.NamedTemporaryFile(mode="w") self.update_config(self.config) def update_config(self, config): self._entry_history = deque([], config.before) self._dump_next = 0 self._kernel_mode = False def repr_register(self, entry): if (entry.gpr_number() != -1): return "$%d" % entry.gpr_number() elif (entry.capreg_number() != -1): return "$c%d" % entry.capreg_number() def dump_cap(self, cap): chericap = CheriCap(cap) return str(chericap) def dump_regs(self, entry, regs, last_regs): for idx in range(0, 31): real_regnum = idx + 1 self.out.write("[%d] $%d = %x\n" % (regs.valid_gprs[idx], real_regnum, regs.gpr[idx])) for idx in range(0, 32): self.out.write( "[%d] $c%d = %s\n" % (regs.valid_caps[idx], idx, self.dump_cap(regs.cap_reg[idx]))) def dump_instr(self, inst, entry, idx): if entry.exception != 31: exception = "except:%x" % entry.exception else: # no exception exception = "" instr_dump = "{%d:%d} 0x%x %s %s" % ( entry.asid, entry.cycles, entry.pc, inst.inst.name, exception) sym = None # XXX it would be nice to have an inst.is_branch property # it should be provided by LLVM quite easily if inst.opcode == "cjalr": sym_addr = inst.op1.value.base + inst.op1.value.offset sym = self.sym_reader.find_symbol(sym_addr) elif inst.opcode == "cjr": sym_addr = inst.op0.value.base + inst.op0.value.offset sym = self.sym_reader.find_symbol(sym_addr) elif inst.opcode == "jalr" or inst.opcode == "jr": sym_addr = inst.op0.value sym = self.sym_reader.find_symbol(sym_addr) if sym: instr_dump = "%s (%s)" % (instr_dump, sym) self.out.write(instr_dump) self.out.write("\n") # dump read/write if inst.cd is None: # no operands for the instruction return if entry.is_load or entry.is_store: sym = self.sym_reader.find_symbol(entry.memory_address) if sym: loc = "[%x (%s)]" % (entry.memory_address, sym) else: loc = "[%x]" % entry.memory_address if entry.is_load: self.out.write("$%s = %s\n" % (inst.cd.name, loc)) else: self.out.write("%s = $%s\n" % (loc, inst.cd.name)) if inst.op0.is_register: if (inst.op0.gpr_index != -1): gpr_value = inst.op0.value gpr_name = inst.op0.name if gpr_value is not None: self.out.write("$%s = %x\n" % (gpr_name, gpr_value)) else: self.out.write("$%s = Unknown\n" % gpr_name) elif (inst.op0.cap_index != -1 or inst.op0.caphw_index != -1): cap_name = inst.op0.name cap_value = inst.op0.value if cap_value is not None: self.out.write("$%s = %s\n" % (cap_name, self.dump_cap(cap_value))) else: self.out.write("$%s = Unknown\n" % cap_name) def dump_kernel_user_switch(self, entry): if self._kernel_mode != entry.is_kernel(): if entry.is_kernel(): self.out.write("Enter kernel mode {%d:%d}\n" % (entry.asid, entry.cycles)) else: self.out.write("Enter user mode {%d:%d}\n" % (entry.asid, entry.cycles)) self._kernel_mode = entry.is_kernel() def do_dump(self, inst, entry, regs, last_regs, idx): # dump instr self.dump_instr(inst, entry, idx) if self.config.show_regs: self.dump_regs(entry, regs, last_regs) def _update_match_result(self, match, value): """ Combine the current match result with the value of a test according to the match mode """ if value is None: return match if self.config.match_any: return match or value else: return match and value def _check_limits(self, start, end, value): result = True if start != None and start > value: result = False if end != None and end < value: result = False return result def _match_instr(self, inst, regs): """Check if the current instruction matches""" if self.config.instr: return self.config.instr == inst.opcode return None def _match_pc(self, inst, regs): """Check if the current instruction PC matches""" if self.config.pc: start, end = self.config.pc return self._check_limits(start, end, inst.entry.pc) return None def _match_addr(self, inst, regs): """Check if the current load or store address matches""" if self.config.mem: if inst.entry.is_load or inst.entry.is_store: start, end = self.config.mem return self._check_limits(start, end, inst.entry.memory_address) else: return False return None def _match_reg(self, inst, regs): """Check if the current instruction uses a register""" if self.config.reg: for operand in inst.operands: if not operand.is_register: continue if operand.name == self.config.reg: return True return False return None def _match_exception(self, inst, regs): """Check if an exception occurred while executing an instruction""" if self.config.exception: if inst.entry.exception == 31: # no exception return False elif self.config.exception == "any": return True else: return inst.entry.exception == int(self.config.exception) return None def _match_syscall(self, inst, regs): """Check if this instruction is a syscall with given code""" # system call code is in v0 code_reg = 2 if self.config.syscall: if inst.opcode == "syscall" and inst.entry.exception == 8: if (regs.valid_grps[code_reg] and regs.gpr[code_reg] == self.config.syscall): return True return False return None def _match_perm(self, inst, regs): """Check if this instruction uses capabilities with the given perms""" if self.config.perms: for operand in inst.operands: if (not operand.is_capability or operand.value is None): # if not a capability or the register in the register set # is not valid continue cap_reg = CheriCap(operand.value) if cap_reg.has_perm(self.config.perms): return True return False return None def _match_nop(self, inst, regs): """Check if instruction is a given canonical NOP""" if self.config.nop: if inst.opcode == "lui": return (inst.op0.gpr_index == 0 and inst.op1.value == self.config.nop) return False return None def scan_all(self, inst, entry, regs, last_regs, idx): if self._dump_next > 0: self.dump_kernel_user_switch(entry) self._dump_next -= 1 self.do_dump(inst, entry, regs, last_regs, idx) else: # initial match value, if match_any is true # we OR the match results so start with false # else we AND them, so start with true match = not self.config.match_any for checker in self.filters: result = checker(inst, regs) match = self._update_match_result(match, result) if match: self.dump_kernel_user_switch(entry) # dump all the instructions in the queue while len(self._entry_history) > 0: old_inst, idx = self._entry_history.popleft() self.do_dump(old_inst, old_inst.entry, old_inst._regset, old_inst._prev_regset, idx) try: self.do_dump(inst, entry, regs, last_regs, idx) except Exception as e: logger.error("Can not dump instruction %s: %s", inst, e) return True self._dump_next = self.config.after else: self._entry_history.append((inst, idx)) return False def parse(self, start=None, end=None, direction=0): start = start or self.config.start end = end or self.config.end if self.config.info: self.out.write("Trace size: %s\n" % len(self)) else: super().parse(start, end) def mp_result(self): """Return the temporary file.""" self.out.flush() return self.out.name def mp_merge(self, results): """Concatenate temporary files""" if self.config.outfile: with open(self.config.outfile, 'wb') as out: for in_file in results: with open(in_file, 'rb') as fd: shutil.copyfileobj(fd, out, 1024 * 1024 * 50)
class SymbolResolutionDriver(BaseToolTaskDriver): """ Task driver that fetches symbol names from binary and source files. This step requires different input data: * Output from procstat-like commands in csv or tab-separated format, this is required to extract the base address of the sections of a binary in memory. * Binary ELF files containing the debug symbols. * Kernel syscalls.master file to map syscall numbers to a name/signature. """ description = """ Resolve call symbols in a cheriplot graph. This is a postprocessing tool that extracts debug information from ELF files, sources and runtime information to add symbol names and call signatures to the cheriplot graph. The tool is incremental, it can be run multiple times on the graph. """ graph = Argument(type=file_path_validator, help="Path to the cheriplot graph.") no_output = Option( action="store_true", help="Do not store output graph, useful for cheriplot-runner") vmmap = NestedConfig(VMMapFileParser) elfpath = Option(nargs="+", type=file_path_validator, default=[], help="Paths where to look for ELF files with symbols") syscalls = Option(default=None, type=file_path_validator, help="Path to the syscalls.master file") outfile = Option(default=None, type=file_path_validator, help="Output file name, defaults to the input file") def __init__(self, **kwargs): super().__init__(**kwargs) self.pgm = None """Loaded graph manager.""" self.vmmap = VMMapFileParser(config=self.config.vmmap) """Memory map file parser.""" self.vmmap.parse() self.symreader = SymReader(vmmap=self.vmmap, path=self.config.elfpath) """Symbol reader""" self._outfile = self.config.outfile or self.config.graph """Output file path, defaults to the input file""" # self.syscalls = BSDSyscallMasterParser(self.config.syscalls) # """Parser for the syscalls.master file.""" self._load_graph() def _load_graph(self): self.pgm = ProvenanceGraphManager.load(self.config.graph) def run(self): # self.syscalls.parse() visitor = ResolveSymbolsGraphVisit(self.pgm, self.symreader, None) visitor(self.pgm.graph) if not self.config.no_output: with ProgressTimer("Write output graph", logger): self.pgm.save(self._outfile)
class ProvenanceGraphDumpDriver(BaseToolTaskDriver): """ Dump and manually filter the nodes in the provenance graph. """ range_format_help = "Accept a range in the form <start>-<end>, -<end>, "\ "<start>- or <single_value>" graph = Argument(help="Path to the provenance graph file") ignore_filter = Option(action="store_true", help="Ignore any graph filter in place") layer = Option(help="Graph layer to dump.", choices=("prov", "call", "all"), default="all") # provenance layer filters origin = Option(help="Find vertices with specific origin.", choices=("root", "csetbounds", "cfromptr", "ptrbounds", "candperm", "partial", "call", "syscall"), default=None) pc = Option(type=option_range_validator, default=None, help="Find vertices with PC in the given range. " + range_format_help) time = Option(type=option_range_validator, help="Find all vertices created at given time. " + range_format_help) lifetime = Option( type=option_range_validator, help="Find all vertices with a lifetime (t_free - t_alloc) " "in the given range. " + range_format_help) mem = Option(type=option_range_validator, help="Show all vertices stored at a memory address. " + range_format_help) deref = Option( type=option_range_validator, help="Show all vertices dereferenced at a memory address. " + range_format_help) size = Option(type=option_range_validator, help="Show vertices with given length. " + range_format_help) perms = Option(type=any_int_validator, help="Find vertices with given permission bits set.") otype = Option(type=any_int_validator, help="Find vertices with given object type.") match_any = Option(action="store_true", help="Return a trace entry when matches any" " of the conditions, otherwise all conditions" " must be verified.") predecessors = Option( action="store_true", help="Show the predecessors of a matching capability.") successors = Option(action="store_true", help="Show the successors of a matching capability.") full_info = Option(action="store_true", help="Show the full vertex information") vmmap = NestedConfig(VMMapFileParser) elfpath = Option(nargs="+", type=file_path_validator, default=[], help="Paths where to look for ELF files with symbols") # call layer filters target = Option( help="Show calls to the given target address or symbol name.") related = Option( action="store_true", help="Show vertices in the provenance layer related to each call.") def __init__(self, **kwargs): super().__init__(**kwargs) self.pgm = ProvenanceGraphManager.load(self.config.graph) """Manager for the graph to dump.""" self.match_origin = None """Search for nodes with this origin""" self._check_origin_arg(self.config.origin) self.prov_filters = [ self._match_origin, self._match_pc, self._match_mem, self._match_deref, self._match_perms, self._match_otype, self._match_alloc, self._match_len, self._match_lifetime, ] self.call_filters = [ self._match_call_type, self._match_call_target, ] if self.config.vmmap and self.config.elfpath: self.vmmap = VMMapFileParser(config=self.config.vmmap) self.vmmap.parse() self.symreader = SymReader(vmmap=self.vmmap, path=self.config.elfpath) else: self.vmmap = None self.symreader = None def _check_origin_arg(self, match_origin): if match_origin == None: return elif match_origin == "root": self.match_origin = CheriNodeOrigin.ROOT elif match_origin == "csetbounds": self.match_origin = CheriNodeOrigin.SETBOUNDS elif match_origin == "cfromptr": self.match_origin = CheriNodeOrigin.FROMPTR elif match_origin == "ptrbounds": self.match_origin = CheriNodeOrigin.PTR_SETBOUNDS elif match_origin == "andperm": self.match_origin = CheriNodeOrigin.ANDPERM elif match_origin == "partial": self.match_origin = CheriNodeOrigin.PARTIAL elif match_origin == "call": self.match_origin = EdgeOperation.CALL elif match_origin == "syscall": self.match_origin = EdgeOperation.SYSCALL else: raise ValueError("Invalid match_origin parameter") def _update_match_result(self, match, value): """ Combine the current match result with the value of a test according to the match mode. """ if value is None: return match if self.config.match_any: return match or value else: return match and value def _check_limits(self, start, end, value): if start is None: start = 0 if end is None: end = np.inf if start <= value and value <= end: return True return False def _match_lifetime(self, edge, vdata): if self.config.lifetime: start, end = self.config.lifetime if vdata.cap.t_free >= 0: lifetime = vdata.cap.t_free - v_data.cap.t_alloc else: lifetime = np.inf return self._check_limits(start, end, lifetime) return None def _match_origin(self, edge, vdata): if self.match_origin: return vdata.origin == self.match_origin return None def _match_pc(self, edge, vdata): if self.config.pc: start, end = self.config.pc return self._check_limits(start, end, vdata.pc) return None def _match_mem(self, edge, vdata): if self.config.mem: start, end = self.config.mem result = False for addr in vdata.address["addr"]: result |= self._check_limits(start, end, addr) if result: break return result return None def _match_deref(self, edge, vdata): if self.config.deref: start, end = self.config.deref result = False for addr in vdata.deref["addr"]: result |= self._check_limits(start, end, addr) if result: break return result return None def _match_perms(self, edge, vdata): if self.config.perms: return vdata.cap.has_perm(self.config.perms) return None def _match_otype(self, edge, vdata): if self.config.otype: return vdata.cap.objtype == self.config.otype return None def _match_alloc(self, edge, vdata): if self.config.time: start, end = self.config.time return self._check_limits(start, end, vdata.cap.t_alloc) return None def _match_len(self, edge, vdata): if self.config.size: start, end = self.config.size return self._check_limits(start, end, vdata.cap.length) return None def _match_call_type(self, edge, vdata): if self.config.origin and edge is not None: eop = self.pgm.edge_operation[edge] return eop == self.match_origin return None def _match_call_target(self, edge, vdata): if self.config.target: return (vdata.symbol == self.config.target) return None def _find_function_for_pc(self, pc): if self.symreader is not None: rt = self.symreader.find_function(pc) if rt is None: return rt # return file:symbol return "{}:{}".format(rt[1], rt[0]) return None def _find_symbol_at(self, addr): if self.symreader is not None: rt = self.symreader.find_address(addr) if rt is None: return None # return file:symbol return "{}:{}".format(rt[1], rt[0]) return None def _dump_prov_vertex(self, edge, v): vdata = self.pgm.data[v] str_vertex = StringIO() str_vertex.write("(provenance) {} ".format(vdata)) # Display annotated_XXX properties str_vertex.write(" annotations: { ") for key in self.pgm.graph.vp.keys(): if not key.startswith("annotated_"): continue name = key[len("annotated_"):] property_map = self.pgm.graph.vp[key] if property_map[v]: # vertes is in the property map str_vertex.write("{} ".format(name.upper())) str_vertex.write("} ") # Dump event table events = vdata.event_tbl n_load = (events["type"] & EventType.DEREF_LOAD).sum() n_store = (events["type"] & EventType.DEREF_STORE).sum() str_vertex.write("deref-load:{:d} deref-store:{:d} ".format( n_load, n_store)) n_loaded = (events["type"] & EventType.LOAD).sum() n_stored = (events["type"] & EventType.STORE).sum() str_vertex.write("load:{:d} store:{:d}".format(n_loaded, n_stored)) # Display symbol name symbol = self._find_symbol_at(vdata.cap.base) if symbol: str_vertex.write(" to:{}".format(symbol)) # Display function at PC fn_sym = self._find_function_for_pc(vdata.pc) if fn_sym: str_vertex.write(" {}".format(fn_sym)) # Dump event table details if self.config.full_info: str_vertex.write("\n") frame_str = vdata.event_tbl.to_string( formatters={ "addr": "0x{0:x}".format, "type": lambda t: str(EventType(t)) }) str_vertex.write("Event table:\n{}\n".format(frame_str)) return str_vertex.getvalue() def _dump_call_vertex(self, edge, v): vdata = self.pgm.data[v] str_vertex = StringIO() if edge is not None: eop = EdgeOperation(self.pgm.edge_operation[edge]) eaddr = self.pgm.edge_addr[edge] etime = self.pgm.edge_time[edge] else: eop = None eaddr = etime = 0 str_vertex.write( "(call) op:{!s} caller:0x{:x} t_call:{:d} {!s}\n".format( eop, eaddr, etime, vdata)) return str_vertex.getvalue() def _dump_vertex(self, edge, v): if self.pgm.layer_prov[v]: return self._dump_prov_vertex(edge, v) elif self.pgm.layer_call[v]: return self._dump_call_vertex(edge, v) else: logger.warning("dump_vertex: invalid layer %s", self.pgm.data[v]) def _dump_predecessors(self, view, v): if not self.config.predecessors or v is None: return predecessors = [] current = v while True: parent, edge = self._get_parent(view, current) predecessors.insert(0, (current, edge)) if parent is None: break current = parent for pred, edge in predecessors: print("+- {}".format(self._dump_vertex(edge, pred))) print("^") def _dump_successors(self, v): if not self.config.successors: return vertices = list(zip(repeat(1), repeat(v), v.out_neighbours())) # list of tuples (depth, vertex) while len(vertices): depth, parent, s = vertices.pop(0) edge = self.pgm.graph.edge(parent, s) successors = list( zip(repeat(depth + 1), repeat(s), s.out_neighbours())) successors.extend(vertices) vertices = successors space = " " * depth print("{}+- {}".format(space, self._dump_vertex(edge, s))) def _dump_related(self, v): if not self.config.related: return if self.pgm.layer_prov[v]: # dump call vertices where this vertex is visible either # at CALL or RETURN time u = self.pgm.graph.vertex(v) for edge in u.out_edges(): if not self.pgm.layer_call[edge.target()]: continue eop = EdgeOperation(self.pgm.edge_operation[edge]) regno = ", ".join(map(str, self.pgm.edge_regs[edge])) dst = self.pgm.data[edge.target()] print("[{}] @ c{} +-> {}".format(eop.name, regno, dst)) if self.pgm.layer_call[v]: # dump visible vertices at CALL and RETURN time u = self.pgm.graph.vertex(v) for edge in u.in_edges(): if not self.pgm.layer_prov[edge.source()]: continue eop = EdgeOperation(self.pgm.edge_operation[edge]) regno = ", ".join(map(str, self.pgm.edge_regs[edge])) src = self.pgm.data[edge.source()] print("[{}] @ c{} +-> {}".format(eop.name, regno, src)) def _get_parent(self, view, v): """ Get the parent vertex in the given layer and the connecting edge. """ parents = list(v.in_neighbours()) if len(parents) == 0: return None, None return parents[0], view.edge(parents[0], v) def _dump_layer(self, view): for v in view.vertices(): vdata = self.pgm.data[v] parent, edge = self._get_parent(view, v) # initial match value, if match_any is true # we OR the match results so start with false # else we AND them, so start with true match = not self.config.match_any if self.pgm.layer_prov[v]: filters = self.prov_filters elif self.pgm.layer_call[v]: filters = self.call_filters else: logger.warning("dump_layer: invalid layer %s", vdata) for checker in filters: result = checker(edge, vdata) match = self._update_match_result(match, result) if match: self._dump_predecessors(view, parent) print("+- {}".format(self._dump_vertex(edge, v))) self._dump_related(v) self._dump_successors(v) print("######") def run(self): if self.config.ignore_filter: self.pgm.graph.clear_filters() if self.config.layer == "all" or self.config.layer == "prov": self._dump_layer(self.pgm.prov_view()) if self.config.layer == "all" or self.config.layer == "call": self._dump_layer(self.pgm.call_view())
class VMMapFileParser(ConfigurableComponent): """ Parse a vmmap file created by procstat or libprocstat-based vmmap_dump tool """ vmmap_file = Option( default=None, type=file_path_validator, help="File that specify the VM mappings for the traced process") def __init__(self, **kwargs): super().__init__(**kwargs) self.vmmap = VMMapModel() """Model that describes vmmap entries.""" self.map_file = None """File where the entries are specified.""" self.csv_style = False """The map file can be csv or space-separated.""" if self.config.vmmap_file is None: logger.debug("No vmmap file, empty vmmap") return try: self.map_file = open(self.config.vmmap_file, "r") except IOError: logger.error("Can not open %s", self.config.vmmap_file) raise # try to guess the format of the file line = self.map_file.readline() self.map_file.seek(0) try: line.index(",") has_csv_delim = True except ValueError: has_csv_delim = False self.csv_style = has_csv_delim def get_model(self): return self.vmmap def parse(self): if not self.map_file: # nothing to parse return if self.csv_style: logger.info("Try to load vmmap_dump memory map file") vmmap_dump_cols = [ "start", "end", "offset", "perm", "res", "pres", "ref", "shd", "flag", "tp", "path" ] maybe_b16_int = lambda x: int(x, 16) if str(x).strip().startswith( "0x") else int(x) col_converters = {"start": maybe_b16_int, "end": maybe_b16_int} vmmap = pd.read_csv(self.map_file, names=vmmap_dump_cols, converters=col_converters) else: logger.info("Try to load procstat memory map file") procstat_cols = [ "pid", "start", "end", "perm", "res", "pres", "ref", "shd", "flag", "tp", "path" ] col_types = { "pid": np.int_, "start": np.uint64, "end": np.uint64, "perm": str, "res": np.int_, "pres": np.int_, "ref": np.int_, "shd": np.int_, "flag": str, "tp": str, "path": str } from_b16_int = lambda x: int(x, 16) col_converters = {"start": from_b16_int, "end": from_b16_int} vmmap = pd.read_table(self.map_file, names=procstat_cols, sep="\s+", dtype=col_types, converters=col_converters) vmmap = vmmap.fillna("") logger.debug("Parsed vmmap") self.vmmap.vmmap = vmmap.ix[:, ["start", "end", "perm", "flag", "path"]]
class GraphFilterDriver(BaseToolTaskDriver): """Driver that implements the top-level filtering tool.""" description = """ Graph filtering tool. This tool processes a cheriplot graph to produce a filtered version. The filtered graph still includes all the vertices but carries a mask that removes some of the vertices when used. """ graph = Argument(type=file_path_validator, help="Path to the cheriplot graph") outfile = Option(default=None, type=file_path_validator, help="Path to the output file") display_name = Option(default=None, help="New display-name for the graph") purge = Option(action="store_true", help="Purge filtered elements in the output graph. " "This is not reversible.") incremental = Option(action="store_true", help="Do not remove existing graph filters.") no_output = Option( action="store_true", help="Do not store output graph, useful for cheriplot-runner") vmmap = NestedConfig(VMMapFileParser) no_null = Option(action="store_true", help="Filter null vertices") no_kernel = Option(action="store_true", help="Filter kernel vertices") no_cfromptr = Option(action="store_true", help="Filter cfromptr vertices") no_andperm = Option(action="store_true", help="Filter candperm vertices") no_stack = Option(action="store_true", help="Filter vertices pointing to the stack") no_roots = Option(action="store_true", help="Filter root vertices") annotate_stack = Option(action="store_true", help="Mark vertices pointing to the stack") annotate_malloc = Option(action="store_true", help="Mark vertices derived from malloc") annotate_malloc_ancestors = Option( action="store_true", help="Mark vertices that are used to derive malloc capabilities.") annotate_mmap = Option(action="store_true", help="Mark vertices derived from mmap") annotate_xnx = Option(action="store_true", help="Mark executable and non executable vertices") remove_qtrace_execve = Option( action="store_true", help= "Mask out vertices that are created before the return from the last execve() issued in qtrace." ) aggregate_ptrbounds = Option( action="store_true", help="Merge sequences of cfromptr+csetbounds. This is not reversible.") tslice = Option(action="store_true", help="Filter a graph slice (see tslice parameters)") tslice_mode = Option(nargs="+", choices=("deref", "create", "access"), default=["create"], help="""tslice filter mode parameter: deref: cap dereference time (load/store/call via capability) create: cap create time access: cap access time (load/store of the capability) """) tslice_time = Option( nargs=2, type=int, metavar=("start", "end"), help="tslice filter start-time and end-time parameters") def __init__(self, **kwargs): super().__init__(**kwargs) self.pgm = None """Loaded graph managers.""" self._vmmap_parser = VMMapFileParser(config=self.config.vmmap) """Process memory mapping CSV parser.""" self._outfile = self.config.outfile or self.config.graph """Output file path, defaults to the input file.""" self._load_graph() def _load_graph(self): self.pgm = ProvenanceGraphManager.load(self.config.graph) def _get_filter(self, pgm): """Get a combined filter for a given graph manager.""" filters = ChainGraphVisit(pgm) if self.config.remove_qtrace_execve: filters += FilterBeforeExecve(pgm) if self.config.no_null: filters += FilterNullVertices(pgm) if self.config.no_roots: filters += FilterRootVertices(pgm) if self.config.aggregate_ptrbounds: filters += MergeCfromptr(pgm) if self.config.no_cfromptr: filters += FilterCfromptr(pgm) if self.config.no_andperm: filters += FilterCandperm(pgm) if self.config.no_stack: vmmap = self._vmmap_parser.get_model() for entry in vmmap: if entry.grows_down: break else: logger.error("no-stack filter requires vmmap argument") raise RuntimeError("np-stack filter requires vmmap argument") filters += FilterStackVertices(pgm, entry.start, entry.end) if self.config.no_kernel: filters += FilterKernelVertices(pgm) if self.config.tslice: start, end = self.config.tslice_time deref = "deref" in self.config.tslice_mode create = "create" in self.config.tslice_mode access = "access" in self.config.tslice_mode filters += ProvGraphTimeSlice(pgm, start, end, creation_time=create, deref_time=deref, access_time=access) if self.config.annotate_stack: vmmap = self._vmmap_parser.get_model() for entry in vmmap: if entry.grows_down: break else: logger.error("mark-stack filter requires vmmap argument") raise RuntimeError("mark-stack filter requires vmmap argument") filters += DecorateStackStrict(pgm, entry.start, entry.end) filters += DecorateStackAll(pgm, entry.start, entry.end) if self.config.annotate_mmap: filters += DecorateMmap(pgm) filters += DecorateMmapReturn(pgm) if self.config.annotate_malloc: vmmap = self._vmmap_parser.get_model() min_addr = 2**64 heap_entry = None # first entry in the memory map for entry in vmmap: if entry.end < min_addr: min_addr = entry.end heap_entry = entry if not heap_entry: logger.error("mark-malloc filter requires vmmap argument") raise RuntimeError( "mark-malloc filter requires vmmap argument") # filters += DecorateHeap(pgm, heap_entry.start, heap_entry.end) filters += DecorateMalloc(pgm) # filters += DecorateMallocReturn(pgm) if self.config.annotate_xnx: filters += DecorateExecutable(pgm) return filters def run(self): self._vmmap_parser.parse() vmmap = self._vmmap_parser.get_model() if not self.config.incremental: self.pgm.graph.clear_filters() graph_filter = self._get_filter(self.pgm) filtered_graph = graph_filter(self.pgm.graph) vfilt, _ = filtered_graph.get_vertex_filter() self.pgm.graph.set_vertex_filter(vfilt) if self.config.purge: with ProgressTimer("Purge filtered vertices", logger): self.pgm.graph.purge_vertices() if self.config.display_name: self.pgm.graph.gp.name = self.config.display_name if not self.config.no_output: with ProgressTimer("Write output graph", logger): self.pgm.save(self._outfile)
class TxtTraceCmpParser(CallbackTraceParser, TaskDriver): """ Compare a text trace with a binary trace and report any difference. """ description = "Scan two traces and inspect differences" cvtrace = Argument(help="Path to cvtrace file") txttrace = Argument(help="Path to the text trace file") pc_only = Option("-p", action="store_true", help="Only check instruction PC") quiet = Option("-q", action="store_true", help="Suppress warning messages") def __init__(self, config): CallbackTraceParser.__init__(config.cvtrace) TaskDriver.__init__(config) if config.quiet: logging.basicConfig(level=logging.ERROR) # txt trace perser state machine self.txt_parse_state = State.S_INSTR self.txt_trace = open(config.txttrace, "r") # skip lines from the txt trace until the first # instruction self._skiplines(inst_only=True) self.txt_parse_state = State.S_INSTR def run(self): self.parse() def _skiplines(self, inst_only=False): """Skip lines that are not used""" while True: saved_pos = self.txt_trace.tell() line = self.txt_trace.readline() # test all the pattern that should not be skipped if inst_only == False: if re.search("Cap Memory Read", line) is not None: self.txt_parse_state = State.S_CAP_MEM break if re.search("Cap Memory Write", line) is not None: self.txt_parse_state = State.S_CAP_MEM break if re.search("Memory Read", line) is not None: self.txt_parse_state = State.S_MEM break if re.search("Memory Write", line) is not None: self.txt_parse_state = State.S_MEM break if re.search("Write [C\$]?[a-z0-9]+", line) is not None: self.txt_parse_state = State.S_REG break if re.match("[0-9xa-f]+:", line) is not None: # the next call to the parser function will # continue from here self.txt_parse_state = State.S_INSTR_END break self.txt_trace.seek(saved_pos) def _txt_instr(self, inst): line = self.txt_trace.readline() # line matches "[0-9xa-f]+:" # parse addr addr, rest = line.split(':') _, addr = addr.split("x") intaddr = struct.unpack(">Q", bytes.fromhex(addr))[0] inst["pc"] = intaddr rest = re.sub("[ \t]+", " ", rest.strip()) opcode = rest.split(" ")[0] inst["opcode"] = opcode if len(rest.split(" ")) > 1: operands = rest.split(" ")[1] op0 = operands.split(",")[0] else: op0 = None # if we find a li zero, <something> is a canonical nop so # we need to skip until the next instruction is found if inst["opcode"] == "li" and op0 == "zero": self._skiplines(inst_only=True) else: # seek to next valid line and change state self._skiplines() def _txt_reg(self, inst): line = self.txt_trace.readline() m = re.search("Write \$?([a-z0-9]+) = ([a-f0-9]+)", line) if m: # write to gpr format # Write t4 = 0000000000008400 reg = m.group(1) val = m.group(2) intval = struct.unpack(">Q", bytes.fromhex(val))[0] inst["reg"] = reg inst["data"] = intval else: # write to cap register format # Write C24|v:1 s:0 p:7fff807d b:0000007fffffdb20 l:0000000000000400 # |o:0000000000000000 t:0 m = re.search( "Write C([0-9]+)\|v:([01]) s:([01]) p:([a-f0-9]+) " "b:([a-f0-9]+) l:([a-f0-9]+)", line) if m is None: raise RuntimeError("Malformed cap reg write") # first line of a capability match # next line must match this line = self.txt_trace.readline() nxt = re.search("\|o:([a-f0-9]+) t:([a-f0-9]+)", line) if nxt is None: raise RuntimeError("Malformed cap reg write") v = m.group(2) s = m.group(3) p = m.group(4) b = m.group(5) l = m.group(6) o = nxt.group(1) t = nxt.group(2) try: if len(t) % 2: # hotfix fromhex() that do not like odd num of digits t = "0" + t t = bytes.fromhex(t) if len(t) < 4: for i in range(4 - len(t)): t = bytes.fromhex("00") + t except Exception: logger.error("Can not load type field %s %s", m.groups(), nxt.groups()) raise # take only 16bit for permissions, the upper 16bit # are stored in the trace but ignored by cheritrace # as we do not care about uperms apparently. intp = struct.unpack(">L", bytes.fromhex(p))[0] & 0xffff intb = struct.unpack(">Q", bytes.fromhex(b))[0] intl = struct.unpack(">Q", bytes.fromhex(l))[0] into = struct.unpack(">Q", bytes.fromhex(o))[0] intt = struct.unpack(">L", t)[0] & 0x00ffffff inst["cap"] = { "valid": int(v), "sealed": int(s), "perms": intp, "base": intb, "length": intl, "offset": into, "otype": intt, } # seek to next valid line and change state self._skiplines() def _txt_mem(self, inst): line = self.txt_trace.readline() m = re.search("(Cap )?Memory Read +\[([0-9a-f]+)\]", line) if m: # data load is_cap = m.group(1) addr = m.group(2) intaddr = struct.unpack(">Q", bytes.fromhex(addr))[0] inst["load"] = intaddr if is_cap: # skip another line self.txt_trace.readline() else: m = re.search("(Cap )?Memory Write +\[([0-9a-f]+)\]", line) if m is None: raise RuntimeError("Mem not a read nor a write") #data store is_cap = m.group(1) addr = m.group(2) intaddr = struct.unpack(">Q", bytes.fromhex(addr))[0] inst["store"] = intaddr if is_cap: # skip another line self.txt_trace.readline() # seek to next valid line and change state self._skiplines() def _next_txt_instr(self): """ Fetch the next instruction from the txt trace. This is the state machine main loop. """ instr = {} while self.txt_parse_state != State.S_INSTR_END: if self.txt_parse_state == State.S_SKIP: self._skiplines() elif self.txt_parse_state == State.S_INSTR: self._txt_instr(instr) elif self.txt_parse_state == State.S_REG: self._txt_reg(instr) elif self.txt_parse_state == State.S_MEM: self._txt_mem(instr) elif self.txt_parse_state == State.S_CAP_MEM: self._txt_mem(instr) # next call starts always from an instruction self.txt_parse_state = State.S_INSTR return instr def _dump_txt_inst(self, txt_inst): string = "pc:0x%x %s" % (txt_inst["pc"], txt_inst["opcode"]) if "load" in txt_inst: string += " load:%x" % txt_inst["load"] if "store" in txt_inst: string += " store:%x" % txt_inst["store"] if "data" in txt_inst: string += " val:%x" % txt_inst["data"] if "cap" in txt_inst: txt_cap = txt_inst["cap"] string += " v:%d s:%d b:%x o:%x l:%x p:%x t:%x" % ( txt_cap["valid"], txt_cap["sealed"], txt_cap["base"], txt_cap["offset"], txt_cap["length"], txt_cap["perms"], txt_cap["otype"]) return string def _parse_exception(self, entry, regs, disasm, idx): super()._parse_exception(entry, regs, disasm, idx) # read entry from txt_inst = self._next_txt_instr() logger.debug("Scan txt:<%s>, bin:<unparsed>", self._dump_txt_inst(txt_inst)) # check only pc which must be valid anyway assert txt_inst["pc"] == entry.pc def scan_all(self, inst, entry, regs, last_regs, idx): # read entry from txt_inst = self._next_txt_instr() logger.debug("Scan txt:<%s>, bin:%s", self._dump_txt_inst(txt_inst), inst) try: # check that the instruction matches assert txt_inst["pc"] == entry.pc if self.pc_only: # only check pc, skip everything else return False if inst.opcode in ["mfc0"]: # these have weird behaviour so just ignore for now return False if txt_inst["opcode"] != inst.opcode: # opcode check is not mandatory due to disassembly differences # issue a warning anyway for now logger.warning("Opcode differ {%d} txt:<%s> bin:%s", entry.cycles, self._dump_txt_inst(txt_inst), inst) if "load" in txt_inst: assert txt_inst["load"] == entry.memory_address if "store" in txt_inst: assert txt_inst["store"] == entry.memory_address if "data" in txt_inst: if inst.opcode not in ["mfc0"]: reg_number = entry.gpr_number() for op in inst.operands: if op.is_register and op.gpr_index == reg_number: logger.debug("gpr:%d reg:%d") assert txt_inst["data"] == op.value, \ "reg data do not match %d != %d" % ( txt_inst["data"], op.value) break # # XXX we have a problem with extracting the jump target # # from jal/j the binary trace have an offset that does # # not make much sense.. # assert txt_inst["data"] == inst.op0.value if "cap" in txt_inst: cap = CheriCap(inst.op0.value) txt_cap = txt_inst["cap"] assert txt_cap["valid"] == cap.valid, \ "tag do not match %d != %d" % ( txt_cap["valid"], cap.valid) assert txt_cap["sealed"] == cap.sealed, \ "seal do not match %d != %d" % ( txt_cap["sealed"], cap.sealed) assert txt_cap["base"] == cap.base, \ "base do not match %x != %x" % ( txt_cap["base"], cap.base) assert txt_cap["length"] == cap.length, \ "length do not match %x != %x" % ( txt_cap["length"], cap.length) assert txt_cap["offset"] == cap.offset, \ "offset do not match %x != %x" % ( txt_cap["offset"], cap.offset) assert txt_cap["perms"] == cap.permissions, \ "perms do not match %x != %x" % ( txt_cap["perms"], cap.permissions) assert txt_cap["otype"] == cap.objtype, \ "otype do not match %x != %x" % ( txt_cap["otype"], cap.objtype) except AssertionError: logger.error("Assertion failed at {%d} inst:%s txt:<%s>", entry.cycles, inst, self._dump_txt_inst(txt_inst)) raise self.progress.advance() return False
class PtrSizeCdfDriver(TaskDriver, ExternalLegendTopPlotBuilder): title = "CDF of the size of capabilities created" x_label = "Size" y_label = "Proportion of the total number of capabilities" outfile = Option(help="Output file", default="ptrsize_cdf.pdf") publish = Option(help="Adjust plot for publication", action="store_true") absolute = Option(help="Do not normalize y axis, show absolute count" " of capabilities", action="store_true") filters = Option( default=[], action="append", nargs="+", choices=("stack", "mmap", "malloc"), help="set of possible elements to modify for the CDF, assume" "that the size of the given elements is the maximum possible.") split = Option( default=[], action="append", choices=("stack", "stack-all", "malloc", "exec", "glob", "kern", "caprelocs", "caprelocs-only"), help="Separate the given vertices in a separate CDF") def __init__(self, pgm_list, vmmap, **kwargs): super().__init__(**kwargs) self.pgm_list = pgm_list """List of graph managers to plot.""" self.vmmap = vmmap """VMmap model of the process memory mapping.""" self.datasets = [] if self.config.publish: self._style["font"] = FontProperties(size=25) if self.config.absolute: self.y_label = "Number of capabilities" self.title += " in {}".format(",".join([pgm.name for pgm in self.pgm_list])) def _get_title_kwargs(self): kw = super()._get_title_kwargs() if self.config.publish: # suppress the title so we have more space kw.update({"visible": False}) return kw def _get_savefig_kwargs(self): kw = super()._get_figure_kwargs() kw["dpi"] = 300 return kw def _get_axes_rect(self): if self.config.publish: return [0.125, 0.08, 0.85, 0.8] return super()._get_axes_rect() def _get_legend_kwargs(self): kw = super()._get_legend_kwargs() kw["loc"] = "lower right" return kw def make_plot(self): super().make_plot() self.ax.set_xscale("log", basex=2) def _make_cdf_dataset(self, pgm, vfilt, setname): if vfilt is not None: view = GraphView(pgm.graph, vfilt=vfilt) cdf = PtrBoundCdf(pgm, self.config.absolute, graph=view) else: cdf = PtrBoundCdf(pgm, self.config.absolute) cdf.num_ignored = -1 cdf.name = setname cdf.slice_name = None cdf.build_cdf() self.datasets.append(cdf) def _get_legend_kwargs(self): """ Change layout of the number of colums in the legend """ kw = super()._get_legend_kwargs() kw.update({ "bbox_to_anchor": (-0.125, 1.009, 1.125, 0.102), "ncol": 6, "labelspacing": 0.5, "borderpad": 0.1 }) return kw def run(self): for idx, pgm in enumerate(self.pgm_list): self._make_cdf_dataset(pgm, None, "all") for split_set in self.config.split: if split_set == "stack": self._make_cdf_dataset(pgm, pgm.graph.vp.annotated_usr_stack, split_set) elif split_set == "stack-all": self._make_cdf_dataset(pgm, pgm.graph.vp.annotated_usr_stack, "stack") self._make_cdf_dataset(pgm, pgm.graph.vp.annotated_stack, "stack-deref") elif split_set == "malloc": self._make_cdf_dataset(pgm, pgm.graph.vp.annotated_malloc, split_set) elif split_set == "exec": self._make_cdf_dataset(pgm, pgm.graph.vp.annotated_exec, split_set) elif split_set == "caprelocs": self._make_cdf_dataset(pgm, pgm.graph.vp.annotated_capreloc, "relocs") elif split_set == "caprelocs-only": difference = pgm.graph.new_vertex_property("bool") difference.a = (pgm.graph.vp.annotated_capreloc.a & ~pgm.graph.vp.annotated_globptr.a) self._make_cdf_dataset(pgm, difference, "relocs-only") elif split_set == "glob": # any global pointers or pointers derived from global pointers combined = pgm.graph.new_vertex_property("bool") combined.a = (pgm.graph.vp.annotated_globptr.a | pgm.graph.vp.annotated_globderived.a) self._make_cdf_dataset(pgm, combined, split_set) self._make_cdf_dataset(pgm, pgm.graph.vp.annotated_captblptr, "captbl") elif split_set == "kern": # kernel originated and syscall originated vertices self._make_cdf_dataset(pgm, pgm.graph.vp.annotated_ksyscall, "syscall") self._make_cdf_dataset(pgm, pgm.graph.vp.annotated_korigin, "kern") else: logger.error("Invalid --split option value %s", split_set) raise ValueError("Invalid --split option value") self.register_patch_builder( self.datasets, CdfPatchBuilder(self.config.absolute)) self.process(out_file=self.config.outfile)
class CallGraphTraceParser(CallbackTraceParser, TaskDriver): """ Dump a stack trace given a cheri trace instruction. We also detect all the functions that have been called and returned during the backtrace. Keep 2 data structures: - a deque that holds the last return address found - a pandas dataframe that holds the function landing pad and return points for nested functions The deque is used during parsing to detect nested functions and call sites The pandas dataframe holds: - the nested functions (symbol address, return address, parent symbol address, resolved symbol info) - the backtrace functions (symbol address, parent symbol address, resolved symbol info) Starting from the given instruction (cycle) iterate the trace backwards and detect all the (c)jalr and (c)jr instructions. When a <return> is found, it is added to the pandas dataset """ start = Option("-s", type=int, help="Backtrace starting from trace position", default=None) end = Option("-e", type=int, help="Stop backtrace at given position", default=None) depth = Option("-d", help="Build the call graph/backtrace for the " "last <depth> function calls", type=int, default=None) def __init__(self, **kwargs): super().__init__(**kwargs) self._backtrace_num = 0 self.backtrace_depth = self.config.depth """Stop parsing after the backtrace has length <depth>""" self.return_stack = deque() """Stack of return instructions found""" self.call_site_map = {} """Map call site addresses to vertices""" self.cgm = CallGraphManager() """Handle the structure of the call graph""" self.root = self.cgm.graph.add_vertex() """ Root is an empty node that is filled with the current function info as the trace is parsed backwards. """ def parse(self, start=None, end=None): # parse from the given start backwards logger.info("Scan trace %s", self.path) super().parse(self.config.start, self.config.end, 1) def do_scan(self, inst, entry): """Decide whether we should scan this instruction or not""" if entry.is_kernel(): return False return True def check_depth(self): """Check backtrace depth and decide whether to stop backtracing""" if self.backtrace_depth != None: if self.backtrace_depth <= self._backtrace_num: return True return False def add_call(self, target, time, pc): """ Register a call in the call graph when the call is not part of the backtrace. A new edge is added between the current "root" vertex and the call target vertex. If the target vertex does not exist it is created. """ logger.debug("[%d] Call to 0x%x", time, target) # do we have a node for this call target? if target in self.call_site_map: target_vertex = self.call_site_map[target] # do we have already an edge towards that vertex? for e in self.root.out_edges(): if e.target() == target_vertex: # just increment the call count call_edge = e break else: # create the edge towards the target call_edge = self.cgm.graph.add_edge(self.root, target_vertex) else: # found a new call target, so create a vertex for it target_vertex = self.cgm.graph.add_vertex() self.cgm.addr[target_vertex] = target self.call_site_map[target] = target_vertex call_edge = self.cgm.graph.add_edge(self.root, target_vertex) self.cgm.t_call[call_edge].append(time) def add_backtrace(self, target, time, pc): """ Register a call in the backtrace. Create a new "root" vertex and create an edge for the call between the new vertex and the current "root". If the current "root" vertex address exists in the graph reroute all edges from the current root to the duplicate vertex. This is required to handle recursion. """ logger.debug("[%d] Backtrace call to 0x%x", time, target) if target in self.call_site_map: # there is already a vertex for the current function # take all edges from the current "root" and make them # start at the existing vertex, the root is not changed because # it stays empty and can be reused target_vertex = self.call_site_map[target] for e in self.root.out_edges: new_e = self.cgm.graph.add_edge(target_vertex, e.target()) self.cgm.t_call[new_e] = self.cgm.t_call[e] self.cgm.backtrace[new_e] = self.cgm.backtrace[e] self.cgm.graph.remove_edge(e) else: self.cgm.addr[self.root] = target target_vertex = self.root self.root = self.cgm.graph.add_vertex() # connect the current root with the call target e = self.cgm.graph.add_edge(self.root, target_vertex) self.cgm.t_call[e].append(time) self.cgm.backtrace[e] = time self._backtrace_num += 1 def scan_cjalr(self, inst, entry, regs, last_regs, idx): # check that the call matches the last return instruction # that we have if not self.do_scan(inst, entry): return False call = inst.op1.value ret = inst.op0.value if len(self.return_stack) > 0: # if there was a return, that must match this call cjr_addr, cjr_cycles, is_cap = self.return_stack.pop() if not is_cap: logger.error("cjalr matches a non-capability return %s", inst) raise RuntimeError("cjalr matches a non-capability return") # check return value if ret.base + ret.offset != cjr_addr: logger.error( "cjalr specifies different return addr " "0x%x != 0x%x, inst %s", ret.base + ret.offset, cjr_addr, inst) raise RuntimeError("cjalr specifies different return addr") self.add_call(call.base + call.offset, entry.cycles, entry.pc) else: self.add_backtrace(call.base + call.offset, entry.cycles, entry.pc) return self.check_depth() def scan_cjr(self, inst, entry, regs, last_regs, idx): if not self.do_scan(inst, entry): return False ret_cap = inst.op0.value # append (addr, cycles, is_cap) self.return_stack.append( (ret_cap.base + ret_cap.offset, entry.cycles, True)) return self.check_depth()
class CallGraphDriver(TaskDriver): """ Run component for the call-graph and backtrace generator. The scan config element holds the interactive-mode arguments, other options are fixed at instantiation. """ description = "Generate call graph and stack traces from cvtrace files" trace = Argument(help="Path to cvtrace file") sym = Option(nargs="*", help="Binaries providing symbols", default=None) vmmap = Option("-m", help="Memory map file that specifies base addresses for " "the binaries in --sym", default=None) scan = NestedConfig(CallGraphTraceParser) backtrace = SubCommand(help="Plot the call graph") callgraph = SubCommand(CallGraphPlot, help="Show the backtrace") def __init__(self, **kwargs): super().__init__(**kwargs) self.parser = CallGraphTraceParser(trace_path=self.config.trace, config=self.config.scan) def update_config(self, config): super().update_config(config) self.parser.update_config(config.scan) def run(self): self.parser.parse() # get the parsed model cgm = self.parser.cgm # if we have symbols and a vmmap, add symbols to the call graph if self.config.sym and self.config.vmmap: add_symbols = CallGraphAddSymbols(cgm, self.config.sym, self.config.vmmap) cgm.bfs_transform(add_symbols) with suppress(AttributeError): if self.config.backtrace: self.call_graph_backtrace(cgm) with suppress(AttributeError): if self.config.callgraph: self.plot = CallGraphPlot(config=self.config.callgraph) self.plot.plot(cgm) def call_graph_backtrace(self, cgm): """ Dump the backtrace from the call graph parsed in the parser. """ has_backtrace_info = cgm.graph.new_edge_property("bool") map_property_values(cgm.backtrace, has_backtrace_info, lambda b: b != 0) cgm.graph.set_edge_filter(has_backtrace_info) bt = sorted(cgm.graph.edges(), key=lambda e: cgm.backtrace[e]) for e in bt: fn_time = cgm.backtrace[e] fn_addr = cgm.addr[e.source()] fn_name = cgm.name[e.source()] print("[%d] 0x%x %s" % (fn_time, fn_addr, fn_name)) cgm.graph.clear_filters()
class UserGraphPreprocessDriver(BaseToolTaskDriver): description = """ Prepare graph that represent a single user process trace captured with qtrace. This will perform the following: - Annotate graph with symbols - Find the last execve where we enter the process and find the stack pointer. - Annotate successors of the stack pointer (annotated_stack) - Annotate anything dereferenced in the stack map - Annotate vertices returned by malloc (annotated_malloc) - Annotate executable vertices (annotated_exec) - Annotate global pointers and pointers used to load from captable - Annotate pointers that are returned from syscalls - Annotate pointers that originated in the kernel and are loaded from memory - Mask out all vertices and calls that are created before the last call to execve() and have not been marked - Mask out NULL capabilities - Mask out kernel capabilities that have not been marked """ elfpath = Option( required=True, nargs="+", type=file_path_validator, default=[], help="Paths where to look for ELF files with symbols") vmmap = NestedConfig(VMMapFileParser) graph = Argument( type=file_path_validator, help="Path to the cheriplot graph.") outfile = Option( default=None, type=file_path_validator, help="Path to the output file") display_name = Option( default=None, help="New display-name for the graph") # available plots ptrsize_cdf = SubCommand(PtrSizeCdfDriver) def __init__(self, **kwargs): super().__init__(**kwargs) self.vmmap = VMMapFileParser(config=self.config.vmmap) """Memory map file parser""" self.vmmap.parse() self.symreader = SymReader(vmmap=self.vmmap, path=self.config.elfpath) """Symbol reader""" self.outfile = self.config.outfile or self.config.graph """Output file path, defaults to the current graph path""" self.pgm = ProvenanceGraphManager.load(self.config.graph) """Provenance graph manager""" def _get_stack_map(self): for vme in self.vmmap.get_model(): if vme.grows_down: return (vme.start, vme.end) return None def _get_visit_chain(self): vchain = ChainGraphVisit(self.pgm) vchain += ResolveSymbolsGraphVisit(self.pgm, self.symreader, None) vchain += FindLastExecve(self.pgm) # vchain += DetectStackCapability(self.pgm) stack_begin, stack_end = self._get_stack_map() vchain += DecorateStackCapabilities(self.pgm) vchain += DecorateStackAll(self.pgm, stack_begin, stack_end) vchain += DecorateMalloc(self.pgm) vchain += DecorateExecutable(self.pgm) vchain += DecorateGlobalPointers(self.pgm, self.symreader) vchain += DecorateCapRelocs(self.pgm, self.symreader) vchain += DecorateKernelCapabilities(self.pgm) vchain += DecorateAccessedInUserspace(self.pgm) vchain += FilterBeforeExecve(self.pgm) vchain += FilterNullVertices(self.pgm) vchain += FilterUnusedKernelVertices(self.pgm) return vchain def run(self): """ Run the tool filtering stages and save the output graph. """ vmmap = self.vmmap.get_model() # reset filters on the graph self.pgm.graph.clear_filters() # get the new filter chain visitor_chain = self._get_visit_chain() # apply all operations in order filtered_graph = visitor_chain(self.pgm.graph) # get the resulting vertex filter and apply it to the main graph vfilt, _ = filtered_graph.get_vertex_filter() self.pgm.graph.set_vertex_filter(vfilt) # if we have to change the display name, do it if self.config.display_name: self.pgm.graph.gp.name = self.config.display_name # write out the graph with ProgressTimer("Write output graph", logger): self.pgm.save(self.outfile) with suppress(AttributeError): if self.config.subcommand_class: # generate plot (would be nice to support more than 1 per run) plot = self.config.subcommand_class([self.pgm], self.vmmap, config=self.config) plot.run()