def proc_write_capture_on_sys_write_enter(cpu, pc, fd, buf, cnt): curr_proc = panda.plugins['osi'].get_current_process(cpu) curr_proc_name = ffi.string(curr_proc.name).decode() if self._proc_name == curr_proc_name: try: data = panda.virtual_memory_read(cpu, buf, cnt) except ValueError: raise RuntimeError( "Failed to read buffer: proc \'{}\', addr 0x{:016x}". format(curr_proc_name, buf)) file_name_ptr = panda.plugins[ 'osi_linux'].osi_linux_fd_to_filename(cpu, curr_proc, fd) file_path = ffi.string(file_name_ptr).decode() # For informational purposes only, collection not reliant on this exact mapping if fd == 1: # POSIX stdout file_path += ".stdout" elif fd == 2: # POSIX stderr file_path += ".stderr" log_file = self._log_dir.joinpath( file_path.replace("//", "_").replace("/", "_")) with open(log_file, "ab") as f: f.write(data) self._files_written.add(str(log_file))
def syscall_enter(cpu, pc, call, ctx): for arg_idx in range(call.nargs): # Debug prints #type_str = ffi.string(ffi.cast("syscall_argtype_t", call.argt[arg_idx])) #print(f"\tArg{arg_idx}: size {call.argsz[arg_idx]}, type {type_str}") # Log all pointers passed to syscalls - strings or poitners to buffers if call.argt[arg_idx] in [ argtypes['SYSCALL_ARG_PTR'], argtypes['SYSCALL_ARG_STR'] ]: arg_ptr = int( ffi.cast('uint64_t*', ctx.args)[arg_idx] ) # Cast to uint64_t's _BEFORE_ we access (weird) TODO asid = panda.current_asid(cpu) proc = panda.plugins['osi'].get_current_process(cpu) syscall_name = ffi.string(call.name).decode( 'utf8') if call.name != ffi.NULL else "unknown" if asid not in asid_to_procname: proc_name = ffi.string(proc.name).decode('utf8') if ( proc.name != ffi.NULL) else "unknown" asid_to_procname[asid] = proc_name proc_name = asid_to_procname[asid] if proc_name in procnames_of_interest: print( f"Process: {proc_name} ({ctx.asid}) syscall {syscall_name} with buffer at 0x{arg_ptr:x}" ) if arg_ptr not in identified_buffers.keys(): identified_buffers[arg_ptr] = [] identified_buffers[arg_ptr].append( (asid, proc_name, panda.rr_get_guest_instr_count(), syscall_name))
def on_sys_read_return(cpu, pc, fd, buf, count): proc = panda.plugins['osi'].get_current_process(cpu) procname = ffi.string(proc.name) if proc != ffi.NULL else "error" fname_ptr = panda.plugins['osi_linux'].osi_linux_fd_to_filename(cpu, proc, fd) fname = ffi.string(fname_ptr) if fname_ptr != ffi.NULL else "error" rc = panda.plugins['syscalls2'].get_syscall_retval(cpu) print(f"[PANDA] {procname} read {rc} bytes from {fname}")
def __init__(self, panda, cpu, fd, cmd, guest_ptr, use_osi_linux=False): do_ioctl_init(panda.arch) self.cmd = ffi.new("union IoctlCmdUnion*") self.cmd.asUnsigned32 = cmd self.original_ret_code = None self.osi = use_osi_linux # Optional syscall argument: pointer to buffer if (self.cmd.bits.arg_size > 0): try: self.has_buf = True self.guest_ptr = guest_ptr self.guest_buf = panda.virtual_memory_read( cpu, self.guest_ptr, self.cmd.bits.arg_size) except ValueError: raise RuntimeError( "Failed to read guest buffer: ioctl({})".format( str(self.cmd))) else: self.has_buf = False self.guest_ptr = None self.guest_buf = None # Optional OSI usage: process and file name if self.osi: proc = panda.plugins['osi'].get_current_process(cpu) proc_name_ptr = proc.name file_name_ptr = panda.plugins[ 'osi_linux'].osi_linux_fd_to_filename(cpu, proc, fd) self.proc_name = ffi.string(proc_name_ptr).decode() self.file_name = ffi.string(file_name_ptr).decode() else: self.proc_name = None self.file_name = None
def on_sys_read_return(cpu, pc, fd, buf, count): proc = panda.plugins['osi'].get_current_process(cpu) procname = ffi.string(proc.name) if proc != ffi.NULL else "error" fname_ptr = panda.plugins['osi_linux'].osi_linux_fd_to_filename( cpu, proc, fd) fname = ffi.string(fname_ptr) if fname_ptr != ffi.NULL else "error" print(f"[PANDA] {procname} read from {fname}") if b"cat" in procname: populate_ghidra(cpu, pc) import ipdb ipdb.set_trace()
def virt_mem_after_read(cpustate, pc, addr, size, buf): curbuf = ffi.cast("char*", buf) current = panda.get_current_process(cpustate) if current != ffi.NULL: if size >= 5: buf_addr = hex(int(ffi.cast("uint64_t", buf))) buf_str = ffi.string(ffi.cast("char*",buf)).decode(errors='ignore') print("Read buf: %s, size: %x, at pc: %x %s" %(buf_addr[2:], size, addr, buf_str))
def tainted_branch(addr, size): cpu = panda.get_cpu() pc = panda.current_pc(cpu) proc = panda.plugins['osi'].get_current_process(cpu) name = ffi.string(proc.name) # if name == b'querystr.cgi': print(f'BRANCH at addr {addr} was tainted in proc {name}')
def new_asid(cpu, oldasid, newasid): global reverted print("ASID", reverted, panda.arch) if reverted and panda.arch in osi_supported: # If osi unsupported, bail proc = panda.plugins['osi'].get_current_process(cpu) name = ffi.string(proc.name) if name not in seen: seen.add(name) return 0
def __init__(self, proc_object): self.pid = proc_object.pid self.ppid = proc_object.ppid self.start_time = proc_object.create_time try: self.name = ffi.string(proc_object.name).decode() except: self.name = "?" self.children = set() self.parent = None
def get_calltree(cpu): # Print the calltree to the current process proc = panda.plugins['osi'].get_current_process(cpu) if proc == ffi.NULL: print("Error determining current process") return procs = panda.get_processes_dict(cpu) chain = [{'name': ffi.string(proc.name).decode('utf8', 'ignore'), 'pid': proc.pid, 'parent_pid': proc.ppid}] while chain[-1]['pid'] > 1 and chain[-1]['parent_pid'] in procs.keys(): chain.append(procs[chain[-1]['parent_pid']]) return " -> ".join(f"{item['name']} ({item['pid']})" for item in chain[::-1])
def __str__(self): if self.osi: self_str = "\'{}\' using \'{}\' - ".format(self.proc_name, self.file_name) else: self_str = "" bits = self.cmd.bits direction = ffi.string(ffi.cast("enum ioctl_direction", bits.direction)) ioctl_desc = f"dir={direction},arg_size={bits.arg_size:x},cmd={bits.cmd_num:x},type={bits.type_num:x}" if (self.guest_ptr == None): self_str += f"ioctl({ioctl_desc}) -> {self.original_ret_code}" else: self_str += f"ioctl({ioctl_desc},ptr={self.guest_ptr:08x},buf={self.guest_buf}) -> {self.original_ret_code}" return self_str
def populate_ghidra(cpu, pc): tid = currentProgram.startTransaction("BRIDGE: Change Memory Sections") memory = currentProgram.getMemory() delete_all_memory_segments(memory, monitor) names = set() for mapping in panda.get_mappings(cpu): if mapping.file != ffi.NULL: name = ffi.string(mapping.file).decode() else: name = "[unknown]" while name in names: from random import randint name += ":" + hex(randint(0, 100000000)) names.add(name) memory.createInitializedBlock(name, toAddr(mapping.base), mapping.size, 0, monitor, False) memory_read = read_memory(cpu, mapping.base, mapping.size) if memory_read: memory.setBytes(toAddr(mapping.base), read_memory(cpu, mapping.base, mapping.size)) analyzeAll(currentProgram) #import ghidra.app.decompiler as decomp decomp = b.remote_import("ghidra.app.decompiler") # ## get the decompiler interface iface = decomp.DecompInterface() # ## decompile the function iface.openProgram(currentProgram) fn = getFunctionContaining(toAddr(pc)) d = iface.decompileFunction(fn, 5, monitor) ## get the C code as string if not d.decompileCompleted(): print(d.getErrorMessage()) else: code = d.getDecompiledFunction() ccode = code.getC() print(ccode) setCurrentLocation(toAddr(pc)) currentProgram.endTransaction(tid, True)
def on_sys_read_return(cpu, pc, fd, buf, count): # XXX: taint labels are applied in main_loop_wait so this might be completley # broken depending on when that runs (hopefully at the return?) # This needs testing. See taint_mixins.py:37 taint_idx = 0 proc = panda.plugins['osi'].get_current_process(cpu) proc_name = ffi.string(proc.name) if (proc_name, fd) in net_fds: bytes_written = cpu.env_ptr.regs[R_EAX] data = panda.virtual_memory_read(cpu, buf, bytes_written) if not b'HTTP/' in data in data: print(f"Not tainting buffer: {repr(data)}") return # Don't taint non HTTP. Issues if requests get buffered TODO # Label tainted (physical) addresses taint_groups = taint_selection.split(',') # What if just 1 byte/group? for group in taint_groups: # While we are parsing the taint string if ':' not in group: # One byte taint_offset = int(group) taint_paddr = panda.virt_to_phys( cpu, buf + taint_offset) # Physical address panda.taint_label_ram(taint_paddr, taint_idx) print( f"tainted byte {data[taint_offset]} with index {taint_idx}" ) taint_idx += 1 else: # Range of bytes (i.e. 0:5) assert type(group) == str sep_idx = group.find(':') for taint_offset in range(int(group[:sep_idx]), int(group[sep_idx + 1:]) + 1): taint_paddr = panda.virt_to_phys( cpu, buf + taint_offset) # Physical address panda.taint_label_ram(taint_paddr, taint_idx) print( f"tainted byte {data[taint_offset]} with index {taint_idx}" ) taint_idx += 1
def bbe(cpu, tb): proc = panda.plugins['osi'].get_current_process(cpu) name = ffi.string(proc.name) if name not in printed: printed.add(name) print(name)
def on_sys_read_return(cpu, pc, fd, buf, count): proc = panda.get_current_process(cpu) fname_ptr = panda.plugins['osi_linux'].osi_linux_fd_to_filename(cpu, proc, fd) fname = ffi.string(fname_ptr) print("Reading from", fname)
def before_write(cpu, pc, start_addr, size, buf): for addr in range(start_addr, start_addr + size): if addr not in identified_buffers: continue buf_base = buf + (addr - start_addr) data = ffi.string(ffi.cast('char*', buf_base)) # Find the last instruction (highest icount) that wrote to the buffer, # but before the syscall's icount write_icount = panda.rr_get_guest_instr_count() asid = panda.current_asid(cpu) for (old_asid, proc_name, icount_use, _) in identified_buffers[addr]: if old_asid != asid: continue if icount_use < write_icount: continue in_kernel = panda.in_kernel(cpu) # Identify what module we're currently in so we can get a relative offset for module in panda.get_mappings(cpu): mod_base = None mod_name = ffi.string(module.name).decode( "utf8") if module.name != ffi.NULL else '(null)' if mod_name in procnames_of_interest: if mod_name not in base_addresses or module.base < base_addresses[ mod_name]: base_addresses[mod_name] = module.base # Debug: print memory map at each write we care about #print(f"0x{module.base:012x} - 0x{module.base+module.size:012x}: {mod_name}") if addr >= module.base and addr < module.base + module.size: # Then it's in this module mod_name = ffi.string(module.name).decode( "utf8") if module.name != ffi.NULL else '(null)' mod_base = module.base break else: print( f"Warning: No loaded module owns address 0x{addr:x}. Skipping" ) continue # Identify where PC is at time of write ''' for module in panda.get_mappings(cpu): if pc >= module.base and pc < module.base+module.size: # Then it's in this module name = ffi.string(module.name).decode("utf8") if module.name != ffi.NULL else '(null)' print(f"PC 0x{pc:x} is in {name} offset: 0x{pc-module.base:x}") ''' if (asid, addr, icount_use) not in last_write_before.keys(): last_write_before[(asid, addr, icount_use)] = (write_icount, pc, mod_name, mod_base, in_kernel) else: last_write_icount = last_write_before[(asid, addr, icount_use)][0] if write_icount > last_write_icount: # Replace with new write last_write_before[(asid, addr, icount_use)] = (write_icount, pc, mod_name, mod_base, in_kernel)
def on_sys_accept4_return(cpu, pc, sockfd, addr, addr_len, flags): newfd = cpu.env_ptr.regs[R_EAX] proc = panda.plugins['osi'].get_current_process(cpu) proc_name = ffi.string(proc.name) net_fds.add((proc_name, newfd)) # Each process has its own fd space.
def on_sys_close_enter(cpu, pc, fd): proc = panda.plugins['osi'].get_current_process(cpu) proc_name = ffi.string(proc.name) if (proc_name, fd) in net_fds: net_fds.remove((proc_name, fd))