def analyzeCall(oid, features, address, block_address, insns, line): system_calls = api.get_field("map_calls", [oid], "system_calls") internal_calls = api.get_field("map_calls", [oid], "internal_functions") call = convert_rva_offset(oid, line["addr"]) previousCalls = [instr for instr in insns if instr["addr"] < line["addr"] and instr["mnem"] == "call"] lastCall = 0 if previousCalls: lastCall = max(previousCalls, key=lambda x: x["addr"])["addr"] variables = [ set( [ ( instr["s_ops"][0]["data"] if type(instr["s_ops"][0]["data"]) is not dict else instr["s_ops"][0]["data"]["base"] ) for instr in insns if "s_ops" in instr and instr["addr"] < line["addr"] and instr["addr"] > lastCall and instr["group"] == "load" ] ) ] if call in system_calls: features[block_address]["functions"].append({system_calls[call]: variables}) for callType, funcs in callTypes.iteritems(): if system_calls[call].lower() in funcs: features[address].update({callType: True}) elif call in internal_calls: features[block_address]["functions"].append({internal_calls[call]: variables}) if line["d_op"]["data"] == address: features[block_address].update({"recursion": True})
def graph_stats(args, opts): """ Plugin: Prints a set of statistics about the call and control flow graphs Syntax: graph_stats <oid_1> <oid_2> ... <oid_n> """ valid, invalid = api.valid_oids(args) if not valid: raise ShellSyntaxError("No valid oids found") args = api.expand_oids(valid) for o in args: cg = api.get_field("call_graph", o, "graph") if not cg: continue size = len(cg.nodes()) in_histo = build_freq_histo(cg.in_degree()) out_histo = build_freq_histo(cg.out_degree()) cfgs = api.retrieve("cfg", o) size_dict = {i: cfgs[i].size() for i in cfgs if cfgs[i]} size_histo = build_freq_histo(size_dict) bbs = 0 for s in size_histo: bbs += s * size_histo[s] name = api.get_field("file_meta", o, "names").pop() print "-----------------" print " Graph stats for ", name print print " Functions = ", size print " Basic Blocks = ", bbs print " Call graph in-degree: " pretty_print_dicts(in_histo) print " Call graph out-degree: " pretty_print_dicts(out_histo) print " CFG sizes: " pretty_print_dicts(size_histo) return []
def import_table(args, opts): """ Displays the import table Syntax: import_table <oid> """ args, invalid = api.valid_oids(args) args = api.expand_oids(args) if not args: if current_file: args = [current_file] else: raise ShellSyntaxError("Must provide an oid") for oid in args: print " - Import Table for %s %s " % (name(oid), oid) header = api.get_field("object_header", [oid], "header") if not header: print " --------------------------" print " <EMPTY HEADER>" print " --------------------------" continue src_type = api.get_field("src_type", oid, "type") if src_type == "PE": pe_import_table(header, opts) elif src_type == "ELF": elf_import_table(header, opts) elif src_type == "MACHO": macho_import_table(header, opts) else: print " - Source type %s is unsupported" % (src_type)
def process(oid, opts): logger.debug("process()") src_type = api.retrieve("src_type", oid) source = src_type["source"] header_oid = oid if source != "files": return False file_data = api.get_field(source, oid, "data") header = api.get_field("object_header", [header_oid], "header") # do not disassemble under certain conditions if not header: logging.info("Not processing oid %s: unrecognized file type", oid) return False else: logger.info ("calling (python-based) disassemble_first_run on %s", oid) entries = header.get_entries() if not entries: logger.info("No entry points found for %s", oid) return False entry_address = entries.pop() functions = api.retrieve("function_extract", oid) if functions and entry_address in functions: insns = functions[entry_address]["insns"] else: insns = disassemble_entry.disassemble_entry(file_data, header, entry_address, logger) data = {"type":src_type["type"], "insns":insns, "num_insns":len(insns), "insn_mode":header.insn_mode} api.store(name, oid, data, opts) return True
def elf_header(oid, opts): header = api.get_field("object_header", [oid], "header") src = api.source(oid) file_size = api.get_field("file_meta", oid, "size") names = api.get_names_from_oid(oid) print " ELF Header for %s %s" % (name(oid), oid) if not header: print " <EMPTY>" print " --------------------------" return addr_size = "32 bit" if header.is_64bit(): addr_size = "64 bit" entry_string = "" for e in header.get_entries(): entry_string += "%s (%s) " % (hex(e), (e)) print " - File Size: %s" % (file_size) print " - Addr Size: %s" % (addr_size) print " - Image Base: %s (%s)" % (hex(header.image_base), header.image_base) print " - Image Size: %s " % (header.image_size) print " - Code Size: %s " % (header.code_size) print " - Code Base: %s (%s)" % (hex(header.code_base), header.code_base) print " - Machine: %s" % (header.machine) print " - OS Version: %s" % (header.os_version) print " - Entry points: %s" % (entry_string) print_sections(header, opts) elf_import_table(header, opts)
def build_imports_string(self, oid): header = api.get_field("object_header", oid, "header") t = api.get_field("src_type", oid, "type") if not header: return ["N/A"] str_list = [] str_list.append("Import Address Table :" ) if not header.imports: str_list.append(" + No import table") else: entries = header.imports.keys() entries.sort() for entry in entries: if t == "MACHO": str_list.append(" - Lib: " + entry) names = header.imports[entry].keys() names.sort() for name in names: value = header.imports[entry][name]["n_value"] str_list.append(" - %s : %s (%s)" % ( name, hex(value), value)) elif t == "ELF": str_list.append(" - Lib: " + entry) names = header.imports[entry].keys() names.sort() for name in names: value = header.imports[entry][name]["value"] str_list.append(" - %s : %s (%s)" % ( name, hex(value), value)) elif t == "PE": str_list.append(" - DLL: " + entry) if header.imports[entry]["addresses"]: for imp in header.imports[entry]["addresses"]: str_list.append(" - %s : %s"%(header.imports[entry]["addresses"][imp], imp)) return str_list
def print_internal_functions(names): oid = current_file functions = api.retrieve("function_extract", oid) if functions: fbreaks = get_fbreaks(functions) name_to_offset = {} for offset in functions: name_to_offset[functions[offset]["name"]] = offset system_calls = api.get_field("map_calls", oid, "system_calls") internal_functions = api.get_field("map_calls", oid, "internal_functions") if not system_calls: print " No system calls found for %s %s" % (name(oid), oid) if not internal_functions: print " No internal functions found for %s %s" % (name(oid), oid) function_calls = dict( system_calls.items() + internal_functions.items()) for fn in names: if fn not in name_to_offset: print " %s not a function in %s %s" % (fn, name(oid), oid) continue print " Function %s for %s %s" % (fn, name(oid), oid) print " -------------------------------------" print_disassembly(oid, functions[name_to_offset[fn]]['insns'], function_calls, fbreaks, start=0, stop=0, height=32) print " -------------------------------------"
def name_filter(args, opts): """ Use without args to find files with that name, use with args to filter Syntax: name_filter %<oid> --name=<file_name> """ if not "name" in opts: raise ShellSyntaxError("name_filter requires a --name=<file_name> option") oids = [] valid, invalid = api.valid_oids(args) valid = api.expand_oids(valid) name = opts["name"] terms = name.split("*") if not args: if len(terms) == 1: return api.get_oids_with_name(opts["name"]).keys() else: valid = api.retrieve_all_keys("file_meta") if len(terms) == 1: for oid in valid: names = api.get_field("file_meta", oid, "names") if names and opts["name"] in names: oids.append(oid) else: for oid in valid: names = api.get_field("file_meta", oid, "names") if names: for name in names: if name.startswith(terms[0]) and name.endswith(terms[1]): oids.append(oid) return oids
def disassembly(args, opts): """ Displays the disassembly for a file Syntax: disassembly <oid> [--slice=<beg>:<end>] """ args, invalid = api.valid_oids(args) args = api.expand_oids(args) if not args: if current_file: args = [current_file] else: raise ShellSyntaxError("Must provide an oid") start = stop = 0 height = default_height if "slice" in opts: start, stop = get_slice(opts) if "height" in opts: try: width = int(opts["height"]) except ValueError: raise ShellSyntaxError("Invalid height") mod_opts = {} if "module" in opts: mod_opts["module"] = opts["module"] for oid in args: disasm = api.get_field("disassembly", [oid], "insns", mod_opts) #comments = api.get_field("disassembly", [oid], "comments", mod_opts) functions = api.retrieve("function_extract", oid) if not functions: print " No functions found for %s %s" % (name(oid), oid) continue fbreaks = get_fbreaks(functions) system_calls = api.get_field("map_calls", oid, "system_calls") internal_functions = api.get_field("map_calls", oid, "internal_functions") function_calls = dict() if system_calls is None: print " System calls could not be determined for %s %s" % (name(oid), oid) else: function_calls.update(system_calls.items()) if internal_functions is None: print " Internal functions could not be determined for %s %s" % (name(oid), oid) else: function_calls.update(internal_functions.items()) if disasm: print " Disassembly for %s %s" % (name(oid), oid) print " -------------------------------------" print_disassembly(oid, disasm, function_calls, fbreaks, start, stop, height) print " -------------------------------------" else: print " %s could not be disassembled." % name(oid)
def export_files(oids, opts): base_name = "export" if "name" in opts and opts["name"]: base_name = opts["name"] for oid in oids: data = api.get_field("files", oid, "data") if not data: print "Not able to process %s" % oid continue name = api.get_field("file_meta", oid, "names").pop() name = base_name + "_" + name write_file(name, data)
def print_relationships(): no_relationships = True for e in family: if not family[e]: continue no_relationships = False name = api.get_field("file_meta", e, "names").pop() kid_names = [] for kid in family[e]: kid_names.append(api.get_field("file_meta", kid, "names").pop()) print " -", name, "->", ", ".join(kid_names) if no_relationships: print " < NONE >" print
def process(oid, opts): logger.debug("process()") src_type = api.get_field("src_type", oid, "type") if src_type != "DEX": return False src = api.source(oid) data = api.get_field(src, oid, "data") if not data: logger.debug("Not able to process %s",oid) return False result = parse_dex(data, oid) if result: api.store(name, oid, result, opts) return True return False
def export_tar_zip(oids, opts, type): name = "export" if "name" in opts: name = opts["name"] and opts["name"] if type == "tar" and not name.endswith(".tar"): name += ".tar" if type == "zip" and not name.endswith(".zip"): name += ".zip" mode = "w" if "bz2" in opts: mode += ":bz2" name += ".bz2" elif "gz" in opts: mode += ":gz" name += ".gz" fname = unique_scratch_file(name) xo = None if type == "tar": xo = tarfile.open(fname, mode=mode) if type == "zip": xo = zipfile.ZipFile(fname, mode=mode) tmp_files = [] names = [] for oid in oids: data = api.get_field("files", oid, "data") if not data: print "Not able to process %s" % oid continue name = api.get_field("file_meta", oid, "names").pop() names.append(name) t = tmp_file(name, data) tmp_files.append(t) if type == "tar": xo.add(t) if type == "zip": xo.write(t) xo.close() print " - File(s) %s exported to %s" % (", ".join(names), fname) for f in tmp_files: os.remove(f)
def display_file_data(self, oid, fnames): meta = api.retrieve('file_meta', oid) size = meta['size'] if size > 1000000000: formatted_size = "(%dG)" % (size / 1000000000) elif size > 1000000: formatted_size = "(%dM)" % (size / 1000000) elif size > 1000: formatted_size = "(%dK)" % (size / 1000) else: formatted_size = '' file_type = api.get_field("src_type", oid, 'type') file_description = "File Names: " + fnames + '\n' file_description += "Size: %7d %s\n" % (size, formatted_size) file_description += "File Type: " + file_type + '\n' file_description += "Oxide ID: " + oid + '\n' self.metadata_display.delete(index1='1.0', index2=tk.END) self.metadata_display.insert(index='1.0', chars=file_description) # calculate and display the histogram display_option = self.file_display_type_str.get() if display_option == ' ': self.clear() return elif display_option == 'byte histogram': hist = self.get_byte_hist(oid) elif display_option == 'opcode histogram': hist = self.get_opcode_hist(oid, cutoff=20) self.display_hist(hist)
def display_file_data(self, oid, fnames): data_list = [] self.file_display.delete(index1='1.0', index2=tk.END) meta = api.retrieve('file_meta', oid) size = meta['size'] if size > 1000000000: formatted_size = "(%dG)" % (size / 1000000000) elif size > 1000000: formatted_size = "(%dM)" % (size / 1000000) elif size > 1000: formatted_size = "(%dK)" % (size / 1000) else: formatted_size = '' file_type = api.get_field("src_type", oid, 'type') file_description = "File Names: " + fnames + '\n' file_description += "Size: %7d %s\n" % (size, formatted_size) file_description += "File Type: " + file_type + '\n' file_description += "Oxide ID: " + oid + '\n' self.metadata_display.delete(index1='1.0', index2=tk.END) self.metadata_display.insert(index='1.0', chars=file_description) display_option = self.file_display_type_str.get() if display_option == 'hex ': data_list = self.build_hex_str(oid) elif display_option == 'header ': data_list = self.build_header_string(oid) elif display_option == 'imports ': data_list = self.build_imports_string(oid) elif display_option == 'byte histogram': data_list = self.build_hist_string('byte_histogram', oid) elif display_option == 'opcode histogram': data_list = self.build_hist_string('opcode_histogram', oid) for line in data_list: self.file_display.insert(index=tk.END, chars=line+ "\n")
def convert_rva_offset(oid, rva): header = api.get_field("object_header", oid, "header") try: rva = int(rva) except: raise ShellSyntaxError("Unrecognized address %s" % rva) return header.get_offset(rva)
def get_val(args, opts): """ Get the value of bytes at offset, interpreted as little-endian Requires a default file to have been set. Format values are: c = 1 byte char b = 1 byte signed B = 1 byte unsigned h = 2 bytes signed H = 2 bytes unsigned i = 4 bytes signed I = 4 bytes unsigned q = 8 bytes signed Q = 8 bytes unsigned Each character may be preceeded by an number of occurances. For example 4h2b is equivalent to hhhhbb Syntax: get_val <offset> <format_string> """ if len(args) != 2: raise ShellSyntaxError("offset and count required") offset = int(args[0]) format = args[1] if not current_file: raise ShellSyntaxError("Must set default file using re_init") data = api.get_field("files", current_file, "data") if offset > len(data): raise ShellSyntaxError("Offset is beyond the end of the file") format = "=" + format size = struct.calcsize(format) output = struct.unpack(format, data[offset:offset+size]) for n in output: print n
def header(args, opts): """ Displays header info Syntax: header <oid> ... [--verbose] """ args, invalid = api.valid_oids(args) args = api.expand_oids(args) if not args: if current_file: args = [current_file] else: raise ShellSyntaxError("Must provide an oid") headers = [] for oid in args: src_type = api.get_field("src_type", oid, "type") if src_type == "PE": pe_header(oid, opts) elif src_type == "ELF": elf_header(oid, opts) elif src_type == "MACHO": macho_header(oid, opts) elif src_type == "OSX Universal Binary": osx_header(oid, opts) else: print " - Source type %s is unsupported" % (src_type) return []
def print_macho_header(header, oid, opts, embedded=False): src = api.source(oid) file_size = api.get_field("file_meta", oid, "size") names = api.get_names_from_oid(oid) indent = False tab = "" if embedded: indent = True tab = "\t" if embedded: print "%s Embedded Macho-O Header for %s %s" % (tab, name(oid), oid) else: print " Macho-O Header for %s %s" % (name(oid), oid) if not header: print " <EMPTY>" print " --------------------------" return entry_string = "" for e in header.get_entries(): entry_string += "%s (%s) " % (hex(e), (e)) print "%s - File Size: %s" % (tab, file_size) # FIXME get embedded file size print "%s - Addr Size: %s" % (tab, header.insn_mode) print "%s - Magic: %s" % (tab, header.magic) print "%s - Big Endian: %s" % (tab, header.big_endian) print "%s - Machine: %s" % (tab, header.machine) print "%s - UUID: %s" % (tab, header.uuid) print "%s - Entry points: %s" % (tab, entry_string) print_sections(header, opts, indent) macho_import_table(header, opts)
def function_view(args, opts): """ Display the named function Syntax: function [<function_name>] Note: re_init must be set """ if not current_file: raise ShellSyntaxError("Must set a file. Run re_init <oid>") oid = current_file function_names = args if not function_names: internal_functions = api.get_field("map_calls", oid, "internal_functions") if internal_functions is None: print " No internal functions found for %s %s" % (name(oid), oid) return else: names = list(set(internal_functions.values())) names.sort() print " Internal functions calls for %s %s" % (name(oid), oid) print " -------------------------------------" for n, f in enumerate(names): print " %s. %s" % (n, f) print " -------------------------------------" res = raw_input(" Select the number of the function to view: ") if not res.isdigit() or int(res) > len(names): print " - %s is not a valid selection" return function_names = [names[int(res)]]
def add_translations(f, callback): join_words = lambda cs: (words() ^ 'n' | where(tag_is_not('ignored')) | join(cs, f_note_pk, f_note_fk)) checked_cards = lambda: cards() ^ 'c' | where(is_checked( ), deck_is('unsorted'), is_not_suspended()) new_cards = lambda: checked_cards() | where(is_recent()) data = execute(db(), join_words(new_cards()) | select('n.id', '@flds')) (nids, fss) = zip(*data) or ([], []) ws = map(lambda flds: splitFields(flds)[api.get_field('word', 'Words')], fss) map( lambda (nid, tr): api.upd_note(nid, {'Translation': tr}, [] if len(tr) > 0 else ['ignored']), zip(nids, dmap(f, ws, callback, conf.feedback_time))) execute( db(), cards() ^ 'c2' | where(tmpl_is('word', 'filtered')) | join(join_words(new_cards()), '@nid', 'n.id') | update(set_deck('filtered'), set_recent(), set_learning()) | with_pk('@id')) execute(db(), checked_cards() | update(set_suspended()))
def process(oid, opts): logger.debug("process()") insns = api.get_field("disassembly", oid, "insns") if insns == None: return False offsets = insns.keys() offsets.sort() count = 0 start_offset = 0 nop_run = {} for offset in offsets: if insns[offset]["mnem"] == "nop": if count == 0: start_offset = offset count += 1 else: if count > 0: nop_run[start_offset] = count count = 0 start_offset = 0 if count > 0: nop_run[start_offset] = count api.store(name, oid, nop_run, opts) return True
def process(oid, opts): logger.debug("process()") disasm = api.get_field("disassembly", [oid], "insns") if disasm == None: return False opcodes = get_opcodes(disasm) api.store(name, oid, {"opcodes":opcodes}, opts) return True
def process(oid, opts): logger.debug("process()") src_type = api.get_field("src_type", oid, "type") if src_type != "PE": return False file_meta = api.retrieve('pe_parse', oid) if not file_meta: logger.debug("Not able to process %s",oid) return False data = api.get_field("files", oid, "data") if not data: return False result = detect_packer(file_meta, data) if result: api.store(name, oid, result, opts) return True return False
def summarize(args, opts): """ Gives a summary of a set of files, including types, extensions, etc. If no argument is passed, gives a summary for the entire datastore (may be very slow). Syntax: summarize %<oid> """ valid, invalid = api.valid_oids(args) valid = set(api.expand_oids(valid)) types = defaultdict(int) extensions = defaultdict(int) sizes = [0,0,0,0,0,0] if not args: valid = set(api.retrieve_all_keys("file_meta")) for oid in valid: meta = api.retrieve("file_meta", oid) names = meta["names"] if names: for name in names: parts = name.split(".") if len(parts) > 1: extensions[parts[-1]] += 1 else: extensions["None"] += 1 t = api.get_field("src_type", oid, "type") if t: types[t] += 1 size = meta["size"] if size < 1024: sizes[0] += 1 elif size < 10*1024: sizes[1] += 1 elif size < 100*1024: sizes[2] += 1 elif size < 1024*1024: sizes[3] += 1 elif size < 10*1024*1024: sizes[4] += 1 else: sizes[5] += 1 print "\nTotal files in set: ", len(valid) print "\nExtensions (files with multiple names counted more than once):" exts = extensions.keys() exts = sorted(exts, key=lambda val: extensions[val], reverse=True) for e in exts: print " ", e, " \t\t :\t\t ", extensions[e] print "\nTypes:" ts = types.keys() ts = sorted(ts, key=lambda val: types[val], reverse=True) for t in ts: print " ", t, " \t\t :\t\t ", types[t] print "\nSizes: " print " Under 1k :", sizes[0] print " 1k - 10k :", sizes[1] print " 10k - 100k :", sizes[2] print " 100k - 1MB :", sizes[3] print " 1MB - 10MB :", sizes[4] print " over 10 MB :", sizes[5] return None
def process(oid, opts): logger.debug("process()") functions = api.retrieve("function_extract", oid) if not functions: return False header = api.get_field("object_header", oid, "header") if not header: return False data = api.get_field(api.source(oid), oid, "data") if not data: return False g = build_basic_blocks(functions, header, data) api.store(name, oid, g, opts) return True
def process(oid, opts): logger.debug("Processing oid %s", oid) src_type = api.get_field("src_type", oid, "type") if src_type != "ELF": return False header = api.retrieve("elf_parse", oid) if not header: return False api.store(name, oid, {"header":elf_repr(header)}, opts) return True
def osx_header(oid, opts): header = api.get_field("object_header", [oid], "header") src = api.source(oid) file_size = api.get_field("file_meta", oid, "size") names = api.get_names_from_oid(oid) print " OSX Universal Header for %s %s" % (name(oid), oid) if not header: print " <EMPTY>" print " --------------------------" return print " - File Size: %s" % (file_size) print " - Magic: %s" % (header.magic) print " - Big Endian: %s" % (header.big_endian) print " - Embedded Files: %s" % (header.num_embedded) for header in header.embedded: print " -------------------------------------" print_macho_header(header, oid, opts, embedded=True) print " -------------------------------------" print
def process(oid, opts): logger.debug("process()") src_type = api.get_field("src_type", oid, "type") if src_type != "PE": return False header = api.retrieve("pe_parse", oid) if not header: return False api.store(name, oid, {"header":pe_repr(header)}, opts) return True
def mapper(oid, opts, jobid=False): logger.debug("mapper()") src = api.source(oid) if api.exists(name, oid, opts): return oid data = api.get_field(src, oid, "data") if not data: return None out_histo = build_histo(data) api.store(name, oid, out_histo, opts) return oid
def mapper(oid, opts, jobid=False): logger.debug("mapper()") src = api.source(oid) if api.documentation(src)["set"]: return None if api.exists(name, oid, opts): return oid opcodes = api.get_field("opcodes", oid, "opcodes") if not opcodes: return None out_histo = build_histo(opcodes) api.store(name, oid, out_histo, opts) return oid