def process(oid, opts): logger.debug("process()") src_type = api.retrieve("src_type", oid) source = src_type["source"] header_oid = oid if source != "files": return False file_data = api.get_field(source, oid, "data") header = api.get_field("object_header", [header_oid], "header") # do not disassemble under certain conditions if not header: logging.info("Not processing oid %s: unrecognized file type", oid) return False else: logger.info ("calling (python-based) disassemble_first_run on %s", oid) entries = header.get_entries() if not entries: logger.info("No entry points found for %s", oid) return False entry_address = entries.pop() functions = api.retrieve("function_extract", oid) if functions and entry_address in functions: insns = functions[entry_address]["insns"] else: insns = disassemble_entry.disassemble_entry(file_data, header, entry_address, logger) data = {"type":src_type["type"], "insns":insns, "num_insns":len(insns), "insn_mode":header.insn_mode} api.store(name, oid, data, opts) return True
def process(oid, opts): logger.debug("process()") ifs = 0 loops = 0 conditionals = [] insns = [] features = {} variables = [] asm = api.retrieve("function_extract", [oid]) if not asm: return False basic_blocks = api.retrieve("basic_blocks", [oid]) for func_addresses, blocks in sorted(basic_blocks.iteritems()): features[func_addresses] = {} for block in blocks: features[func_addresses].update({block["first_insn"]: {"functions": [], "conditional_structure": []}}) for address, dicts in sorted(asm.iteritems()): previousAddr = 0 nesting = [] insns = dicts["insns"] for line in insns: block_address = max([block_index for block_index in features[address] if block_index <= line["addr"]]) if line["mnem"] == "call": analyzeCall(oid, features[address], address, block_address, insns, line) # if line['group'] == 'cond': # analyzeConditionals(oid, features[address], block_address, insns, line, nesting) conditionals = [instr for instr in insns if instr["group"] == "cond" and correctCMP(instr)] loops = len([instr for instr in conditionals if not forwardJump(nextJump(insns, instr["addr"]), instr["addr"])]) ifs = len([instr for instr in conditionals if forwardJump(nextJump(insns, instr["addr"]), instr["addr"])]) maths = len([instr for instr in insns if instr["group"] == "arith"]) features[address].update({"conditionals": len(conditionals), "loops": loops, "ifs": ifs, "math": maths}) checkDynamicCalls(oid, features) api.store(name, oid, features, opts) return True
def caseRetrieve(): questions = [{ 'type': 'input', 'name': 'hash', 'message': 'Please input the transaction hash', }] answer = prompt(questions) api.retrieve(answer['hash'])
def extract_osx(args, opts): """ Imports objects from an OSX Universal Binary Syntax: """ valid, invalid = api.valid_oids(args) if not valid: raise ShellSyntaxError("No valid oids found") args = api.expand_oids(valid) for oid in args: meta = api.retrieve("file_meta", oid) name = meta["names"].pop() src_type = api.retrieve("src_type", oid) if src_type["type"] != "OSX Universal Binary": print " - %s (%s) is not an OSX Universal binary file, skipping" % (name, oid) continue data = api.retrieve("files", oid)["data"] if not data: print " - No data for this file %s (%s) " % (name, oid) continue oh = api.retrieve("object_header", oid) num = oh["header"].num_embedded print " - Found %s files embedded in file %s (%s)" % (num, name, oid) oids = [] newfiles = 0 for f in oh["header"].embedded: beg = f.header_offset end = f.file_end print " + Extracting bytes %s:%s of file type %s" % (beg, end, f.machine) fname = name + "_" + f.machine fpath = os.path.join(api.scratch_dir, fname) print " + Writing temp file to %s" % (fpath) fd = file(fpath, 'wb') fd.write(data[beg:end]) fd.close() print " + Importing file %s" % (fpath) oid, newfile = api.import_file(fpath) oids.append(oid) if newfile: newfiles += 1 print " + Removing temp file from the scratch directory" os.remove(fpath) print print " - Extracted and imported %s files, %s were new" % (len(oids), newfiles) # Return a list of the oids corresponding to the files extracted return oids
def process(oid, opts): logger.debug("process()") opcode_histogram = api.retrieve("opcode_histogram", oid, {}) opcode_ngrams = api.retrieve("opcode_ngrams", oid, {"n":2}) if not opcode_histogram or not opcode_ngrams: return False colls = collocations(opcode_histogram, opcode_ngrams) api.store(name, oid, colls, opts) return True
def process(oid, opts): logger.debug("process()") basic_blocks = api.retrieve("basic_blocks", oid) functions = api.retrieve("function_extract", oid) if not basic_blocks or not functions: return False hashes = set() for f in functions: for b in basic_blocks[f]: mnems = "".join( [i["mnem"] for i in functions[f]["insns"] if i["addr"] >= b["first_insn"] and i["addr"] <= b["last_insn"]] ) mnem_hash = hashlib.sha1(mnems).hexdigest() hashes.add(mnem_hash) api.store(name, oid, {"hashes":hashes}, opts) return True
def size_filter(args, opts): """ Filter files by size in bytes Syntax: size_filter %<oid> --min=<size> --max=<size> """ if not args: raise ShellSyntaxError("File name not specified") min_size = 0 max_size = None if "min" in opts: min_size = int(opts["min"]) if "max" in opts: max_size = int(opts["max"]) valid, invalid = api.valid_oids(args) oids = api.expand_oids(valid) filtered_oids = [] for oid in oids: meta = api.retrieve("file_meta", oid) size = meta["size"] if size > min_size and ((not max_size) or size < max_size): filtered_oids.append(oid) return filtered_oids
def graph_stats(args, opts): """ Plugin: Prints a set of statistics about the call and control flow graphs Syntax: graph_stats <oid_1> <oid_2> ... <oid_n> """ valid, invalid = api.valid_oids(args) if not valid: raise ShellSyntaxError("No valid oids found") args = api.expand_oids(valid) for o in args: cg = api.get_field("call_graph", o, "graph") if not cg: continue size = len(cg.nodes()) in_histo = build_freq_histo(cg.in_degree()) out_histo = build_freq_histo(cg.out_degree()) cfgs = api.retrieve("cfg", o) size_dict = {i: cfgs[i].size() for i in cfgs if cfgs[i]} size_histo = build_freq_histo(size_dict) bbs = 0 for s in size_histo: bbs += s * size_histo[s] name = api.get_field("file_meta", o, "names").pop() print "-----------------" print " Graph stats for ", name print print " Functions = ", size print " Basic Blocks = ", bbs print " Call graph in-degree: " pretty_print_dicts(in_histo) print " Call graph out-degree: " pretty_print_dicts(out_histo) print " CFG sizes: " pretty_print_dicts(size_histo) return []
def handle(self): """handle the retrieving of a already created pastebin from a existant connexion, then close the connexion""" #handle the get request uid = list() while 1: buf = self.request.recv(GeneralHandler.BUF_SIZE).decode("UTF-8") logging.debug("Uid buffer is :|%s|" % buf) #if buf == b'\xff\xec': #telnet support # break if '\x0a' in buf: #netcat support uid += buf break uid += buf decoded_uid = "".join(uid).rstrip() logging.debug("Uid decoded is |%s|" % decoded_uid) try: data = api.retrieve(decoded_uid) except db.NonExistentUID: data = "Uid %s not found" % decoded_uid state = self.request.sendall(data.encode("UTF-8")) if state: logging.debug('Data not fully transmitted') logging.warning('The data sent have not been transmitted properly') logging.debug('Data retrieved')
def display_file_data(self, oid, fnames): meta = api.retrieve('file_meta', oid) size = meta['size'] if size > 1000000000: formatted_size = "(%dG)" % (size / 1000000000) elif size > 1000000: formatted_size = "(%dM)" % (size / 1000000) elif size > 1000: formatted_size = "(%dK)" % (size / 1000) else: formatted_size = '' file_type = api.get_field("src_type", oid, 'type') file_description = "File Names: " + fnames + '\n' file_description += "Size: %7d %s\n" % (size, formatted_size) file_description += "File Type: " + file_type + '\n' file_description += "Oxide ID: " + oid + '\n' self.metadata_display.delete(index1='1.0', index2=tk.END) self.metadata_display.insert(index='1.0', chars=file_description) # calculate and display the histogram display_option = self.file_display_type_str.get() if display_option == ' ': self.clear() return elif display_option == 'byte histogram': hist = self.get_byte_hist(oid) elif display_option == 'opcode histogram': hist = self.get_opcode_hist(oid, cutoff=20) self.display_hist(hist)
def process(oid, opts): logger.debug("Processing file %s", oid) import_time = int(time.time()) import_name = os.path.basename(opts["file_location"]) # strip dir from name file_stat = opts["stat"] size = file_stat["size"] data = None # Get the existing file info - if any if api.exists(name, oid, opts): data = api.retrieve(name, oid, opts, True) # If file info doesn't exist create new if not data: metadata = {import_time:{import_name:file_stat}} data = {"metadata":metadata, "names":set([import_name]), "size":size} # If data already exists append else: if "size" not in data: data["size"] = size data["metadata"][import_time] = {import_name:file_stat} data["names"].add(import_name) api.store(name, oid, data, opts) # Add import time tag tags = {"import_time":import_time} api.apply_tags(oid, tags) return True
def print_internal_functions(names): oid = current_file functions = api.retrieve("function_extract", oid) if functions: fbreaks = get_fbreaks(functions) name_to_offset = {} for offset in functions: name_to_offset[functions[offset]["name"]] = offset system_calls = api.get_field("map_calls", oid, "system_calls") internal_functions = api.get_field("map_calls", oid, "internal_functions") if not system_calls: print " No system calls found for %s %s" % (name(oid), oid) if not internal_functions: print " No internal functions found for %s %s" % (name(oid), oid) function_calls = dict( system_calls.items() + internal_functions.items()) for fn in names: if fn not in name_to_offset: print " %s not a function in %s %s" % (fn, name(oid), oid) continue print " Function %s for %s %s" % (fn, name(oid), oid) print " -------------------------------------" print_disassembly(oid, functions[name_to_offset[fn]]['insns'], function_calls, fbreaks, start=0, stop=0, height=32) print " -------------------------------------"
def build_hist_string(self, mod_name, oid, cutoff=200): str_list = [] args = (mod_name, oid) if api.exists(mod_name=mod_name, oid=oid): hist = api.retrieve(mod_name=mod_name,oid_list=[oid,]) hist_items = hist.items() hist_items.sort(key=operator.itemgetter(1), reverse=True) hist_items = hist_items[:cutoff] if len(hist) == 0: str_list.append("No keys were found.") for key, value in hist_items: if mod_name == "byte_histogram": formatted_pair = '%s\t%8d' % (hexlify(key),value) else: formatted_pair = '%s\t%8d' % (key,value) str_list.append(formatted_pair) else: if self.threads.has_key(args) and not self.threads[args].is_alive(): # Requested and returned NULL. str_list.append("N/A") else: str_list.append("%s file does not exist for:" % mod_name) str_list.append("\t%s" % oid) if self.threads.has_key(args) and self.threads[args].is_alive(): # Requested and still waiting. str_list.append("It is still being created.") else: # First time this has been requested. str_list.append("It is being created.") creation_thread = threading.Thread(target=api.process, args=args) creation_thread.start() self.threads[args] = creation_thread return str_list
def display_file_data(self, oid, fnames): data_list = [] self.file_display.delete(index1='1.0', index2=tk.END) meta = api.retrieve('file_meta', oid) size = meta['size'] if size > 1000000000: formatted_size = "(%dG)" % (size / 1000000000) elif size > 1000000: formatted_size = "(%dM)" % (size / 1000000) elif size > 1000: formatted_size = "(%dK)" % (size / 1000) else: formatted_size = '' file_type = api.get_field("src_type", oid, 'type') file_description = "File Names: " + fnames + '\n' file_description += "Size: %7d %s\n" % (size, formatted_size) file_description += "File Type: " + file_type + '\n' file_description += "Oxide ID: " + oid + '\n' self.metadata_display.delete(index1='1.0', index2=tk.END) self.metadata_display.insert(index='1.0', chars=file_description) display_option = self.file_display_type_str.get() if display_option == 'hex ': data_list = self.build_hex_str(oid) elif display_option == 'header ': data_list = self.build_header_string(oid) elif display_option == 'imports ': data_list = self.build_imports_string(oid) elif display_option == 'byte histogram': data_list = self.build_hist_string('byte_histogram', oid) elif display_option == 'opcode histogram': data_list = self.build_hist_string('opcode_histogram', oid) for line in data_list: self.file_display.insert(index=tk.END, chars=line+ "\n")
def checkDynamicCalls(oid, features): strs = api.retrieve("strings", [oid]) strs = "".join([strs[addr] for addr in sorted(strs)]) for callType, funcs in callTypes.iteritems(): for func in funcs: if func in strs.lower(): features.update({"dynamic_" + callType: True}) break
def test_preferred_uid_clash(self): pref_uid = "rtt" uid = api.post(ApiTest.CONTENT, preferred_uid=pref_uid) self.assertEqual(uid, pref_uid) ret_content = api.retrieve(pref_uid) self.assertEqual(ret_content, ApiTest.CONTENT) uid2 = api.post(ApiTest.CONTENT, preferred_uid=pref_uid) self.assertNotEqual(uid2, uid)
def get_tags(oid): if not isinstance(oid, str): logger.error("get_tags must be called with a single OID") return None elif not api.exists("tags", oid): return None else: return api.retrieve("tags", oid)
def summarize(args, opts): """ Gives a summary of a set of files, including types, extensions, etc. If no argument is passed, gives a summary for the entire datastore (may be very slow). Syntax: summarize %<oid> """ valid, invalid = api.valid_oids(args) valid = set(api.expand_oids(valid)) types = defaultdict(int) extensions = defaultdict(int) sizes = [0,0,0,0,0,0] if not args: valid = set(api.retrieve_all_keys("file_meta")) for oid in valid: meta = api.retrieve("file_meta", oid) names = meta["names"] if names: for name in names: parts = name.split(".") if len(parts) > 1: extensions[parts[-1]] += 1 else: extensions["None"] += 1 t = api.get_field("src_type", oid, "type") if t: types[t] += 1 size = meta["size"] if size < 1024: sizes[0] += 1 elif size < 10*1024: sizes[1] += 1 elif size < 100*1024: sizes[2] += 1 elif size < 1024*1024: sizes[3] += 1 elif size < 10*1024*1024: sizes[4] += 1 else: sizes[5] += 1 print "\nTotal files in set: ", len(valid) print "\nExtensions (files with multiple names counted more than once):" exts = extensions.keys() exts = sorted(exts, key=lambda val: extensions[val], reverse=True) for e in exts: print " ", e, " \t\t :\t\t ", extensions[e] print "\nTypes:" ts = types.keys() ts = sorted(ts, key=lambda val: types[val], reverse=True) for t in ts: print " ", t, " \t\t :\t\t ", types[t] print "\nSizes: " print " Under 1k :", sizes[0] print " 1k - 10k :", sizes[1] print " 10k - 100k :", sizes[2] print " 100k - 1MB :", sizes[3] print " 1MB - 10MB :", sizes[4] print " over 10 MB :", sizes[5] return None
def reducer(intermediate_output, opts, jobid): logger.debug("reducer()") out_histo = defaultdict(int) for oid in intermediate_output: if oid: histo = api.retrieve(name, oid, opts) out_histo = merge_histo(histo, out_histo) api.store(name, jobid, out_histo, opts) return out_histo
def process(oid, opts): logger.debug("Processing oid %s", oid) src_type = api.get_field("src_type", oid, "type") if src_type != "ELF": return False header = api.retrieve("elf_parse", oid) if not header: return False api.store(name, oid, {"header":elf_repr(header)}, opts) return True
def mapper(oid, opts, jobid=False): logger.debug("mapper()") src = api.source(oid) if api.documentation(src)["set"]: return None if api.exists(name, oid, opts): return oid out_histo = build_ngrams(api.retrieve(src, oid, opts)["data"], opts["n"]) api.store(name, oid, out_histo, opts) return oid
def process(oid, opts): logger.debug("Processing oid %s", oid) src_type = api.get_field("src_type", oid, "type") if src_type == "MACHO": header = api.retrieve("macho_parse", oid) if not header: return False api.store(name, oid, {"header":macho_repr(header)}, opts) return True elif src_type == "OSX Universal Binary": header = api.retrieve("macho_parse", oid) if not header: return False api.store(name, oid, {"header":universal_repr(header)}, opts) return True else: return False
def disassembly(args, opts): """ Displays the disassembly for a file Syntax: disassembly <oid> [--slice=<beg>:<end>] """ args, invalid = api.valid_oids(args) args = api.expand_oids(args) if not args: if current_file: args = [current_file] else: raise ShellSyntaxError("Must provide an oid") start = stop = 0 height = default_height if "slice" in opts: start, stop = get_slice(opts) if "height" in opts: try: width = int(opts["height"]) except ValueError: raise ShellSyntaxError("Invalid height") mod_opts = {} if "module" in opts: mod_opts["module"] = opts["module"] for oid in args: disasm = api.get_field("disassembly", [oid], "insns", mod_opts) #comments = api.get_field("disassembly", [oid], "comments", mod_opts) functions = api.retrieve("function_extract", oid) if not functions: print " No functions found for %s %s" % (name(oid), oid) continue fbreaks = get_fbreaks(functions) system_calls = api.get_field("map_calls", oid, "system_calls") internal_functions = api.get_field("map_calls", oid, "internal_functions") function_calls = dict() if system_calls is None: print " System calls could not be determined for %s %s" % (name(oid), oid) else: function_calls.update(system_calls.items()) if internal_functions is None: print " Internal functions could not be determined for %s %s" % (name(oid), oid) else: function_calls.update(internal_functions.items()) if disasm: print " Disassembly for %s %s" % (name(oid), oid) print " -------------------------------------" print_disassembly(oid, disasm, function_calls, fbreaks, start, stop, height) print " -------------------------------------" else: print " %s could not be disassembled." % name(oid)
def get_heatmap(s, heatoid): n = 3 histo = api.retrieve("byte_ngrams", heatoid, {"n":n}) heatmap = [0]*len(s) for i in xrange(len(s)-n): gram = s[i] for j in xrange(1,n): gram += ","+s[i+j] if gram in histo: heatmap[i] = histo[gram] return heatmap
def process(oid, opts): logger.debug("process()") src_type = api.get_field("src_type", oid, "type") if src_type != "PE": return False header = api.retrieve("pe_parse", oid) if not header: return False api.store(name, oid, {"header":pe_repr(header)}, opts) return True
def key_filter(args, opts): """ Use to match the results of a module (module name required). Specify key and optionally value. Syntax: key_filter %<oid> --module=<mod_name> --key=<key> [--value=<value>] """ if not "module" in opts or not "key" in opts: raise ShellSyntaxError("key_filter requires a --module=<mod_name> and a --key=<key> option") oids = [] valid, invalid = api.valid_oids(args) valid = api.expand_oids(valid) if not args: valid = api.retrieve_all_keys("files") if "key" in opts and "value" in opts: oids = api.retrieve("substring_search", valid, {"mod":opts["module"], "key":opts["key"], "value":opts["value"]}) elif "key" in opts: oids = api.retrieve("key_search", valid, {"mod":opts["module"], "key":opts["key"]}) return oids
def launch_template_browser(self): dialog = TemplateSelectDialog(self) template_name = dialog.result if template_name: self.clear_annotations_display() template_match = api.retrieve('alg_ident',self.oid,{'template':template_name}) template_match = self.create_comments(template_match) for offset in self.offsets: if offset in template_match[self.oid]: if self.auto_annotations[offset]: self.auto_annotations[offset] += ', ' self.auto_annotations[offset] += template_match[self.oid][offset] self.display_annotation_data(self.oid)
def apply_tags(oid_list, new_tags): if isinstance(oid_list, list): for oid in oid_list: apply_tags(oid, new_tags) else: oid = oid_list if not api.exists("tags", oid): tags = {} else: tags = api.retrieve("tags", oid, {}, True) for tag in new_tags: tags[tag] = new_tags[tag] api.store("tags", oid, tags)
def process(oid, opts): logger.debug("process()") functions = api.retrieve("function_extract", oid) if not functions: return False hashes = set() for f in functions: mnems = "".join([i["mnem"] for i in functions[f]["insns"]]) mnem_hash = hashlib.sha1(mnems).hexdigest() hashes.add(mnem_hash) api.store(name, oid, {"hashes": hashes}, opts) return True
def results(oid_list, opts): logger.debug('results()') disassembler = choose_disassembler(opts) if disassembler not in choices: logger.warn('disassembly only accepts (%r)' % choices) return None try: disasm = api.retrieve(disassembler, oid_list[0], opts) except Exception, msg: logger.error("disassembly failed for %s: %s", oid_list[0], msg) return None
def process(oid, opts): logger.debug("process()") functions = api.retrieve("function_extract", oid) if not functions: return False header = api.get_field("object_header", oid, "header") if not header: return False bbs = api.retrieve("basic_blocks", oid) if not bbs: return False data = api.get_field(api.source(oid), oid, "data") if not data: return False g = build_cfg(functions, header, data, bbs) api.store(name, oid, g, opts) return True