def import_src(self, src_db): self.load_hooks() self.src_db = src_db matches = False try: self.db.execute('attach "%s" as src' % src_db) except: pass if self.find_initial_rows(): self.find_callgraph_matches() self.choose_best_matches(is_final=True) if len(self.best_matches) > 0: matches = True if matches: c = CDiffChooser( self, "Matched functions for %s" % os.path.basename(src_db), self.best_matches, self) c.show() if _DEBUG: c = CDiffChooser(self, "Dubious matches", self.dubious_matches, self) c.show() else: Warning("No matches found.") log("No matches found.")
def open_or_create_database(self, force=False): self.db_filename = os.path.splitext(self.db_path)[0] + "-src.sqlite" if not os.path.exists(self.db_filename) or self.different_versions() or force: if not from_ida: raise Exception("Export process can only be done from within IDA") # self.is_old_version(self.db_filename) log("Exporting current database...") exporter = CBinaryToSourceExporter() exporter.export(self.db_filename)
def indent_source(src): global indent_cmd try: p = Popen(indent_cmd, stdout=PIPE, stdin=PIPE, stderr=STDOUT) indenter = p.communicate(input=src)[0] tmp = indenter.decode() if tmp != "" and tmp is not None: tmp = tmp.replace("<", "<").replace(">", ">") return tmp except: log("Error indenting: %s" % (str(sys.exc_info()[1]))) return src.replace("<", "<").replace(">", ">")
def open_or_create_database(self, force=False): self.db_filename = os.path.splitext(self.db_path)[0] + "-src.sqlite" if not os.path.exists( self.db_filename) or self.different_versions() or force: if not from_ida: raise Exception( "Export process can only be done from within IDA") # Load the project specific hooks self.load_hooks() # And export the current database log("Exporting current database...") exporter = CBinaryToSourceExporter(hooks=self.hooks) exporter.export(self.db_filename)
def find_one_callgraph_match(self, src_id, bin_ea, min_level, call_type="callee", iteration=1): cur = self.db.cursor() sql = "select * from functions where ea = ?" cur.execute(sql, (str(bin_ea), )) row = cur.fetchone() if row is not None: bin_id = row["id"] src_rows = list(self.get_source_call_type(src_id, call_type)) if src_rows is not None and len(src_rows) > 0: bin_rows = list(self.get_binary_call_type(bin_ea, call_type)) if bin_rows: if len(bin_rows) * len( src_rows) > self.max_cartesian_product: msg = "Cartesian product finding %ss for SRC=%d/BIN=0x%08x(%s) too big (%d)..." log(msg % (call_type, src_id, long(bin_ea), row["name"], len(bin_rows) * len(src_rows))) elif len(bin_rows) > 0: if _DEBUG: print "Finding matches in a cartesian product of %d x %d row(s)" % ( len(src_rows), len(bin_rows)) for src_row in src_rows: for bin_row in bin_rows: curr_bin_id = self.get_binary_func_id( bin_row[call_type]) if not curr_bin_id: continue score, reasons, ml = self.compare_functions( src_row[call_type], curr_bin_id, CALLGRAPH_MATCH) if score >= min_level: func_name = self.get_source_func_name( src_row[call_type]) self.add_match( long(src_row[call_type]), bin_row[call_type], func_name, "Callgraph match (%s, iteration %d)" % (call_type, iteration), score, reasons, ml) cur.close()
def import_src(self, src_db): matches = False self.db.execute('attach "%s" as src' % src_db) if self.find_initial_rows(): self.find_callgraph_matches() self.choose_best_matches(is_final=True) if len(self.best_matches) > 0: matches = True if matches: c = CDiffChooser(self, "Matched functions", self.best_matches, self) c.show() if _DEBUG: c = CDiffChooser(self, "Dubious matches", self.dubious_matches, self) c.show() else: Warning("No matches found.") log("No matches found.")
def load_hooks(self): if self.project_script is None or self.project_script == "": return True try: module = imp.load_source("pigaios_hooks", self.project_script) except: log("Error loading project specific Python script: %s" % str(sys.exc_info()[1])) return False if module is None: # How can it be? return False keys = dir(module) if 'HOOKS' not in keys: log("Error: The project specific script doesn't export the HOOKS dictionary" ) return False hooks = module.HOOKS if 'PigaiosHooks' not in hooks: log("Error: The project specific script exports the HOOK dictionary but it doesn't contain a 'PigaiosHooks' entry." ) return False hook_class = hooks["PigaiosHooks"] self.hooks = hook_class(self) return True
def find_callgraph_matches(self): log("Finding callgraph matches...") i = 0 dones = set() ea_dones = set() while 1: t = time.time() i += 1 log("Iteration %d, discovered a total of %d row(s)..." % (i, len(self.best_matches))) total = len(self.best_matches) # Iterate through the best matches we first found. # NOTES: The 'match_id' is the id of the function in the source code. for match_id in list(self.best_matches): if match_id in dones: continue dones.add(match_id) if match_id in self.best_matches: ea, bin_caller, heur, score, reasons, ml = self.best_matches[ match_id] if ea in ea_dones: continue ea_dones.add(ea) if i == 1 or score >= self.min_level or ml == 1.0: self.find_nearby_functions(match_id, ea, 0.3 + (i * 0.1), i) self.find_one_callgraph_match(match_id, ea, self.min_level, "callee", i) self.find_one_callgraph_match(match_id, ea, self.min_level, "caller", i) # More than 5 minutes for a single iteration is too long... if time.time() - t >= 60 * 5: log("Iteration took too long, continuing...") break self.choose_best_matches() if len(self.best_matches) == total: break
lexer = shlex.shlex(x.iIndentCommand.value) lexer.wordchars += "\:-." indent_cmd = list(lexer) importer = CIDABinaryToSourceImporter() importer.min_level = min_level importer.min_display_level = min_display_level importer.use_decompiler = x.rUseDecompiler.checked importer.import_src(database) finally: hide_wait_box() if __name__ == "__main__": try: try: if os.getenv("DIAPHORA_PROFILE") is not None: import cProfile profiler = cProfile.Profile() profiler.runcall(main) exported = True profiler.print_stats(sort="time") else: main() except: log("ERROR: %s" % str(sys.exc_info()[1])) traceback.print_exc() raise finally: hide_wait_box()
def find_initial_rows(self): cur = self.db.cursor() sql = """ select bin.ea, src.name, src.id, bin.id from functions bin, src.functions src where (bin.conditions between src.conditions and src.conditions + 3 or bin.name = src.name) and bin.constants = src.constants and bin.constants_json = src.constants_json and (select count(*) from src.functions x where x.constants_json = src.constants_json) < %d and src.constants_json != '[]' and src.constants > 0 and src.conditions > 1 and bin.loops = src.loops """ cur.execute("select count(*) from src.functions") row = cur.fetchone() total = row[0] if has_ml: log("Decision tree based system available") log("Finding best matches...") rows = [] for i in range(1, 6): # Constants must appear less than i% of the time in the sources val = (total * i / 100) cur.execute(sql % val) row = cur.fetchone() if row: rows = cur.fetchall() rows.insert(0, row) break size = len(rows) if size > 0: matches_count = {} for row in rows: try: matches_count[row[1]] += 1 except: matches_count[row[1]] = 1 max_score = 0 min_score = 1 for row in rows: func_ea = long(row[0]) match_name = row[1] match_id = row[2] bin_id = row[3] score, reasons, ml = self.compare_functions( match_id, bin_id, ATTRIBUTES_MATCHING) if score < min_score: min_score = score if score > max_score: max_score = score self.add_match(match_id, func_ea, match_name, "Attributes matching", score, reasons, ml) log("Minimum score %f, maximum score %f" % (min_score, max_score)) # We have had too good matches or too few, use a more relaxed minimum score if min_score > 0.5: min_score = 0.5 # If the minimum ratios were set to '0', calculate them from the minimum # ratio we get from the initial best matches (which must be false positives # free). if self.min_level == 0.0: self.min_level = min(abs(min_score - 0.3), 0.01) if self.min_display_level == 0.0: self.min_display_level = max(abs(min_score - 0.3), 0.3) log("Minimum score for calculations: %f" % self.min_level) log("Minimum score to show results : %f" % self.min_display_level) sql = """ select distinct bin_func.ea, src_func.name, src_func.id, bin_func.id from functions bin_func, constants bin_const, src.functions src_func, src.constants src_const where bin_const.constant = src_const.constant and bin_func.id = bin_const.func_id and src_func.id = src_const.func_id and (select count(*) from src.constants sc where sc.constant = src_const.constant ) <= 3""" cur.execute(sql) while 1: row = cur.fetchone() if not row: break size += 1 func_ea = long(row[0]) match_name = row[1] match_id = row[2] bin_id = row[3] score, reasons, ml = self.compare_functions( match_id, bin_id, SAME_RARE_CONSTANT) self.add_match(match_id, func_ea, match_name, "Same rare constant", score, reasons, ml) cur.close() return size != 0
def log(self, msg): log(msg)