def test_multiple_modules(self): """ Test that we can disassemble and reassemble a binary with two modules. """ binary = "ex" library = "fun.so" with cd(ex_dir / "ex_dyn_library"): self.assertTrue(compile("gcc", "g++", "-O0", [])) self.assertTrue( disassemble(binary, False, format="--ir", extension="gtirb",) ) self.assertTrue( disassemble(library, False, format="--ir", extension="gtirb",) ) ir_binary = gtirb.IR.load_protobuf(binary + ".gtirb") ir_library = gtirb.IR.load_protobuf(library + ".gtirb") ir_binary.modules.append(ir_library.modules[0]) ir_binary.save_protobuf("two_modules.gtirb") completedProcess = subprocess.run( [ "gtirb-pprinter", "--ir", "two_modules.gtirb", "-b", binary, "--skip-symbol", "_end", ] ) assert completedProcess.returncode == 0 assert test()
def check_function_inference( self, make_dir, binary, c_compiler, cxx_compiler, optimization ): """ Test that the function inference finds all the functions compare the functions found with only function symbols and calls in a non-stripped binary with the functions found with the advanced analysis in the stripped binary """ with cd(make_dir): self.assertTrue( compile(c_compiler, cxx_compiler, optimization, []) ) self.assertTrue( disassemble( binary, False, format="--ir", extension="gtirb", extra_args=["--skip-function-analysis"], ) ) module = gtirb.IR.load_protobuf(binary + ".gtirb").modules[0] self.assertTrue( disassemble(binary, True, format="--ir", extension="gtirb") ) moduleStripped = gtirb.IR.load_protobuf(binary + ".gtirb").modules[ 0 ] self.assertEqual( self.get_function_addresses(module), self.get_function_addresses(moduleStripped), )
def test_relative_jump_tables(self): """ Test edges for relative jump tables are added. """ binary = "ex" with cd(ex_asm_dir / "ex_relative_switch"): self.assertTrue(compile("gcc", "g++", "-O0", [])) self.assertTrue( disassemble( binary, "strip", False, False, format="--ir", extension="gtirb", )) ir_library = gtirb.IR.load_protobuf(binary + ".gtirb") m = ir_library.modules[0] # check that the jumping_block has edges # to all the jump table entries jumping_block_symbol = [ s for s in m.symbols if s.name == "jumping_block" ][0] assert isinstance(jumping_block_symbol.referent, gtirb.CodeBlock) jumping_block = jumping_block_symbol.referent expected_dest_blocks = [ s.referent for s in m.symbols if s.name in ["LBB5_4", "LBB5_5", "LBB5_6", "LBB5_7"] ] self.assertEqual(len(list(jumping_block.outgoing_edges)), 4) dest_blocks = [e.target for e in jumping_block.outgoing_edges] self.assertEqual(set(dest_blocks), set(expected_dest_blocks))
def test_symbol_selection(self): """ Test that the right symbols are chosen for relocations and for functions. """ binary = "ex" with cd(ex_asm_dir / "ex_symbol_selection"): self.assertTrue(compile("gcc", "g++", "-O0", [])) self.assertTrue(disassemble(binary, format="--ir")[0]) ir_library = gtirb.IR.load_protobuf(binary + ".gtirb") m = ir_library.modules[0] self.check_first_sym_expr(m, "Block_hello", "hello_not_hidden") self.check_first_sym_expr(m, "Block_how", "how_global") self.check_first_sym_expr(m, "Block_bye", "bye_obj") # check symbols at the end of sections syms = [ s for s in m.symbols if s.name in ["__init_array_end", "end_of_data_section", "edata", "_end"] ] self.assertTrue(all(s.at_end for s in syms)) # check chosen function names fun_names = { sym.name for sym in m.aux_data["functionNames"].data.values() } self.assertIn("fun", fun_names) self.assertNotIn("_fun", fun_names)
def test_data_directories_in_code(self): with cd(ex_dir / "ex1"): subprocess.run(make("clean"), stdout=subprocess.DEVNULL) # Compile with `.rdata' section merged to `.text'. proc = subprocess.run( ["cl", "/Od", "ex.c", "/link", "/merge:.rdata=.text"], stdout=subprocess.DEVNULL, ) self.assertEqual(proc.returncode, 0) # Disassemble to GTIRB file. self.assertTrue(disassemble("ex.exe", format="--ir")[0]) # Load the GTIRB file. ir = gtirb.IR.load_protobuf("ex.exe.gtirb") module = ir.modules[0] def is_code(section): return gtirb.ir.Section.Flag.Executable in section.flags pe_data_directories = module.aux_data["peDataDirectories"].data code_blocks = [(b.address, b.address + b.size) for b in module.code_blocks] for _, addr, size in pe_data_directories: # Check data directories in code sections are data blocks. if size > 0: if any(s for s in module.sections_on(addr) if is_code(s)): data_block = next(module.data_blocks_on(addr), None) self.assertIsNotNone(data_block) # Check no code blocks were created within data directories. for start, end in code_blocks: self.assertFalse(start <= addr <= end)
def test_moved_label(self): """ Test that labels are correctly moved. """ binary = "ex" with cd(ex_asm_dir / "ex_moved_label"): self.assertTrue(compile("gcc", "g++", "-Os", [])) self.assertTrue(disassemble(binary, format="--ir")[0]) ir_library = gtirb.IR.load_protobuf(binary + ".gtirb") m = ir_library.modules[0] symbol_of_interest = [s for s in m.symbols if s.name == "point.1"][0] assert isinstance(symbol_of_interest.referent, gtirb.CodeBlock) block = symbol_of_interest.referent bi = block.byte_interval sexpr = set( bi.symbolic_expressions_at( range(block.address, block.address + block.size))) self.assertEqual(len(sexpr), 1) se1 = next(iter(sexpr))[2] assert isinstance(se1, gtirb.SymAddrConst) self.assertEqual(se1.symbol.name, "point.2") self.assertEqual(se1.offset, 22)
def test_jumptable_call_edge(self): for config in self.configs(): with cd(ex_dir / "ex_call_array"), self.subTest( platform=config["platform"]): self.assertTrue(compile(*config["args"])) binary = "ex" gtirb_path = binary + ".gtirb" self.assertTrue( disassemble(binary, gtirb_path, format="--ir")[0]) ex_ir = gtirb.IR.load_protobuf(gtirb_path) module = ex_ir.modules[0] # Locate the jumptable where the functions are called funcs = {"one", "two", "three", "four"} for node in module.cfg_nodes: targets = { lookup_sym(edge.target) for edge in node.outgoing_edges } if funcs.issubset(targets): jumptable = node break else: self.fail("Did not find jumptable") # The edges to the functions should be calls. for edge in jumptable.outgoing_edges: if lookup_sym(edge.target) not in funcs: continue self.assertEqual(edge.label.type, gtirb.Edge.Type.Call)
def test_symbols_through_plt(self): """ Test a library that calls local methods through the plt table and locally defined symbols do not point to proxy blocks. """ library = "ex.so" with cd(ex_dir / "ex_lib_symbols"): self.assertTrue(compile("gcc", "g++", "-O0", [])) self.assertTrue( disassemble(library, False, format="--ir", extension="gtirb",) ) ir_library = gtirb.IR.load_protobuf(library + ".gtirb") m = ir_library.modules[0] # foo is a symbol pointing to a code block foo = [s for s in m.symbols if s.name == "foo"][0] assert isinstance(foo.referent, gtirb.CodeBlock) # bar calls through the plt bar = [s for s in m.symbols if s.name == "bar"][0] bar_block = bar.referent callee = [ e.target for e in bar_block.outgoing_edges if e.label.type == gtirb.Edge.Type.Call ][0] assert [s.name for s in m.sections_on(callee.address)] == [".plt"]
def test_switch_overlap(self): """ Test that with two overlapping jumptables, a conherent jump table is generated. """ binary = "ex" with cd(ex_asm_dir / "ex_switch_overlap"): self.assertTrue(compile("gcc", "g++", "-O0", [])) self.assertTrue(disassemble(binary, format="--ir")[0]) ir_library = gtirb.IR.load_protobuf(binary + ".gtirb") m = ir_library.modules[0] rodata = next(s for s in m.sections if s.name == ".jumptable") ref = None count = 0 for _, _, symexpr in rodata.symbolic_expressions_at( range(rodata.address, rodata.address + rodata.size) ): if not isinstance(symexpr, gtirb.symbolicexpression.SymAddrAddr): continue # confirm all symexpr have the same ref if count == 0: ref = symexpr.symbol2 self.assertEqual(symexpr.symbol2.name, ref.name) count += 1 self.assertEqual(count, 4)
def test_cfi_table(self): """ Test that cfi directives are correctly generated. """ binary = "ex" with cd(ex_asm_dir / "ex_cfi_directives"): self.assertTrue(compile("gcc", "g++", "-O0", [])) self.assertTrue( disassemble(binary, False, format="--ir", extension="gtirb",) ) ir_library = gtirb.IR.load_protobuf(binary + ".gtirb") m = ir_library.modules[0] cfi = m.aux_data["cfiDirectives"].data # we simplify directives to make queries easier found = False for offset, directives in cfi.items(): directive_names = [elem[0] for elem in directives] if ".cfi_remember_state" in directive_names: found = True # the directive is at the end of the block assert offset.element_id.size == offset.displacement assert directive_names == [ ".cfi_remember_state", ".cfi_restore_state", ".cfi_endproc", ] break assert found
def test_read_gtirb(self): binary = "ex" with cd(ex_dir / "ex1"): self.assertTrue(compile("gcc", "g++", "-O0", [])) # Output GTIRB file without disassembling. self.assertTrue( disassemble(binary, format="--ir", extra_args=["--no-analysis"])[0]) # Disassemble GTIRB input file. self.assertTrue(disassemble("ex.gtirb", format="--asm")[0]) self.assertTrue(reassemble("gcc", "ex.gtirb", extra_flags=[])) self.assertTrue(test())
def test_boundary_sym_expr(self): """ Test that symexpr that should be pointing to the end of a section indeed points to the symbol at the end of the section. """ binary = "ex" with cd(ex_asm_dir / "ex_boundary_sym_expr"): self.assertTrue(compile("gcc", "g++", "-O0", [])) self.assertTrue(disassemble(binary, format="--ir")[0]) ir_library = gtirb.IR.load_protobuf(binary + ".gtirb") m = ir_library.modules[0] self.check_first_sym_expr(m, "load_end", "nums_end")
def check_main_inference(self, make_dir, binary, strip=False, strip_exe="strip", **compile_opts): """ Test that the main function is inferred in the same location for both stripped and non-stripped versions of the same binary. """ with cd(make_dir): self.assertTrue(compile(**compile_opts), msg="Compilation failed") self.assertTrue( disassemble( binary, strip_exe=strip_exe, strip=strip, format="--ir", extra_args=["--skip-function-analysis"], )[0], msg="Disassembly failed", ) module = gtirb.IR.load_protobuf(binary + ".gtirb").modules[0] self.assertTrue( disassemble(binary, strip_exe=strip_exe, strip=True, format="--ir")[0], msg="Disassembly failed (stripped)", ) moduleStripped = gtirb.IR.load_protobuf(binary + ".gtirb").modules[0] self.assertEqual( self.get_main_address(module), self.get_main_address(moduleStripped), )
def test_switch_limited_by_cmp_x64(self): """ Ensure jump table propagation is limited by comparsions of the index register. """ binary = "ex" with cd(ex_asm_dir / "ex_switch_limited_by_cmp"): self.assertTrue(compile("gcc", "g++", "-O0", [])) self.assertTrue(disassemble(binary, format="--ir")[0]) ir_library = gtirb.IR.load_protobuf(binary + ".gtirb") m = ir_library.modules[0] # check that the .jump has edges to only the four jump table entries jump_sym = next(s for s in m.symbols if s.name == ".jump") self.assertEqual(len(list(jump_sym.referent.outgoing_edges)), 4)
def test_x86_64_object_cfg(self): """ Test X86_64 object file relocation edges. """ binary = "ex.o" with cd(ex_dir / "ex1"): self.assertTrue(compile("gcc", "g++", "-O0", ["--save-temps"])) self.assertTrue(disassemble(binary, format="--ir")[0]) ir_library = gtirb.IR.load_protobuf(binary + ".gtirb") m = ir_library.modules[0] call = b"\xe8\x00\x00\x00\x00" blocks = [b for b in m.code_blocks if b.contents.endswith(call)] self.assertTrue( all(len(list(b.outgoing_edges)) == 2 for b in blocks) )
def test_stack_chk_fallthrough(self): for config in self.configs(): with cd(ex_dir / "ex1"), self.subTest(platform=config["platform"]): self.assertTrue(compile(*config["args"])) binary = "ex" gtirb_path = binary + ".gtirb" self.assertTrue( disassemble(binary, gtirb_path, format="--ir")[0]) ex_ir = gtirb.IR.load_protobuf(gtirb_path) module = ex_ir.modules[0] # Locate the PLT block for __stack_chk_fail for node in module.cfg_nodes: if (isinstance(node, gtirb.ProxyBlock) and lookup_sym(node) == "__stack_chk_fail"): proxy = node break else: self.fail("Did not find __stack_chk_fail PLT entry") # The ProxyBlock should have one incoming branch edge from # the PLT. self.assertEqual(len(list(proxy.incoming_edges)), 1) plt_edge = next(proxy.incoming_edges) self.assertEqual(plt_edge.label.type, gtirb.Edge.Type.Branch) plt = plt_edge.source # Ensure that calls to the PLT entry have no fallthrough. for call_edge in plt.incoming_edges: # All edges to the stack_chk_fail ProxyBlock should be # calls self.assertEqual(call_edge.label.type, gtirb.Edge.Type.Call) # The calling block should not have a fallthrough. self.assertTrue( all(e.label.type != gtirb.Edge.Type.Fallthrough for e in call_edge.source.outgoing_edges))
def test_souffle_relations(self): """Test `--with-souffle-relations' equivalence to `--debug-dir'.""" with cd(ex_dir / "ex1"): # build self.assertTrue(compile("gcc", "g++", "-O0", [])) # disassemble if not os.path.exists("dbg"): os.mkdir("dbg") self.assertTrue( disassemble( "ex", format="--ir", extra_args=[ "-F", "--with-souffle-relations", "--debug-dir", "dbg", ], )[0]) # load the gtirb ir = gtirb.IR.load_protobuf("ex.gtirb") m = ir.modules[0] # dump relations to directory if not os.path.exists("aux"): os.mkdir("aux") for table, ext in [ ("souffleFacts", "facts"), ("souffleOutputs", "csv"), ]: for name, relation in m.aux_data[table].data.items(): _, csv = relation with open(f"aux/{name}.{ext}", "w") as out: out.write(csv) # compare the relations directories subprocess.check_call(["diff", "dbg", "aux"])
def test_generate_resources(self): with cd(ex_dir / "ex_rsrc"): # Build example with PE resource file. proc = subprocess.run(make("clean"), stdout=subprocess.DEVNULL) self.assertEqual(proc.returncode, 0) proc = subprocess.run(make("all"), stdout=subprocess.DEVNULL) self.assertEqual(proc.returncode, 0) # Disassemble to GTIRB file. self.assertTrue( disassemble( "ex.exe", format="--asm", extra_args=[ "--generate-import-libs", "--generate-resources", ], )) # Reassemble with regenerated RES file. ml, entry = "ml64", "__EntryPoint" if os.environ.get("VSCMD_ARG_TGT_ARCH") == "x86": ml, entry = "ml", "_EntryPoint" self.assertTrue( reassemble( ml, "ex.exe", extra_flags=[ "/link", "ex.res", "/entry:" + entry, "/subsystem:console", ], )) proc = subprocess.run(make("check"), stdout=subprocess.DEVNULL) self.assertEqual(proc.returncode, 0)
def test_static_lib(self): for path in self.configs: # Parse YAML config file. with open(path) as f: config = yaml.safe_load(f) default = config.get("default") wrapper = default.get("test").get("wrapper") base_flags = default.get("build").get("flags") test_dir = ex_dir / "ex_static_lib" for exec_type in ExecType: flags = get_flags(base_flags, exec_type) if (path.stem, exec_type) in ( ("linux-elf-x86", ExecType.PIE), ("qemu-elf-arm", ExecType.NO_PIE), ): # TODO: fix and re-enable this. # See issue #330, #331 continue with cd(test_dir), self.subTest(platform=path.stem, flags=flags): self.assertTrue( compile( default.get("build").get("c")[0], default.get("build").get("cpp")[0], default.get("build").get("optimizations")[0], flags, exec_wrapper=wrapper, )) binary = "libmsg.a" modules = [ "msg_one", "msg_two", "msg_three", "msg_four_with_a_long_name", ] gtirb_file = "libmsg.gtirb" self.assertTrue( disassemble(binary, gtirb_file, format="--ir")[0]) self.assertEqual( len(modules), len(gtirb.IR.load_protobuf(gtirb_file).modules), ) asm_dir = Path("libmsg-tmp") self.assertTrue( disassemble(binary, str(asm_dir), format="--asm")[0]) self.assertTrue(asm_dir.exists()) self.assertTrue(asm_dir.is_dir()) self.assertEqual( {name + ".s" for name in modules}, set(os.listdir(asm_dir)), ) # reassemble object files print("# Reassembling", binary + ".s", "into", binary) re_compiler = default.get("reassemble").get("compiler") re_flags = default.get("reassemble").get("flags") for obj in modules: subprocess.run( [ re_compiler, "-c", str(asm_dir / (obj + ".s")), "-o", obj + ".o", ] + re_flags, check=True, ) # re-build static archive objects = [obj + ".o" for obj in modules] for obj in modules: subprocess.run(["ar", "-rcs", binary] + objects, check=True) # re-link objects.append("ex.o") self.assertTrue(link(re_compiler, "ex", objects, re_flags)) self.assertTrue(test(wrapper))