def create_ir_with_name(name: str) -> gtirb.IR: (ir, m) = create_test_module(gtirb.Module.FileFormat.ELF, gtirb.Module.ISA.X64) _, bi = add_text_section(m) cb = add_code_block(bi, b"\xC3") add_function(m, name, cb) return ir
def test_windows_defs(self): ir, m = create_test_module( file_format=gtirb.Module.FileFormat.PE, isa=gtirb.Module.ISA.X64, binary_type=["EXEC", "EXE", "WINDOWS_CUI"], ) m.aux_data["peImportEntries"].data.append( (0, -1, "GetMessageW", "USER32.DLL")) for tool in run_binary_pprinter_mock(ir): if tool.name == "lib.exe": def_arg = next( (arg for arg in tool.args if arg.startswith("/DEF:")), None) self.assertIsNotNone(def_arg, "no /DEF in lib invocation") self.assertIn("/MACHINE:X64", tool.args) with open(def_arg[5:], "r") as f: lines = interesting_lines(f.read()) self.assertEqual( lines, ['LIBRARY "USER32.DLL"', "EXPORTS", "GetMessageW"], ) break else: self.fail("did not see a lib.exe execution")
def test_nonmoffset_mov(self): ir, m = create_test_module( gtirb.Module.FileFormat.PE, gtirb.Module.ISA.IA32 ) s, bi = add_text_section(m, 0x1000) hello_expr = gtirb.SymAddrConst(0, add_symbol(m, "hello")) add_code_block( bi, # mov edi, hello b"\x8B\x3D\x00\x00\x00\x00", # wrong symbolic offset {0: hello_expr}, ) add_code_block( bi, # mov edi, hello b"\x8B\x3D\x00\x00\x00\x00", # correct symbolic offset {2: hello_expr}, ) asm, output = run_asm_pprinter_with_outputput(ir) self.assertNotIn(self.COMPAT_WARNING_MESSAGE, output) self.assertContains( asm_lines(asm), ("mov EDI,DWORD PTR [0]", "mov EDI,DWORD PTR [hello]"), )
def test_moffset_mov_ia32_compat(self): ir, m = create_test_module( gtirb.Module.FileFormat.PE, gtirb.Module.ISA.IA32 ) s, bi = add_text_section(m, 0x1000) hello_expr = gtirb.SymAddrConst(0, add_symbol(m, "hello")) # mov al, byte ptr [hello] add_code_block(bi, b"\xA0\x00\x00\x00\x00", {0: hello_expr}) # mov ax, word ptr [hello] add_code_block(bi, b"\x66\xA1\x00\x00\x00\x00", {0: hello_expr}) # mov eax, dword ptr [hello] add_code_block(bi, b"\xA1\x00\x00\x00\x00", {0: hello_expr}) # mov byte ptr [hello], al add_code_block(bi, b"\xA2\x00\x00\x00\x00", {0: hello_expr}) # mov word ptr [hello], ax add_code_block(bi, b"\x66\xA3\x00\x00\x00\x00", {0: hello_expr}) # mov dword ptr [hello], eax add_code_block(bi, b"\xA3\x00\x00\x00\x00", {0: hello_expr}) asm, output = run_asm_pprinter_with_outputput(ir) self.assertIn(self.COMPAT_WARNING_MESSAGE, output) self.assertEqual(output.count(self.COMPAT_WARNING_MESSAGE), 1) self.assertContains( asm_lines(asm), ( "mov AL,BYTE PTR [hello]", "mov AX,WORD PTR [hello]", "mov EAX,DWORD PTR [hello]", "mov BYTE PTR [hello],AL", "mov WORD PTR [hello],AX", "mov DWORD PTR [hello],EAX", ), )
def test_data_block_alignment_via_symbol(self): """ Test that data blocks that have exported symbols are *not* aligned at all. """ ir, m = create_test_module(file_format=gtirb.Module.FileFormat.ELF, isa=gtirb.Module.ISA.X64) _, bi = add_data_section(m) add_data_block(bi, b"\x01\x02") block = add_data_block(bi, b"\x03\x04") sym = add_symbol(m, "hello", block) add_elf_symbol_info(m, sym, block.size, "OBJECT") asm = run_asm_pprinter(ir) self.assertContains( asm_lines(asm), [ ".byte 0x2", ".globl hello", ".type hello, @object", "hello:", ".byte 0x3", ], )
def test_code_block_alignment_via_symbol(self): """ Test that code blocks that have exported symbols are aligned by their address. """ ir, m = create_test_module(file_format=gtirb.Module.FileFormat.ELF, isa=gtirb.Module.ISA.X64) _, bi = add_text_section(m) add_code_block(bi, b"\x90\x90") block = add_code_block(bi, b"\xC3") sym = add_symbol(m, "hello", block) add_elf_symbol_info(m, sym, block.size, "FUNC") asm = run_asm_pprinter(ir) self.assertContains( asm_lines(asm), [ "nop", ".align 2", ".globl hello", ".type hello, @function", "hello:", "ret", ], )
def test_ambiguous_symbol_names(self): ir, m = create_test_module(gtirb.Module.FileFormat.ELF, gtirb.Module.ISA.X64) s, bi = add_text_section(m, 0x1000) cb = add_code_block(bi, b"\xc3") cb2 = add_code_block(bi, b"\xc3") s2, bi2 = add_data_section(m, 0x500) db = add_data_block(bi2, b"hello") add_symbol(m, "f1_disambig_0x1000_0", db) add_function(m, "f1", cb) add_function(m, "f2", cb) add_function(m, "f2", cb) add_function(m, "f1", cb2) add_symbol(m, "f1", cb) asm = pprinter_helpers.run_asm_pprinter(ir) print(asm) # f1_0x1000 should start counting from 1, # since 0 produces a conflict, # but f2_0x1000 should start counting from 0 self.assertIn("f1_disambig_0x1000_1:", asm) self.assertIn("f1_disambig_0x1000_2:", asm) self.assertIn("f1_disambig_0x1001_0:", asm) self.assertIn("f2_disambig_0x1000_0", asm) self.assertIn("f2_disambig_0x1000_1", asm)
def test_unpack_instructions(self): """ Test printing various instructions """ instructions = [ (b"\x00\x08\x80\xD2", "mov x0,#64"), (b"\xE8\x0E\x04\x0E", "dup v8.2s,w23"), (b"\xC7\x04\x02\x4E", "dup v7.8h,v6.h[0]"), (b"\x00\x00\x01\x4E", "tbl v0.16b,{v0.16b},v1.16b"), (b"\x47\x90\x00\x0D", "st1 {v7.s}[1],[x2]"), (b"\x9D\x0E\x9E\x0D", "st1 {v29.b}[3],[x20],lr"), (b"\x40\x1E\xB2\x4E", "mov v0.16b,v18.16b"), # TODO: capstone bug, see # https://github.com/capstone-engine/capstone/issues/1842 # (b"\xDD\x9F\x2D\x05", "splice z29.b,p7,{z30.b,z31.b}"), # (b"\xFD\x9F\x2C\x05", "splice z29.b,p7,z30.b,z31.b"), ] for insn_bytes, insn_str in instructions: with self.subTest(instruction=insn_str): ir, m = create_test_module( file_format=gtirb.Module.FileFormat.ELF, isa=gtirb.Module.ISA.ARM64, ) s, bi = add_text_section(m) add_code_block(bi, insn_bytes) asm = run_asm_pprinter(ir) self.assertIn(insn_str, asm)
def test_block_alignment_via_address_fallback(self): ir, m = create_test_module(file_format=gtirb.Module.FileFormat.ELF, isa=gtirb.Module.ISA.X64) _, bi = add_text_section(m, address=0x1004) add_code_block(bi, b"\xC3") asm = run_asm_pprinter(ir) self.assertContains(asm_lines(asm), [".align 4", "ret"])
def test_local_got_reference(self): """ .got references are not generated correctly unless they refer to global symbols - we must rewrite symbols referenced in the .got as global. """ ir, m = create_test_module( file_format=gtirb.Module.FileFormat.ELF, isa=gtirb.Module.ISA.ARM64, ) s, bi = add_text_section(m) insn_bytes = b"\x20\x00\x00\xb0" # adr x0, label add_code_block(bi, insn_bytes) # Add .got section _, bi_data = add_data_section(m) got_data = add_data_block(bi_data, b"\xff\xff\xff\xff") got_sym = gtirb.symbol.Symbol("got_my_local", payload=got_data, module=m) # Add target data section _, bi_data = add_data_section(m) block_data = add_data_block(bi_data, b"\xff\xff\xff\xff") sym = gtirb.symbol.Symbol("my_local", payload=block_data, module=m) add_symbol_forwarding(m, got_sym, sym) add_elf_symbol_info( m, sym, block_data.size, "OBJECT", binding="LOCAL", visibility="DEFAULT", ) sym_expr = gtirb.symbolicexpression.SymAddrConst( 0, got_sym, attributes=[ gtirb.symbolicexpression.SymbolicExpression.Attribute.GotRef ], ) bi.symbolic_expressions[0] = sym_expr asm = run_asm_pprinter(ir) # Verify that the instruction is printed correctly. self.assertIn("adrp x0, :got:my_local", asm) # Verify that the symbol is printed with global and hidden attributes. self.assertIn(".type my_local, @object", asm) self.assertIn(".globl my_local", asm) self.assertIn(".hidden my_local", asm) self.assertIn("my_local:", asm)
def test_block_alignment_via_array_section_fallback_x64(self): # This tests the changes in MR 362. ir, m = create_test_module(file_format=gtirb.Module.FileFormat.ELF, isa=gtirb.Module.ISA.X64) _, bi = add_section(m, ".init_array") add_data_block(bi, b"\x00\x00\x00\x00\x00\x00\x00\x00") asm = run_asm_pprinter(ir, ["--policy=dynamic"]) self.assertContains(asm_lines(asm), [".align 8", ".zero 8"])
def test_block_alignment_via_section_in_aux_data(self): ir, m = create_test_module(file_format=gtirb.Module.FileFormat.ELF, isa=gtirb.Module.ISA.X64) s, bi = add_text_section(m) add_code_block(bi, b"\xC3") m.aux_data["alignment"].data[s] = 32 asm = run_asm_pprinter(ir) self.assertContains(asm_lines(asm), [".align 32", "ret"])
def test_windows_dll(self): ir, m = create_test_module( file_format=gtirb.Module.FileFormat.PE, isa=gtirb.Module.ISA.X64, binary_type=["EXEC", "DLL", "WINDOWS_CUI"], ) _, bi = add_text_section(m) add_code_block(bi, b"\xC3") tools = list(run_binary_pprinter_mock(ir)) self.assertEqual(len(tools), 1) self.assertEqual(tools[0].name, "ml64.exe") self.assertIn("/DLL", tools[0].args)
def test_unpack_dd(self): # This test ensures that we do not regress on the following issue: # git.grammatech.com/rewriting/gtirb-pprinter/-/merge_requests/439 ir, m = create_test_module(file_format=gtirb.Module.FileFormat.ELF, isa=gtirb.Module.ISA.X64) s, bi = add_text_section(m) # vpgatherdd ymm1,DWORD PTR [r8+ymm5*4],ymm6 add_code_block(bi, b"\xC4\xC2\x4D\x90\x0c\xA8") # We're specifically trying to see if the middle operand is a # DWORD PTR or a YMMWORD PTR. asm = run_asm_pprinter(ir, ["--syntax=intel"]) self.assertIn("DWORD PTR", asm)
def test_keep_function(self): ir, m = create_test_module( file_format=gtirb.Module.FileFormat.ELF, isa=gtirb.Module.ISA.X64, binary_type=["DYN"], ) _, _ = add_section(m, ".dynamic") _, bi = add_text_section(m) add_function(m, "_start", add_code_block(bi, b"\xC3")) asm = run_asm_pprinter(ir) self.assertNotContains(asm_lines(asm), ["_start:", "ret"]) asm = run_asm_pprinter(ir, ["--keep-function", "_start"]) self.assertContains(asm_lines(asm), ["_start:", "ret"])
def test_windows_subsystem_console(self): # This tests the changes in MR 346. ir, m = create_test_module( file_format=gtirb.Module.FileFormat.PE, isa=gtirb.Module.ISA.X64, binary_type=["EXEC", "EXE", "WINDOWS_CUI"], ) _, bi = add_text_section(m) block = add_code_block(bi, b"\xC3") m.entry_point = block tools = list(run_binary_pprinter_mock(ir)) self.assertEqual(len(tools), 1) self.assertEqual(tools[0].name, "ml64.exe") self.assertIn("/SUBSYSTEM:console", tools[0].args)
def test_ldlinux_dep(self): # Check that a binary with a known dependence on ld-linux.so does # not try to explicity link with it, as the link should be implicit. ir, m = create_test_module( gtirb.Module.FileFormat.ELF, gtirb.Module.ISA.X64, ["DYN"] ) add_section(m, ".dynamic") _, bi = add_text_section(m) main = add_code_block(bi, b"\xC3") add_function(m, "main", main) m.aux_data["libraries"].data.append("ld-linux-x86-64.so.2") output = run_binary_pprinter_mock_out( ir, [], check_output=True ).stdout.decode(sys.stdout.encoding) self.assertIn("Compiler arguments:", output) self.assertNotIn("ld-linux", output)
def test_windows_includelib(self): ir, m = create_test_module( file_format=gtirb.Module.FileFormat.PE, isa=gtirb.Module.ISA.X64, binary_type=["EXEC", "EXE", "WINDOWS_CUI"], ) _, bi = add_text_section(m) m.aux_data["libraries"].data.append(("WINSPOOL.DRV")) m.aux_data["libraries"].data.append(("USER32.DLL")) asm = run_asm_pprinter(ir) self.assertContains(asm_lines(asm), ["INCLUDELIB WINSPOOL.lib"]) self.assertContains(asm_lines(asm), ["INCLUDELIB USER32.lib"]) self.assertNotContains(asm_lines(asm), ["INCLUDELIB WINSPOOL.DRV"]) self.assertNotContains(asm_lines(asm), ["INCLUDELIB USER32.DLL"])
def test_moffset_mov_x64_correct(self): ir, m = create_test_module( gtirb.Module.FileFormat.PE, gtirb.Module.ISA.X64 ) s, bi = add_text_section(m, 0x1000) hello_expr = gtirb.SymAddrConst(0, add_symbol(m, "hello")) # mov rax, qword ptr [hello] add_code_block( bi, b"\x48\xA1\x00\x00\x00\x00\x00\x00\x00\x00", {2: hello_expr} ) # mov qword ptr [hello], rax add_code_block( bi, b"\x48\xA3\x00\x00\x00\x00\x00\x00\x00\x00", {2: hello_expr} ) asm, output = run_asm_pprinter_with_outputput(ir) self.assertNotIn(self.COMPAT_WARNING_MESSAGE, output) self.assertContains( asm_lines(asm), ("mov RAX,QWORD PTR [hello]", "mov QWORD PTR [hello],RAX",), )
def test_adr_substitution(self): """ In some cases, the assembler will substitute an adr instruction where the assembly contained an adrp instruction. If we apply a :got: attribute to that symbolic expression, the assembler won't assemble it. In that case, we must reverse the adrp -> adr substitution. """ ir, m = create_test_module( file_format=gtirb.Module.FileFormat.ELF, isa=gtirb.Module.ISA.ARM64, ) s, bi = add_text_section(m) insn_bytes = b"\x20\x00\x00\x10" # adr x0, label add_code_block(bi, insn_bytes) insn_bytes = b"\x1f\x20\x03\xd5" # nop block_nop = add_code_block(bi, insn_bytes) sym = gtirb.symbol.Symbol("__stack_chk_guard", payload=block_nop, module=m) sym_expr = gtirb.symbolicexpression.SymAddrConst( 0, sym, attributes=[ gtirb.symbolicexpression.SymbolicExpression.Attribute.GotRef ], ) bi.symbolic_expressions[0] = sym_expr asm = run_asm_pprinter(ir) # Verify that the instruction is printed correctly. self.assertIn("adrp x0, :got:__stack_chk_guard", asm) # Verify that a comment is added self.assertIn("Instruction substituted", asm)
Build a minimal X86-64 hello world GTIRB file. """ import gtirb from gtirb_helpers import ( create_test_module, add_elf_symbol_info, add_text_section, add_data_section, add_symbol, add_code_block, add_data_block, ) ir, m = create_test_module(gtirb.Module.FileFormat.ELF, gtirb.Module.ISA.X64) # Add .data section. s, bi = add_data_section(m, 0x4000A8) block = add_data_block(bi, b"hello world\n") hello = add_symbol(m, "hello", block) # Add .text section. s, bi = add_text_section(m, 0x400080) # mov eax, 1 block = add_code_block(bi, b"\xB8\x01\x00\x00\x00") start = add_symbol(m, "_start", block) add_elf_symbol_info(m, start, block.size, "FUNC") # mov ebx, 1
def make_pe_resource_data(self) -> gtirb.IR: ir, m = create_test_module( file_format=gtirb.Module.FileFormat.PE, isa=gtirb.Module.ISA.X64, binary_type=["EXEC", "EXE", "WINDOWS_GUI"], ) _, bi = add_section(m, ".text") entry = add_code_block(bi, b"\xC3") m.entry_point = entry resource_data = b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\ \x00\x00\x00\x00\x00\x02\x00\x06\x00\x00\x00 \x00\x00\x80\ \x18\x00\x00\x008\x00\x00\x80\x00\x00\x00\x00\x00\x00\x00\ \x00\x00\x00\x00\x00\x00\x00\x01\x00\x07\x00\x00\x00P\x00\ \x00\x80\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\ \x00\x00\x01\x00\x01\x00\x00\x00h\x00\x00\x80\x00\x00\x00\ \x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\t\x04\ \x00\x00\x80\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\ \x00\x00\x00\x00\x00\x01\x00\t\x04\x00\x00\x90\x00\x00\x00\ \xa0`\x00\x00H\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\ \xe8`\x00\x00}\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\ \x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x14\x00T\x00e\x00s\ \x00t\x00 \x00r\x00e\x00s\x00o\x00u\x00r\x00c\x00e\x00 \x00s\ \x00t\x00r\x00i\x00n\x00g\x00\x00\x00\x00\x00\x00\x00\x00\ \x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\ <?xml version='1.0' encoding='UTF-8' standalone='yes'?>\ \r\n<assembly xmlns='urn:schemas-microsoft-com:asm.v1\ ' manifestVersion='1.0'>\r\n \ <trustInfo xmlns=\"urn:schemas-microsoft-com:asm.v3\">\r\n\ <security>\r\n <requestedPrivileges>\r\n \ <requestedExecutionLevel level='asInvoker' uiAccess='false' />\r\n\ </requestedPrivileges>\r\n </security>\r\n </trustInfo>\ \r\n</assembly>\r\n\x00\x00\x00')" _, bi = add_section(m, ".rsrc") _ = add_byte_block(bi, gtirb.block.DataBlock, resource_data) off1 = gtirb.Offset(bi, 0) off2 = gtirb.Offset(bi, 72) entry1 = ( [ 72, 0, 0, 0, 32, 0, 0, 0, 255, 255, 6, 0, 255, 255, 7, 0, 0, 0, 0, 0, 48, 16, 9, 4, 0, 0, 0, 0, 0, 0, 0, 0, ], off1, 72, ) entry2 = ( [ 125, 1, 0, 0, 32, 0, 0, 0, 255, 255, 24, 0, 255, 255, 1, 0, 0, 0, 0, 0, 48, 16, 9, 4, 0, 0, 0, 0, 0, 0, 0, 0, ], off2, 381, ) m.aux_data["peResources"] = gtirb.AuxData( [entry1, entry2], "sequence<tuple<sequence<uint8_t>,Offset,uint64_t>>", ) return ir