def test_moffset_mov_ia32_compat(self): ir, m = create_test_module( gtirb.Module.FileFormat.PE, gtirb.Module.ISA.IA32 ) s, bi = add_text_section(m, 0x1000) hello_expr = gtirb.SymAddrConst(0, add_symbol(m, "hello")) # mov al, byte ptr [hello] add_code_block(bi, b"\xA0\x00\x00\x00\x00", {0: hello_expr}) # mov ax, word ptr [hello] add_code_block(bi, b"\x66\xA1\x00\x00\x00\x00", {0: hello_expr}) # mov eax, dword ptr [hello] add_code_block(bi, b"\xA1\x00\x00\x00\x00", {0: hello_expr}) # mov byte ptr [hello], al add_code_block(bi, b"\xA2\x00\x00\x00\x00", {0: hello_expr}) # mov word ptr [hello], ax add_code_block(bi, b"\x66\xA3\x00\x00\x00\x00", {0: hello_expr}) # mov dword ptr [hello], eax add_code_block(bi, b"\xA3\x00\x00\x00\x00", {0: hello_expr}) asm, output = run_asm_pprinter_with_outputput(ir) self.assertIn(self.COMPAT_WARNING_MESSAGE, output) self.assertEqual(output.count(self.COMPAT_WARNING_MESSAGE), 1) self.assertContains( asm_lines(asm), ( "mov AL,BYTE PTR [hello]", "mov AX,WORD PTR [hello]", "mov EAX,DWORD PTR [hello]", "mov BYTE PTR [hello],AL", "mov WORD PTR [hello],AX", "mov DWORD PTR [hello],EAX", ), )
def test_unpack_instructions(self): """ Test printing various instructions """ instructions = [ (b"\x00\x08\x80\xD2", "mov x0,#64"), (b"\xE8\x0E\x04\x0E", "dup v8.2s,w23"), (b"\xC7\x04\x02\x4E", "dup v7.8h,v6.h[0]"), (b"\x00\x00\x01\x4E", "tbl v0.16b,{v0.16b},v1.16b"), (b"\x47\x90\x00\x0D", "st1 {v7.s}[1],[x2]"), (b"\x9D\x0E\x9E\x0D", "st1 {v29.b}[3],[x20],lr"), (b"\x40\x1E\xB2\x4E", "mov v0.16b,v18.16b"), # TODO: capstone bug, see # https://github.com/capstone-engine/capstone/issues/1842 # (b"\xDD\x9F\x2D\x05", "splice z29.b,p7,{z30.b,z31.b}"), # (b"\xFD\x9F\x2C\x05", "splice z29.b,p7,z30.b,z31.b"), ] for insn_bytes, insn_str in instructions: with self.subTest(instruction=insn_str): ir, m = create_test_module( file_format=gtirb.Module.FileFormat.ELF, isa=gtirb.Module.ISA.ARM64, ) s, bi = add_text_section(m) add_code_block(bi, insn_bytes) asm = run_asm_pprinter(ir) self.assertIn(insn_str, asm)
def test_ambiguous_symbol_names(self): ir, m = create_test_module(gtirb.Module.FileFormat.ELF, gtirb.Module.ISA.X64) s, bi = add_text_section(m, 0x1000) cb = add_code_block(bi, b"\xc3") cb2 = add_code_block(bi, b"\xc3") s2, bi2 = add_data_section(m, 0x500) db = add_data_block(bi2, b"hello") add_symbol(m, "f1_disambig_0x1000_0", db) add_function(m, "f1", cb) add_function(m, "f2", cb) add_function(m, "f2", cb) add_function(m, "f1", cb2) add_symbol(m, "f1", cb) asm = pprinter_helpers.run_asm_pprinter(ir) print(asm) # f1_0x1000 should start counting from 1, # since 0 produces a conflict, # but f2_0x1000 should start counting from 0 self.assertIn("f1_disambig_0x1000_1:", asm) self.assertIn("f1_disambig_0x1000_2:", asm) self.assertIn("f1_disambig_0x1001_0:", asm) self.assertIn("f2_disambig_0x1000_0", asm) self.assertIn("f2_disambig_0x1000_1", asm)
def create_ir_with_name(name: str) -> gtirb.IR: (ir, m) = create_test_module(gtirb.Module.FileFormat.ELF, gtirb.Module.ISA.X64) _, bi = add_text_section(m) cb = add_code_block(bi, b"\xC3") add_function(m, name, cb) return ir
def test_code_block_alignment_via_symbol(self): """ Test that code blocks that have exported symbols are aligned by their address. """ ir, m = create_test_module(file_format=gtirb.Module.FileFormat.ELF, isa=gtirb.Module.ISA.X64) _, bi = add_text_section(m) add_code_block(bi, b"\x90\x90") block = add_code_block(bi, b"\xC3") sym = add_symbol(m, "hello", block) add_elf_symbol_info(m, sym, block.size, "FUNC") asm = run_asm_pprinter(ir) self.assertContains( asm_lines(asm), [ "nop", ".align 2", ".globl hello", ".type hello, @function", "hello:", "ret", ], )
def test_nonmoffset_mov(self): ir, m = create_test_module( gtirb.Module.FileFormat.PE, gtirb.Module.ISA.IA32 ) s, bi = add_text_section(m, 0x1000) hello_expr = gtirb.SymAddrConst(0, add_symbol(m, "hello")) add_code_block( bi, # mov edi, hello b"\x8B\x3D\x00\x00\x00\x00", # wrong symbolic offset {0: hello_expr}, ) add_code_block( bi, # mov edi, hello b"\x8B\x3D\x00\x00\x00\x00", # correct symbolic offset {2: hello_expr}, ) asm, output = run_asm_pprinter_with_outputput(ir) self.assertNotIn(self.COMPAT_WARNING_MESSAGE, output) self.assertContains( asm_lines(asm), ("mov EDI,DWORD PTR [0]", "mov EDI,DWORD PTR [hello]"), )
def test_block_alignment_via_address_fallback(self): ir, m = create_test_module(file_format=gtirb.Module.FileFormat.ELF, isa=gtirb.Module.ISA.X64) _, bi = add_text_section(m, address=0x1004) add_code_block(bi, b"\xC3") asm = run_asm_pprinter(ir) self.assertContains(asm_lines(asm), [".align 4", "ret"])
def test_local_got_reference(self): """ .got references are not generated correctly unless they refer to global symbols - we must rewrite symbols referenced in the .got as global. """ ir, m = create_test_module( file_format=gtirb.Module.FileFormat.ELF, isa=gtirb.Module.ISA.ARM64, ) s, bi = add_text_section(m) insn_bytes = b"\x20\x00\x00\xb0" # adr x0, label add_code_block(bi, insn_bytes) # Add .got section _, bi_data = add_data_section(m) got_data = add_data_block(bi_data, b"\xff\xff\xff\xff") got_sym = gtirb.symbol.Symbol("got_my_local", payload=got_data, module=m) # Add target data section _, bi_data = add_data_section(m) block_data = add_data_block(bi_data, b"\xff\xff\xff\xff") sym = gtirb.symbol.Symbol("my_local", payload=block_data, module=m) add_symbol_forwarding(m, got_sym, sym) add_elf_symbol_info( m, sym, block_data.size, "OBJECT", binding="LOCAL", visibility="DEFAULT", ) sym_expr = gtirb.symbolicexpression.SymAddrConst( 0, got_sym, attributes=[ gtirb.symbolicexpression.SymbolicExpression.Attribute.GotRef ], ) bi.symbolic_expressions[0] = sym_expr asm = run_asm_pprinter(ir) # Verify that the instruction is printed correctly. self.assertIn("adrp x0, :got:my_local", asm) # Verify that the symbol is printed with global and hidden attributes. self.assertIn(".type my_local, @object", asm) self.assertIn(".globl my_local", asm) self.assertIn(".hidden my_local", asm) self.assertIn("my_local:", asm)
def test_block_alignment_via_section_in_aux_data(self): ir, m = create_test_module(file_format=gtirb.Module.FileFormat.ELF, isa=gtirb.Module.ISA.X64) s, bi = add_text_section(m) add_code_block(bi, b"\xC3") m.aux_data["alignment"].data[s] = 32 asm = run_asm_pprinter(ir) self.assertContains(asm_lines(asm), [".align 32", "ret"])
def test_windows_dll(self): ir, m = create_test_module( file_format=gtirb.Module.FileFormat.PE, isa=gtirb.Module.ISA.X64, binary_type=["EXEC", "DLL", "WINDOWS_CUI"], ) _, bi = add_text_section(m) add_code_block(bi, b"\xC3") tools = list(run_binary_pprinter_mock(ir)) self.assertEqual(len(tools), 1) self.assertEqual(tools[0].name, "ml64.exe") self.assertIn("/DLL", tools[0].args)
def test_unpack_dd(self): # This test ensures that we do not regress on the following issue: # git.grammatech.com/rewriting/gtirb-pprinter/-/merge_requests/439 ir, m = create_test_module(file_format=gtirb.Module.FileFormat.ELF, isa=gtirb.Module.ISA.X64) s, bi = add_text_section(m) # vpgatherdd ymm1,DWORD PTR [r8+ymm5*4],ymm6 add_code_block(bi, b"\xC4\xC2\x4D\x90\x0c\xA8") # We're specifically trying to see if the middle operand is a # DWORD PTR or a YMMWORD PTR. asm = run_asm_pprinter(ir, ["--syntax=intel"]) self.assertIn("DWORD PTR", asm)
def test_keep_function(self): ir, m = create_test_module( file_format=gtirb.Module.FileFormat.ELF, isa=gtirb.Module.ISA.X64, binary_type=["DYN"], ) _, _ = add_section(m, ".dynamic") _, bi = add_text_section(m) add_function(m, "_start", add_code_block(bi, b"\xC3")) asm = run_asm_pprinter(ir) self.assertNotContains(asm_lines(asm), ["_start:", "ret"]) asm = run_asm_pprinter(ir, ["--keep-function", "_start"]) self.assertContains(asm_lines(asm), ["_start:", "ret"])
def test_windows_subsystem_console(self): # This tests the changes in MR 346. ir, m = create_test_module( file_format=gtirb.Module.FileFormat.PE, isa=gtirb.Module.ISA.X64, binary_type=["EXEC", "EXE", "WINDOWS_CUI"], ) _, bi = add_text_section(m) block = add_code_block(bi, b"\xC3") m.entry_point = block tools = list(run_binary_pprinter_mock(ir)) self.assertEqual(len(tools), 1) self.assertEqual(tools[0].name, "ml64.exe") self.assertIn("/SUBSYSTEM:console", tools[0].args)
def create_multi_module_ir(self) -> gtirb.IR: ir = gtirb.IR() m1 = gtirb.Module( name="ex", file_format=gtirb.Module.FileFormat.ELF, isa=gtirb.Module.ISA.X64, ) m1.ir = ir add_standard_aux_data_tables(m1) _, bi1 = add_text_section(m1) add_function(m1, "main", add_code_block(bi1, b"\xC3")) m2 = gtirb.Module( name="fun.so", file_format=gtirb.Module.FileFormat.ELF, isa=gtirb.Module.ISA.X64, ) m2.ir = ir add_standard_aux_data_tables(m2) _, bi2 = add_text_section(m2) add_function(m2, "fun", add_code_block(bi2, b"\xC3")) return ir
def test_ldlinux_dep(self): # Check that a binary with a known dependence on ld-linux.so does # not try to explicity link with it, as the link should be implicit. ir, m = create_test_module( gtirb.Module.FileFormat.ELF, gtirb.Module.ISA.X64, ["DYN"] ) add_section(m, ".dynamic") _, bi = add_text_section(m) main = add_code_block(bi, b"\xC3") add_function(m, "main", main) m.aux_data["libraries"].data.append("ld-linux-x86-64.so.2") output = run_binary_pprinter_mock_out( ir, [], check_output=True ).stdout.decode(sys.stdout.encoding) self.assertIn("Compiler arguments:", output) self.assertNotIn("ld-linux", output)
def test_windows_includelib(self): ir, m = create_test_module( file_format=gtirb.Module.FileFormat.PE, isa=gtirb.Module.ISA.X64, binary_type=["EXEC", "EXE", "WINDOWS_CUI"], ) _, bi = add_text_section(m) m.aux_data["libraries"].data.append(("WINSPOOL.DRV")) m.aux_data["libraries"].data.append(("USER32.DLL")) asm = run_asm_pprinter(ir) self.assertContains(asm_lines(asm), ["INCLUDELIB WINSPOOL.lib"]) self.assertContains(asm_lines(asm), ["INCLUDELIB USER32.lib"]) self.assertNotContains(asm_lines(asm), ["INCLUDELIB WINSPOOL.DRV"]) self.assertNotContains(asm_lines(asm), ["INCLUDELIB USER32.DLL"])
def test_moffset_mov_x64_correct(self): ir, m = create_test_module( gtirb.Module.FileFormat.PE, gtirb.Module.ISA.X64 ) s, bi = add_text_section(m, 0x1000) hello_expr = gtirb.SymAddrConst(0, add_symbol(m, "hello")) # mov rax, qword ptr [hello] add_code_block( bi, b"\x48\xA1\x00\x00\x00\x00\x00\x00\x00\x00", {2: hello_expr} ) # mov qword ptr [hello], rax add_code_block( bi, b"\x48\xA3\x00\x00\x00\x00\x00\x00\x00\x00", {2: hello_expr} ) asm, output = run_asm_pprinter_with_outputput(ir) self.assertNotIn(self.COMPAT_WARNING_MESSAGE, output) self.assertContains( asm_lines(asm), ("mov RAX,QWORD PTR [hello]", "mov QWORD PTR [hello],RAX",), )
def test_adr_substitution(self): """ In some cases, the assembler will substitute an adr instruction where the assembly contained an adrp instruction. If we apply a :got: attribute to that symbolic expression, the assembler won't assemble it. In that case, we must reverse the adrp -> adr substitution. """ ir, m = create_test_module( file_format=gtirb.Module.FileFormat.ELF, isa=gtirb.Module.ISA.ARM64, ) s, bi = add_text_section(m) insn_bytes = b"\x20\x00\x00\x10" # adr x0, label add_code_block(bi, insn_bytes) insn_bytes = b"\x1f\x20\x03\xd5" # nop block_nop = add_code_block(bi, insn_bytes) sym = gtirb.symbol.Symbol("__stack_chk_guard", payload=block_nop, module=m) sym_expr = gtirb.symbolicexpression.SymAddrConst( 0, sym, attributes=[ gtirb.symbolicexpression.SymbolicExpression.Attribute.GotRef ], ) bi.symbolic_expressions[0] = sym_expr asm = run_asm_pprinter(ir) # Verify that the instruction is printed correctly. self.assertIn("adrp x0, :got:__stack_chk_guard", asm) # Verify that a comment is added self.assertIn("Instruction substituted", asm)
add_text_section, add_data_section, add_symbol, add_code_block, add_data_block, ) ir, m = create_test_module(gtirb.Module.FileFormat.ELF, gtirb.Module.ISA.X64) # Add .data section. s, bi = add_data_section(m, 0x4000A8) block = add_data_block(bi, b"hello world\n") hello = add_symbol(m, "hello", block) # Add .text section. s, bi = add_text_section(m, 0x400080) # mov eax, 1 block = add_code_block(bi, b"\xB8\x01\x00\x00\x00") start = add_symbol(m, "_start", block) add_elf_symbol_info(m, start, block.size, "FUNC") # mov ebx, 1 add_code_block(bi, b"\xBB\x01\x00\x00\x00") # mov rsi, hello operand = gtirb.SymAddrConst(0, hello) add_code_block(bi, b"\x48\xBE\xA8\x00\x40\x00\x00\x00\x00\x00", {2: operand}) # mov rsi, 13 add_code_block(bi, b"\xBA\x0D\x00\x00\x00") # syscall add_code_block(bi, b"\x0F\x05")