def test_BytecodesAreEqual(tempdir: pathlib.Path): """Test binary difftesting.""" src = tempdir / 'a.c' a, b = tempdir / 'a', tempdir / 'b' a_opt, b_opt = tempdir / 'a_opt', tempdir / 'b_opt' with open(src, 'w') as f: f.write(""" int DoFoo(int x) { return 2 * x; } int main(int argc, char** argv) { return DoFoo(10); } """) clang.Exec([str(src), '-o', str(a), '-O0', '-S', '-c', '-emit-llvm']) clang.Exec([str(src), '-o', str(a_opt), '-O3', '-S', '-c', '-emit-llvm']) clang.Exec([str(src), '-o', str(b), '-O0', '-S', '-c', '-emit-llvm']) clang.Exec([str(src), '-o', str(b_opt), '-O3', '-S', '-c', '-emit-llvm']) # FIXME(cec): Remove debugging printout. with open(a) as f: a_src = f.read() print(a_src) assert implementation.BytecodesAreEqual(a, b) assert not implementation.BytecodesAreEqual(a, a_opt) assert implementation.BytecodesAreEqual(a_opt, b_opt)
def test_BinariesAreEqual(tempdir: pathlib.Path): """Test binary difftesting.""" src = tempdir / "a.c" a, b = tempdir / "a", tempdir / "b" a_opt, b_opt = tempdir / "a_opt", tempdir / "b_opt" with open(src, "w") as f: f.write(""" int DoFoo(int x) { // Easily optimizable code: true branch is not reachable, therefore always // return 1, and probably inline calls to DoFoo with const 1. if (0) {{ return 2 * x; }} else {{ return 1; }} } int main(int argc, char** argv) { return DoFoo(10); } """) p = clang.Exec([str(src), "-o", str(a), "-O0"]) assert not p.returncode # Sanity check that compilation succeeded. clang.Exec([str(src), "-o", str(a_opt), "-O3"]) clang.Exec([str(src), "-o", str(b), "-O0"]) clang.Exec([str(src), "-o", str(b_opt), "-O3"]) assert implementation.BinariesAreEqual(a, b) assert not implementation.BinariesAreEqual(a, a_opt) assert implementation.BinariesAreEqual(a_opt, b_opt)
def test_OpenClCompileAndLinkFlags_smoke_test(flags_getter, flags_getter_args, c_program_src: str, tempdir: pathlib.Path): """Test that code can be compiled with OpenCL flags.""" cflags, ldflags = flags_getter(**flags_getter_args) # Create bitcode. bitcode_path = tempdir / "a.ll" proc = clang.Exec( ["-x", "c", "-", "-S", "-emit-llvm", "-o", str(bitcode_path)] + cflags, stdin=c_program_src, stdout=None, stderr=None, ) assert not proc.returncode assert bitcode_path.is_file() # Compile bitcode to executable. bin_path = tempdir / "a.out" proc = clang.Exec( ["-o", str(bin_path), str(bitcode_path)] + ldflags, stdout=None, stderr=None) assert not proc.returncode assert bin_path.is_file() # The C program should exit with returncode 5. proc = subprocess.Popen( [str(bin_path)], env=libcecl_compile.LibCeclExecutableEnvironmentVariables()) proc.communicate() assert proc.returncode == 5
def test_LlvmPipeline(tempdir: pathlib.Path): """End-to-end test.""" with open(tempdir / "foo.c", "w") as f: f.write( """ int main() { int x = 0; if (x != 0) x = 5; // dead code return x; } """ ) # Generate bytecode. p = clang.Exec( [ str(tempdir / "foo.c"), "-o", str(tempdir / "foo.ll"), "-S", "-xc++", "-emit-llvm", "-c", "-O0", ] ) assert not p.stderr assert not p.stdout assert not p.returncode assert (tempdir / "foo.ll").is_file() # Run an optimization pass. p = opt.Exec( [str(tempdir / "foo.ll"), "-o", str(tempdir / "foo2.ll"), "-S", "-dce"] ) assert not p.stderr assert not p.stdout assert not p.returncode assert (tempdir / "foo2.ll").is_file() # Compile bytecode to LLVM IR. p = clang.Exec([str(tempdir / "foo2.ll"), "-o", str(tempdir / "foo")]) assert not p.stderr assert not p.stdout assert not p.returncode assert (tempdir / "foo").is_file() subprocess.check_call([str(tempdir / "foo")])
def BytecodeFromOpenClString(opencl_string: str, optimization_level: str) -> str: """Create bytecode from OpenCL source string. Args: opencl_string: A string of OpenCL code. optimization_level: The optimization level to use, one of {-O0,-O1,-O2,-O3,-Ofast,-Os,-Oz}. Returns: A tuple of the arguments to clang, and the bytecode as a string. Raises: ClangException: If compiling to bytecode fails. """ # Use -O3 to reduce CFGs. clang_args = opencl.GetClangArgs(use_shim=False) + [ clang.ValidateOptimizationLevel(optimization_level), "-S", "-emit-llvm", "-o", "-", "-i", "-", "-Wno-everything", # No warnings please. ] process = clang.Exec(clang_args, stdin=opencl_string) if process.returncode: raise clang.ClangException( "clang failed", returncode=process.returncode, stderr=process.stderr, command=clang_args, ) return process.stdout, clang_args
def CSourceToBytecode(source: str) -> str: """Build LLVM bytecode for the given C code.""" process = clang.Exec( ["-xc", "-O0", "-S", "-emit-llvm", "-", "-o", "-"], stdin=source ) assert not process.returncode return process.stdout
def ExtractLlvmByteCodeOrDie(src_file_path: pathlib.Path, datafolder: pathlib.Path): """Read and compile to bytecode or die.""" # Read the source file and strip any non-ascii characters. with open(src_file_path, "rb") as f: src = f.read().decode("unicode_escape") src = src.encode("ascii", "ignore").decode("ascii") # Compile src to bytecode. clang_args = opencl.GetClangArgs(use_shim=True) + [ "-O0", "-S", "-emit-llvm", "-o", "-", "-i", "-", # No warnings, and fail immediately on error. "-Wno-everything", "-ferror-limit=1", # Kernels have headers. "-I", str(datafolder / "kernels_cl"), # We don't need the full shim header, just the common constants: "-DCLGEN_OPENCL_SHIM_NO_COMMON_TYPES", "-DCLGEN_OPENCL_SHIM_NO_UNSUPPORTED_STORAGE_CLASSES_AND_QUALIFIERS", ] process = clang.Exec(clang_args, stdin=src, log=False) if process.returncode: app.Error("Failed to compile %s", src_file_path) app.Error("stderr: %s", process.stderr) app.Fatal(f"clang failed with returncode {process.returncode}") return process.stdout
def CSourceToBytecode(source: str) -> str: """Build LLVM bytecode for the given C code.""" # NOTE: This has to be at least -O1 to obtain polly outputs process = clang.Exec(["-xc", "-O1", "-S", "-emit-llvm", "-", "-o", "-"], stdin=source) assert not process.returncode return process.stdout
def CompileDriver(libcecl_src: str, binary_path: pathlib.Path): proc = clang.Exec( ["-x", "c", "-std=c99", "-", "-o", str(binary_path)] + cflags + ldflags, stdin=libcecl_src, ) if proc.returncode: raise DriverCompilationFailed(proc.stderr[:1024]) assert binary_path.is_file()
def test_LlvmPipeline(tempdir: pathlib.Path): """End-to-end test.""" with open(tempdir / 'foo.c', 'w') as f: f.write(""" #include <stdio.h> int main() { int x = 0; if (x != 0) x = 5; // dead code printf("Hello, world!\\n"); return x; } """) # Generate bytecode. p = clang.Exec([ str(tempdir / 'foo.c'), '-o', str(tempdir / 'foo.ll'), '-S', '-xc++', '-emit-llvm', '-c', '-O0' ]) assert not p.stderr assert not p.stdout assert not p.returncode assert (tempdir / 'foo.ll').is_file() # Run an optimization pass. p = opt.Exec([ str(tempdir / 'foo.ll'), '-o', str(tempdir / 'foo2.ll'), '-S', '-dce' ]) assert not p.stderr assert not p.stdout assert not p.returncode assert (tempdir / 'foo2.ll').is_file() # Compile bytecode to LLVM IR. p = clang.Exec([str(tempdir / 'foo2.ll'), '-o', str(tempdir / 'foo')]) assert not p.stderr assert not p.stdout assert not p.returncode assert (tempdir / 'foo').is_file() out = subprocess.check_output([str(tempdir / 'foo')], universal_newlines=True) assert out == 'Hello, world!\n'
def GetBytecodesFromContentFiles( source_name: str, language: str, content_files: typing.List[typing.Tuple[int, str]], ) -> typing.List[ml4pl_pb2.LlvmBytecode]: """Extract LLVM bytecodes from contentfiles. Args: source_name: The name of the content file database. This is the same across all content files. language: The source code language. This is the same across all content files. content_files: A list of <id,text> tuples, where each tuple is the ID and text of a row in the content file database. Returns: A list of zero LlvmBytecode protos, one for each contentfile which was successfully processed. """ if language == "swift": return GetSwiftBytecodesFromContentFiles(source_name, content_files) elif language == "haskell": return GetHaskellBytecodesFromContentFiles(source_name, content_files) protos = [] clang_args = LANGUAGE_TO_CLANG_ARGS[language] + [ "-S", "-emit-llvm", "-", "-o", "-", ] for content_file_id, text in content_files: process = clang.Exec(clang_args, stdin=text) if process.returncode: continue protos.append( ml4pl_pb2.LlvmBytecode( source_name=source_name, relpath=str(content_file_id), lang=language, cflags=" ".join(clang_args), bytecode=process.stdout, clang_returncode=0, error_message="", ) ) return protos
def test_BinariesAreEqual(tempdir: pathlib.Path): """Test binary difftesting.""" src = tempdir / 'a.c' a, b = tempdir / 'a', tempdir / 'b' a_opt, b_opt = tempdir / 'a_opt', tempdir / 'b_opt' with open(src, 'w') as f: f.write(""" int DoFoo(int x) { return 2 * x; } int main(int argc, char** argv) { return DoFoo(10); } """) clang.Exec([str(src), '-o', str(a), '-O0']) clang.Exec([str(src), '-o', str(a_opt), '-O3']) clang.Exec([str(src), '-o', str(b), '-O0']) clang.Exec([str(src), '-o', str(b_opt), '-O3']) assert implementation.BinariesAreEqual(a, b) assert not implementation.BinariesAreEqual(a, a_opt) assert implementation.BinariesAreEqual(a_opt, b_opt)
def GetSwiftBytecodesFromContentFiles( source_name: str, content_files: typing.List[typing.Tuple[int, str]] ) -> typing.List[ml4pl_pb2.LlvmBytecode]: """Extract LLVM bytecodes from swift contentfiles. The process is swift -> LLVM bitcode, clang -> LLVM bytecode. This requires that the `swift` binary is in the system path. """ protos = [] with tempfile.TemporaryDirectory(prefix="phd_import_swift_") as d: with fs.chdir(d) as d: for content_file_id, text in content_files: swift_file = d / "file.swift" bc_file = d / "file.bc" fs.Write(swift_file, text.encode("utf-8")) swift = subprocess.Popen( ["swift", "-Xfrontend", "-emit-bc", swift_file.name], stderr=subprocess.DEVNULL, ) swift.communicate() if swift.returncode: continue if not bc_file.is_file(): continue process = clang.Exec(["-S", "-emit-llvm", str(bc_file), "-o", "-"]) if process.returncode: continue protos.append( ml4pl_pb2.LlvmBytecode( source_name=source_name, relpath=str(content_file_id), lang="swift", cflags="", bytecode=process.stdout, clang_returncode=0, error_message="", ) ) return protos
def test_Exec_compile_bytecode_stdin(tempdir: pathlib.Path): """Test bytecode generation.""" p = clang.Exec([ '-xc++', '-S', '-emit-llvm', '-c', '-o', str(tempdir / 'foo.ll'), '-' ], stdin=""" #include <iostream> int main() { std::cout << "Hello, world!" << std::endl; return 0; } """) print(p.stderr) assert not p.returncode assert not p.stderr assert not p.stdout assert (tempdir / 'foo.ll').is_file()
def test_Exec_compile_bytecode(tempdir: pathlib.Path): """Test bytecode generation.""" with open(tempdir / 'foo.cc', 'w') as f: f.write(""" #include <iostream> int main() { std::cout << "Hello, world!" << std::endl; return 0; } """) p = clang.Exec([ str(tempdir / 'foo.cc'), '-xc++', '-S', '-emit-llvm', '-c', '-o', str(tempdir / 'foo.ll') ]) assert not p.returncode assert not p.stderr assert not p.stdout assert (tempdir / 'foo.ll').is_file()
def BytecodeFromOpenClString(opencl_string: str) -> str: """Create bytecode from OpenCL source string. Args: opencl_string: A string of OpenCL code. Returns: The bytecode as a string. Raises: ClangException: If compiling to bytecode fails. """ # Use -O3 to reduce CFGs. clang_args = opencl.GetClangArgs( use_shim=False) + ['-O3', '-S', '-emit-llvm', '-o', '-', '-i', '-'] process = clang.Exec(clang_args, stdin=opencl_string) if process.returncode: raise clang.ClangException("clang failed with returncode " f"{process.returncode}:\n{process.stderr}") return process.stdout
def BytecodeFromLinuxSrc(path: pathlib.Path, optimization_level: str) -> str: """Create bytecode from a Linux source file. Args: path: The path of the source file. optimization_level: The clang optimization level to use, one of {-O0,-O1,-O2,-O3,-Ofast,-Os,-Oz}. Returns: The bytecode as a string. Raises: ClangException: If compiling to bytecode fails. """ root = linux.LinuxSourcesDataset().src_tree_root genroot = linux.LinuxSourcesDataset().generated_hdrs_root # A subset of the arguments found by running `make V=1` in the linux # build and grabbing a random C compile target. # The build I took this from: Wp,-MD,arch/x86/kernel/.asm-offsets.s.d -nostdinc -isystem /usr/lib/gcc/x86_64-linux-gnu/5/include -I./arch/x86/include -I./arch/x86/include/generated -I./include -I./arch/x86/include/uapi -I./arch/x86/include/generated/uapi -I./include/uapi -I./include/generated/uapi -include ./include/linux/kconfig.h -include ./include/linux/compiler_types.h -D__KERNEL__ -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs -fno-strict-aliasing -fno-common -fshort-wchar -Werror-implicit-function-declaration -Wno-format-security -std=gnu89 -fno-PIE -DCC_HAVE_ASM_GOTO -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx -m64 -falign-jumps=1 -falign-loops=1 -mno-80387 -mno-fp-ret-in-387 -mpreferred-stack-boundary=3 -mskip-rax-setup -mtune=generic -mno-red-zone -mcmodel=kernel -funit-at-a-time -DCONFIG_X86_X32_ABI -DCONFIG_AS_CFI=1 -DCONFIG_AS_CFI_SIGNAL_FRAME=1 -DCONFIG_AS_CFI_SECTIONS=1 -DCONFIG_AS_FXSAVEQ=1 -DCONFIG_AS_SSSE3=1 -DCONFIG_AS_CRC32=1 -DCONFIG_AS_AVX=1 -DCONFIG_AS_AVX2=1 -DCONFIG_AS_AVX512=1 -DCONFIG_AS_SHA1_NI=1 -DCONFIG_AS_SHA256_NI=1 -pipe -Wno-sign-compare -fno-asynchronous-unwind-tables -mindirect-branch=thunk-extern -mindirect-branch-register -DRETPOLINE -fno-delete-null-pointer-checks -O2 --param=allow-store-data-races=0 -Wframe-larger-than=1024 -fstack-protector-strong -Wno-unused-but-set-variable -fno-var-tracking-assignments -g -gdwarf-4 -pg -mrecord-mcount -mfentry -DCC_USING_FENTRY -Wdeclaration-after-statement -Wno-pointer-sign -fno-strict-overflow -fno-merge-all-constants -fmerge-constants -fno-stack-check -fconserve-stack -Werror=implicit-int -Werror=strict-prototypes -Werror=date-time -Werror=incompatible-pointer-types -Werror=designated-init -DKBUILD_BASENAME='"asm_offsets"' -DKBUILD_MODNAME='"asm_offsets"' -fverbose-asm -S -o arch/x86/kernel/asm-offsets.s arch/x86/kernel/asm-offsets.c clang_args = [ "-S", "-emit-llvm", "-o", "-", clang.ValidateOptimizationLevel(optimization_level), "-Wno-everything", # No warnings please. "-I", str(root / "arch/x86/include"), "-I", str(genroot / "arch/x86/include/generated"), "-I", str(root / "include"), "-I", str(root / "arch/x86/include/uapi"), "-I", str(genroot / "arch/x86/include/generated/uapi"), "-I", str(root / "include/uapi"), "-I", str(genroot / "include/generated/uapi"), "-I", str(genroot / "arch/x86/include"), "-I", str(genroot / "arch/x86/include/generated"), "-I", str(genroot / "arch/x86/include/generated/uapi"), "-I", str(genroot / "include"), "-I", str(genroot / "include/generated"), "-include", str(genroot / "include/linux/kconfig.h"), "-include", str(genroot / "include/linux/compiler_types.h"), "-D__KERNEL__", "-m64", "-DCONFIG_X86_X32_ABI", "-DCONFIG_AS_CFI=1", "-DCONFIG_AS_CFI_SIGNAL_FRAME=1", "-DCONFIG_AS_CFI_SECTIONS=1", "-DCONFIG_AS_FXSAVEQ=1", "-DCONFIG_AS_SSSE3=1", "-DCONFIG_AS_CRC32=1", "-DCONFIG_AS_AVX=1", "-DCONFIG_AS_AVX2=1", "-DCONFIG_AS_AVX512=1", "-DCONFIG_AS_SHA1_NI=1", "-DCONFIG_AS_SHA256_NI=1", "-pipe", "-DRETPOLINE", "-DCC_USING_FENTRY", "-DKBUILD_BASENAME='\"asm_offsets\"'", "-DKBUILD_MODNAME='\"asm_offsets\"'", str(path), ] process = clang.Exec(clang_args) if process.returncode: