Example #1
0
def test_BytecodesAreEqual(tempdir: pathlib.Path):
    """Test binary difftesting."""
    src = tempdir / 'a.c'
    a, b = tempdir / 'a', tempdir / 'b'
    a_opt, b_opt = tempdir / 'a_opt', tempdir / 'b_opt'
    with open(src, 'w') as f:
        f.write("""
int DoFoo(int x) {
  return 2 * x;
}

int main(int argc, char** argv) {
  return DoFoo(10);
}
""")

    clang.Exec([str(src), '-o', str(a), '-O0', '-S', '-c', '-emit-llvm'])
    clang.Exec([str(src), '-o', str(a_opt), '-O3', '-S', '-c', '-emit-llvm'])
    clang.Exec([str(src), '-o', str(b), '-O0', '-S', '-c', '-emit-llvm'])
    clang.Exec([str(src), '-o', str(b_opt), '-O3', '-S', '-c', '-emit-llvm'])

    # FIXME(cec): Remove debugging printout.
    with open(a) as f:
        a_src = f.read()
    print(a_src)

    assert implementation.BytecodesAreEqual(a, b)
    assert not implementation.BytecodesAreEqual(a, a_opt)
    assert implementation.BytecodesAreEqual(a_opt, b_opt)
Example #2
0
def test_BinariesAreEqual(tempdir: pathlib.Path):
    """Test binary difftesting."""
    src = tempdir / "a.c"
    a, b = tempdir / "a", tempdir / "b"
    a_opt, b_opt = tempdir / "a_opt", tempdir / "b_opt"
    with open(src, "w") as f:
        f.write("""
int DoFoo(int x) {
  // Easily optimizable code: true branch is not reachable, therefore always
  // return 1, and probably inline calls to DoFoo with const 1.
  if (0) {{
    return 2 * x;
  }} else {{
    return 1;
  }}
}

int main(int argc, char** argv) {
  return DoFoo(10);
}
""")

    p = clang.Exec([str(src), "-o", str(a), "-O0"])
    assert not p.returncode  # Sanity check that compilation succeeded.
    clang.Exec([str(src), "-o", str(a_opt), "-O3"])
    clang.Exec([str(src), "-o", str(b), "-O0"])
    clang.Exec([str(src), "-o", str(b_opt), "-O3"])

    assert implementation.BinariesAreEqual(a, b)
    assert not implementation.BinariesAreEqual(a, a_opt)
    assert implementation.BinariesAreEqual(a_opt, b_opt)
Example #3
0
def test_OpenClCompileAndLinkFlags_smoke_test(flags_getter, flags_getter_args,
                                              c_program_src: str,
                                              tempdir: pathlib.Path):
    """Test that code can be compiled with OpenCL flags."""
    cflags, ldflags = flags_getter(**flags_getter_args)

    # Create bitcode.
    bitcode_path = tempdir / "a.ll"
    proc = clang.Exec(
        ["-x", "c", "-", "-S", "-emit-llvm", "-o",
         str(bitcode_path)] + cflags,
        stdin=c_program_src,
        stdout=None,
        stderr=None,
    )
    assert not proc.returncode
    assert bitcode_path.is_file()

    # Compile bitcode to executable.
    bin_path = tempdir / "a.out"
    proc = clang.Exec(
        ["-o", str(bin_path), str(bitcode_path)] + ldflags,
        stdout=None,
        stderr=None)
    assert not proc.returncode
    assert bin_path.is_file()

    # The C program should exit with returncode 5.
    proc = subprocess.Popen(
        [str(bin_path)],
        env=libcecl_compile.LibCeclExecutableEnvironmentVariables())
    proc.communicate()
    assert proc.returncode == 5
Example #4
0
def test_LlvmPipeline(tempdir: pathlib.Path):
  """End-to-end test."""
  with open(tempdir / "foo.c", "w") as f:
    f.write(
      """
int main() {
  int x = 0;
  if (x != 0)
    x = 5; // dead code
  return x;
}
"""
    )

  # Generate bytecode.
  p = clang.Exec(
    [
      str(tempdir / "foo.c"),
      "-o",
      str(tempdir / "foo.ll"),
      "-S",
      "-xc++",
      "-emit-llvm",
      "-c",
      "-O0",
    ]
  )
  assert not p.stderr
  assert not p.stdout
  assert not p.returncode
  assert (tempdir / "foo.ll").is_file()

  # Run an optimization pass.
  p = opt.Exec(
    [str(tempdir / "foo.ll"), "-o", str(tempdir / "foo2.ll"), "-S", "-dce"]
  )
  assert not p.stderr
  assert not p.stdout
  assert not p.returncode
  assert (tempdir / "foo2.ll").is_file()

  # Compile bytecode to LLVM IR.
  p = clang.Exec([str(tempdir / "foo2.ll"), "-o", str(tempdir / "foo")])
  assert not p.stderr
  assert not p.stdout
  assert not p.returncode
  assert (tempdir / "foo").is_file()

  subprocess.check_call([str(tempdir / "foo")])
Example #5
0
def BytecodeFromOpenClString(opencl_string: str,
                             optimization_level: str) -> str:
    """Create bytecode from OpenCL source string.

  Args:
    opencl_string: A string of OpenCL code.
    optimization_level: The optimization level to use, one of
        {-O0,-O1,-O2,-O3,-Ofast,-Os,-Oz}.

  Returns:
    A tuple of the arguments to clang, and the bytecode as a string.

  Raises:
    ClangException: If compiling to bytecode fails.
  """
    # Use -O3 to reduce CFGs.
    clang_args = opencl.GetClangArgs(use_shim=False) + [
        clang.ValidateOptimizationLevel(optimization_level),
        "-S",
        "-emit-llvm",
        "-o",
        "-",
        "-i",
        "-",
        "-Wno-everything",  # No warnings please.
    ]
    process = clang.Exec(clang_args, stdin=opencl_string)
    if process.returncode:
        raise clang.ClangException(
            "clang failed",
            returncode=process.returncode,
            stderr=process.stderr,
            command=clang_args,
        )
    return process.stdout, clang_args
Example #6
0
def CSourceToBytecode(source: str) -> str:
  """Build LLVM bytecode for the given C code."""
  process = clang.Exec(
    ["-xc", "-O0", "-S", "-emit-llvm", "-", "-o", "-"], stdin=source
  )
  assert not process.returncode
  return process.stdout
Example #7
0
def ExtractLlvmByteCodeOrDie(src_file_path: pathlib.Path,
                             datafolder: pathlib.Path):
    """Read and compile to bytecode or die."""
    # Read the source file and strip any non-ascii characters.
    with open(src_file_path, "rb") as f:
        src = f.read().decode("unicode_escape")
    src = src.encode("ascii", "ignore").decode("ascii")

    # Compile src to bytecode.
    clang_args = opencl.GetClangArgs(use_shim=True) + [
        "-O0",
        "-S",
        "-emit-llvm",
        "-o",
        "-",
        "-i",
        "-",
        # No warnings, and fail immediately on error.
        "-Wno-everything",
        "-ferror-limit=1",
        # Kernels have headers.
        "-I",
        str(datafolder / "kernels_cl"),
        # We don't need the full shim header, just the common constants:
        "-DCLGEN_OPENCL_SHIM_NO_COMMON_TYPES",
        "-DCLGEN_OPENCL_SHIM_NO_UNSUPPORTED_STORAGE_CLASSES_AND_QUALIFIERS",
    ]
    process = clang.Exec(clang_args, stdin=src, log=False)
    if process.returncode:
        app.Error("Failed to compile %s", src_file_path)
        app.Error("stderr: %s", process.stderr)
        app.Fatal(f"clang failed with returncode {process.returncode}")
    return process.stdout
Example #8
0
def CSourceToBytecode(source: str) -> str:
    """Build LLVM bytecode for the given C code."""
    # NOTE: This has to be at least -O1 to obtain polly outputs
    process = clang.Exec(["-xc", "-O1", "-S", "-emit-llvm", "-", "-o", "-"],
                         stdin=source)
    assert not process.returncode
    return process.stdout
Example #9
0
 def CompileDriver(libcecl_src: str, binary_path: pathlib.Path):
   proc = clang.Exec(
     ["-x", "c", "-std=c99", "-", "-o", str(binary_path)] + cflags + ldflags,
     stdin=libcecl_src,
   )
   if proc.returncode:
     raise DriverCompilationFailed(proc.stderr[:1024])
   assert binary_path.is_file()
Example #10
0
def test_LlvmPipeline(tempdir: pathlib.Path):
    """End-to-end test."""
    with open(tempdir / 'foo.c', 'w') as f:
        f.write("""
#include <stdio.h>

int main() {
  int x = 0;
  if (x != 0)
    x = 5; // dead code
  printf("Hello, world!\\n");
  return x;
}
""")

    # Generate bytecode.
    p = clang.Exec([
        str(tempdir / 'foo.c'), '-o',
        str(tempdir / 'foo.ll'), '-S', '-xc++', '-emit-llvm', '-c', '-O0'
    ])
    assert not p.stderr
    assert not p.stdout
    assert not p.returncode
    assert (tempdir / 'foo.ll').is_file()

    # Run an optimization pass.
    p = opt.Exec([
        str(tempdir / 'foo.ll'), '-o',
        str(tempdir / 'foo2.ll'), '-S', '-dce'
    ])
    assert not p.stderr
    assert not p.stdout
    assert not p.returncode
    assert (tempdir / 'foo2.ll').is_file()

    # Compile bytecode to LLVM IR.
    p = clang.Exec([str(tempdir / 'foo2.ll'), '-o', str(tempdir / 'foo')])
    assert not p.stderr
    assert not p.stdout
    assert not p.returncode
    assert (tempdir / 'foo').is_file()

    out = subprocess.check_output([str(tempdir / 'foo')],
                                  universal_newlines=True)
    assert out == 'Hello, world!\n'
Example #11
0
def GetBytecodesFromContentFiles(
  source_name: str,
  language: str,
  content_files: typing.List[typing.Tuple[int, str]],
) -> typing.List[ml4pl_pb2.LlvmBytecode]:
  """Extract LLVM bytecodes from contentfiles.

  Args:
    source_name: The name of the content file database. This is the same across
      all content files.
    language: The source code language. This is the same across all content
      files.
    content_files: A list of <id,text> tuples, where each tuple is the ID and
      text of a row in the content file database.

  Returns:
    A list of zero LlvmBytecode protos, one for each contentfile which was
    successfully processed.
  """
  if language == "swift":
    return GetSwiftBytecodesFromContentFiles(source_name, content_files)
  elif language == "haskell":
    return GetHaskellBytecodesFromContentFiles(source_name, content_files)

  protos = []
  clang_args = LANGUAGE_TO_CLANG_ARGS[language] + [
    "-S",
    "-emit-llvm",
    "-",
    "-o",
    "-",
  ]

  for content_file_id, text in content_files:
    process = clang.Exec(clang_args, stdin=text)
    if process.returncode:
      continue

    protos.append(
      ml4pl_pb2.LlvmBytecode(
        source_name=source_name,
        relpath=str(content_file_id),
        lang=language,
        cflags=" ".join(clang_args),
        bytecode=process.stdout,
        clang_returncode=0,
        error_message="",
      )
    )

  return protos
Example #12
0
def test_BinariesAreEqual(tempdir: pathlib.Path):
    """Test binary difftesting."""
    src = tempdir / 'a.c'
    a, b = tempdir / 'a', tempdir / 'b'
    a_opt, b_opt = tempdir / 'a_opt', tempdir / 'b_opt'
    with open(src, 'w') as f:
        f.write("""
int DoFoo(int x) {
  return 2 * x;
}

int main(int argc, char** argv) {
  return DoFoo(10);
}
""")

    clang.Exec([str(src), '-o', str(a), '-O0'])
    clang.Exec([str(src), '-o', str(a_opt), '-O3'])
    clang.Exec([str(src), '-o', str(b), '-O0'])
    clang.Exec([str(src), '-o', str(b_opt), '-O3'])

    assert implementation.BinariesAreEqual(a, b)
    assert not implementation.BinariesAreEqual(a, a_opt)
    assert implementation.BinariesAreEqual(a_opt, b_opt)
Example #13
0
def GetSwiftBytecodesFromContentFiles(
  source_name: str, content_files: typing.List[typing.Tuple[int, str]]
) -> typing.List[ml4pl_pb2.LlvmBytecode]:
  """Extract LLVM bytecodes from swift contentfiles.

  The process is swift -> LLVM bitcode, clang -> LLVM bytecode.

  This requires that the `swift` binary is in the system path.
  """
  protos = []

  with tempfile.TemporaryDirectory(prefix="phd_import_swift_") as d:
    with fs.chdir(d) as d:
      for content_file_id, text in content_files:
        swift_file = d / "file.swift"
        bc_file = d / "file.bc"
        fs.Write(swift_file, text.encode("utf-8"))
        swift = subprocess.Popen(
          ["swift", "-Xfrontend", "-emit-bc", swift_file.name],
          stderr=subprocess.DEVNULL,
        )
        swift.communicate()
        if swift.returncode:
          continue
        if not bc_file.is_file():
          continue

        process = clang.Exec(["-S", "-emit-llvm", str(bc_file), "-o", "-"])
        if process.returncode:
          continue

        protos.append(
          ml4pl_pb2.LlvmBytecode(
            source_name=source_name,
            relpath=str(content_file_id),
            lang="swift",
            cflags="",
            bytecode=process.stdout,
            clang_returncode=0,
            error_message="",
          )
        )

  return protos
Example #14
0
def test_Exec_compile_bytecode_stdin(tempdir: pathlib.Path):
    """Test bytecode generation."""
    p = clang.Exec([
        '-xc++', '-S', '-emit-llvm', '-c', '-o',
        str(tempdir / 'foo.ll'), '-'
    ],
                   stdin="""
#include <iostream>

int main() {
  std::cout << "Hello, world!" << std::endl;
  return 0;
}
""")
    print(p.stderr)
    assert not p.returncode
    assert not p.stderr
    assert not p.stdout
    assert (tempdir / 'foo.ll').is_file()
Example #15
0
def test_Exec_compile_bytecode(tempdir: pathlib.Path):
    """Test bytecode generation."""
    with open(tempdir / 'foo.cc', 'w') as f:
        f.write("""
#include <iostream>

int main() {
  std::cout << "Hello, world!" << std::endl;
  return 0;
}
""")
    p = clang.Exec([
        str(tempdir / 'foo.cc'), '-xc++', '-S', '-emit-llvm', '-c', '-o',
        str(tempdir / 'foo.ll')
    ])
    assert not p.returncode
    assert not p.stderr
    assert not p.stdout
    assert (tempdir / 'foo.ll').is_file()
Example #16
0
def BytecodeFromOpenClString(opencl_string: str) -> str:
    """Create bytecode from OpenCL source string.

  Args:
    opencl_string: A string of OpenCL code.

  Returns:
    The bytecode as a string.

  Raises:
    ClangException: If compiling to bytecode fails.
  """
    # Use -O3 to reduce CFGs.
    clang_args = opencl.GetClangArgs(
        use_shim=False) + ['-O3', '-S', '-emit-llvm', '-o', '-', '-i', '-']
    process = clang.Exec(clang_args, stdin=opencl_string)
    if process.returncode:
        raise clang.ClangException("clang failed with returncode "
                                   f"{process.returncode}:\n{process.stderr}")
    return process.stdout
Example #17
0
def BytecodeFromLinuxSrc(path: pathlib.Path, optimization_level: str) -> str:
  """Create bytecode from a Linux source file.

  Args:
    path: The path of the source file.
    optimization_level: The clang optimization level to use, one of
        {-O0,-O1,-O2,-O3,-Ofast,-Os,-Oz}.

  Returns:
    The bytecode as a string.

  Raises:
    ClangException: If compiling to bytecode fails.
  """
  root = linux.LinuxSourcesDataset().src_tree_root
  genroot = linux.LinuxSourcesDataset().generated_hdrs_root
  # A subset of the arguments found by running `make V=1` in the linux
  # build and grabbing a random C compile target.
  # The build I took this from: Wp,-MD,arch/x86/kernel/.asm-offsets.s.d  -nostdinc -isystem /usr/lib/gcc/x86_64-linux-gnu/5/include -I./arch/x86/include -I./arch/x86/include/generated  -I./include -I./arch/x86/include/uapi -I./arch/x86/include/generated/uapi -I./include/uapi -I./include/generated/uapi -include ./include/linux/kconfig.h -include ./include/linux/compiler_types.h -D__KERNEL__ -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs -fno-strict-aliasing -fno-common -fshort-wchar -Werror-implicit-function-declaration -Wno-format-security -std=gnu89 -fno-PIE -DCC_HAVE_ASM_GOTO -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx -m64 -falign-jumps=1 -falign-loops=1 -mno-80387 -mno-fp-ret-in-387 -mpreferred-stack-boundary=3 -mskip-rax-setup -mtune=generic -mno-red-zone -mcmodel=kernel -funit-at-a-time -DCONFIG_X86_X32_ABI -DCONFIG_AS_CFI=1 -DCONFIG_AS_CFI_SIGNAL_FRAME=1 -DCONFIG_AS_CFI_SECTIONS=1 -DCONFIG_AS_FXSAVEQ=1 -DCONFIG_AS_SSSE3=1 -DCONFIG_AS_CRC32=1 -DCONFIG_AS_AVX=1 -DCONFIG_AS_AVX2=1 -DCONFIG_AS_AVX512=1 -DCONFIG_AS_SHA1_NI=1 -DCONFIG_AS_SHA256_NI=1 -pipe -Wno-sign-compare -fno-asynchronous-unwind-tables -mindirect-branch=thunk-extern -mindirect-branch-register -DRETPOLINE -fno-delete-null-pointer-checks -O2 --param=allow-store-data-races=0 -Wframe-larger-than=1024 -fstack-protector-strong -Wno-unused-but-set-variable -fno-var-tracking-assignments -g -gdwarf-4 -pg -mrecord-mcount -mfentry -DCC_USING_FENTRY -Wdeclaration-after-statement -Wno-pointer-sign -fno-strict-overflow -fno-merge-all-constants -fmerge-constants -fno-stack-check -fconserve-stack -Werror=implicit-int -Werror=strict-prototypes -Werror=date-time -Werror=incompatible-pointer-types -Werror=designated-init    -DKBUILD_BASENAME='"asm_offsets"' -DKBUILD_MODNAME='"asm_offsets"'  -fverbose-asm -S -o arch/x86/kernel/asm-offsets.s arch/x86/kernel/asm-offsets.c
  clang_args = [
    "-S",
    "-emit-llvm",
    "-o",
    "-",
    clang.ValidateOptimizationLevel(optimization_level),
    "-Wno-everything",  # No warnings please.
    "-I",
    str(root / "arch/x86/include"),
    "-I",
    str(genroot / "arch/x86/include/generated"),
    "-I",
    str(root / "include"),
    "-I",
    str(root / "arch/x86/include/uapi"),
    "-I",
    str(genroot / "arch/x86/include/generated/uapi"),
    "-I",
    str(root / "include/uapi"),
    "-I",
    str(genroot / "include/generated/uapi"),
    "-I",
    str(genroot / "arch/x86/include"),
    "-I",
    str(genroot / "arch/x86/include/generated"),
    "-I",
    str(genroot / "arch/x86/include/generated/uapi"),
    "-I",
    str(genroot / "include"),
    "-I",
    str(genroot / "include/generated"),
    "-include",
    str(genroot / "include/linux/kconfig.h"),
    "-include",
    str(genroot / "include/linux/compiler_types.h"),
    "-D__KERNEL__",
    "-m64",
    "-DCONFIG_X86_X32_ABI",
    "-DCONFIG_AS_CFI=1",
    "-DCONFIG_AS_CFI_SIGNAL_FRAME=1",
    "-DCONFIG_AS_CFI_SECTIONS=1",
    "-DCONFIG_AS_FXSAVEQ=1",
    "-DCONFIG_AS_SSSE3=1",
    "-DCONFIG_AS_CRC32=1",
    "-DCONFIG_AS_AVX=1",
    "-DCONFIG_AS_AVX2=1",
    "-DCONFIG_AS_AVX512=1",
    "-DCONFIG_AS_SHA1_NI=1",
    "-DCONFIG_AS_SHA256_NI=1",
    "-pipe",
    "-DRETPOLINE",
    "-DCC_USING_FENTRY",
    "-DKBUILD_BASENAME='\"asm_offsets\"'",
    "-DKBUILD_MODNAME='\"asm_offsets\"'",
    str(path),
  ]
  process = clang.Exec(clang_args)
  if process.returncode: