Ejemplo n.º 1
0
def clean(repo_folder: str) -> None:
    r"""Clean all unversioned files in a Git repository.

    :param repo_folder: Path to the Git repository.
    """
    # Reset modified files.
    run_command(["git", "reset", "--hard"],
                cwd=repo_folder,
                ignore_errors=True)
    # Use `-f` twice to really clean everything.
    run_command(["git", "clean", "-xffd"], cwd=repo_folder, ignore_errors=True)
    # Do the same thing for submodules, if submodules exist.
    if os.path.exists(os.path.join(repo_folder, ".gitmodules")):
        run_command([
            "git", "submodule", "foreach", "--recursive", "git", "reset",
            "--hard"
        ],
                    cwd=repo_folder,
                    ignore_errors=True)
        run_command([
            "git", "submodule", "foreach", "--recursive", "git", "clean",
            "-xffd"
        ],
                    cwd=repo_folder,
                    ignore_errors=True)
Ejemplo n.º 2
0
def run_docker_command(command: Union[str, List[str]], cwd: Optional[str] = None,
                       user: Optional[Union[int, Tuple[int, int]]] = None,
                       directory_mapping: Optional[Dict[str, str]] = None,
                       timeout: Optional[float] = None, **kwargs) -> CommandResult:
    r"""Run a command inside a container based on the ``gcc-custom`` Docker image.

    :param command: The command to run. Should be either a `str` or a list of `str`. Note: they're treated the same way,
        because a shell is always spawn in the entry point.
    :param cwd: The working directory of the command to run. If None, uses the default (probably user home).
    :param user: The user ID to use inside the Docker container. Additionally, group ID can be specified by passing
        a tuple of two `int`\ s for this argument. If not specified, the current user and group IDs are used. As a
        special case, pass in ``0`` to run as root.
    :param directory_mapping: Mapping of host directories to container paths. Mapping is performed via "bind mount".
    :param timeout: Maximum running time for the command. If running time exceeds the specified limit,
        ``subprocess.TimeoutExpired`` is thrown.
    :param kwargs: Additional keyword arguments to pass to :meth:`ghcc.utils.run_command`.
    """
    # Validate `command` argument, and append call to `bash` if `shell` is True.
    if isinstance(command, list):
        command = ' '.join(command)
    command = f"'{command}'"

    # Construct the `docker run` command.
    docker_command = ["docker", "run", "--rm"]
    for host, container in (directory_mapping or {}).items():
        docker_command.extend(["-v", f"{os.path.abspath(host)}:{container}"])
    if cwd is not None:
        docker_command.extend(["-w", cwd])

    # Assign user and group IDs based on `user` argument.
    if user != 0:
        user_id: Union[str, int] = "`id -u $USER`"
        group_id: Union[str, int] = "`id -g $USER`"
        if user is not None:
            if isinstance(user, tuple):
                user_id, group_id = user
            else:
                user_id = user
        docker_command.extend(["-e", f"LOCAL_USER_ID={user_id}"])
        docker_command.extend(["-e", f"LOCAL_GROUP_ID={group_id}"])

    docker_command.append("gcc-custom")
    if timeout is not None:
        # Timeout is implemented by calling `timeout` inside Docker container.
        docker_command.extend(["timeout", f"{timeout}s"])
    docker_command.append(command)
    ret = run_command(' '.join(docker_command), shell=True, **kwargs)

    # Check whether exceeded timeout limit by inspecting return code.
    if ret.return_code == 124:
        assert timeout is not None
        raise error_wrapper(subprocess.TimeoutExpired(ret.command, timeout, output=ret.captured_output))
    return ret
Ejemplo n.º 3
0
 def try_clone():
     # If a true git error was thrown, re-raise it and let the outer code deal with it.
     try:
         try_branch = default_branch or "master"
         # Try cloning only 'master' branch, but it's possible there's no branch named 'master'.
         run_command([
             "git", "clone", "--depth=1", f"--branch={try_branch}",
             "--single-branch", url, clone_folder
         ],
                     env=env,
                     timeout=timeout)
         return
     except subprocess.CalledProcessError as err:
         expected_msg = b"fatal: Remote branch master not found in upstream origin"
         if default_branch is not None or not (err.output is not None and
                                               expected_msg in err.output):
             # If `default_branch` is specified, always re-raise the exception.
             raise err
     # 'master' branch doesn't exist; do a shallow clone of all branches.
     run_command(["git", "clone", "--depth=1", url, clone_folder],
                 env=env,
                 timeout=timeout)
Ejemplo n.º 4
0
def _preprocess(input_path: str, output_path: str) -> str:
    compile_ret = run_command([
        "gcc", "-E", "-nostdlib", "-I" + FAKE_LIBC_PATH, "-o", output_path,
        input_path
    ],
                              ignore_errors=True)

    if compile_ret.return_code != 0:
        if compile_ret.captured_output is not None:
            raise PreprocessError(compile_ret.captured_output.decode("utf-8"))
        raise PreprocessError

    with open(output_path, "r") as f:
        preprocessed_code = f.read()
    # Remove line control macros so we can programmatically locate errors.
    preprocessed_code = LINE_CONTROL_REGEX.sub("", preprocessed_code)
    return preprocessed_code
Ejemplo n.º 5
0
def verify_docker_image(verbose: bool = False, print_checked_paths: bool = False) -> bool:
    r"""Checks whether the Docker image is up-to-date. This is done by verifying the modification dates for all library
    files are earlier than the Docker image build date.

    :param verbose: If ``True``, prints out error message telling the user to rebuild Docker image.
    :param print_checked_paths: If ``True``, prints out paths of all checked files.
    """
    output = run_command(
        ["docker", "image", "ls", "gcc-custom", "--format", "{{.CreatedAt}}"], return_output=True).captured_output
    assert output is not None
    image_creation_time_string = output.decode("utf-8").strip()
    image_creation_timestamp = datetime.strptime(image_creation_time_string, "%Y-%m-%d %H:%M:%S %z %Z").timestamp()

    repo_root: Path = Path(__file__).parent.parent.parent
    paths_to_check = ["ghcc", "scripts", ".dockerignore", "Dockerfile", "requirements.txt"]
    paths_to_ignore = ["ghcc/parse", "ghcc/database.py", "scripts/fake_libc_include"]
    prefixes_to_ignore = [str(repo_root / path) for path in paths_to_ignore]
    max_timestamp = 0.0
    for repo_path in paths_to_check:
        path = str(repo_root / repo_path)
        if os.path.isfile(path) and not any(path.startswith(prefix) for prefix in prefixes_to_ignore):
            if print_checked_paths:
                print(path)
            max_timestamp = max(max_timestamp, os.path.getmtime(path))
        else:
            for subdir, dirs, files in os.walk(path):
                if subdir.endswith("__pycache__"):
                    continue
                for f in files:
                    file_path = os.path.join(subdir, f)
                    if not any(file_path.startswith(prefix) for prefix in prefixes_to_ignore):
                        if print_checked_paths:
                            print(file_path)
                        max_timestamp = max(max_timestamp, os.path.getmtime(file_path))
    up_to_date = max_timestamp <= image_creation_timestamp

    if not up_to_date and verbose:
        image_path = os.path.relpath(os.path.join(__file__, "..", "..", ".."), os.getcwd())
        log("ERROR: Your Docker image is out-of-date. Please rebuild the image by: "
            f"`docker build -t gcc-custom {image_path}`", "error", force_console=True)
    return up_to_date
Ejemplo n.º 6
0
def _make_skeleton(
    directory: str,
    timeout: Optional[float] = None,
    env: Optional[Dict[str, str]] = None,
    verbose: bool = True,
    *,
    make_fn,
    check_file_fn: Callable[[str, str],
                            bool] = _check_elf_fn) -> CompileResult:
    r"""A composable routine for different compilation methods. Different routines can be composed by specifying
    different ``make_fn``\ s and ``check_file_fn``\ s.

    :param directory: The directory containing the Makefile.
    :param timeout: Maximum compilation time.
    :param env: A dictionary of environment variables.
    :param verbose: If ``True``, print out executed commands and outputs.
    :param make_fn: The function to call for compilation. The function takes as input variables ``directory``,
        ``timeout``, and ``env``.
    :param check_file_fn: A function to determine whether a generated file should be collected, i.e., whether it is a
        binary file. The function takes as input variables ``directory`` and ``file``, where ``file`` is the path of the
        file to check, relative to ``directory``. Defaults to :meth:`_check_elf_fn`, which checks whether the file is an
        ELF file.
    """
    directory = os.path.abspath(directory)

    try:
        # Clean unversioned files by previous compilations.
        clean(directory)

        # Call the actual function for `make`.
        make_fn(directory, timeout=timeout, env=env, verbose=verbose)
        result = _create_result(True)

    except subprocess.TimeoutExpired as e:
        # Even if exceptions occur, we still check for ELF files, just in case.
        result = _create_result(error_type=CompileErrorType.Timeout,
                                captured_output=e.output)
    except subprocess.CalledProcessError as e:
        result = _create_result(error_type=CompileErrorType.CompileFailed,
                                captured_output=e.output)
    except OSError as e:
        result = _create_result(error_type=CompileErrorType.Unknown,
                                captured_output=str(e))

    try:
        # Use Git to find all unversioned files -- these would be the products of compilation.
        output = run_command(["git", "ls-files", "--others"],
                             cwd=directory,
                             timeout=timeout,
                             return_output=True).captured_output
        assert output is not None
        diff_files = [
            # files containing escape characters are in quotes
            file if file[0] != '"' else file[1:-1]
            for file in output.decode('unicode_escape').split("\n") if file
        ]  # file names could contain spaces

        # Inspect each file and find ELF files.
        for file in diff_files:
            if check_file_fn(directory, file):
                result.elf_files.append(file)
    except subprocess.TimeoutExpired as e:
        return _create_result(elf_files=result.elf_files,
                              error_type=CompileErrorType.Timeout,
                              captured_output=e.output)
    except subprocess.CalledProcessError as e:
        return _create_result(elf_files=result.elf_files,
                              error_type=CompileErrorType.Unknown,
                              captured_output=e.output)
    except OSError as e:
        return _create_result(elf_files=result.elf_files,
                              error_type=CompileErrorType.Unknown,
                              captured_output=str(e))

    return result
Ejemplo n.º 7
0
def _unsafe_make(directory: str,
                 timeout: Optional[float] = None,
                 env: Optional[Dict[str, str]] = None,
                 verbose: bool = False) -> None:
    env = {"PATH": f"{MOCK_PATH}:{os.environ['PATH']}", **(env or {})}
    # Try GNU Automake first. Note that errors are ignored because it's possible that the original files still work.
    if contains_files(directory, ["configure.ac", "configure.in"]):
        start_time = time.time()
        if os.path.isfile(os.path.join(directory, "autogen.sh")):
            # Some projects with non-trivial build instructions provide an "autogen.sh" script.
            run_command(["chmod", "+x", "./autogen.sh"],
                        env=env,
                        cwd=directory,
                        verbose=verbose)
            run_command(["./autogen.sh"],
                        env=env,
                        cwd=directory,
                        timeout=timeout,
                        verbose=verbose,
                        ignore_errors=True)
        else:
            run_command(["autoreconf", "--force", "--install"],
                        env=env,
                        cwd=directory,
                        timeout=timeout,
                        ignore_errors=True,
                        verbose=verbose)
        end_time = time.time()
        if timeout is not None:
            timeout = max(1.0, timeout - int(end_time - start_time))

    # Try running `./configure` if it exists.
    if os.path.isfile(os.path.join(directory, "configure")):
        start_time = time.time()
        run_command(["chmod", "+x", "./configure"],
                    env=env,
                    cwd=directory,
                    verbose=verbose)
        ret = run_command(["./configure", "--disable-werror"],
                          env=env,
                          cwd=directory,
                          timeout=timeout,
                          verbose=verbose,
                          ignore_errors=True)
        end_time = time.time()
        if ret.return_code != 0 and end_time - start_time <= 2:
            # The configure file might not support `--disable-werror` and died instantly. Try again without the flag.
            run_command(["./configure"],
                        env=env,
                        cwd=directory,
                        timeout=timeout,
                        verbose=verbose)
            end_time = time.time()
        if timeout is not None:
            timeout = max(1.0, timeout - int(end_time - start_time))

    # Make while ignoring errors.
    # `-B/--always-make` could give strange errors for certain Makefiles, e.g. ones containing "%:"
    try:
        run_command(["make", "--keep-going", "-j1"],
                    env=env,
                    cwd=directory,
                    timeout=timeout,
                    verbose=verbose)
    except subprocess.CalledProcessError as err:
        expected_msg = b"missing separator"
        if not (err.output is not None and expected_msg in err.output):
            raise err
        else:
            # Try again using BSD Make instead of GNU Make. Note BSD Make does not have a flag equivalent to
            # `-B/--always-make`.
            run_command(["bmake", "-k", "-j1"],
                        env=env,
                        cwd=directory,
                        timeout=timeout,
                        verbose=verbose)
Ejemplo n.º 8
0
def clone(repo_owner: str,
          repo_name: str,
          clone_folder: str,
          folder_name: Optional[str] = None,
          *,
          default_branch: Optional[str] = None,
          timeout: Optional[float] = None,
          recursive: bool = False,
          skip_if_exists: bool = True) -> CloneResult:
    r"""Clone a repository on GitHub, for instance, ``torvalds/linux``.

    :param repo_owner: Name of the repository owner, e.g., ``torvalds``.
    :param repo_name: Name of the repository, e.g., ``linux``.
    :param clone_folder: Path to the folder where the repository will be stored.
    :param folder_name: Name of the folder of the cloned repository. If ``None``, ``repo_owner/repo_name`` is used.
    :param default_branch: Name of the default branch of the repository. Cloning behavior differs slightly depending on
        whether the argument is ``None``. If ``None``, then the following happens:

        1. Attempts a shallow clone on only the ``master`` branch.
        2. If error occurs, attempts a shallow clone for all branches.
        3. If error still occurs, raise the error.

        If not ``None``, then the following happens:

        1. Attempts a shallow clone on only the default branch.
        2. If error occurs, raise the error.
    :param timeout: Maximum time allowed for cloning, in seconds. Defaults to ``None`` (unlimited time).
    :param recursive: If ``True``, passes the ``--recursive`` flag to Git, which recursively clones submodules.
    :param skip_if_exists: Whether to skip cloning if the destination folder already exists. If ``False``, the folder
        will be deleted.

    :return: An instance of :class:`CloneResult` indicating the result. Fields ``repo_owner``, ``repo_name``, and
        ``success`` are not ``None``.

        - If cloning succeeded, the field ``time`` is also not ``None``.
        - If cloning failed, the fields ``error_type`` and ``captured_output`` are also not ``None``.
    """
    start_time = time.time()
    url = f"https://github.com/{repo_owner}/{repo_name}.git"
    if folder_name is None:
        folder_name = f"{repo_owner}/{repo_name}"
    clone_folder = os.path.join(clone_folder, folder_name)
    if os.path.exists(clone_folder):
        if not skip_if_exists:
            shutil.rmtree(clone_folder)
        else:
            return CloneResult(repo_owner,
                               repo_name,
                               error_type=CloneErrorType.FolderExists)

    # Certain repos might have turned private or been deleted, and git prompts for username/password when it happens.
    # Setting the environment variable `GIT_TERMINAL_PROMPT` to 0 could disable such behavior and let git fail promptly.
    # Lucky that this is introduced in version 2.3; otherwise would have to poll waiting channel of current process
    # and see if it's waiting for IO.
    # See: https://askubuntu.com/questions/19442/what-is-the-waiting-channel-of-a-process
    env = {"GIT_TERMINAL_PROMPT": "0"}

    def try_clone():
        # If a true git error was thrown, re-raise it and let the outer code deal with it.
        try:
            try_branch = default_branch or "master"
            # Try cloning only 'master' branch, but it's possible there's no branch named 'master'.
            run_command([
                "git", "clone", "--depth=1", f"--branch={try_branch}",
                "--single-branch", url, clone_folder
            ],
                        env=env,
                        timeout=timeout)
            return
        except subprocess.CalledProcessError as err:
            expected_msg = b"fatal: Remote branch master not found in upstream origin"
            if default_branch is not None or not (err.output is not None and
                                                  expected_msg in err.output):
                # If `default_branch` is specified, always re-raise the exception.
                raise err
        # 'master' branch doesn't exist; do a shallow clone of all branches.
        run_command(["git", "clone", "--depth=1", url, clone_folder],
                    env=env,
                    timeout=timeout)

    try:
        try_clone()
        end_time = time.time()
        elapsed_time = end_time - start_time
    except subprocess.CalledProcessError as e:
        no_ssh_expected_msg = b"fatal: could not read Username for 'https://github.com': terminal prompts disabled"
        ssh_expected_msg = b"remote: Repository not found."
        if e.output is not None and (no_ssh_expected_msg in e.output
                                     or ssh_expected_msg in e.output):
            return CloneResult(repo_owner,
                               repo_name,
                               error_type=CloneErrorType.PrivateOrNonexistent)
        else:
            return CloneResult(repo_owner,
                               repo_name,
                               error_type=CloneErrorType.Unknown,
                               captured_output=e.output)
    except subprocess.TimeoutExpired as e:
        return CloneResult(repo_owner,
                           repo_name,
                           error_type=CloneErrorType.Timeout,
                           captured_output=e.output)

    if recursive:
        submodule_timeout = (timeout -
                             elapsed_time) if timeout is not None else None
        try:
            # If this fails, still treat it as a success, but include a special error type.
            run_command(
                ["git", "submodule", "update", "--init", "--recursive"],
                env=env,
                cwd=clone_folder,
                timeout=submodule_timeout)
        except (subprocess.CalledProcessError, subprocess.TimeoutExpired) as e:
            return CloneResult(repo_owner,
                               repo_name,
                               success=True,
                               time=elapsed_time,
                               error_type=CloneErrorType.SubmodulesFailed,
                               captured_output=e.output)
        end_time = time.time()
        elapsed_time = end_time - start_time

    return CloneResult(repo_owner, repo_name, success=True, time=elapsed_time)