Example #1
0
    def dep_fetcher(dep, *, initial_load=False):
        if dep not in loaded_deps and in_sandbox:
            if not initial_load:
                raise BuildException(
                    f"New dep {dep} found when rerunning rule, it's likely not deterministic!"
                )
            if not dep.startswith(":"):
                log(f"Loading dependency {dep} into sandbox")
                copy_helper(
                    src_root=os.curdir,
                    dest_root=scratch_path,
                    src_names=[dep],
                    symlink=not rule.do_not_symlink,
                )

        # check that these deps are built! Since they may not have been checked by the PreviewExecution.
        dep_rule = None
        if dep not in build_state.source_files:
            dep_rule = build_state.target_rule_lookup.lookup(build_state, dep)
            if dep_rule not in build_state.ready:
                raise MissingDependency(dep)

        loaded_deps.add(dep)

        return dep_rule
Example #2
0
 def cache_loader(cache_key: str, rule: Rule, dest_root: str) -> bool:
     cache_location, cache_paths = get_cache_output_paths(
         cache_directory, rule, cache_key)
     if bucket:
         del cache_location
         if not aux_loader(cache_key, rule, dest_root):
             try:
                 cache_fetcher(cache_key, ".touch")
             except CacheMiss:
                 STATS["misses"] += delta
                 return False
             for src_name, cache_path in zip(rule.outputs, cache_paths):
                 cache_path = str(Path(cache_key).joinpath(cache_path))
                 os.makedirs(dest_root, exist_ok=True)
                 try:
                     if src_name.endswith("/"):
                         os.makedirs(Path(dest_root).joinpath(src_name),
                                     exist_ok=True)
                         blobs: Iterator[Blob] = list(
                             bucket.list_blobs(prefix=cache_path))
                         for blob in blobs:
                             target = str(
                                 Path(dest_root).joinpath(
                                     src_name).joinpath(
                                         blob.name[len(cache_path) + 1:]))
                             os.makedirs(dirname(target), exist_ok=True)
                             blob.download_to_filename(target)
                             STATS["hits"] += delta
                     else:
                         target = str(Path(dest_root).joinpath(src_name))
                         os.makedirs(dirname(target), exist_ok=True)
                         bucket.blob(cache_path).download_to_filename(
                             target)
                         STATS["hits"] += delta
                 except NotFound:
                     STATS["misses"] += delta
                     return False
             # now that we have fetched, let's cache it on disk
             aux_save(cache_key, rule, dest_root)
         return True
     else:
         if not os.path.exists(cache_location):
             STATS["misses"] += delta
             return False
         try:
             copy_helper(
                 src_root=cache_location,
                 src_names=cache_paths,
                 dest_root=dest_root,
                 dest_names=rule.outputs,
             )
             STATS["hits"] += delta
         except FileNotFoundError:
             raise BuildException(
                 "Cache corrupted. This should never happen unless you modified the cache "
                 "directory manually! If so, delete the cache directory and try again."
             )
         return True
Example #3
0
    def save(cache_key: str, rule: Rule, output_root: str):
        cache_location, cache_paths = get_cache_output_paths(
            cache_directory, rule, cache_key)

        memorize(cache_key, ".touch", "")

        if bucket:
            del cache_location  # just to be safe

            for src_name, cache_path in zip(rule.outputs, cache_paths):
                if src_name.endswith("/"):
                    dir_root = Path(output_root).joinpath(src_name)
                    for path, subdirs, files in os.walk(dir_root):
                        path = os.path.relpath(path, dir_root)
                        for name in files:
                            #               output_root -> src_name/ -> [path -> name]
                            # <cache_base> -> cache_key -> cache_path -> [path -> name]
                            target = Path(path).joinpath(name)
                            src_loc = (Path(output_root).joinpath(
                                src_name).joinpath(target))
                            aux_cache_loc = (
                                Path(AUX_CACHE).joinpath(cache_key).joinpath(
                                    cache_path).joinpath(target))
                            if not os.path.exists(aux_cache_loc) or (
                                    hash_file(src_loc) !=
                                    hash_file(aux_cache_loc)):
                                STATS["inserts"] += delta
                                bucket.blob(
                                    str(
                                        Path(cache_key).joinpath(
                                            cache_path).joinpath(
                                                target))).upload_from_filename(
                                                    str(src_loc))
                else:
                    #                 output_root -> src_name
                    # <cache_base> -> cache_key -> cache_path

                    target = Path(output_root).joinpath(src_name)
                    aux_cache_loc = (Path(AUX_CACHE).joinpath(
                        cache_key).joinpath(cache_path))
                    if not os.path.exists(aux_cache_loc) or (
                            hash_file(target) != hash_file(aux_cache_loc)):
                        STATS["inserts"] += delta
                        bucket.blob(str(Path(cache_key).joinpath(
                            cache_path)), ).upload_from_filename(
                                str(Path(output_root).joinpath(src_name)), )

            aux_save(cache_key, rule, output_root)

        else:
            STATS["inserts"] += delta
            copy_helper(
                src_root=output_root,
                src_names=rule.outputs,
                dest_root=cache_location,
                dest_names=cache_paths,
            )
Example #4
0
 def load_deps(deps):
     deps = set(deps) - loaded_deps
     # check that these deps are built! Since they have not been checked by the PreviewExecution.
     missing_deps = []
     for dep in deps:
         if dep not in build_state.source_files:
             dep_rule = build_state.target_rule_lookup.lookup(
                 build_state, dep)
             if dep_rule not in build_state.ready:
                 missing_deps.append(dep)
     if missing_deps:
         raise MissingDependency(*missing_deps)
     loaded_deps.update(deps)
     if in_sandbox:
         log(f"Loading dependencies {deps} into sandbox")
         copy_helper(
             src_root=build_state.repo_root,
             dest_root=scratch_path,
             src_names=[dep for dep in deps if not dep.startswith(":")],
             symlink=not rule.do_not_symlink,
         )
Example #5
0
def build(
    build_state: BuildState,
    rule: Rule,
    deps: Collection[str],
    *,
    scratch_path: Optional[Path],
):
    """
    All the dependencies that can be determined from caches have been
    obtained. Now we need to run. Either we will successfully finish everything,
    or we will get a missing dependency and have to requeue
    """
    cache_memorize, _ = make_cache_memorize(build_state.cache_directory)

    in_sandbox = scratch_path is not None

    loaded_deps = set()

    def load_deps(deps):
        deps = set(deps) - loaded_deps
        # check that these deps are built! Since they have not been checked by the PreviewExecution.
        missing_deps = []
        for dep in deps:
            if dep not in build_state.source_files:
                dep_rule = build_state.target_rule_lookup.lookup(
                    build_state, dep)
                if dep_rule not in build_state.ready:
                    missing_deps.append(dep)
        if missing_deps:
            raise MissingDependency(*missing_deps)
        loaded_deps.update(deps)
        if in_sandbox:
            log(f"Loading dependencies {deps} into sandbox")
            copy_helper(
                src_root=build_state.repo_root,
                dest_root=scratch_path,
                src_names=[dep for dep in deps if not dep.startswith(":")],
                symlink=not rule.do_not_symlink,
            )

    load_deps(deps)
    hashstate = HashState()

    ctx = ExecutionContext(
        scratch_path if in_sandbox else build_state.repo_root,
        scratch_path.joinpath(rule.location)
        if in_sandbox else Path(build_state.repo_root).joinpath(rule.location),
        hashstate,
        load_deps,
        cache_memorize,
    )

    for dep in rule.deps:
        dep_rule = build_state.target_rule_lookup.try_lookup(dep)
        if dep.startswith(":"):
            setattr(ctx.deps, dep[1:], dep_rule.provided_value)
        else:
            hashstate.update(dep.encode("utf-8"))
            hashstate.update(hash_file(dep))
        if dep not in build_state.source_files:
            ctx.deps[dep] = dep_rule.provided_value

    try:
        rule.provided_value = rule.impl(ctx)
        for out in rule.outputs:
            # needed so that if we ask for another output, we don't panic if it's not in the cache
            hashstate.record(out)
        if in_sandbox:
            ctx.run_shell_queue()
    except CalledProcessError as e:
        raise BuildException("".join([
            str(e) + "\n",
            Style.RESET_ALL,
            f"Location: {scratch_path}\n",
            f"Working Directory: {ctx.cwd}\n",
            e.stdout.decode("utf-8"),
            e.stderr.decode("utf-8"),
            traceback.format_exc(),
        ]))

    if in_sandbox:
        try:
            copy_helper(
                src_root=scratch_path,
                src_names=rule.outputs,
                dest_root=build_state.repo_root,
            )
        except FileNotFoundError as e:
            raise BuildException(
                f"Output file {e.filename} from rule {rule} was not generated."
            )

    for input_path in ctx.inputs:
        if input_path.startswith(":"):
            # don't hash rule deps
            continue
        hashstate.update(input_path.encode("utf-8"))
        hashstate.update(hash_file(input_path))

    return hashstate.state()
Example #6
0
    def cache_load_files(cache_key: str, rule: Rule, dest_root: str) -> List[str]:
        cache_location, cache_paths = get_cache_output_paths(
            cache_directory, rule, cache_key
        )
        out = []
        if bucket:
            from google.cloud.exceptions import NotFound

            del cache_location
            if not aux_load_files(cache_key, rule, dest_root):
                try:
                    cache_load_string(cache_key, ".touch")
                except CacheMiss:
                    STATS["misses"] += delta
                    raise
                for src_name, cache_path in zip(rule.outputs, cache_paths):
                    cache_path = str(Path(cache_key).joinpath(cache_path))
                    os.makedirs(dest_root, exist_ok=True)
                    try:
                        if src_name.endswith("/"):
                            os.makedirs(
                                Path(dest_root).joinpath(src_name), exist_ok=True
                            )
                            blobs = list(bucket.list_blobs(prefix=cache_path))
                            for blob in blobs:
                                filename = blob.name[len(cache_path) + 1 :]
                                target = str(
                                    Path(dest_root)
                                    .joinpath(src_name)
                                    .joinpath(filename)
                                )
                                os.makedirs(dirname(target), exist_ok=True)
                                blob.download_to_filename(target)
                                STATS["hits"] += delta
                                out.append(os.path.join(src_name, filename))
                        else:
                            target = os.path.join(dest_root, src_name)
                            os.makedirs(dirname(target), exist_ok=True)
                            bucket.blob(cache_path).download_to_filename(target)
                            out.append(target)
                            STATS["hits"] += delta
                    except NotFound:
                        STATS["misses"] += delta
                        raise CacheMiss
                # now that we have fetched, let's cache it on disk
                aux_store_files(cache_key, rule, dest_root)
            return out
        else:
            if not os.path.exists(cache_location):
                STATS["misses"] += delta
                raise CacheMiss
            try:
                _, out = copy_helper(
                    src_root=cache_location,
                    src_names=cache_paths,
                    dest_root=dest_root,
                    dest_names=rule.outputs,
                )
                STATS["hits"] += delta
            except FileNotFoundError:
                raise BuildException(
                    "Cache corrupted. This should never happen unless you modified the cache "
                    "directory manually! If so, delete the cache directory and try again."
                )
            return out
Example #7
0
def build(
    build_state: BuildState,
    rule: Rule,
    *,
    precomputed_deps: Optional[List[str]] = None,
    scratch_path: Optional[Path],
    skip_cache_key: bool,
):
    """
    All the dependencies that can be determined from caches have been
    obtained. Now we need to run. Either we will successfully finish everything,
    or we will get a missing dependency and have to requeue
    """
    cache_store_string, _ = make_cache_store(build_state.cache_directory)

    in_sandbox = scratch_path is not None

    loaded_deps = set()

    def dep_fetcher(dep, *, initial_load=False):
        if dep not in loaded_deps and in_sandbox:
            if not initial_load:
                raise BuildException(
                    f"New dep {dep} found when rerunning rule, it's likely not deterministic!"
                )
            if not dep.startswith(":"):
                log(f"Loading dependency {dep} into sandbox")
                copy_helper(
                    src_root=os.curdir,
                    dest_root=scratch_path,
                    src_names=[dep],
                    symlink=not rule.do_not_symlink,
                )

        # check that these deps are built! Since they may not have been checked by the PreviewExecution.
        dep_rule = None
        if dep not in build_state.source_files:
            dep_rule = build_state.target_rule_lookup.lookup(build_state, dep)
            if dep_rule not in build_state.ready:
                raise MissingDependency(dep)

        loaded_deps.add(dep)

        return dep_rule

    if precomputed_deps:
        assert in_sandbox
        for dep in precomputed_deps:
            dep_fetcher(dep, initial_load=True)

    hashstate = HashState()

    ctx = ExecutionContext(
        scratch_path if in_sandbox else os.curdir,
        rule.location,
        build_state.macros,
        hashstate,
        dep_fetcher,
        cache_store_string,
    )

    try:
        if not skip_cache_key:
            for out in rule.outputs:
                # needed so that if we ask for another output, we don't panic if it's not in the cache
                hashstate.record(out)
        provided_value = rule.impl(ctx)
        if ctx.out_of_date_deps:
            raise MissingDependency(*ctx.out_of_date_deps)
        if in_sandbox:
            ctx.run_shell_queue()
    except CalledProcessError as e:
        raise BuildException(
            "".join(
                [
                    str(e) + "\n",
                    Style.RESET_ALL,
                    f"Location: {scratch_path}\n",
                    f"Working Directory: {scratch_path}/{ctx.cwd}\n",
                    e.stdout.decode("utf-8"),
                    e.stderr.decode("utf-8"),
                ]
            )
        )

    if in_sandbox:
        try:
            copy_helper(
                src_root=scratch_path,
                src_names=rule.outputs,
                dest_root=os.curdir,
            )
        except FileNotFoundError as e:
            raise BuildException(
                f"Output file {e.filename} from rule {rule} was not generated."
            )

    if not skip_cache_key:
        for input_path in ctx.inputs:
            if input_path.startswith(":"):
                # don't hash rule deps
                continue
            hashstate.update(input_path.encode("utf-8"))
            hashstate.update(hash_file(input_path))
    hashstate.record("done")

    return provided_value, hashstate.state() if not skip_cache_key else None