Exemple #1
0
    def save(cache_key: str, rule: Rule, output_root: str):
        cache_location, cache_paths = get_cache_output_paths(
            cache_directory, rule, cache_key)

        memorize(cache_key, ".touch", "")

        if bucket:
            del cache_location  # just to be safe

            for src_name, cache_path in zip(rule.outputs, cache_paths):
                if src_name.endswith("/"):
                    dir_root = Path(output_root).joinpath(src_name)
                    for path, subdirs, files in os.walk(dir_root):
                        path = os.path.relpath(path, dir_root)
                        for name in files:
                            #               output_root -> src_name/ -> [path -> name]
                            # <cache_base> -> cache_key -> cache_path -> [path -> name]
                            target = Path(path).joinpath(name)
                            src_loc = (Path(output_root).joinpath(
                                src_name).joinpath(target))
                            aux_cache_loc = (
                                Path(AUX_CACHE).joinpath(cache_key).joinpath(
                                    cache_path).joinpath(target))
                            if not os.path.exists(aux_cache_loc) or (
                                    hash_file(src_loc) !=
                                    hash_file(aux_cache_loc)):
                                STATS["inserts"] += delta
                                bucket.blob(
                                    str(
                                        Path(cache_key).joinpath(
                                            cache_path).joinpath(
                                                target))).upload_from_filename(
                                                    str(src_loc))
                else:
                    #                 output_root -> src_name
                    # <cache_base> -> cache_key -> cache_path

                    target = Path(output_root).joinpath(src_name)
                    aux_cache_loc = (Path(AUX_CACHE).joinpath(
                        cache_key).joinpath(cache_path))
                    if not os.path.exists(aux_cache_loc) or (
                            hash_file(target) != hash_file(aux_cache_loc)):
                        STATS["inserts"] += delta
                        bucket.blob(str(Path(cache_key).joinpath(
                            cache_path)), ).upload_from_filename(
                                str(Path(output_root).joinpath(src_name)), )

            aux_save(cache_key, rule, output_root)

        else:
            STATS["inserts"] += delta
            copy_helper(
                src_root=output_root,
                src_names=rule.outputs,
                dest_root=cache_location,
                dest_names=cache_paths,
            )
 def _hash(self) -> str:
     print(
         "[WARNING] DepSet hashing is experimental and may corrupt caches")
     hashstate = HashState()
     for child in self.children:
         if isinstance(child, DepSet):
             hashstate.record(child._hash())
         else:
             assert (isinstance(child, str) and not child.endswith("/")
                     and not child.endswith(":")
                     ), "Depsets only hold files or other depsets"
             hashstate.record(child)
             hashstate.update(hash_file(child))
     return hashstate.state()
    def dep_fetcher(input_path, *, get_hash=False) -> Union[str, bytes]:
        try:
            if input_path not in build_state.source_files:
                rule = build_state.target_rule_lookup.lookup(
                    build_state, input_path)
                # this input may be stale / unbuilt
                # if so, do not read it, but instead throw MissingDependency
                if rule not in build_state.ready:
                    raise MissingDependency(input_path)
                # so it's already ready for use!

            if get_hash:
                return hash_file(input_path)
            else:
                with open(input_path) as f:
                    return f.read()
        except FileNotFoundError:
            raise MissingDependency(input_path)
Exemple #4
0
    def dep_fetcher(input_path, type: str = "rule"):
        try:
            rule = None
            if input_path not in build_state.source_files:
                rule = build_state.target_rule_lookup.try_lookup(input_path)
                # this input may be stale / unbuilt / no longer exists
                # if so, do not read it, but instead throw MissingDependency
                if rule is None or rule not in build_state.ready:
                    raise MissingDependency(input_path)
                # so it's already ready for use!

            if type == "hash":
                return hash_file(input_path)
            elif type == "rule":
                return rule
            else:
                raise Exception(f"Unknown dep type {type}")
        except FileNotFoundError:
            raise MissingDependency(input_path)
Exemple #5
0
def build(
    build_state: BuildState,
    rule: Rule,
    deps: Collection[str],
    *,
    scratch_path: Optional[Path],
):
    """
    All the dependencies that can be determined from caches have been
    obtained. Now we need to run. Either we will successfully finish everything,
    or we will get a missing dependency and have to requeue
    """
    cache_memorize, _ = make_cache_memorize(build_state.cache_directory)

    in_sandbox = scratch_path is not None

    loaded_deps = set()

    def load_deps(deps):
        deps = set(deps) - loaded_deps
        # check that these deps are built! Since they have not been checked by the PreviewExecution.
        missing_deps = []
        for dep in deps:
            if dep not in build_state.source_files:
                dep_rule = build_state.target_rule_lookup.lookup(
                    build_state, dep)
                if dep_rule not in build_state.ready:
                    missing_deps.append(dep)
        if missing_deps:
            raise MissingDependency(*missing_deps)
        loaded_deps.update(deps)
        if in_sandbox:
            log(f"Loading dependencies {deps} into sandbox")
            copy_helper(
                src_root=build_state.repo_root,
                dest_root=scratch_path,
                src_names=[dep for dep in deps if not dep.startswith(":")],
                symlink=not rule.do_not_symlink,
            )

    load_deps(deps)
    hashstate = HashState()

    ctx = ExecutionContext(
        scratch_path if in_sandbox else build_state.repo_root,
        scratch_path.joinpath(rule.location)
        if in_sandbox else Path(build_state.repo_root).joinpath(rule.location),
        hashstate,
        load_deps,
        cache_memorize,
    )

    for dep in rule.deps:
        dep_rule = build_state.target_rule_lookup.try_lookup(dep)
        if dep.startswith(":"):
            setattr(ctx.deps, dep[1:], dep_rule.provided_value)
        else:
            hashstate.update(dep.encode("utf-8"))
            hashstate.update(hash_file(dep))
        if dep not in build_state.source_files:
            ctx.deps[dep] = dep_rule.provided_value

    try:
        rule.provided_value = rule.impl(ctx)
        for out in rule.outputs:
            # needed so that if we ask for another output, we don't panic if it's not in the cache
            hashstate.record(out)
        if in_sandbox:
            ctx.run_shell_queue()
    except CalledProcessError as e:
        raise BuildException("".join([
            str(e) + "\n",
            Style.RESET_ALL,
            f"Location: {scratch_path}\n",
            f"Working Directory: {ctx.cwd}\n",
            e.stdout.decode("utf-8"),
            e.stderr.decode("utf-8"),
            traceback.format_exc(),
        ]))

    if in_sandbox:
        try:
            copy_helper(
                src_root=scratch_path,
                src_names=rule.outputs,
                dest_root=build_state.repo_root,
            )
        except FileNotFoundError as e:
            raise BuildException(
                f"Output file {e.filename} from rule {rule} was not generated."
            )

    for input_path in ctx.inputs:
        if input_path.startswith(":"):
            # don't hash rule deps
            continue
        hashstate.update(input_path.encode("utf-8"))
        hashstate.update(hash_file(input_path))

    return hashstate.state()
Exemple #6
0
def initialize_workspace(
    setup_rule_lookup: TargetLookup,
    setup_target: str,
    state_directory: str,
    quiet: bool,
):
    # we don't need the indirect lookup as we only have rule and source deps
    direct_lookup: Dict[str, Rule] = setup_rule_lookup.direct_lookup

    if setup_target not in direct_lookup:
        raise BuildException(
            f"Unknown or unspecified setup target {setup_target}")

    rebuilt: Set[str] = set()
    ready: Set[str] = set()
    work_queue = [direct_lookup[setup_target]]

    cache_fetcher, _ = make_cache_fetcher(state_directory)
    cache_memorize, _ = make_cache_memorize(state_directory)

    status_monitor = create_status_monitor(1, quiet)
    status_monitor.move(total=1)

    while work_queue:
        todo = work_queue.pop()
        log(f"Popping setup rule {todo} off work queue")
        hashstate = HashState()
        ctx = WorkspaceExecutionContext(hashstate)
        unchecked_rules = []
        for dep in todo.deps:
            hashstate.record(dep)
            if dep.startswith(":"):
                if dep not in direct_lookup:
                    raise BuildException(f"Unable to find setup rule {dep}")
                dep_rule = direct_lookup[dep]
                if dep_rule not in ready:
                    unchecked_rules.append(dep_rule)
                    continue
                ctx.deps[dep] = dep_rule.provided_value
                setattr(ctx.deps, dep[1:], dep_rule.provided_value)
            else:
                try:
                    hashstate.update(hash_file(dep))
                except FileNotFoundError:
                    raise BuildException(f"Source file {dep} not found.")

        if unchecked_rules:
            for dep in unchecked_rules:
                if dep not in work_queue:
                    log(f"Setup rule {todo} is enqueuing {dep}")
                    status_monitor.move(total=1)
                    work_queue.append(dep)
                else:
                    log(f"Setup rule {todo} is waiting on {dep}, which is already enqueued"
                        )
                dep.runtime_dependents.add(todo)
                todo.pending_rule_dependencies.add(dep)
        else:
            # our dependent rules are ready, now we need to see if we need to rerun
            todo.provided_value = todo.impl(ctx)

            if todo.name is None:
                raise BuildException(
                    f"All setup rules must have names, but {todo} does not.")

            try:
                ok = cache_fetcher("workspace", todo.name) == hashstate.state()
                if not ok:
                    log(f"State mismatch for rule {todo}, need to rerun")
            except CacheMiss:
                log(f"State not found for rule {todo}, need to run for first time"
                    )
                ok = False

            for dep in todo.deps:
                if dep.startswith(":"):
                    if direct_lookup[dep] in rebuilt:
                        log(f"Dependency {dep} of setup rule {todo} was rebuilt, so we must rebuild {todo} as well"
                            )
                        ok = True

            for out in todo.outputs:
                if not os.path.exists(out):
                    log(f"Output {out} is missing for setup rule {todo}, forcing rerun"
                        )
                    ok = False
                    break

            if not ok:
                # we need to fully run
                ctx.run_shell_queue()
                rebuilt.add(todo)
                cache_memorize("workspace", todo.name, hashstate.state())

            # either way, now we can trigger our dependents
            ready.add(todo)
            for dep in todo.runtime_dependents:
                dep.pending_rule_dependencies.remove(todo)
                if not dep.pending_rule_dependencies:
                    work_queue.append(dep)
                    status_monitor.move(total=1)

        status_monitor.move(curr=1)
Exemple #7
0
def initialize_workspace(
    setup_rule_lookup: TargetLookup,
    setup_targets: List[str],
    state_directory: str,
    quiet: bool,
):
    # we don't need the indirect lookup as we only have rule and source deps
    direct_lookup: Dict[str, Rule] = setup_rule_lookup.direct_lookup
    work_queue = []
    for setup_target in setup_targets:
        if setup_target not in direct_lookup:
            raise BuildException(
                f"Unknown or unspecified setup target {setup_target}")
        work_queue.append(direct_lookup[setup_target])

    rebuilt: Set[str] = set()
    ready: Set[str] = set()

    cache_load_string, _ = make_cache_load(state_directory)
    cache_store_string, _ = make_cache_store(state_directory)

    if work_queue:
        status_monitor = create_status_monitor(1, quiet)
        status_monitor.move(total=len(work_queue))

    def dep_fetcher(dep):
        if dep.startswith(":"):
            if dep not in direct_lookup:
                raise BuildException(f"Unable to find setup rule {dep}")
            dep_rule = direct_lookup[dep]
            log(f"Looking up setup rule {dep}")
            if dep_rule not in ready:
                raise MissingDependency(dep)
            return dep_rule

    while work_queue:
        todo = work_queue.pop()
        log(f"Popping setup rule {todo} off work queue")
        try:
            if todo.name is None:
                raise BuildException(
                    f"All setup rules must have names, but {todo} does not.")

            hashstate = HashState()
            ctx = WorkspaceExecutionContext(hashstate, dep_fetcher)
            unchecked_rules = []

            try:
                todo.set_provided_value(
                    todo.impl(ctx),
                    None,
                    ctx.inputs,
                    ctx.deferred_inputs,
                    [],  # todo: implement output providers for setup rules
                )
                if ctx.out_of_date_deps:
                    raise MissingDependency(*ctx.out_of_date_deps)
            except MissingDependency as e:
                unchecked_rules = [direct_lookup[x] for x in e.paths]

            if unchecked_rules:
                for dep in unchecked_rules:
                    if dep not in work_queue:
                        log(f"Setup rule {todo} is enqueuing {dep}")
                        status_monitor.move(total=1)
                        work_queue.append(dep)
                    else:
                        log(f"Setup rule {todo} is waiting on {dep}, which is already enqueued"
                            )
                    dep.runtime_dependents.add(todo)
                    todo.pending_rule_dependencies.add(dep)
            else:
                log(f"Setup rule {todo} ran with inputs {ctx.inputs + ctx.deferred_inputs}"
                    )
                for dep in ctx.inputs + ctx.deferred_inputs:
                    if dep.startswith(":"):
                        continue
                    try:
                        hashstate.record(dep)
                        hashstate.update(hash_file(dep))
                    except FileNotFoundError:
                        raise BuildException(f"Source file {dep} not found.")

                try:
                    ok = cache_load_string("workspace",
                                           todo.name) == hashstate.state()
                    if not ok:
                        log(f"State mismatch for rule {todo}, need to rerun")
                except CacheMiss:
                    log(f"State not found for rule {todo}, need to run for first time"
                        )
                    ok = False

                for dep in ctx.inputs + ctx.deferred_inputs:
                    if dep.startswith(":"):
                        if direct_lookup[dep] in rebuilt:
                            log(f"Dependency {dep} of setup rule {todo} was rebuilt, so we must rebuild {todo} as well"
                                )
                            ok = False

                for out in todo.outputs:
                    if not os.path.exists(out):
                        log(f"Output {out} is missing for setup rule {todo}, forcing rerun"
                            )
                        ok = False
                        break

                if not ok:
                    # we need to fully run
                    log(f"Fully running setup rule {todo}")
                    ctx.run_shell_queue()
                    rebuilt.add(todo)
                    cache_store_string("workspace", todo.name,
                                       hashstate.state())

                # either way, now we can trigger our dependents
                ready.add(todo)
                for dep in todo.runtime_dependents:
                    dep.pending_rule_dependencies.remove(todo)
                    if not dep.pending_rule_dependencies:
                        work_queue.append(dep)
                        status_monitor.move(total=1)

            status_monitor.move(curr=1)
        except Exception as e:
            if not isinstance(e, BuildException):
                suffix = f"\n{Style.RESET_ALL}" + traceback.format_exc()
            else:
                suffix = ""
            status_monitor.stop()
            raise BuildException(f"Error while executing rule {todo}: " +
                                 str(e) + suffix)
def build(
    build_state: BuildState,
    rule: Rule,
    *,
    precomputed_deps: Optional[List[str]] = None,
    scratch_path: Optional[Path],
    skip_cache_key: bool,
):
    """
    All the dependencies that can be determined from caches have been
    obtained. Now we need to run. Either we will successfully finish everything,
    or we will get a missing dependency and have to requeue
    """
    cache_store_string, _ = make_cache_store(build_state.cache_directory)

    in_sandbox = scratch_path is not None

    loaded_deps = set()

    def dep_fetcher(dep, *, initial_load=False):
        if dep not in loaded_deps and in_sandbox:
            if not initial_load:
                raise BuildException(
                    f"New dep {dep} found when rerunning rule, it's likely not deterministic!"
                )
            if not dep.startswith(":"):
                log(f"Loading dependency {dep} into sandbox")
                copy_helper(
                    src_root=os.curdir,
                    dest_root=scratch_path,
                    src_names=[dep],
                    symlink=not rule.do_not_symlink,
                )

        # check that these deps are built! Since they may not have been checked by the PreviewExecution.
        dep_rule = None
        if dep not in build_state.source_files:
            dep_rule = build_state.target_rule_lookup.lookup(build_state, dep)
            if dep_rule not in build_state.ready:
                raise MissingDependency(dep)

        loaded_deps.add(dep)

        return dep_rule

    if precomputed_deps:
        assert in_sandbox
        for dep in precomputed_deps:
            dep_fetcher(dep, initial_load=True)

    hashstate = HashState()

    ctx = ExecutionContext(
        scratch_path if in_sandbox else os.curdir,
        rule.location,
        build_state.macros,
        hashstate,
        dep_fetcher,
        cache_store_string,
    )

    try:
        if not skip_cache_key:
            for out in rule.outputs:
                # needed so that if we ask for another output, we don't panic if it's not in the cache
                hashstate.record(out)
        provided_value = rule.impl(ctx)
        if ctx.out_of_date_deps:
            raise MissingDependency(*ctx.out_of_date_deps)
        if in_sandbox:
            ctx.run_shell_queue()
    except CalledProcessError as e:
        raise BuildException(
            "".join(
                [
                    str(e) + "\n",
                    Style.RESET_ALL,
                    f"Location: {scratch_path}\n",
                    f"Working Directory: {scratch_path}/{ctx.cwd}\n",
                    e.stdout.decode("utf-8"),
                    e.stderr.decode("utf-8"),
                ]
            )
        )

    if in_sandbox:
        try:
            copy_helper(
                src_root=scratch_path,
                src_names=rule.outputs,
                dest_root=os.curdir,
            )
        except FileNotFoundError as e:
            raise BuildException(
                f"Output file {e.filename} from rule {rule} was not generated."
            )

    if not skip_cache_key:
        for input_path in ctx.inputs:
            if input_path.startswith(":"):
                # don't hash rule deps
                continue
            hashstate.update(input_path.encode("utf-8"))
            hashstate.update(hash_file(input_path))
    hashstate.record("done")

    return provided_value, hashstate.state() if not skip_cache_key else None