def save(cache_key: str, rule: Rule, output_root: str): cache_location, cache_paths = get_cache_output_paths( cache_directory, rule, cache_key) memorize(cache_key, ".touch", "") if bucket: del cache_location # just to be safe for src_name, cache_path in zip(rule.outputs, cache_paths): if src_name.endswith("/"): dir_root = Path(output_root).joinpath(src_name) for path, subdirs, files in os.walk(dir_root): path = os.path.relpath(path, dir_root) for name in files: # output_root -> src_name/ -> [path -> name] # <cache_base> -> cache_key -> cache_path -> [path -> name] target = Path(path).joinpath(name) src_loc = (Path(output_root).joinpath( src_name).joinpath(target)) aux_cache_loc = ( Path(AUX_CACHE).joinpath(cache_key).joinpath( cache_path).joinpath(target)) if not os.path.exists(aux_cache_loc) or ( hash_file(src_loc) != hash_file(aux_cache_loc)): STATS["inserts"] += delta bucket.blob( str( Path(cache_key).joinpath( cache_path).joinpath( target))).upload_from_filename( str(src_loc)) else: # output_root -> src_name # <cache_base> -> cache_key -> cache_path target = Path(output_root).joinpath(src_name) aux_cache_loc = (Path(AUX_CACHE).joinpath( cache_key).joinpath(cache_path)) if not os.path.exists(aux_cache_loc) or ( hash_file(target) != hash_file(aux_cache_loc)): STATS["inserts"] += delta bucket.blob(str(Path(cache_key).joinpath( cache_path)), ).upload_from_filename( str(Path(output_root).joinpath(src_name)), ) aux_save(cache_key, rule, output_root) else: STATS["inserts"] += delta copy_helper( src_root=output_root, src_names=rule.outputs, dest_root=cache_location, dest_names=cache_paths, )
def _hash(self) -> str: print( "[WARNING] DepSet hashing is experimental and may corrupt caches") hashstate = HashState() for child in self.children: if isinstance(child, DepSet): hashstate.record(child._hash()) else: assert (isinstance(child, str) and not child.endswith("/") and not child.endswith(":") ), "Depsets only hold files or other depsets" hashstate.record(child) hashstate.update(hash_file(child)) return hashstate.state()
def dep_fetcher(input_path, *, get_hash=False) -> Union[str, bytes]: try: if input_path not in build_state.source_files: rule = build_state.target_rule_lookup.lookup( build_state, input_path) # this input may be stale / unbuilt # if so, do not read it, but instead throw MissingDependency if rule not in build_state.ready: raise MissingDependency(input_path) # so it's already ready for use! if get_hash: return hash_file(input_path) else: with open(input_path) as f: return f.read() except FileNotFoundError: raise MissingDependency(input_path)
def dep_fetcher(input_path, type: str = "rule"): try: rule = None if input_path not in build_state.source_files: rule = build_state.target_rule_lookup.try_lookup(input_path) # this input may be stale / unbuilt / no longer exists # if so, do not read it, but instead throw MissingDependency if rule is None or rule not in build_state.ready: raise MissingDependency(input_path) # so it's already ready for use! if type == "hash": return hash_file(input_path) elif type == "rule": return rule else: raise Exception(f"Unknown dep type {type}") except FileNotFoundError: raise MissingDependency(input_path)
def build( build_state: BuildState, rule: Rule, deps: Collection[str], *, scratch_path: Optional[Path], ): """ All the dependencies that can be determined from caches have been obtained. Now we need to run. Either we will successfully finish everything, or we will get a missing dependency and have to requeue """ cache_memorize, _ = make_cache_memorize(build_state.cache_directory) in_sandbox = scratch_path is not None loaded_deps = set() def load_deps(deps): deps = set(deps) - loaded_deps # check that these deps are built! Since they have not been checked by the PreviewExecution. missing_deps = [] for dep in deps: if dep not in build_state.source_files: dep_rule = build_state.target_rule_lookup.lookup( build_state, dep) if dep_rule not in build_state.ready: missing_deps.append(dep) if missing_deps: raise MissingDependency(*missing_deps) loaded_deps.update(deps) if in_sandbox: log(f"Loading dependencies {deps} into sandbox") copy_helper( src_root=build_state.repo_root, dest_root=scratch_path, src_names=[dep for dep in deps if not dep.startswith(":")], symlink=not rule.do_not_symlink, ) load_deps(deps) hashstate = HashState() ctx = ExecutionContext( scratch_path if in_sandbox else build_state.repo_root, scratch_path.joinpath(rule.location) if in_sandbox else Path(build_state.repo_root).joinpath(rule.location), hashstate, load_deps, cache_memorize, ) for dep in rule.deps: dep_rule = build_state.target_rule_lookup.try_lookup(dep) if dep.startswith(":"): setattr(ctx.deps, dep[1:], dep_rule.provided_value) else: hashstate.update(dep.encode("utf-8")) hashstate.update(hash_file(dep)) if dep not in build_state.source_files: ctx.deps[dep] = dep_rule.provided_value try: rule.provided_value = rule.impl(ctx) for out in rule.outputs: # needed so that if we ask for another output, we don't panic if it's not in the cache hashstate.record(out) if in_sandbox: ctx.run_shell_queue() except CalledProcessError as e: raise BuildException("".join([ str(e) + "\n", Style.RESET_ALL, f"Location: {scratch_path}\n", f"Working Directory: {ctx.cwd}\n", e.stdout.decode("utf-8"), e.stderr.decode("utf-8"), traceback.format_exc(), ])) if in_sandbox: try: copy_helper( src_root=scratch_path, src_names=rule.outputs, dest_root=build_state.repo_root, ) except FileNotFoundError as e: raise BuildException( f"Output file {e.filename} from rule {rule} was not generated." ) for input_path in ctx.inputs: if input_path.startswith(":"): # don't hash rule deps continue hashstate.update(input_path.encode("utf-8")) hashstate.update(hash_file(input_path)) return hashstate.state()
def initialize_workspace( setup_rule_lookup: TargetLookup, setup_target: str, state_directory: str, quiet: bool, ): # we don't need the indirect lookup as we only have rule and source deps direct_lookup: Dict[str, Rule] = setup_rule_lookup.direct_lookup if setup_target not in direct_lookup: raise BuildException( f"Unknown or unspecified setup target {setup_target}") rebuilt: Set[str] = set() ready: Set[str] = set() work_queue = [direct_lookup[setup_target]] cache_fetcher, _ = make_cache_fetcher(state_directory) cache_memorize, _ = make_cache_memorize(state_directory) status_monitor = create_status_monitor(1, quiet) status_monitor.move(total=1) while work_queue: todo = work_queue.pop() log(f"Popping setup rule {todo} off work queue") hashstate = HashState() ctx = WorkspaceExecutionContext(hashstate) unchecked_rules = [] for dep in todo.deps: hashstate.record(dep) if dep.startswith(":"): if dep not in direct_lookup: raise BuildException(f"Unable to find setup rule {dep}") dep_rule = direct_lookup[dep] if dep_rule not in ready: unchecked_rules.append(dep_rule) continue ctx.deps[dep] = dep_rule.provided_value setattr(ctx.deps, dep[1:], dep_rule.provided_value) else: try: hashstate.update(hash_file(dep)) except FileNotFoundError: raise BuildException(f"Source file {dep} not found.") if unchecked_rules: for dep in unchecked_rules: if dep not in work_queue: log(f"Setup rule {todo} is enqueuing {dep}") status_monitor.move(total=1) work_queue.append(dep) else: log(f"Setup rule {todo} is waiting on {dep}, which is already enqueued" ) dep.runtime_dependents.add(todo) todo.pending_rule_dependencies.add(dep) else: # our dependent rules are ready, now we need to see if we need to rerun todo.provided_value = todo.impl(ctx) if todo.name is None: raise BuildException( f"All setup rules must have names, but {todo} does not.") try: ok = cache_fetcher("workspace", todo.name) == hashstate.state() if not ok: log(f"State mismatch for rule {todo}, need to rerun") except CacheMiss: log(f"State not found for rule {todo}, need to run for first time" ) ok = False for dep in todo.deps: if dep.startswith(":"): if direct_lookup[dep] in rebuilt: log(f"Dependency {dep} of setup rule {todo} was rebuilt, so we must rebuild {todo} as well" ) ok = True for out in todo.outputs: if not os.path.exists(out): log(f"Output {out} is missing for setup rule {todo}, forcing rerun" ) ok = False break if not ok: # we need to fully run ctx.run_shell_queue() rebuilt.add(todo) cache_memorize("workspace", todo.name, hashstate.state()) # either way, now we can trigger our dependents ready.add(todo) for dep in todo.runtime_dependents: dep.pending_rule_dependencies.remove(todo) if not dep.pending_rule_dependencies: work_queue.append(dep) status_monitor.move(total=1) status_monitor.move(curr=1)
def initialize_workspace( setup_rule_lookup: TargetLookup, setup_targets: List[str], state_directory: str, quiet: bool, ): # we don't need the indirect lookup as we only have rule and source deps direct_lookup: Dict[str, Rule] = setup_rule_lookup.direct_lookup work_queue = [] for setup_target in setup_targets: if setup_target not in direct_lookup: raise BuildException( f"Unknown or unspecified setup target {setup_target}") work_queue.append(direct_lookup[setup_target]) rebuilt: Set[str] = set() ready: Set[str] = set() cache_load_string, _ = make_cache_load(state_directory) cache_store_string, _ = make_cache_store(state_directory) if work_queue: status_monitor = create_status_monitor(1, quiet) status_monitor.move(total=len(work_queue)) def dep_fetcher(dep): if dep.startswith(":"): if dep not in direct_lookup: raise BuildException(f"Unable to find setup rule {dep}") dep_rule = direct_lookup[dep] log(f"Looking up setup rule {dep}") if dep_rule not in ready: raise MissingDependency(dep) return dep_rule while work_queue: todo = work_queue.pop() log(f"Popping setup rule {todo} off work queue") try: if todo.name is None: raise BuildException( f"All setup rules must have names, but {todo} does not.") hashstate = HashState() ctx = WorkspaceExecutionContext(hashstate, dep_fetcher) unchecked_rules = [] try: todo.set_provided_value( todo.impl(ctx), None, ctx.inputs, ctx.deferred_inputs, [], # todo: implement output providers for setup rules ) if ctx.out_of_date_deps: raise MissingDependency(*ctx.out_of_date_deps) except MissingDependency as e: unchecked_rules = [direct_lookup[x] for x in e.paths] if unchecked_rules: for dep in unchecked_rules: if dep not in work_queue: log(f"Setup rule {todo} is enqueuing {dep}") status_monitor.move(total=1) work_queue.append(dep) else: log(f"Setup rule {todo} is waiting on {dep}, which is already enqueued" ) dep.runtime_dependents.add(todo) todo.pending_rule_dependencies.add(dep) else: log(f"Setup rule {todo} ran with inputs {ctx.inputs + ctx.deferred_inputs}" ) for dep in ctx.inputs + ctx.deferred_inputs: if dep.startswith(":"): continue try: hashstate.record(dep) hashstate.update(hash_file(dep)) except FileNotFoundError: raise BuildException(f"Source file {dep} not found.") try: ok = cache_load_string("workspace", todo.name) == hashstate.state() if not ok: log(f"State mismatch for rule {todo}, need to rerun") except CacheMiss: log(f"State not found for rule {todo}, need to run for first time" ) ok = False for dep in ctx.inputs + ctx.deferred_inputs: if dep.startswith(":"): if direct_lookup[dep] in rebuilt: log(f"Dependency {dep} of setup rule {todo} was rebuilt, so we must rebuild {todo} as well" ) ok = False for out in todo.outputs: if not os.path.exists(out): log(f"Output {out} is missing for setup rule {todo}, forcing rerun" ) ok = False break if not ok: # we need to fully run log(f"Fully running setup rule {todo}") ctx.run_shell_queue() rebuilt.add(todo) cache_store_string("workspace", todo.name, hashstate.state()) # either way, now we can trigger our dependents ready.add(todo) for dep in todo.runtime_dependents: dep.pending_rule_dependencies.remove(todo) if not dep.pending_rule_dependencies: work_queue.append(dep) status_monitor.move(total=1) status_monitor.move(curr=1) except Exception as e: if not isinstance(e, BuildException): suffix = f"\n{Style.RESET_ALL}" + traceback.format_exc() else: suffix = "" status_monitor.stop() raise BuildException(f"Error while executing rule {todo}: " + str(e) + suffix)
def build( build_state: BuildState, rule: Rule, *, precomputed_deps: Optional[List[str]] = None, scratch_path: Optional[Path], skip_cache_key: bool, ): """ All the dependencies that can be determined from caches have been obtained. Now we need to run. Either we will successfully finish everything, or we will get a missing dependency and have to requeue """ cache_store_string, _ = make_cache_store(build_state.cache_directory) in_sandbox = scratch_path is not None loaded_deps = set() def dep_fetcher(dep, *, initial_load=False): if dep not in loaded_deps and in_sandbox: if not initial_load: raise BuildException( f"New dep {dep} found when rerunning rule, it's likely not deterministic!" ) if not dep.startswith(":"): log(f"Loading dependency {dep} into sandbox") copy_helper( src_root=os.curdir, dest_root=scratch_path, src_names=[dep], symlink=not rule.do_not_symlink, ) # check that these deps are built! Since they may not have been checked by the PreviewExecution. dep_rule = None if dep not in build_state.source_files: dep_rule = build_state.target_rule_lookup.lookup(build_state, dep) if dep_rule not in build_state.ready: raise MissingDependency(dep) loaded_deps.add(dep) return dep_rule if precomputed_deps: assert in_sandbox for dep in precomputed_deps: dep_fetcher(dep, initial_load=True) hashstate = HashState() ctx = ExecutionContext( scratch_path if in_sandbox else os.curdir, rule.location, build_state.macros, hashstate, dep_fetcher, cache_store_string, ) try: if not skip_cache_key: for out in rule.outputs: # needed so that if we ask for another output, we don't panic if it's not in the cache hashstate.record(out) provided_value = rule.impl(ctx) if ctx.out_of_date_deps: raise MissingDependency(*ctx.out_of_date_deps) if in_sandbox: ctx.run_shell_queue() except CalledProcessError as e: raise BuildException( "".join( [ str(e) + "\n", Style.RESET_ALL, f"Location: {scratch_path}\n", f"Working Directory: {scratch_path}/{ctx.cwd}\n", e.stdout.decode("utf-8"), e.stderr.decode("utf-8"), ] ) ) if in_sandbox: try: copy_helper( src_root=scratch_path, src_names=rule.outputs, dest_root=os.curdir, ) except FileNotFoundError as e: raise BuildException( f"Output file {e.filename} from rule {rule} was not generated." ) if not skip_cache_key: for input_path in ctx.inputs: if input_path.startswith(":"): # don't hash rule deps continue hashstate.update(input_path.encode("utf-8")) hashstate.update(hash_file(input_path)) hashstate.record("done") return provided_value, hashstate.state() if not skip_cache_key else None