def _hash(self) -> str: print( "[WARNING] DepSet hashing is experimental and may corrupt caches") hashstate = HashState() for child in self.children: if isinstance(child, DepSet): hashstate.record(child._hash()) else: assert (isinstance(child, str) and not child.endswith("/") and not child.endswith(":") ), "Depsets only hold files or other depsets" hashstate.record(child) hashstate.update(hash_file(child)) return hashstate.state()
def build( build_state: BuildState, rule: Rule, deps: Collection[str], *, scratch_path: Optional[Path], ): """ All the dependencies that can be determined from caches have been obtained. Now we need to run. Either we will successfully finish everything, or we will get a missing dependency and have to requeue """ cache_memorize, _ = make_cache_memorize(build_state.cache_directory) in_sandbox = scratch_path is not None loaded_deps = set() def load_deps(deps): deps = set(deps) - loaded_deps # check that these deps are built! Since they have not been checked by the PreviewExecution. missing_deps = [] for dep in deps: if dep not in build_state.source_files: dep_rule = build_state.target_rule_lookup.lookup( build_state, dep) if dep_rule not in build_state.ready: missing_deps.append(dep) if missing_deps: raise MissingDependency(*missing_deps) loaded_deps.update(deps) if in_sandbox: log(f"Loading dependencies {deps} into sandbox") copy_helper( src_root=build_state.repo_root, dest_root=scratch_path, src_names=[dep for dep in deps if not dep.startswith(":")], symlink=not rule.do_not_symlink, ) load_deps(deps) hashstate = HashState() ctx = ExecutionContext( scratch_path if in_sandbox else build_state.repo_root, scratch_path.joinpath(rule.location) if in_sandbox else Path(build_state.repo_root).joinpath(rule.location), hashstate, load_deps, cache_memorize, ) for dep in rule.deps: dep_rule = build_state.target_rule_lookup.try_lookup(dep) if dep.startswith(":"): setattr(ctx.deps, dep[1:], dep_rule.provided_value) else: hashstate.update(dep.encode("utf-8")) hashstate.update(hash_file(dep)) if dep not in build_state.source_files: ctx.deps[dep] = dep_rule.provided_value try: rule.provided_value = rule.impl(ctx) for out in rule.outputs: # needed so that if we ask for another output, we don't panic if it's not in the cache hashstate.record(out) if in_sandbox: ctx.run_shell_queue() except CalledProcessError as e: raise BuildException("".join([ str(e) + "\n", Style.RESET_ALL, f"Location: {scratch_path}\n", f"Working Directory: {ctx.cwd}\n", e.stdout.decode("utf-8"), e.stderr.decode("utf-8"), traceback.format_exc(), ])) if in_sandbox: try: copy_helper( src_root=scratch_path, src_names=rule.outputs, dest_root=build_state.repo_root, ) except FileNotFoundError as e: raise BuildException( f"Output file {e.filename} from rule {rule} was not generated." ) for input_path in ctx.inputs: if input_path.startswith(":"): # don't hash rule deps continue hashstate.update(input_path.encode("utf-8")) hashstate.update(hash_file(input_path)) return hashstate.state()
def initialize_workspace( setup_rule_lookup: TargetLookup, setup_targets: List[str], state_directory: str, quiet: bool, ): # we don't need the indirect lookup as we only have rule and source deps direct_lookup: Dict[str, Rule] = setup_rule_lookup.direct_lookup work_queue = [] for setup_target in setup_targets: if setup_target not in direct_lookup: raise BuildException( f"Unknown or unspecified setup target {setup_target}") work_queue.append(direct_lookup[setup_target]) rebuilt: Set[str] = set() ready: Set[str] = set() cache_load_string, _ = make_cache_load(state_directory) cache_store_string, _ = make_cache_store(state_directory) if work_queue: status_monitor = create_status_monitor(1, quiet) status_monitor.move(total=len(work_queue)) def dep_fetcher(dep): if dep.startswith(":"): if dep not in direct_lookup: raise BuildException(f"Unable to find setup rule {dep}") dep_rule = direct_lookup[dep] log(f"Looking up setup rule {dep}") if dep_rule not in ready: raise MissingDependency(dep) return dep_rule while work_queue: todo = work_queue.pop() log(f"Popping setup rule {todo} off work queue") try: if todo.name is None: raise BuildException( f"All setup rules must have names, but {todo} does not.") hashstate = HashState() ctx = WorkspaceExecutionContext(hashstate, dep_fetcher) unchecked_rules = [] try: todo.set_provided_value( todo.impl(ctx), None, ctx.inputs, ctx.deferred_inputs, [], # todo: implement output providers for setup rules ) if ctx.out_of_date_deps: raise MissingDependency(*ctx.out_of_date_deps) except MissingDependency as e: unchecked_rules = [direct_lookup[x] for x in e.paths] if unchecked_rules: for dep in unchecked_rules: if dep not in work_queue: log(f"Setup rule {todo} is enqueuing {dep}") status_monitor.move(total=1) work_queue.append(dep) else: log(f"Setup rule {todo} is waiting on {dep}, which is already enqueued" ) dep.runtime_dependents.add(todo) todo.pending_rule_dependencies.add(dep) else: log(f"Setup rule {todo} ran with inputs {ctx.inputs + ctx.deferred_inputs}" ) for dep in ctx.inputs + ctx.deferred_inputs: if dep.startswith(":"): continue try: hashstate.record(dep) hashstate.update(hash_file(dep)) except FileNotFoundError: raise BuildException(f"Source file {dep} not found.") try: ok = cache_load_string("workspace", todo.name) == hashstate.state() if not ok: log(f"State mismatch for rule {todo}, need to rerun") except CacheMiss: log(f"State not found for rule {todo}, need to run for first time" ) ok = False for dep in ctx.inputs + ctx.deferred_inputs: if dep.startswith(":"): if direct_lookup[dep] in rebuilt: log(f"Dependency {dep} of setup rule {todo} was rebuilt, so we must rebuild {todo} as well" ) ok = False for out in todo.outputs: if not os.path.exists(out): log(f"Output {out} is missing for setup rule {todo}, forcing rerun" ) ok = False break if not ok: # we need to fully run log(f"Fully running setup rule {todo}") ctx.run_shell_queue() rebuilt.add(todo) cache_store_string("workspace", todo.name, hashstate.state()) # either way, now we can trigger our dependents ready.add(todo) for dep in todo.runtime_dependents: dep.pending_rule_dependencies.remove(todo) if not dep.pending_rule_dependencies: work_queue.append(dep) status_monitor.move(total=1) status_monitor.move(curr=1) except Exception as e: if not isinstance(e, BuildException): suffix = f"\n{Style.RESET_ALL}" + traceback.format_exc() else: suffix = "" status_monitor.stop() raise BuildException(f"Error while executing rule {todo}: " + str(e) + suffix)
def initialize_workspace( setup_rule_lookup: TargetLookup, setup_targets: List[str], state_directory: str, quiet: bool, ): # we don't need the indirect lookup as we only have rule and source deps direct_lookup: Dict[str, Rule] = setup_rule_lookup.direct_lookup work_queue = [] for setup_target in setup_targets: if setup_target not in direct_lookup: raise BuildException( f"Unknown or unspecified setup target {setup_target}") work_queue.append(direct_lookup[setup_target]) rebuilt: Set[str] = set() ready: Set[str] = set() cache_fetcher, _ = make_cache_fetcher(state_directory) cache_memorize, _ = make_cache_memorize(state_directory) if work_queue: status_monitor = create_status_monitor(1, quiet) status_monitor.move(total=len(work_queue)) while work_queue: todo = work_queue.pop() log(f"Popping setup rule {todo} off work queue") hashstate = HashState() ctx = WorkspaceExecutionContext(hashstate) unchecked_rules = [] for dep in todo.deps: hashstate.record(dep) if dep.startswith(":"): if dep not in direct_lookup: raise BuildException(f"Unable to find setup rule {dep}") dep_rule = direct_lookup[dep] if dep_rule not in ready: unchecked_rules.append(dep_rule) continue ctx.deps[dep] = dep_rule.provided_value setattr(ctx.deps, dep[1:], dep_rule.provided_value) else: try: hashstate.update(hash_file(dep)) except FileNotFoundError: raise BuildException(f"Source file {dep} not found.") if unchecked_rules: for dep in unchecked_rules: if dep not in work_queue: log(f"Setup rule {todo} is enqueuing {dep}") status_monitor.move(total=1) work_queue.append(dep) else: log(f"Setup rule {todo} is waiting on {dep}, which is already enqueued" ) dep.runtime_dependents.add(todo) todo.pending_rule_dependencies.add(dep) else: # our dependent rules are ready, now we need to see if we need to rerun todo.provided_value = todo.impl(ctx) if todo.name is None: raise BuildException( f"All setup rules must have names, but {todo} does not.") try: ok = cache_fetcher("workspace", todo.name) == hashstate.state() if not ok: log(f"State mismatch for rule {todo}, need to rerun") except CacheMiss: log(f"State not found for rule {todo}, need to run for first time" ) ok = False for dep in todo.deps: if dep.startswith(":"): if direct_lookup[dep] in rebuilt: log(f"Dependency {dep} of setup rule {todo} was rebuilt, so we must rebuild {todo} as well" ) ok = False for out in todo.outputs: if not os.path.exists(out): log(f"Output {out} is missing for setup rule {todo}, forcing rerun" ) ok = False break if not ok: # we need to fully run ctx.run_shell_queue() rebuilt.add(todo) cache_memorize("workspace", todo.name, hashstate.state()) # either way, now we can trigger our dependents ready.add(todo) for dep in todo.runtime_dependents: dep.pending_rule_dependencies.remove(todo) if not dep.pending_rule_dependencies: work_queue.append(dep) status_monitor.move(total=1) status_monitor.move(curr=1)
def build( build_state: BuildState, rule: Rule, *, precomputed_deps: Optional[List[str]] = None, scratch_path: Optional[Path], skip_cache_key: bool, ): """ All the dependencies that can be determined from caches have been obtained. Now we need to run. Either we will successfully finish everything, or we will get a missing dependency and have to requeue """ cache_store_string, _ = make_cache_store(build_state.cache_directory) in_sandbox = scratch_path is not None loaded_deps = set() def dep_fetcher(dep, *, initial_load=False): if dep not in loaded_deps and in_sandbox: if not initial_load: raise BuildException( f"New dep {dep} found when rerunning rule, it's likely not deterministic!" ) if not dep.startswith(":"): log(f"Loading dependency {dep} into sandbox") copy_helper( src_root=os.curdir, dest_root=scratch_path, src_names=[dep], symlink=not rule.do_not_symlink, ) # check that these deps are built! Since they may not have been checked by the PreviewExecution. dep_rule = None if dep not in build_state.source_files: dep_rule = build_state.target_rule_lookup.lookup(build_state, dep) if dep_rule not in build_state.ready: raise MissingDependency(dep) loaded_deps.add(dep) return dep_rule if precomputed_deps: assert in_sandbox for dep in precomputed_deps: dep_fetcher(dep, initial_load=True) hashstate = HashState() ctx = ExecutionContext( scratch_path if in_sandbox else os.curdir, rule.location, build_state.macros, hashstate, dep_fetcher, cache_store_string, ) try: if not skip_cache_key: for out in rule.outputs: # needed so that if we ask for another output, we don't panic if it's not in the cache hashstate.record(out) provided_value = rule.impl(ctx) if ctx.out_of_date_deps: raise MissingDependency(*ctx.out_of_date_deps) if in_sandbox: ctx.run_shell_queue() except CalledProcessError as e: raise BuildException( "".join( [ str(e) + "\n", Style.RESET_ALL, f"Location: {scratch_path}\n", f"Working Directory: {scratch_path}/{ctx.cwd}\n", e.stdout.decode("utf-8"), e.stderr.decode("utf-8"), ] ) ) if in_sandbox: try: copy_helper( src_root=scratch_path, src_names=rule.outputs, dest_root=os.curdir, ) except FileNotFoundError as e: raise BuildException( f"Output file {e.filename} from rule {rule} was not generated." ) if not skip_cache_key: for input_path in ctx.inputs: if input_path.startswith(":"): # don't hash rule deps continue hashstate.update(input_path.encode("utf-8")) hashstate.update(hash_file(input_path)) hashstate.record("done") return provided_value, hashstate.state() if not skip_cache_key else None
def get_deps(build_state: BuildState, rule: Rule): """ Use static dependencies and caches to try and identify as *many* needed dependencies as possible, without *any* spurious dependencies. """ hashstate = HashState() cache_fetcher, _ = make_cache_fetcher(build_state.cache_directory) dep_fetcher = make_dep_fetcher(build_state) ctx = PreviewContext( build_state.repo_root, rule.location, hashstate, dep_fetcher, cache_fetcher, ) log(f"Looking for static dependencies of {rule}") for dep in rule.deps: if dep not in build_state.source_files: dep_rule = build_state.target_rule_lookup.lookup(build_state, dep) if dep_rule not in build_state.ready: log(f"Static dependency {dep} of {dep_rule} is not ready, skipping impl" ) # static deps are not yet ready break ctx.deps[dep] = dep_rule.provided_value if dep.startswith(":"): setattr(ctx.deps, dep[1:], dep_rule.provided_value) continue hashstate.update(dep.encode("utf-8")) try: hashstate.update(dep_fetcher(dep, get_hash=True)) except MissingDependency: # get static deps before running the impl! # this means that a source file is *missing*, but the error will be thrown in enqueue_deps break else: ok = False try: log(f"Running impl of {rule} to discover dynamic dependencies") rule.provided_value = rule.impl(ctx) log(f"Impl of {rule} completed with discovered deps: {ctx.inputs}") for out in rule.outputs: # needed so that if we ask for another output, we don't panic if it's not in the cache hashstate.record(out) ok = True except CacheMiss: log(f"Cache miss while running impl of {rule}") pass # stops context execution except MissingDependency as e: log(f"Dependencies {e.paths} were unavailable while running impl of {rule}" ) pass # dep already added to ctx.inputs except Exception as e: print( "Error occurred during PreviewExecution. This may be normal, if a cached file that has not " "yet been reported / processed has been changed. However, it may also be an internal error, so " "it is being logged here. If it is an internal error, please contact the maintainer." ) print(repr(e)) # if `ok`, hash loaded dynamic dependencies if ok: log(f"Runtime dependencies resolved for {rule}, now checking dynamic dependencies" ) for input_path in ctx.inputs: if input_path.startswith(":"): input_dep = build_state.target_rule_lookup.try_lookup( input_path) if input_dep is None or input_dep not in build_state.ready: ok = False log(f"Dynamic rule dependency {input_path} is not yet ready" ) break else: hashstate.update(input_path.encode("utf-8")) try: data = dep_fetcher(input_path, get_hash=True) except MissingDependency as e: # this dependency was not needed for deps calculation # but is not verified to be up-to-date ok = False log(f"Dynamic dependencies {e.paths} were not needed for the impl, but are not up to date" ) break else: hashstate.update(data) return ( hashstate.state() if ok else None, ctx.inputs + rule.deps, ctx.uses_dynamic_inputs, ) return None, rule.deps, None
def get_deps(build_state: BuildState, rule: Rule, *, skip_cache_key: bool): """ Use static dependencies and caches to try and identify as *many* needed dependencies as possible, without *any* spurious dependencies. """ hashstate = HashState() cache_load_string, _ = make_cache_load(build_state.cache_directory) dep_fetcher = make_dep_fetcher(build_state) ctx = PreviewContext( rule.location, build_state.macros, hashstate, dep_fetcher, cache_load_string, ) ok = False provided_value = None try: log(f"Running impl of {rule} to discover dependencies") if not skip_cache_key: for out in rule.outputs: # needed so that if we ask for another output, we don't panic if it's not in the cache hashstate.record(out) provided_value = rule.impl(ctx) log(f"Impl of {rule} completed with deps: {ctx.inputs}") ok = True except CacheMiss: log(f"Cache miss while running impl of {rule}") pass # stops context execution except MissingDependency as e: log(f"Dependencies {e.paths} were unavailable while running impl of {rule}" ) pass # dep already added to ctx.inputs except Exception as e: print( "Error occurred during PreviewExecution. This may be normal, if a cached file that has not " "yet been reported / processed has been changed. However, it may also be an internal error, so " "it is being logged here. If it is an internal error, please contact the maintainer." ) print(repr(e)) if not ctx.uses_dynamic_inputs: raise # if `ok`, hash loaded dynamic dependencies if ok: log(f"Inputs and dependencies resolved for {rule}") for input_path in ctx.inputs: if input_path.startswith(":"): input_dep = build_state.target_rule_lookup.try_lookup( input_path) if input_dep is None or input_dep not in build_state.ready: ok = False log(f"Rule dependency {input_path} is not yet ready (or does not exist)" ) break else: if not skip_cache_key: hashstate.update(input_path.encode("utf-8")) try: data = dep_fetcher(input_path, "rule" if skip_cache_key else "hash") except MissingDependency as e: # this dependency was not needed for deps calculation # but is not verified to be up-to-date ok = False log(f"Dependencies {e.paths} were not needed for the impl, but are not up to date" ) break else: if not skip_cache_key: hashstate.update(data) hashstate.record("done") return ( hashstate.state() if ok else None, provided_value, ctx.inputs, ctx.deferred_inputs, ctx.uses_dynamic_inputs, )