def find(path, *, unsafe_ignore_extension=False): build_root = make_callback.build_root target = normalize_path(repo_root, build_root, path) if target in make_callback.find_cache: return make_callback.find_cache[target] if build_root is None: raise BuildException( "Rules files can only define functions, not invoke find()") if not unsafe_ignore_extension: ext = path.split(".")[-1] if "/" in ext: raise BuildException( "Cannot find() files without specifying an extension") out = [ os.path.relpath(path, repo_root) for path in glob( normalize_path(repo_root, build_root, path), recursive=True, ) ] make_callback.find_cache[target] = out = [ "//" + path for path in out if path in src_files ] return sorted(out)
def load_rules(path): path = normalize_path(repo_root, make_load_rules.rules_root, path) if not path.endswith(".py"): raise BuildException(f"Cannot import from a non .py file: {path}") if path in LOAD_FRAME_CACHE: return LOAD_FRAME_CACHE[path] start_time_stack.append(time.time()) old_rules_root = make_load_rules.rules_root __builtins__["load"] = make_load_rules(repo_root, path) # We hide the callback here, since you should not be running the # callback (or anything else!) in an import, but just providing defs frame = {"__builtins__": __builtins__} reset_mock_imports(frame, ["load"]) cached_root = make_callback.build_root make_callback.build_root = None with open(path) as f: try: exec(f.read(), frame) except Exception: raise BuildException( f"Error while processing rules file {path}:\n" + f"\n{Style.RESET_ALL}" + traceback.format_exc()) make_callback.build_root = cached_root make_load_rules.rules_root = old_rules_root TIMINGS[path] = load_time = time.time() - start_time_stack.pop() start_time_stack[0] += load_time out = LOAD_FRAME_CACHE[path] = Struct(frame) return out
def find(path, *, unsafe_ignore_extension=False): build_root = make_callback.build_root if not unsafe_ignore_extension: ext = path.split(".")[-1] if "/" in ext: raise BuildException( "Cannot find() files without specifying an extension" ) if build_root is None and not path.startswith("//"): raise BuildException( "find() cannot be invoked outside a BUILD file except using a repo-relative path." ) target = normalize_path(build_root, path) if target in make_callback.find_cache: return make_callback.find_cache[target] def gen(): return sorted( os.path.relpath(f, os.curdir) for f in glob( target, recursive=True, ) if os.path.relpath(os.path.realpath(f), os.curdir) in src_files ) out = make_callback.find_cache[target] = GlobDepSet(gen) return out
def add_dep(self, dep: str, *, load_provided=False, defer=False): if load_provided and defer: raise BuildException( "Cannot load provided value of a deferred dependency") dep = str(dep) if (dep.startswith(":") and not defer and not load_provided and not self.never_defer): defer = True if defer and self.never_defer: raise BuildException("Setup rules cannot defer dependencies") if defer: self.deferred_inputs.append(dep) else: self.hashstate.record("add_dep", dep) self.inputs.append(self._resolve(dep)) if load_provided: rule = self.dep_fetcher(self._resolve(dep)) if rule is None: raise BuildException( "Cannot load provided value of a source file") self.deps[dep] = rule.provided_value if rule.name: self.deps[rule.name] = rule.provided_value return defer
def lookup(self, build_state: BuildState, dep: str) -> Rule: if dep in build_state.source_files: raise BuildException( f"Dependency {dep} is a source file, not a buildable dependency. " f"This is likely an internal error.") else: rule = self.try_lookup(dep) if rule is None: raise BuildException(f"Unable to resolve dependency {dep}") return rule
def set_provided_value( self, value: Optional[List[AbstractProvider]], build_state: Optional[BuildState], deps: List[str], deferred_deps: List[str], outputs: List[str], ): if value is not None and (not isinstance(value, list) or not all( isinstance(x, AbstractProvider) for x in value)): raise BuildException( f"Build rules can only return a list of Providers (or None), received {value}" ) self._provided_value = { DepsProvider: IterableDepSet(*deps), OutputProvider: IterableDepSet(*outputs), **({ TransitiveDepsProvider: IterableDepSet( *deps, *(dep if dep in build_state.source_files else build_state.target_rule_lookup.lookup( build_state, dep).provided_value[TransitiveDepsProvider] for dep in (deps + deferred_deps)), ), TransitiveOutputProvider: IterableDepSet( *outputs, *(build_state.target_rule_lookup.lookup(build_state, dep).provided_value[TransitiveOutputProvider] for dep in (deps + deferred_deps) if dep not in build_state.source_files), ), } if build_state is not None else {}), **{type(x): x.value for x in value or {}}, }
def dep_fetcher(dep, *, initial_load=False): if dep not in loaded_deps and in_sandbox: if not initial_load: raise BuildException( f"New dep {dep} found when rerunning rule, it's likely not deterministic!" ) if not dep.startswith(":"): log(f"Loading dependency {dep} into sandbox") copy_helper( src_root=os.curdir, dest_root=scratch_path, src_names=[dep], symlink=not rule.do_not_symlink, ) # check that these deps are built! Since they may not have been checked by the PreviewExecution. dep_rule = None if dep not in build_state.source_files: dep_rule = build_state.target_rule_lookup.lookup(build_state, dep) if dep_rule not in build_state.ready: raise MissingDependency(dep) loaded_deps.add(dep) return dep_rule
def run_build(build_state: BuildState, targets: List[str], num_threads: int, quiet: bool): build_state.status_monitor = create_status_monitor(num_threads, quiet) for target in targets: root_rule = build_state.target_rule_lookup.try_lookup( target) or build_state.target_rule_lookup.lookup( build_state, ":" + target) build_state.scheduled_but_not_ready.add(root_rule) build_state.work_queue.put(root_rule) build_state.status_monitor.move(total=1) thread_instances = [] for i in range(num_threads): thread = Thread(target=worker, args=(build_state, i), daemon=True) thread_instances.append(thread) thread.start() build_state.work_queue.join() if build_state.failure is not None: raise build_state.failure for _ in range(num_threads): build_state.work_queue.put(None) for thread in thread_instances: thread.join() build_state.status_monitor.stop() if build_state.scheduled_but_not_ready: # there is a dependency cycle somewhere! for root_rule in targets: if root_rule in build_state.scheduled_but_not_ready: break else: raise BuildException("An internal error occurred.") chain = [] pos = root_rule while True: if pos in chain: chain.append(pos) raise BuildException( f"Circular dependency detected: Rule {pos} depends on itself " f"through the path: {' -> '.join(map(str, chain))}") else: chain.append(pos) pos = next(iter(pos.pending_rule_dependencies))
def register_default_build_rule(self, rule: str): self._check_active() self._check_rule(rule) if self.default_build_rule is not None: raise BuildException( f"Default build rule is already set to {self.default_build_rule}" ) self.default_build_rule = rule
def cache_loader(cache_key: str, rule: Rule, dest_root: str) -> bool: cache_location, cache_paths = get_cache_output_paths( cache_directory, rule, cache_key) if bucket: del cache_location if not aux_loader(cache_key, rule, dest_root): try: cache_fetcher(cache_key, ".touch") except CacheMiss: STATS["misses"] += delta return False for src_name, cache_path in zip(rule.outputs, cache_paths): cache_path = str(Path(cache_key).joinpath(cache_path)) os.makedirs(dest_root, exist_ok=True) try: if src_name.endswith("/"): os.makedirs(Path(dest_root).joinpath(src_name), exist_ok=True) blobs: Iterator[Blob] = list( bucket.list_blobs(prefix=cache_path)) for blob in blobs: target = str( Path(dest_root).joinpath( src_name).joinpath( blob.name[len(cache_path) + 1:])) os.makedirs(dirname(target), exist_ok=True) blob.download_to_filename(target) STATS["hits"] += delta else: target = str(Path(dest_root).joinpath(src_name)) os.makedirs(dirname(target), exist_ok=True) bucket.blob(cache_path).download_to_filename( target) STATS["hits"] += delta except NotFound: STATS["misses"] += delta return False # now that we have fetched, let's cache it on disk aux_save(cache_key, rule, dest_root) return True else: if not os.path.exists(cache_location): STATS["misses"] += delta return False try: copy_helper( src_root=cache_location, src_names=cache_paths, dest_root=dest_root, dest_names=rule.outputs, ) STATS["hits"] += delta except FileNotFoundError: raise BuildException( "Cache corrupted. This should never happen unless you modified the cache " "directory manually! If so, delete the cache directory and try again." ) return True
def dep_fetcher(dep): if dep.startswith(":"): if dep not in direct_lookup: raise BuildException(f"Unable to find setup rule {dep}") dep_rule = direct_lookup[dep] log(f"Looking up setup rule {dep}") if dep_rule not in ready: raise MissingDependency(dep) return dep_rule
def require_buildtool_version(self, min_version: str): if self.skip_version_check: return curr_version = version("buildtool").replace("-", "9999") if parse(curr_version) < parse(min_version): raise BuildException( f"Current buildtool version {curr_version} < {min_version}, the minimum required " "for this project. Please upgrade, or pass in --skip-version-check to skip this check." )
def resolve(path): build_root = make_callback.build_root if build_root is None: raise BuildException( "Rules files can only define functions, not invoke resolve(). " "If you are in an impl() function, use ctx.resolve() instead.") return "//" + normalize_path(repo_root, build_root, path)
def load_rules( flags: Dict[str, object], *, skip_version_check: bool, workspace: bool = False, ): flags = Struct(flags, default=True) repo_root = find_root() src_files = get_repo_files() build_files = (["WORKSPACE"] if workspace else [ file for file in src_files if file.split("/")[-1] == "BUILD" ]) target_rule_lookup = TargetLookup() sys.path.insert(0, repo_root) callback, find, resolve = make_callback(repo_root, None, set(src_files), target_rule_lookup) config.skip_version_check = skip_version_check for build_file in build_files: make_callback.build_root = os.path.dirname(build_file) with open(build_file) as f: frame = {} load = make_load_rules(repo_root, build_file) __builtins__["callback"] = callback __builtins__["find"] = find __builtins__["resolve"] = resolve __builtins__["load"] = load __builtins__["flags"] = flags frame = { **frame, "__builtins__": __builtins__, } reset_mock_imports( frame, ["callback", "find", "load", "flags", "resolve"]) if workspace: __builtins__["config"] = config config.active = True reset_mock_imports(frame, ["config"]) start_time_stack.append(time.time()) try: exec(f.read(), frame) config.active = False except Exception: raise BuildException( f"Error while processing BUILD file {build_file}:\n" + f"\n{Style.RESET_ALL}" + traceback.format_exc()) TIMINGS[build_file] = time.time() - start_time_stack.pop() make_callback.build_root = None return target_rule_lookup
def verify(self): # check for overlaps involving location_lookups for path in list(self.direct_lookup) + list(self.location_lookup): # check that this path does not lie inside a parent, by checking all prefixes for parent in Path(path).parents: key = str(parent) + "/" if key in self.location_lookup: raise BuildException( f"Outputs {key} and {path} overlap - all outputs must be disjoint" )
def find_root(): repo_root = os.path.abspath(os.path.curdir) while True: if "WORKSPACE" in os.listdir(repo_root): return repo_root repo_root = os.path.dirname(repo_root) if repo_root == os.path.dirname(repo_root): break raise BuildException( "Unable to find WORKSPACE file - are you in the project directory?")
def normalize_path(build_root, path): path = str(path) suffix = "/" if path.endswith("/") else "" if path.startswith("//"): path = os.path.normpath(path[2:]) else: path = os.path.normpath(os.path.join(build_root, path)) if ".." in path or path.startswith("/"): raise BuildException( f"Target `{path}` is not in the root directory of the repo.") return path + suffix
def normalize_path(repo_root, build_root, path): suffix = "/" if path.endswith("/") else "" if path.startswith("//"): path = Path(repo_root).joinpath(path[2:]) else: path = Path(repo_root).joinpath(build_root, path) path = Path(os.path.abspath(path)) repo_root = Path(os.path.abspath(repo_root)) if repo_root != path and repo_root not in path.parents: raise BuildException( f"Target `{path}` is not in the root directory of the repo.") return str(path.relative_to(repo_root)) + suffix
def __init__(self, cwd: str, macros: Dict[str, Callable]): self.cwd = cwd self.deps = DotDict() for name, macro in macros.items(): if hasattr(self, name): raise BuildException( f"Macro {name} shadows existing Context attribute") setattr( self, name, lambda *args, __macro=macro, **kwargs: __macro( self, *args, **kwargs), )
def input( self, *, file: Optional[str] = None, sh: Optional[str] = None, env: Optional[Env] = None, ): if file is not None: raise BuildException( f"Cannot add dependencies dynamically in a setup rule. Add {file} as a static dependency " f'then use input(sh="cat {file}") instead.') else: return super().input(file=file, sh=sh, env=env)
def provider(*args): if len(args) > 1: raise BuildException("provider() takes at most one argument") if args: class Provider(AbstractProvider): def __init__(self): super().__init__(args[0]) else: class Provider(AbstractProvider): pass return Provider
def callback( *, name: Optional[str] = None, deps: Sequence[str] = (), impl: Callable = lambda _: None, out: Union[str, Sequence[str]] = (), do_not_symlink: bool = False, do_not_cache: bool = False, ): build_root = make_callback.build_root if build_root is None: raise BuildException( "Rules files can only define functions, not invoke callback()" ) if isinstance(out, str): out = [out] def wrapped_impl(ctx): ctx.add_deps(deps) return impl(ctx) rule = Rule( name=name, location=build_root, impl=wrapped_impl, outputs=[normalize_path(build_root, output) for output in out], do_not_symlink=do_not_symlink, do_not_cache=do_not_cache, ) for output in rule.outputs: add_target_rule(output, rule) if name is not None: add_target_rule(":" + name, rule) return f":{name}"
def callback( *, name: Optional[str] = None, deps: Sequence[str] = (), impl: Callable, out: Union[str, Sequence[str]] = (), do_not_symlink: bool = False, ): build_root = make_callback.build_root if build_root is None: raise BuildException( "Rules files can only define functions, not invoke callback()") if isinstance(out, str): out = [out] rule = Rule( name=name, location=build_root, deps=[ dep if dep.startswith(":") else normalize_path( repo_root, build_root, dep) for dep in deps ], impl=impl, outputs=[ normalize_path(repo_root, build_root, output) for output in out ], do_not_symlink=do_not_symlink, ) for output in rule.outputs: add_target_rule(output, rule) if name is not None: add_target_rule(":" + name, rule) return f":{name}"
def fail(target): raise BuildException( f"The target `{target}` is built by multiple rules. Targets can only be produced by a single rule." )
def _check_rule(rule: str): if not rule.startswith(":"): raise BuildException(f"Can only register a rule, not {rule}")
def _check_active(self): if not self.active: raise BuildException("Cannot use config in this context.")
def initialize_workspace( setup_rule_lookup: TargetLookup, setup_target: str, state_directory: str, quiet: bool, ): # we don't need the indirect lookup as we only have rule and source deps direct_lookup: Dict[str, Rule] = setup_rule_lookup.direct_lookup if setup_target not in direct_lookup: raise BuildException( f"Unknown or unspecified setup target {setup_target}") rebuilt: Set[str] = set() ready: Set[str] = set() work_queue = [direct_lookup[setup_target]] cache_fetcher, _ = make_cache_fetcher(state_directory) cache_memorize, _ = make_cache_memorize(state_directory) status_monitor = create_status_monitor(1, quiet) status_monitor.move(total=1) while work_queue: todo = work_queue.pop() log(f"Popping setup rule {todo} off work queue") hashstate = HashState() ctx = WorkspaceExecutionContext(hashstate) unchecked_rules = [] for dep in todo.deps: hashstate.record(dep) if dep.startswith(":"): if dep not in direct_lookup: raise BuildException(f"Unable to find setup rule {dep}") dep_rule = direct_lookup[dep] if dep_rule not in ready: unchecked_rules.append(dep_rule) continue ctx.deps[dep] = dep_rule.provided_value setattr(ctx.deps, dep[1:], dep_rule.provided_value) else: try: hashstate.update(hash_file(dep)) except FileNotFoundError: raise BuildException(f"Source file {dep} not found.") if unchecked_rules: for dep in unchecked_rules: if dep not in work_queue: log(f"Setup rule {todo} is enqueuing {dep}") status_monitor.move(total=1) work_queue.append(dep) else: log(f"Setup rule {todo} is waiting on {dep}, which is already enqueued" ) dep.runtime_dependents.add(todo) todo.pending_rule_dependencies.add(dep) else: # our dependent rules are ready, now we need to see if we need to rerun todo.provided_value = todo.impl(ctx) if todo.name is None: raise BuildException( f"All setup rules must have names, but {todo} does not.") try: ok = cache_fetcher("workspace", todo.name) == hashstate.state() if not ok: log(f"State mismatch for rule {todo}, need to rerun") except CacheMiss: log(f"State not found for rule {todo}, need to run for first time" ) ok = False for dep in todo.deps: if dep.startswith(":"): if direct_lookup[dep] in rebuilt: log(f"Dependency {dep} of setup rule {todo} was rebuilt, so we must rebuild {todo} as well" ) ok = True for out in todo.outputs: if not os.path.exists(out): log(f"Output {out} is missing for setup rule {todo}, forcing rerun" ) ok = False break if not ok: # we need to fully run ctx.run_shell_queue() rebuilt.add(todo) cache_memorize("workspace", todo.name, hashstate.state()) # either way, now we can trigger our dependents ready.add(todo) for dep in todo.runtime_dependents: dep.pending_rule_dependencies.remove(todo) if not dep.pending_rule_dependencies: work_queue.append(dep) status_monitor.move(total=1) status_monitor.move(curr=1)
def worker(build_state: BuildState, index: int): scratch_path = Path(f".scratch_{index}") if scratch_path.exists(): rmtree(scratch_path, ignore_errors=True) _, cache_store_files = make_cache_store(build_state.cache_directory) _, cache_load_files = make_cache_load(build_state.cache_directory) while True: if build_state.failure is not None: # every thread needs to clear the queue since otherwise some other thread might still be filling it up clear_queue(build_state.work_queue) return # some thread has failed, emergency stop todo = build_state.work_queue.get() if todo is None: return start_time = time.time() try: build_state.status_monitor.update(index, "Parsing: " + str(todo)) log(f"Target {todo} popped from queue by worker {index}") # only from caches, will never run a subprocess ( cache_key, provided_value, deps, deferred_deps, uses_dynamic_deps, ) = get_deps(build_state, todo, skip_cache_key=todo.do_not_cache) if uses_dynamic_deps: log("Target", todo, "Uses dynamic deps") if cache_key is None: # unable to compute cache_key, potentially because not all deps are ready log( f"Target {todo} either has unbuilt dependencies, " f"or does not have a cached dynamic dependency resolved" ) deps_ready = not enqueue_deps( build_state, todo, deps, catch_failure=uses_dynamic_deps, ) if deps_ready: log(f"Apparently {todo} is missing an input cache in the impl") else: log(f"Apparently {todo} is waiting on unbuilt dependencies") else: log(f"All the dependencies of target {todo} are ready: {deps}") # if the cache_key is ready, *all* the deps must be ready, not just the discoverable deps! # unless the cache_key is not actually cached, in which case our inputs() could be wrong, so # we have to run in the working directory to verify deps_ready = True log(f"Enqueuing deferred dependencies for {todo}: {deferred_deps}") enqueue_deps( build_state, None, deferred_deps, catch_failure=uses_dynamic_deps ) if deps_ready: done = False # check if we're already cached! if cache_key and not todo.do_not_cache: try: outputs = cache_load_files(cache_key, todo, os.curdir) log(f"Target {todo} was loaded from the cache") done = True except CacheMiss: pass if not done: # time to execute! but *not* inside the lock # when we release the lock, stuff may change outside, but # we don't care since *our* dependencies (so far) are all available log(f"Target {todo} is not in the cache, rerunning...") build_state.status_monitor.update(index, "Building: " + str(todo)) # if cache_key is None or we use dynamic dependencies, we haven't finished evaluating # the impl, so we don't know all the dependencies it could need. Therefore, we must # run it in the working directory, so the impl can find the dependencies it needs # Then, we run it *again*, to verify that the dependencies are accurate. # The cache_key could be None due to race conditions, even if we don't use dynamic deps. # Imagine: # - Rule A depends on a provider of rule B, which is not yet built # (and on C, because of B's provider) # - We determine that rule B is not built, so A depends on B but can't complete impl, # so A.cache_key = None # - B is built # - We try to enqueue the deps of A. Enqueue_deps responds that B is already built, # so A.deps_ready=True # - So at this stage, A.cache_key = None, A.uses_dynamic_deps=False, but A.deps is missing C! if uses_dynamic_deps or cache_key is None: log( f"We don't know the dependencies of {todo}, " f"so we are running the impl in the root directory to find out!" ) try: _, cache_key = build( build_state, todo, scratch_path=None, skip_cache_key=todo.do_not_cache, ) except MissingDependency as d: log( f"Target {todo} failed to fully build because of the missing dynamic " f"dependencies: {d.paths}, requeuing" ) scheduled = enqueue_deps(build_state, todo, d.paths) if not scheduled: # due to race conditions, our dependencies are actually all ready now! # no one else will enqueue us, so it is safe to enqueue ourselves build_state.work_queue.put(todo) build_state.status_monitor.move(total=1) else: # now, if no exception has thrown, all the deps are available to the deps finder ( alt_cache_key, provided_value, deps, deferred_deps, uses_dynamic_deps, ) = get_deps( build_state, todo, skip_cache_key=todo.do_not_cache ) try: provided_value, alt_cache_key_2 = build( build_state, todo, scratch_path=scratch_path, precomputed_deps=deps, skip_cache_key=todo.do_not_cache, ) except CalledProcessError as e: build_state.status_monitor.stop() print(e.cmd) print(e) raise BuildException( f"The dependencies for target {todo} are not fully specified, " f"as it failed to build when provided only with them." ) if not todo.do_not_cache: assert ( cache_key == alt_cache_key == alt_cache_key_2 ), "An internal error has occurred" done = True else: log( f"We know all the dependencies of {todo}, so we can run it in a sandbox" ) provided_value, alt_cache_key = build( build_state, todo, scratch_path=scratch_path, precomputed_deps=deps, skip_cache_key=todo.do_not_cache, ) if not todo.do_not_cache: assert ( cache_key == alt_cache_key ), "An internal error has occurred" done = True if done: log(f"Target {todo} has been built fully!") if not todo.do_not_cache: outputs = cache_store_files(cache_key, todo, os.curdir) else: outputs = [] for out in todo.outputs: if out.endswith("/"): outputs.extend( os.path.join(path, filename) for path, subdirs, files in os.walk(out) for filename in files ) else: outputs.append(out) if scratch_path.exists(): rmtree(scratch_path, ignore_errors=True) if done: waiting_for_deferred = enqueue_deps( build_state, todo, deferred_deps ) # We have to wait for deferred dependencies to build because the TransitiveOutputProvider # needs them to complete. However, we can run the build itself before the deferred deps finish. if waiting_for_deferred: log( f"Target {todo} has been built, but is waiting for deferred dependencies" ) else: todo.set_provided_value( provided_value, build_state, deps, deferred_deps, outputs ) with build_state.scheduling_lock: build_state.ready.add(todo) # no one will ever add us back, since we are in `ready` build_state.scheduled_but_not_ready.remove(todo) # now it's time to set up our dependents # we need to be inside the lock even if we have no dependents, in case # we *gain* dependents from another thread which could have held the lock! for dependent in todo.runtime_dependents: dependent.pending_rule_dependencies.remove(todo) if not dependent.pending_rule_dependencies: # this guy is ready to go build_state.work_queue.put(dependent) build_state.status_monitor.move(total=1) # either way, we're done with this task for now build_state.status_monitor.move(curr=1) build_state.work_queue.task_done() # record timing data run_time = time.time() - start_time TIMINGS[str(todo)] += run_time except Exception as e: if isinstance(e, BuildException): suffix = f"\n{Style.RESET_ALL}" else: suffix = f"\n{Style.RESET_ALL}" + traceback.format_exc() build_state.status_monitor.stop() build_state.failure = BuildException( f"Error while executing rule {todo}: " + str(e) + suffix ) build_state.work_queue.task_done()
def build( build_state: BuildState, rule: Rule, deps: Collection[str], *, scratch_path: Optional[Path], ): """ All the dependencies that can be determined from caches have been obtained. Now we need to run. Either we will successfully finish everything, or we will get a missing dependency and have to requeue """ cache_memorize, _ = make_cache_memorize(build_state.cache_directory) in_sandbox = scratch_path is not None loaded_deps = set() def load_deps(deps): deps = set(deps) - loaded_deps # check that these deps are built! Since they have not been checked by the PreviewExecution. missing_deps = [] for dep in deps: if dep not in build_state.source_files: dep_rule = build_state.target_rule_lookup.lookup( build_state, dep) if dep_rule not in build_state.ready: missing_deps.append(dep) if missing_deps: raise MissingDependency(*missing_deps) loaded_deps.update(deps) if in_sandbox: log(f"Loading dependencies {deps} into sandbox") copy_helper( src_root=build_state.repo_root, dest_root=scratch_path, src_names=[dep for dep in deps if not dep.startswith(":")], symlink=not rule.do_not_symlink, ) load_deps(deps) hashstate = HashState() ctx = ExecutionContext( scratch_path if in_sandbox else build_state.repo_root, scratch_path.joinpath(rule.location) if in_sandbox else Path(build_state.repo_root).joinpath(rule.location), hashstate, load_deps, cache_memorize, ) for dep in rule.deps: dep_rule = build_state.target_rule_lookup.try_lookup(dep) if dep.startswith(":"): setattr(ctx.deps, dep[1:], dep_rule.provided_value) else: hashstate.update(dep.encode("utf-8")) hashstate.update(hash_file(dep)) if dep not in build_state.source_files: ctx.deps[dep] = dep_rule.provided_value try: rule.provided_value = rule.impl(ctx) for out in rule.outputs: # needed so that if we ask for another output, we don't panic if it's not in the cache hashstate.record(out) if in_sandbox: ctx.run_shell_queue() except CalledProcessError as e: raise BuildException("".join([ str(e) + "\n", Style.RESET_ALL, f"Location: {scratch_path}\n", f"Working Directory: {ctx.cwd}\n", e.stdout.decode("utf-8"), e.stderr.decode("utf-8"), traceback.format_exc(), ])) if in_sandbox: try: copy_helper( src_root=scratch_path, src_names=rule.outputs, dest_root=build_state.repo_root, ) except FileNotFoundError as e: raise BuildException( f"Output file {e.filename} from rule {rule} was not generated." ) for input_path in ctx.inputs: if input_path.startswith(":"): # don't hash rule deps continue hashstate.update(input_path.encode("utf-8")) hashstate.update(hash_file(input_path)) return hashstate.state()
def worker(build_state: BuildState, index: int): scratch_path = Path(build_state.repo_root).joinpath( Path(f".scratch_{index}")) if scratch_path.exists(): rmtree(scratch_path, ignore_errors=True) _, cache_save = make_cache_memorize(build_state.cache_directory) _, cache_loader = make_cache_fetcher(build_state.cache_directory) while True: if build_state.failure is not None: # every thread needs to clear the queue since otherwise some other thread might still be filling it up clear_queue(build_state.work_queue) return # some thread has failed, emergency stop todo = build_state.work_queue.get() if todo is None: return start_time = time.time() try: build_state.status_monitor.update(index, "Parsing: " + str(todo)) log(f"Target {todo} popped from queue by worker {index}") # only from caches, will never run a subprocess cache_key, deps = get_deps(build_state, todo) if cache_key is None: # unable to compute cache_key, potentially because not all deps are ready log(f"Target {todo} either has unbuilt dependencies, " f"or does not have a cached dynamic dependency resolved") deps_ready = not enqueue_deps(build_state, todo, deps) if deps_ready: log("Apparently it is missing an input cache in the impl") else: log("Apparently it is waiting on unbuilt dependencies") else: log(f"All the dependencies of target {todo} are ready: {deps}") # if the cache_key is ready, *all* the deps must be ready, not just the discoverable deps! deps_ready = True if deps_ready: done = False # check if we're already cached! if cache_key: if cache_loader(cache_key, todo, build_state.repo_root): log(f"Target {todo} was loaded from the cache") done = True if not done: # time to execute! but *not* inside the lock # when we release the lock, stuff may change outside, but # we don't care since *our* dependencies (so far) are all available log(f"Target {todo} is not in the cache, rerunning...") build_state.status_monitor.update(index, "Building: " + str(todo)) try: # if cache_key is None, we haven't finished evaluating the impl, so we # don't know all the dependencies it could need. Therefore, we must # run it in the working directory, so the impl can find the dependencies it needs # Then, we run it *again*, to verify that the dependencies are accurate in_sandbox = cache_key is not None if not in_sandbox: log(f"We don't know the dependencies of {todo}, " f"so we are running the impl in the root directory to find out!" ) cache_key = build(build_state, todo, deps, scratch_path=None) # now, if no exception has thrown, all the deps are available to the deps finder alt_cache_key, deps = get_deps(build_state, todo) # the alt_cache_key *MAY HAVE CHANGED*, because dynamic dependencies # are not used for the input() cache_key [since they are only known afterwards] # however, the alt_cache_key should match the cache_key of the subsequent run try: alt_cache_key = build( build_state, todo, deps, scratch_path=scratch_path, ) except MissingDependency: raise BuildException( "An internal error has occurred.") except CalledProcessError as e: build_state.status_monitor.stop() print(e.cmd) print(e) raise BuildException( f"The dependencies for target {todo} are not fully specified, " f"as it failed to build when provided only with them." ) assert (cache_key == alt_cache_key ), "An internal error has occurred" else: log(f"We know all the dependencies of {todo}, so we can run it in a sandbox" ) build(build_state, todo, deps, scratch_path=scratch_path) log(f"Target {todo} has been built fully!") cache_save(cache_key, todo, scratch_path) done = True except MissingDependency as d: log(f"Target {todo} failed to fully build because of the missing dynamic " f"dependencies: {d.paths}, requeuing") scheduled = enqueue_deps(build_state, todo, d.paths) if not scheduled: # due to race conditions, our dependencies are actually all ready now! # no one else will enqueue us, so it is safe to enqueue ourselves build_state.work_queue.put(todo) build_state.status_monitor.move(total=1) if scratch_path.exists(): rmtree(scratch_path, ignore_errors=True) if done: with build_state.scheduling_lock: build_state.ready.add(todo) # no one will ever add us back, since we are in `ready` build_state.scheduled_but_not_ready.remove(todo) # now it's time to set up our dependents # we need to be inside the lock even if we have no dependents, in case # we *gain* dependents from another thread which could have held the lock! for dependent in todo.runtime_dependents: dependent.pending_rule_dependencies.remove(todo) if not dependent.pending_rule_dependencies: # this guy is ready to go build_state.work_queue.put(dependent) build_state.status_monitor.move(total=1) # either way, we're done with this task for now build_state.status_monitor.move(curr=1) build_state.work_queue.task_done() # record timing data run_time = time.time() - start_time TIMINGS[str(todo)] += run_time except Exception as e: if not isinstance(e, BuildException): suffix = f"\n{Style.RESET_ALL}" + traceback.format_exc() else: suffix = "" build_state.status_monitor.stop() build_state.failure = BuildException( f"Error while executing rule {todo}: " + str(e) + suffix) build_state.work_queue.task_done()