def run_build(build_state: BuildState, targets: List[str], num_threads: int, quiet: bool): build_state.status_monitor = create_status_monitor(num_threads, quiet) for target in targets: root_rule = build_state.target_rule_lookup.try_lookup( target) or build_state.target_rule_lookup.lookup( build_state, ":" + target) build_state.scheduled_but_not_ready.add(root_rule) build_state.work_queue.put(root_rule) build_state.status_monitor.move(total=1) thread_instances = [] for i in range(num_threads): thread = Thread(target=worker, args=(build_state, i), daemon=True) thread_instances.append(thread) thread.start() build_state.work_queue.join() if build_state.failure is not None: raise build_state.failure for _ in range(num_threads): build_state.work_queue.put(None) for thread in thread_instances: thread.join() build_state.status_monitor.stop() if build_state.scheduled_but_not_ready: # there is a dependency cycle somewhere! for root_rule in targets: if root_rule in build_state.scheduled_but_not_ready: break else: raise BuildException("An internal error occurred.") chain = [] pos = root_rule while True: if pos in chain: chain.append(pos) raise BuildException( f"Circular dependency detected: Rule {pos} depends on itself " f"through the path: {' -> '.join(map(str, chain))}") else: chain.append(pos) pos = next(iter(pos.pending_rule_dependencies))
def worker(build_state: BuildState, index: int): scratch_path = Path(build_state.repo_root).joinpath( Path(f".scratch_{index}")) if scratch_path.exists(): rmtree(scratch_path, ignore_errors=True) _, cache_save = make_cache_memorize(build_state.cache_directory) _, cache_loader = make_cache_fetcher(build_state.cache_directory) while True: if build_state.failure is not None: # every thread needs to clear the queue since otherwise some other thread might still be filling it up clear_queue(build_state.work_queue) return # some thread has failed, emergency stop todo = build_state.work_queue.get() if todo is None: return start_time = time.time() try: build_state.status_monitor.update(index, "Parsing: " + str(todo)) log(f"Target {todo} popped from queue by worker {index}") # only from caches, will never run a subprocess cache_key, deps = get_deps(build_state, todo) if cache_key is None: # unable to compute cache_key, potentially because not all deps are ready log(f"Target {todo} either has unbuilt dependencies, " f"or does not have a cached dynamic dependency resolved") deps_ready = not enqueue_deps(build_state, todo, deps) if deps_ready: log("Apparently it is missing an input cache in the impl") else: log("Apparently it is waiting on unbuilt dependencies") else: log(f"All the dependencies of target {todo} are ready: {deps}") # if the cache_key is ready, *all* the deps must be ready, not just the discoverable deps! deps_ready = True if deps_ready: done = False # check if we're already cached! if cache_key: if cache_loader(cache_key, todo, build_state.repo_root): log(f"Target {todo} was loaded from the cache") done = True if not done: # time to execute! but *not* inside the lock # when we release the lock, stuff may change outside, but # we don't care since *our* dependencies (so far) are all available log(f"Target {todo} is not in the cache, rerunning...") build_state.status_monitor.update(index, "Building: " + str(todo)) try: # if cache_key is None, we haven't finished evaluating the impl, so we # don't know all the dependencies it could need. Therefore, we must # run it in the working directory, so the impl can find the dependencies it needs # Then, we run it *again*, to verify that the dependencies are accurate in_sandbox = cache_key is not None if not in_sandbox: log(f"We don't know the dependencies of {todo}, " f"so we are running the impl in the root directory to find out!" ) cache_key = build(build_state, todo, deps, scratch_path=None) # now, if no exception has thrown, all the deps are available to the deps finder alt_cache_key, deps = get_deps(build_state, todo) # the alt_cache_key *MAY HAVE CHANGED*, because dynamic dependencies # are not used for the input() cache_key [since they are only known afterwards] # however, the alt_cache_key should match the cache_key of the subsequent run try: alt_cache_key = build( build_state, todo, deps, scratch_path=scratch_path, ) except MissingDependency: raise BuildException( "An internal error has occurred.") except CalledProcessError as e: build_state.status_monitor.stop() print(e.cmd) print(e) raise BuildException( f"The dependencies for target {todo} are not fully specified, " f"as it failed to build when provided only with them." ) assert (cache_key == alt_cache_key ), "An internal error has occurred" else: log(f"We know all the dependencies of {todo}, so we can run it in a sandbox" ) build(build_state, todo, deps, scratch_path=scratch_path) log(f"Target {todo} has been built fully!") cache_save(cache_key, todo, scratch_path) done = True except MissingDependency as d: log(f"Target {todo} failed to fully build because of the missing dynamic " f"dependencies: {d.paths}, requeuing") scheduled = enqueue_deps(build_state, todo, d.paths) if not scheduled: # due to race conditions, our dependencies are actually all ready now! # no one else will enqueue us, so it is safe to enqueue ourselves build_state.work_queue.put(todo) build_state.status_monitor.move(total=1) if scratch_path.exists(): rmtree(scratch_path, ignore_errors=True) if done: with build_state.scheduling_lock: build_state.ready.add(todo) # no one will ever add us back, since we are in `ready` build_state.scheduled_but_not_ready.remove(todo) # now it's time to set up our dependents # we need to be inside the lock even if we have no dependents, in case # we *gain* dependents from another thread which could have held the lock! for dependent in todo.runtime_dependents: dependent.pending_rule_dependencies.remove(todo) if not dependent.pending_rule_dependencies: # this guy is ready to go build_state.work_queue.put(dependent) build_state.status_monitor.move(total=1) # either way, we're done with this task for now build_state.status_monitor.move(curr=1) build_state.work_queue.task_done() # record timing data run_time = time.time() - start_time TIMINGS[str(todo)] += run_time except Exception as e: if not isinstance(e, BuildException): suffix = f"\n{Style.RESET_ALL}" + traceback.format_exc() else: suffix = "" build_state.status_monitor.stop() build_state.failure = BuildException( f"Error while executing rule {todo}: " + str(e) + suffix) build_state.work_queue.task_done()
def worker(build_state: BuildState, index: int): scratch_path = Path(f".scratch_{index}") if scratch_path.exists(): rmtree(scratch_path, ignore_errors=True) _, cache_store_files = make_cache_store(build_state.cache_directory) _, cache_load_files = make_cache_load(build_state.cache_directory) while True: if build_state.failure is not None: # every thread needs to clear the queue since otherwise some other thread might still be filling it up clear_queue(build_state.work_queue) return # some thread has failed, emergency stop todo = build_state.work_queue.get() if todo is None: return start_time = time.time() try: build_state.status_monitor.update(index, "Parsing: " + str(todo)) log(f"Target {todo} popped from queue by worker {index}") # only from caches, will never run a subprocess ( cache_key, provided_value, deps, deferred_deps, uses_dynamic_deps, ) = get_deps(build_state, todo, skip_cache_key=todo.do_not_cache) if uses_dynamic_deps: log("Target", todo, "Uses dynamic deps") if cache_key is None: # unable to compute cache_key, potentially because not all deps are ready log( f"Target {todo} either has unbuilt dependencies, " f"or does not have a cached dynamic dependency resolved" ) deps_ready = not enqueue_deps( build_state, todo, deps, catch_failure=uses_dynamic_deps, ) if deps_ready: log(f"Apparently {todo} is missing an input cache in the impl") else: log(f"Apparently {todo} is waiting on unbuilt dependencies") else: log(f"All the dependencies of target {todo} are ready: {deps}") # if the cache_key is ready, *all* the deps must be ready, not just the discoverable deps! # unless the cache_key is not actually cached, in which case our inputs() could be wrong, so # we have to run in the working directory to verify deps_ready = True log(f"Enqueuing deferred dependencies for {todo}: {deferred_deps}") enqueue_deps( build_state, None, deferred_deps, catch_failure=uses_dynamic_deps ) if deps_ready: done = False # check if we're already cached! if cache_key and not todo.do_not_cache: try: outputs = cache_load_files(cache_key, todo, os.curdir) log(f"Target {todo} was loaded from the cache") done = True except CacheMiss: pass if not done: # time to execute! but *not* inside the lock # when we release the lock, stuff may change outside, but # we don't care since *our* dependencies (so far) are all available log(f"Target {todo} is not in the cache, rerunning...") build_state.status_monitor.update(index, "Building: " + str(todo)) # if cache_key is None or we use dynamic dependencies, we haven't finished evaluating # the impl, so we don't know all the dependencies it could need. Therefore, we must # run it in the working directory, so the impl can find the dependencies it needs # Then, we run it *again*, to verify that the dependencies are accurate. # The cache_key could be None due to race conditions, even if we don't use dynamic deps. # Imagine: # - Rule A depends on a provider of rule B, which is not yet built # (and on C, because of B's provider) # - We determine that rule B is not built, so A depends on B but can't complete impl, # so A.cache_key = None # - B is built # - We try to enqueue the deps of A. Enqueue_deps responds that B is already built, # so A.deps_ready=True # - So at this stage, A.cache_key = None, A.uses_dynamic_deps=False, but A.deps is missing C! if uses_dynamic_deps or cache_key is None: log( f"We don't know the dependencies of {todo}, " f"so we are running the impl in the root directory to find out!" ) try: _, cache_key = build( build_state, todo, scratch_path=None, skip_cache_key=todo.do_not_cache, ) except MissingDependency as d: log( f"Target {todo} failed to fully build because of the missing dynamic " f"dependencies: {d.paths}, requeuing" ) scheduled = enqueue_deps(build_state, todo, d.paths) if not scheduled: # due to race conditions, our dependencies are actually all ready now! # no one else will enqueue us, so it is safe to enqueue ourselves build_state.work_queue.put(todo) build_state.status_monitor.move(total=1) else: # now, if no exception has thrown, all the deps are available to the deps finder ( alt_cache_key, provided_value, deps, deferred_deps, uses_dynamic_deps, ) = get_deps( build_state, todo, skip_cache_key=todo.do_not_cache ) try: provided_value, alt_cache_key_2 = build( build_state, todo, scratch_path=scratch_path, precomputed_deps=deps, skip_cache_key=todo.do_not_cache, ) except CalledProcessError as e: build_state.status_monitor.stop() print(e.cmd) print(e) raise BuildException( f"The dependencies for target {todo} are not fully specified, " f"as it failed to build when provided only with them." ) if not todo.do_not_cache: assert ( cache_key == alt_cache_key == alt_cache_key_2 ), "An internal error has occurred" done = True else: log( f"We know all the dependencies of {todo}, so we can run it in a sandbox" ) provided_value, alt_cache_key = build( build_state, todo, scratch_path=scratch_path, precomputed_deps=deps, skip_cache_key=todo.do_not_cache, ) if not todo.do_not_cache: assert ( cache_key == alt_cache_key ), "An internal error has occurred" done = True if done: log(f"Target {todo} has been built fully!") if not todo.do_not_cache: outputs = cache_store_files(cache_key, todo, os.curdir) else: outputs = [] for out in todo.outputs: if out.endswith("/"): outputs.extend( os.path.join(path, filename) for path, subdirs, files in os.walk(out) for filename in files ) else: outputs.append(out) if scratch_path.exists(): rmtree(scratch_path, ignore_errors=True) if done: waiting_for_deferred = enqueue_deps( build_state, todo, deferred_deps ) # We have to wait for deferred dependencies to build because the TransitiveOutputProvider # needs them to complete. However, we can run the build itself before the deferred deps finish. if waiting_for_deferred: log( f"Target {todo} has been built, but is waiting for deferred dependencies" ) else: todo.set_provided_value( provided_value, build_state, deps, deferred_deps, outputs ) with build_state.scheduling_lock: build_state.ready.add(todo) # no one will ever add us back, since we are in `ready` build_state.scheduled_but_not_ready.remove(todo) # now it's time to set up our dependents # we need to be inside the lock even if we have no dependents, in case # we *gain* dependents from another thread which could have held the lock! for dependent in todo.runtime_dependents: dependent.pending_rule_dependencies.remove(todo) if not dependent.pending_rule_dependencies: # this guy is ready to go build_state.work_queue.put(dependent) build_state.status_monitor.move(total=1) # either way, we're done with this task for now build_state.status_monitor.move(curr=1) build_state.work_queue.task_done() # record timing data run_time = time.time() - start_time TIMINGS[str(todo)] += run_time except Exception as e: if isinstance(e, BuildException): suffix = f"\n{Style.RESET_ALL}" else: suffix = f"\n{Style.RESET_ALL}" + traceback.format_exc() build_state.status_monitor.stop() build_state.failure = BuildException( f"Error while executing rule {todo}: " + str(e) + suffix ) build_state.work_queue.task_done()
def cli( targets: Tuple[str], profile: bool, shell_log: bool, locate: bool, verbose: bool, quiet: bool, skip_version_check: bool, skip_setup: bool, skip_build: bool, clean: bool, num_threads: int, state_directory: str, cache_directory: str, flags: List[str], ): """ This is a `make` alternative with a simpler syntax and some useful features. """ try: repo_root = find_root() os.chdir(repo_root) if verbose: enable_logging() if profile or shell_log: enable_profiling() flags = [flag.split("=", 1) + ["true"] for flag in flags] flags = {flag[0].lower(): loads(flag[1]) for flag in flags} if not skip_setup: setup_rule_lookup, macros = load_rules( flags, workspace=True, skip_version_check=skip_version_check) if macros: raise BuildException( "Macros are not supported in setup rules.") setup_targets = [ target[5:] for target in targets if target.startswith("setup:") ] if locate and setup_targets: raise BuildException( "--locate cannot be used with setup rules - they are declared in WORKSPACE" ) setup_targets = setup_targets or ([ config.default_setup_rule ] if config.default_setup_rule else []) initialize_workspace( setup_rule_lookup, setup_targets, state_directory, quiet, ) target_rule_lookup, macros = load_rules( flags, skip_version_check=skip_version_check) target_rule_lookup.verify() all_files = get_repo_files() source_files = target_rule_lookup.find_source_files(all_files) if profile: print("Slow Build / Rules Files (Loading Phase):") slowest = sorted(LOAD_TIMINGS, key=lambda x: LOAD_TIMINGS[x], reverse=True)[:20] for key in slowest: print(key, LOAD_TIMINGS[key]) print() need_target = locate or not skip_build if not targets and need_target: if config.default_build_rule is None: raise BuildException( "No target provided, and no default target set.") targets = [config.default_build_rule] if locate: for target in targets: if target in source_files: raise BuildException( f"Target {target} is a source file, not a build target." ) rule = target_rule_lookup.try_lookup(target) if rule is None and not target.startswith(":"): rule = target_rule_lookup.try_lookup(f":{target}") if rule is None: raise BuildException(f"Target {target} was not found.") print( f"Target {target} is declared by {rule} in {rule.location}/BUILD." ) exit(0) if not skip_build: for _ in range(2 if clean else 1): if clean: for out_dir in config.output_directories: rmtree(out_dir, ignore_errors=True) for rule in set( target_rule_lookup.direct_lookup.values()) | set( target_rule_lookup.direct_lookup.values()): rule.pending_rule_dependencies = set() rule.runtime_dependents = set() run_build( BuildState( target_rule_lookup=target_rule_lookup, source_files=source_files, cache_directory=cache_directory, macros=macros, ), [ target for target in targets if not target.startswith("setup:") ], num_threads, quiet, ) if profile: print("Slow Rules (Execution Phase):") slowest = sorted(BUILD_TIMINGS, key=lambda x: BUILD_TIMINGS[x], reverse=True)[:20] for key in slowest: print(key, BUILD_TIMINGS[key]) print("Cache Statistics") print( f"{STATS['hits']} cache hits, {STATS['misses']} cache misses, {STATS['inserts']} cache inserts (approx)" ) except BuildException as e: display_error(e) exit(1) except Exception as e: display_error(BuildException("Internal error: " + repr(e))) print(f"\n{Style.RESET_ALL}" + traceback.format_exc()) exit(1)