def dep_fetcher(dep, *, initial_load=False): if dep not in loaded_deps and in_sandbox: if not initial_load: raise BuildException( f"New dep {dep} found when rerunning rule, it's likely not deterministic!" ) if not dep.startswith(":"): log(f"Loading dependency {dep} into sandbox") copy_helper( src_root=os.curdir, dest_root=scratch_path, src_names=[dep], symlink=not rule.do_not_symlink, ) # check that these deps are built! Since they may not have been checked by the PreviewExecution. dep_rule = None if dep not in build_state.source_files: dep_rule = build_state.target_rule_lookup.lookup(build_state, dep) if dep_rule not in build_state.ready: raise MissingDependency(dep) loaded_deps.add(dep) return dep_rule
def cache_loader(cache_key: str, rule: Rule, dest_root: str) -> bool: cache_location, cache_paths = get_cache_output_paths( cache_directory, rule, cache_key) if bucket: del cache_location if not aux_loader(cache_key, rule, dest_root): try: cache_fetcher(cache_key, ".touch") except CacheMiss: STATS["misses"] += delta return False for src_name, cache_path in zip(rule.outputs, cache_paths): cache_path = str(Path(cache_key).joinpath(cache_path)) os.makedirs(dest_root, exist_ok=True) try: if src_name.endswith("/"): os.makedirs(Path(dest_root).joinpath(src_name), exist_ok=True) blobs: Iterator[Blob] = list( bucket.list_blobs(prefix=cache_path)) for blob in blobs: target = str( Path(dest_root).joinpath( src_name).joinpath( blob.name[len(cache_path) + 1:])) os.makedirs(dirname(target), exist_ok=True) blob.download_to_filename(target) STATS["hits"] += delta else: target = str(Path(dest_root).joinpath(src_name)) os.makedirs(dirname(target), exist_ok=True) bucket.blob(cache_path).download_to_filename( target) STATS["hits"] += delta except NotFound: STATS["misses"] += delta return False # now that we have fetched, let's cache it on disk aux_save(cache_key, rule, dest_root) return True else: if not os.path.exists(cache_location): STATS["misses"] += delta return False try: copy_helper( src_root=cache_location, src_names=cache_paths, dest_root=dest_root, dest_names=rule.outputs, ) STATS["hits"] += delta except FileNotFoundError: raise BuildException( "Cache corrupted. This should never happen unless you modified the cache " "directory manually! If so, delete the cache directory and try again." ) return True
def save(cache_key: str, rule: Rule, output_root: str): cache_location, cache_paths = get_cache_output_paths( cache_directory, rule, cache_key) memorize(cache_key, ".touch", "") if bucket: del cache_location # just to be safe for src_name, cache_path in zip(rule.outputs, cache_paths): if src_name.endswith("/"): dir_root = Path(output_root).joinpath(src_name) for path, subdirs, files in os.walk(dir_root): path = os.path.relpath(path, dir_root) for name in files: # output_root -> src_name/ -> [path -> name] # <cache_base> -> cache_key -> cache_path -> [path -> name] target = Path(path).joinpath(name) src_loc = (Path(output_root).joinpath( src_name).joinpath(target)) aux_cache_loc = ( Path(AUX_CACHE).joinpath(cache_key).joinpath( cache_path).joinpath(target)) if not os.path.exists(aux_cache_loc) or ( hash_file(src_loc) != hash_file(aux_cache_loc)): STATS["inserts"] += delta bucket.blob( str( Path(cache_key).joinpath( cache_path).joinpath( target))).upload_from_filename( str(src_loc)) else: # output_root -> src_name # <cache_base> -> cache_key -> cache_path target = Path(output_root).joinpath(src_name) aux_cache_loc = (Path(AUX_CACHE).joinpath( cache_key).joinpath(cache_path)) if not os.path.exists(aux_cache_loc) or ( hash_file(target) != hash_file(aux_cache_loc)): STATS["inserts"] += delta bucket.blob(str(Path(cache_key).joinpath( cache_path)), ).upload_from_filename( str(Path(output_root).joinpath(src_name)), ) aux_save(cache_key, rule, output_root) else: STATS["inserts"] += delta copy_helper( src_root=output_root, src_names=rule.outputs, dest_root=cache_location, dest_names=cache_paths, )
def load_deps(deps): deps = set(deps) - loaded_deps # check that these deps are built! Since they have not been checked by the PreviewExecution. missing_deps = [] for dep in deps: if dep not in build_state.source_files: dep_rule = build_state.target_rule_lookup.lookup( build_state, dep) if dep_rule not in build_state.ready: missing_deps.append(dep) if missing_deps: raise MissingDependency(*missing_deps) loaded_deps.update(deps) if in_sandbox: log(f"Loading dependencies {deps} into sandbox") copy_helper( src_root=build_state.repo_root, dest_root=scratch_path, src_names=[dep for dep in deps if not dep.startswith(":")], symlink=not rule.do_not_symlink, )
def build( build_state: BuildState, rule: Rule, deps: Collection[str], *, scratch_path: Optional[Path], ): """ All the dependencies that can be determined from caches have been obtained. Now we need to run. Either we will successfully finish everything, or we will get a missing dependency and have to requeue """ cache_memorize, _ = make_cache_memorize(build_state.cache_directory) in_sandbox = scratch_path is not None loaded_deps = set() def load_deps(deps): deps = set(deps) - loaded_deps # check that these deps are built! Since they have not been checked by the PreviewExecution. missing_deps = [] for dep in deps: if dep not in build_state.source_files: dep_rule = build_state.target_rule_lookup.lookup( build_state, dep) if dep_rule not in build_state.ready: missing_deps.append(dep) if missing_deps: raise MissingDependency(*missing_deps) loaded_deps.update(deps) if in_sandbox: log(f"Loading dependencies {deps} into sandbox") copy_helper( src_root=build_state.repo_root, dest_root=scratch_path, src_names=[dep for dep in deps if not dep.startswith(":")], symlink=not rule.do_not_symlink, ) load_deps(deps) hashstate = HashState() ctx = ExecutionContext( scratch_path if in_sandbox else build_state.repo_root, scratch_path.joinpath(rule.location) if in_sandbox else Path(build_state.repo_root).joinpath(rule.location), hashstate, load_deps, cache_memorize, ) for dep in rule.deps: dep_rule = build_state.target_rule_lookup.try_lookup(dep) if dep.startswith(":"): setattr(ctx.deps, dep[1:], dep_rule.provided_value) else: hashstate.update(dep.encode("utf-8")) hashstate.update(hash_file(dep)) if dep not in build_state.source_files: ctx.deps[dep] = dep_rule.provided_value try: rule.provided_value = rule.impl(ctx) for out in rule.outputs: # needed so that if we ask for another output, we don't panic if it's not in the cache hashstate.record(out) if in_sandbox: ctx.run_shell_queue() except CalledProcessError as e: raise BuildException("".join([ str(e) + "\n", Style.RESET_ALL, f"Location: {scratch_path}\n", f"Working Directory: {ctx.cwd}\n", e.stdout.decode("utf-8"), e.stderr.decode("utf-8"), traceback.format_exc(), ])) if in_sandbox: try: copy_helper( src_root=scratch_path, src_names=rule.outputs, dest_root=build_state.repo_root, ) except FileNotFoundError as e: raise BuildException( f"Output file {e.filename} from rule {rule} was not generated." ) for input_path in ctx.inputs: if input_path.startswith(":"): # don't hash rule deps continue hashstate.update(input_path.encode("utf-8")) hashstate.update(hash_file(input_path)) return hashstate.state()
def cache_load_files(cache_key: str, rule: Rule, dest_root: str) -> List[str]: cache_location, cache_paths = get_cache_output_paths( cache_directory, rule, cache_key ) out = [] if bucket: from google.cloud.exceptions import NotFound del cache_location if not aux_load_files(cache_key, rule, dest_root): try: cache_load_string(cache_key, ".touch") except CacheMiss: STATS["misses"] += delta raise for src_name, cache_path in zip(rule.outputs, cache_paths): cache_path = str(Path(cache_key).joinpath(cache_path)) os.makedirs(dest_root, exist_ok=True) try: if src_name.endswith("/"): os.makedirs( Path(dest_root).joinpath(src_name), exist_ok=True ) blobs = list(bucket.list_blobs(prefix=cache_path)) for blob in blobs: filename = blob.name[len(cache_path) + 1 :] target = str( Path(dest_root) .joinpath(src_name) .joinpath(filename) ) os.makedirs(dirname(target), exist_ok=True) blob.download_to_filename(target) STATS["hits"] += delta out.append(os.path.join(src_name, filename)) else: target = os.path.join(dest_root, src_name) os.makedirs(dirname(target), exist_ok=True) bucket.blob(cache_path).download_to_filename(target) out.append(target) STATS["hits"] += delta except NotFound: STATS["misses"] += delta raise CacheMiss # now that we have fetched, let's cache it on disk aux_store_files(cache_key, rule, dest_root) return out else: if not os.path.exists(cache_location): STATS["misses"] += delta raise CacheMiss try: _, out = copy_helper( src_root=cache_location, src_names=cache_paths, dest_root=dest_root, dest_names=rule.outputs, ) STATS["hits"] += delta except FileNotFoundError: raise BuildException( "Cache corrupted. This should never happen unless you modified the cache " "directory manually! If so, delete the cache directory and try again." ) return out
def build( build_state: BuildState, rule: Rule, *, precomputed_deps: Optional[List[str]] = None, scratch_path: Optional[Path], skip_cache_key: bool, ): """ All the dependencies that can be determined from caches have been obtained. Now we need to run. Either we will successfully finish everything, or we will get a missing dependency and have to requeue """ cache_store_string, _ = make_cache_store(build_state.cache_directory) in_sandbox = scratch_path is not None loaded_deps = set() def dep_fetcher(dep, *, initial_load=False): if dep not in loaded_deps and in_sandbox: if not initial_load: raise BuildException( f"New dep {dep} found when rerunning rule, it's likely not deterministic!" ) if not dep.startswith(":"): log(f"Loading dependency {dep} into sandbox") copy_helper( src_root=os.curdir, dest_root=scratch_path, src_names=[dep], symlink=not rule.do_not_symlink, ) # check that these deps are built! Since they may not have been checked by the PreviewExecution. dep_rule = None if dep not in build_state.source_files: dep_rule = build_state.target_rule_lookup.lookup(build_state, dep) if dep_rule not in build_state.ready: raise MissingDependency(dep) loaded_deps.add(dep) return dep_rule if precomputed_deps: assert in_sandbox for dep in precomputed_deps: dep_fetcher(dep, initial_load=True) hashstate = HashState() ctx = ExecutionContext( scratch_path if in_sandbox else os.curdir, rule.location, build_state.macros, hashstate, dep_fetcher, cache_store_string, ) try: if not skip_cache_key: for out in rule.outputs: # needed so that if we ask for another output, we don't panic if it's not in the cache hashstate.record(out) provided_value = rule.impl(ctx) if ctx.out_of_date_deps: raise MissingDependency(*ctx.out_of_date_deps) if in_sandbox: ctx.run_shell_queue() except CalledProcessError as e: raise BuildException( "".join( [ str(e) + "\n", Style.RESET_ALL, f"Location: {scratch_path}\n", f"Working Directory: {scratch_path}/{ctx.cwd}\n", e.stdout.decode("utf-8"), e.stderr.decode("utf-8"), ] ) ) if in_sandbox: try: copy_helper( src_root=scratch_path, src_names=rule.outputs, dest_root=os.curdir, ) except FileNotFoundError as e: raise BuildException( f"Output file {e.filename} from rule {rule} was not generated." ) if not skip_cache_key: for input_path in ctx.inputs: if input_path.startswith(":"): # don't hash rule deps continue hashstate.update(input_path.encode("utf-8")) hashstate.update(hash_file(input_path)) hashstate.record("done") return provided_value, hashstate.state() if not skip_cache_key else None