Esempio n. 1
0
 def _hash(self) -> str:
     print(
         "[WARNING] DepSet hashing is experimental and may corrupt caches")
     hashstate = HashState()
     for child in self.children:
         if isinstance(child, DepSet):
             hashstate.record(child._hash())
         else:
             assert (isinstance(child, str) and not child.endswith("/")
                     and not child.endswith(":")
                     ), "Depsets only hold files or other depsets"
             hashstate.record(child)
             hashstate.update(hash_file(child))
     return hashstate.state()
Esempio n. 2
0
def gen_service_account(app: App):
    # set up and create service account
    hashstate = HashState()
    permissions = sorted(app.config["permissions"])
    hashstate.record(permissions)
    service_account_name = f"managed-{hashstate.state()}"[:
                                                          30]  # max len of account ID is 30 chars
    service_account_email = (
        f"{service_account_name}@{PROJECT_ID}.iam.gserviceaccount.com")
    existing_accounts = json.loads(
        sh(
            "gcloud",
            "iam",
            "service-accounts",
            "list",
            "--format",
            "json",
            capture_output=True,
        ))
    for account in existing_accounts:
        if account["email"] == service_account_email:
            break
    else:
        # need to create service account
        sh(
            "gcloud",
            "iam",
            "service-accounts",
            "create",
            service_account_name,
            f"--description",
            f'Managed service account with permissions: {" ".join(permissions)}',
            "--display-name",
            "Managed service account - DO NOT EDIT MANUALLY",
        )
        sleep(60)  # it takes a while to create service accounts
        role_lookup = dict(
            # permissions that most apps might need
            storage="roles/storage.admin",
            database="roles/cloudsql.client",
            logging="roles/logging.admin",
            # only buildserver needs these
            iam_admin="roles/resourcemanager.projectIamAdmin",
            cloud_run_admin="roles/run.admin",
            cloud_functions_admin="roles/cloudfunctions.admin",
        )
        for permission in permissions:
            if permission == "rpc":
                pass  # handled later
            else:
                role = role_lookup[permission]
                try:
                    sh(
                        "gcloud",
                        "projects",
                        "add-iam-policy-binding",
                        PROJECT_ID,
                        f"--member",
                        f"serviceAccount:{service_account_email}",
                        f"--role",
                        role,
                    )
                except CalledProcessError:
                    # abort
                    sh(
                        "gcloud",
                        "iam",
                        "service-accounts",
                        "delete",
                        service_account_email,
                    )
                    raise
    return service_account_name
Esempio n. 3
0
def build(
    build_state: BuildState,
    rule: Rule,
    deps: Collection[str],
    *,
    scratch_path: Optional[Path],
):
    """
    All the dependencies that can be determined from caches have been
    obtained. Now we need to run. Either we will successfully finish everything,
    or we will get a missing dependency and have to requeue
    """
    cache_memorize, _ = make_cache_memorize(build_state.cache_directory)

    in_sandbox = scratch_path is not None

    loaded_deps = set()

    def load_deps(deps):
        deps = set(deps) - loaded_deps
        # check that these deps are built! Since they have not been checked by the PreviewExecution.
        missing_deps = []
        for dep in deps:
            if dep not in build_state.source_files:
                dep_rule = build_state.target_rule_lookup.lookup(
                    build_state, dep)
                if dep_rule not in build_state.ready:
                    missing_deps.append(dep)
        if missing_deps:
            raise MissingDependency(*missing_deps)
        loaded_deps.update(deps)
        if in_sandbox:
            log(f"Loading dependencies {deps} into sandbox")
            copy_helper(
                src_root=build_state.repo_root,
                dest_root=scratch_path,
                src_names=[dep for dep in deps if not dep.startswith(":")],
                symlink=not rule.do_not_symlink,
            )

    load_deps(deps)
    hashstate = HashState()

    ctx = ExecutionContext(
        scratch_path if in_sandbox else build_state.repo_root,
        scratch_path.joinpath(rule.location)
        if in_sandbox else Path(build_state.repo_root).joinpath(rule.location),
        hashstate,
        load_deps,
        cache_memorize,
    )

    for dep in rule.deps:
        dep_rule = build_state.target_rule_lookup.try_lookup(dep)
        if dep.startswith(":"):
            setattr(ctx.deps, dep[1:], dep_rule.provided_value)
        else:
            hashstate.update(dep.encode("utf-8"))
            hashstate.update(hash_file(dep))
        if dep not in build_state.source_files:
            ctx.deps[dep] = dep_rule.provided_value

    try:
        rule.provided_value = rule.impl(ctx)
        for out in rule.outputs:
            # needed so that if we ask for another output, we don't panic if it's not in the cache
            hashstate.record(out)
        if in_sandbox:
            ctx.run_shell_queue()
    except CalledProcessError as e:
        raise BuildException("".join([
            str(e) + "\n",
            Style.RESET_ALL,
            f"Location: {scratch_path}\n",
            f"Working Directory: {ctx.cwd}\n",
            e.stdout.decode("utf-8"),
            e.stderr.decode("utf-8"),
            traceback.format_exc(),
        ]))

    if in_sandbox:
        try:
            copy_helper(
                src_root=scratch_path,
                src_names=rule.outputs,
                dest_root=build_state.repo_root,
            )
        except FileNotFoundError as e:
            raise BuildException(
                f"Output file {e.filename} from rule {rule} was not generated."
            )

    for input_path in ctx.inputs:
        if input_path.startswith(":"):
            # don't hash rule deps
            continue
        hashstate.update(input_path.encode("utf-8"))
        hashstate.update(hash_file(input_path))

    return hashstate.state()
Esempio n. 4
0
def initialize_workspace(
    setup_rule_lookup: TargetLookup,
    setup_targets: List[str],
    state_directory: str,
    quiet: bool,
):
    # we don't need the indirect lookup as we only have rule and source deps
    direct_lookup: Dict[str, Rule] = setup_rule_lookup.direct_lookup
    work_queue = []
    for setup_target in setup_targets:
        if setup_target not in direct_lookup:
            raise BuildException(
                f"Unknown or unspecified setup target {setup_target}")
        work_queue.append(direct_lookup[setup_target])

    rebuilt: Set[str] = set()
    ready: Set[str] = set()

    cache_load_string, _ = make_cache_load(state_directory)
    cache_store_string, _ = make_cache_store(state_directory)

    if work_queue:
        status_monitor = create_status_monitor(1, quiet)
        status_monitor.move(total=len(work_queue))

    def dep_fetcher(dep):
        if dep.startswith(":"):
            if dep not in direct_lookup:
                raise BuildException(f"Unable to find setup rule {dep}")
            dep_rule = direct_lookup[dep]
            log(f"Looking up setup rule {dep}")
            if dep_rule not in ready:
                raise MissingDependency(dep)
            return dep_rule

    while work_queue:
        todo = work_queue.pop()
        log(f"Popping setup rule {todo} off work queue")
        try:
            if todo.name is None:
                raise BuildException(
                    f"All setup rules must have names, but {todo} does not.")

            hashstate = HashState()
            ctx = WorkspaceExecutionContext(hashstate, dep_fetcher)
            unchecked_rules = []

            try:
                todo.set_provided_value(
                    todo.impl(ctx),
                    None,
                    ctx.inputs,
                    ctx.deferred_inputs,
                    [],  # todo: implement output providers for setup rules
                )
                if ctx.out_of_date_deps:
                    raise MissingDependency(*ctx.out_of_date_deps)
            except MissingDependency as e:
                unchecked_rules = [direct_lookup[x] for x in e.paths]

            if unchecked_rules:
                for dep in unchecked_rules:
                    if dep not in work_queue:
                        log(f"Setup rule {todo} is enqueuing {dep}")
                        status_monitor.move(total=1)
                        work_queue.append(dep)
                    else:
                        log(f"Setup rule {todo} is waiting on {dep}, which is already enqueued"
                            )
                    dep.runtime_dependents.add(todo)
                    todo.pending_rule_dependencies.add(dep)
            else:
                log(f"Setup rule {todo} ran with inputs {ctx.inputs + ctx.deferred_inputs}"
                    )
                for dep in ctx.inputs + ctx.deferred_inputs:
                    if dep.startswith(":"):
                        continue
                    try:
                        hashstate.record(dep)
                        hashstate.update(hash_file(dep))
                    except FileNotFoundError:
                        raise BuildException(f"Source file {dep} not found.")

                try:
                    ok = cache_load_string("workspace",
                                           todo.name) == hashstate.state()
                    if not ok:
                        log(f"State mismatch for rule {todo}, need to rerun")
                except CacheMiss:
                    log(f"State not found for rule {todo}, need to run for first time"
                        )
                    ok = False

                for dep in ctx.inputs + ctx.deferred_inputs:
                    if dep.startswith(":"):
                        if direct_lookup[dep] in rebuilt:
                            log(f"Dependency {dep} of setup rule {todo} was rebuilt, so we must rebuild {todo} as well"
                                )
                            ok = False

                for out in todo.outputs:
                    if not os.path.exists(out):
                        log(f"Output {out} is missing for setup rule {todo}, forcing rerun"
                            )
                        ok = False
                        break

                if not ok:
                    # we need to fully run
                    log(f"Fully running setup rule {todo}")
                    ctx.run_shell_queue()
                    rebuilt.add(todo)
                    cache_store_string("workspace", todo.name,
                                       hashstate.state())

                # either way, now we can trigger our dependents
                ready.add(todo)
                for dep in todo.runtime_dependents:
                    dep.pending_rule_dependencies.remove(todo)
                    if not dep.pending_rule_dependencies:
                        work_queue.append(dep)
                        status_monitor.move(total=1)

            status_monitor.move(curr=1)
        except Exception as e:
            if not isinstance(e, BuildException):
                suffix = f"\n{Style.RESET_ALL}" + traceback.format_exc()
            else:
                suffix = ""
            status_monitor.stop()
            raise BuildException(f"Error while executing rule {todo}: " +
                                 str(e) + suffix)
Esempio n. 5
0
def initialize_workspace(
    setup_rule_lookup: TargetLookup,
    setup_targets: List[str],
    state_directory: str,
    quiet: bool,
):
    # we don't need the indirect lookup as we only have rule and source deps
    direct_lookup: Dict[str, Rule] = setup_rule_lookup.direct_lookup
    work_queue = []
    for setup_target in setup_targets:
        if setup_target not in direct_lookup:
            raise BuildException(
                f"Unknown or unspecified setup target {setup_target}")
        work_queue.append(direct_lookup[setup_target])

    rebuilt: Set[str] = set()
    ready: Set[str] = set()

    cache_fetcher, _ = make_cache_fetcher(state_directory)
    cache_memorize, _ = make_cache_memorize(state_directory)

    if work_queue:
        status_monitor = create_status_monitor(1, quiet)
        status_monitor.move(total=len(work_queue))

    while work_queue:
        todo = work_queue.pop()
        log(f"Popping setup rule {todo} off work queue")
        hashstate = HashState()
        ctx = WorkspaceExecutionContext(hashstate)
        unchecked_rules = []
        for dep in todo.deps:
            hashstate.record(dep)
            if dep.startswith(":"):
                if dep not in direct_lookup:
                    raise BuildException(f"Unable to find setup rule {dep}")
                dep_rule = direct_lookup[dep]
                if dep_rule not in ready:
                    unchecked_rules.append(dep_rule)
                    continue
                ctx.deps[dep] = dep_rule.provided_value
                setattr(ctx.deps, dep[1:], dep_rule.provided_value)
            else:
                try:
                    hashstate.update(hash_file(dep))
                except FileNotFoundError:
                    raise BuildException(f"Source file {dep} not found.")

        if unchecked_rules:
            for dep in unchecked_rules:
                if dep not in work_queue:
                    log(f"Setup rule {todo} is enqueuing {dep}")
                    status_monitor.move(total=1)
                    work_queue.append(dep)
                else:
                    log(f"Setup rule {todo} is waiting on {dep}, which is already enqueued"
                        )
                dep.runtime_dependents.add(todo)
                todo.pending_rule_dependencies.add(dep)
        else:
            # our dependent rules are ready, now we need to see if we need to rerun
            todo.provided_value = todo.impl(ctx)

            if todo.name is None:
                raise BuildException(
                    f"All setup rules must have names, but {todo} does not.")

            try:
                ok = cache_fetcher("workspace", todo.name) == hashstate.state()
                if not ok:
                    log(f"State mismatch for rule {todo}, need to rerun")
            except CacheMiss:
                log(f"State not found for rule {todo}, need to run for first time"
                    )
                ok = False

            for dep in todo.deps:
                if dep.startswith(":"):
                    if direct_lookup[dep] in rebuilt:
                        log(f"Dependency {dep} of setup rule {todo} was rebuilt, so we must rebuild {todo} as well"
                            )
                        ok = False

            for out in todo.outputs:
                if not os.path.exists(out):
                    log(f"Output {out} is missing for setup rule {todo}, forcing rerun"
                        )
                    ok = False
                    break

            if not ok:
                # we need to fully run
                ctx.run_shell_queue()
                rebuilt.add(todo)
                cache_memorize("workspace", todo.name, hashstate.state())

            # either way, now we can trigger our dependents
            ready.add(todo)
            for dep in todo.runtime_dependents:
                dep.pending_rule_dependencies.remove(todo)
                if not dep.pending_rule_dependencies:
                    work_queue.append(dep)
                    status_monitor.move(total=1)

        status_monitor.move(curr=1)
Esempio n. 6
0
def build(
    build_state: BuildState,
    rule: Rule,
    *,
    precomputed_deps: Optional[List[str]] = None,
    scratch_path: Optional[Path],
    skip_cache_key: bool,
):
    """
    All the dependencies that can be determined from caches have been
    obtained. Now we need to run. Either we will successfully finish everything,
    or we will get a missing dependency and have to requeue
    """
    cache_store_string, _ = make_cache_store(build_state.cache_directory)

    in_sandbox = scratch_path is not None

    loaded_deps = set()

    def dep_fetcher(dep, *, initial_load=False):
        if dep not in loaded_deps and in_sandbox:
            if not initial_load:
                raise BuildException(
                    f"New dep {dep} found when rerunning rule, it's likely not deterministic!"
                )
            if not dep.startswith(":"):
                log(f"Loading dependency {dep} into sandbox")
                copy_helper(
                    src_root=os.curdir,
                    dest_root=scratch_path,
                    src_names=[dep],
                    symlink=not rule.do_not_symlink,
                )

        # check that these deps are built! Since they may not have been checked by the PreviewExecution.
        dep_rule = None
        if dep not in build_state.source_files:
            dep_rule = build_state.target_rule_lookup.lookup(build_state, dep)
            if dep_rule not in build_state.ready:
                raise MissingDependency(dep)

        loaded_deps.add(dep)

        return dep_rule

    if precomputed_deps:
        assert in_sandbox
        for dep in precomputed_deps:
            dep_fetcher(dep, initial_load=True)

    hashstate = HashState()

    ctx = ExecutionContext(
        scratch_path if in_sandbox else os.curdir,
        rule.location,
        build_state.macros,
        hashstate,
        dep_fetcher,
        cache_store_string,
    )

    try:
        if not skip_cache_key:
            for out in rule.outputs:
                # needed so that if we ask for another output, we don't panic if it's not in the cache
                hashstate.record(out)
        provided_value = rule.impl(ctx)
        if ctx.out_of_date_deps:
            raise MissingDependency(*ctx.out_of_date_deps)
        if in_sandbox:
            ctx.run_shell_queue()
    except CalledProcessError as e:
        raise BuildException(
            "".join(
                [
                    str(e) + "\n",
                    Style.RESET_ALL,
                    f"Location: {scratch_path}\n",
                    f"Working Directory: {scratch_path}/{ctx.cwd}\n",
                    e.stdout.decode("utf-8"),
                    e.stderr.decode("utf-8"),
                ]
            )
        )

    if in_sandbox:
        try:
            copy_helper(
                src_root=scratch_path,
                src_names=rule.outputs,
                dest_root=os.curdir,
            )
        except FileNotFoundError as e:
            raise BuildException(
                f"Output file {e.filename} from rule {rule} was not generated."
            )

    if not skip_cache_key:
        for input_path in ctx.inputs:
            if input_path.startswith(":"):
                # don't hash rule deps
                continue
            hashstate.update(input_path.encode("utf-8"))
            hashstate.update(hash_file(input_path))
    hashstate.record("done")

    return provided_value, hashstate.state() if not skip_cache_key else None
def get_deps(build_state: BuildState, rule: Rule):
    """
    Use static dependencies and caches to try and identify as *many*
    needed dependencies as possible, without *any* spurious dependencies.
    """
    hashstate = HashState()
    cache_fetcher, _ = make_cache_fetcher(build_state.cache_directory)
    dep_fetcher = make_dep_fetcher(build_state)

    ctx = PreviewContext(
        build_state.repo_root,
        rule.location,
        hashstate,
        dep_fetcher,
        cache_fetcher,
    )

    log(f"Looking for static dependencies of {rule}")
    for dep in rule.deps:
        if dep not in build_state.source_files:
            dep_rule = build_state.target_rule_lookup.lookup(build_state, dep)
            if dep_rule not in build_state.ready:
                log(f"Static dependency {dep} of {dep_rule} is not ready, skipping impl"
                    )
                # static deps are not yet ready
                break
            ctx.deps[dep] = dep_rule.provided_value
            if dep.startswith(":"):
                setattr(ctx.deps, dep[1:], dep_rule.provided_value)
                continue
        hashstate.update(dep.encode("utf-8"))
        try:
            hashstate.update(dep_fetcher(dep, get_hash=True))
        except MissingDependency:
            # get static deps before running the impl!
            # this means that a source file is *missing*, but the error will be thrown in enqueue_deps
            break
    else:
        ok = False
        try:
            log(f"Running impl of {rule} to discover dynamic dependencies")
            rule.provided_value = rule.impl(ctx)
            log(f"Impl of {rule} completed with discovered deps: {ctx.inputs}")
            for out in rule.outputs:
                # needed so that if we ask for another output, we don't panic if it's not in the cache
                hashstate.record(out)
            ok = True
        except CacheMiss:
            log(f"Cache miss while running impl of {rule}")
            pass  # stops context execution
        except MissingDependency as e:
            log(f"Dependencies {e.paths} were unavailable while running impl of {rule}"
                )
            pass  # dep already added to ctx.inputs
        except Exception as e:
            print(
                "Error occurred during PreviewExecution. This may be normal, if a cached file that has not "
                "yet been reported / processed has been changed. However, it may also be an internal error, so "
                "it is being logged here. If it is an internal error, please contact the maintainer."
            )
            print(repr(e))
        # if `ok`, hash loaded dynamic dependencies
        if ok:
            log(f"Runtime dependencies resolved for {rule}, now checking dynamic dependencies"
                )
            for input_path in ctx.inputs:
                if input_path.startswith(":"):
                    input_dep = build_state.target_rule_lookup.try_lookup(
                        input_path)
                    if input_dep is None or input_dep not in build_state.ready:
                        ok = False
                        log(f"Dynamic rule dependency {input_path} is not yet ready"
                            )
                        break
                else:
                    hashstate.update(input_path.encode("utf-8"))
                    try:
                        data = dep_fetcher(input_path, get_hash=True)
                    except MissingDependency as e:
                        # this dependency was not needed for deps calculation
                        # but is not verified to be up-to-date
                        ok = False
                        log(f"Dynamic dependencies {e.paths} were not needed for the impl, but are not up to date"
                            )
                        break
                    else:
                        hashstate.update(data)
        return (
            hashstate.state() if ok else None,
            ctx.inputs + rule.deps,
            ctx.uses_dynamic_inputs,
        )
    return None, rule.deps, None
Esempio n. 8
0
def get_deps(build_state: BuildState, rule: Rule, *, skip_cache_key: bool):
    """
    Use static dependencies and caches to try and identify as *many*
    needed dependencies as possible, without *any* spurious dependencies.
    """
    hashstate = HashState()
    cache_load_string, _ = make_cache_load(build_state.cache_directory)
    dep_fetcher = make_dep_fetcher(build_state)

    ctx = PreviewContext(
        rule.location,
        build_state.macros,
        hashstate,
        dep_fetcher,
        cache_load_string,
    )

    ok = False
    provided_value = None
    try:
        log(f"Running impl of {rule} to discover dependencies")
        if not skip_cache_key:
            for out in rule.outputs:
                # needed so that if we ask for another output, we don't panic if it's not in the cache
                hashstate.record(out)
        provided_value = rule.impl(ctx)
        log(f"Impl of {rule} completed with deps: {ctx.inputs}")
        ok = True
    except CacheMiss:
        log(f"Cache miss while running impl of {rule}")
        pass  # stops context execution
    except MissingDependency as e:
        log(f"Dependencies {e.paths} were unavailable while running impl of {rule}"
            )
        pass  # dep already added to ctx.inputs
    except Exception as e:
        print(
            "Error occurred during PreviewExecution. This may be normal, if a cached file that has not "
            "yet been reported / processed has been changed. However, it may also be an internal error, so "
            "it is being logged here. If it is an internal error, please contact the maintainer."
        )
        print(repr(e))
        if not ctx.uses_dynamic_inputs:
            raise
    # if `ok`, hash loaded dynamic dependencies
    if ok:
        log(f"Inputs and dependencies resolved for {rule}")
        for input_path in ctx.inputs:
            if input_path.startswith(":"):
                input_dep = build_state.target_rule_lookup.try_lookup(
                    input_path)
                if input_dep is None or input_dep not in build_state.ready:
                    ok = False
                    log(f"Rule dependency {input_path} is not yet ready (or does not exist)"
                        )
                    break
            else:
                if not skip_cache_key:
                    hashstate.update(input_path.encode("utf-8"))
                try:
                    data = dep_fetcher(input_path,
                                       "rule" if skip_cache_key else "hash")
                except MissingDependency as e:
                    # this dependency was not needed for deps calculation
                    # but is not verified to be up-to-date
                    ok = False
                    log(f"Dependencies {e.paths} were not needed for the impl, but are not up to date"
                        )
                    break
                else:
                    if not skip_cache_key:
                        hashstate.update(data)
        hashstate.record("done")

    return (
        hashstate.state() if ok else None,
        provided_value,
        ctx.inputs,
        ctx.deferred_inputs,
        ctx.uses_dynamic_inputs,
    )