def builder(subvol: Subvol): subvol.create() # Guarantee standard / permissions. This could be a setting, # but in practice, probably any other choice would be wrong. subvol.run_as_root(['chmod', '0755', subvol.path()]) subvol.run_as_root(['chown', 'root:root', subvol.path()]) _ensure_meta_dir_exists(subvol)
def build(self, subvol: Subvol): if not self.rpms: return assert RPM_ACTION_TYPE_TO_PHASE_ORDER[self.action] is self.phase_order assert self.yum_from_snapshot is not None, \ f'{self} -- your `image_layer` must set `yum_from_repo_snapshot`' subvol.run_as_root([ # Since `yum-from-snapshot` variants are generally Python # binaries built from this very repo, in @mode/dev, we would run # a symlink-PAR from the buck-out tree as `root`. This would # leave behind root-owned `__pycache__` directories, which would # break Buck's fragile cleanup, and cause us to leak old build # artifacts. This eventually runs the host out of disk space, # and can also interfere with e.g. `test-image-layer`, since # that test relies on there being just one `create_ops` # subvolume in `buck-image-out` with the "received UUID" that # was committed to VCS as part of the test sendstream. 'env', 'PYTHONDONTWRITEBYTECODE=1', self.yum_from_snapshot, '--install-root', subvol.path(), '--', RPM_ACTION_TYPE_TO_YUM_CMD[self.action], # Sort in case `yum` behavior depends on order (for determinism). '--assumeyes', '--', *sorted(self.rpms), ])
def build(self, subvol: Subvol, layer_opts: LayerOpts): mount_dir = os.path.join(META_MOUNTS_DIR, self.mountpoint, MOUNT_MARKER) for name, data in ( # NB: Not exporting self.mountpoint since it's implicit in the path. ('is_directory', self.is_directory), ('build_source', self.build_source._asdict()), ('runtime_source', json.loads(self.runtime_source)), ): procfs_serde.serialize(data, subvol, os.path.join(mount_dir, name)) source_path = self.build_source.to_path( target_to_path=layer_opts.target_to_path, subvolumes_dir=layer_opts.subvolumes_dir, ) # Support mounting directories and non-directories... This check # follows symlinks for the mount source, which seems correct. is_dir = os.path.isdir(source_path) assert is_dir == self.is_directory, self if is_dir: subvol.run_as_root([ 'mkdir', '--mode=0755', subvol.path(self.mountpoint), ]) else: # Regular files, device nodes, FIFOs, you name it. # `touch` lacks a `--mode` argument, but the mode of this # mountpoint will be shadowed anyway, so let it be whatever. subvol.run_as_root(['touch', subvol.path(self.mountpoint)]) ro_rbind_mount(source_path, subvol, self.mountpoint)
def ro_rbind_mount(src: AnyStr, subvol: Subvol, dest_in_subvol: AnyStr): # Even though `fs_image` currently does not support mount nesting, the # mount must be recursive so that host mounts propagate as expected (we # don't want to have to know if a source host directory contains # sub-mounts). subvol.run_as_root([ 'mount', '-o', 'ro,rbind', src, subvol.path(dest_in_subvol), ]) # Performing mount/unmount operations inside the subvol must not be able # to affect the host system, so the tree must be marked at least # `rslave`. It would be defensible to use `rprivate`, but IMO this is # more surprising than `rslave` in the case of host mounts -- normal # filesystem operations on the host are visible to the container, which # suggests that mount changes must be, also. # # IMPORTANT: Even on fairly recent versions of `util-linux`, merging # this into the first `mount` invocation above does NOT work. Just # leave this ugly 2-call version as is. # # NB: We get slave (not private) propagation since `set_up_volume.sh` # sets propagation to shared on the parent mount `buck-image-out/volume`. subvol.run_as_root(['mount', '--make-rslave', subvol.path(dest_in_subvol)])
def ensure_meta_dir_exists(subvol: Subvol, layer_opts: LayerOpts): subvol.run_as_root([ 'mkdir', '--mode=0755', '--parents', subvol.path(META_DIR), ]) # One might ask: why are we serializing this into the image instead # of just putting a condition on `built_artifacts_require_repo` # into our Buck macros? Two reasons: # - In the case of build appliance images, it is possible for a # @mode/dev (in-place) build to use **either** a @mode/dev, or a # @mode/opt (standalone) build appliance. The only way to know # to know if the appliance needs a repo mount is to have a marker # in the image. # - By marking the images, we avoid having to conditionally add # `--bind-repo-ro` flags in a bunch of places in our codebase. The # in-image marker enables `nspawn_in_subvol` to decide. if os.path.exists(subvol.path(META_ARTIFACTS_REQUIRE_REPO)): _validate_artifacts_require_repo(subvol, layer_opts, 'parent layer') # I looked into adding an `allow_overwrite` flag to `serialize`, but # it was too much hassle to do it right. subvol.run_as_root(['rm', subvol.path(META_ARTIFACTS_REQUIRE_REPO)]) procfs_serde.serialize( layer_opts.artifacts_may_require_repo, subvol, META_ARTIFACTS_REQUIRE_REPO, )
def _ensure_meta_dir_exists(subvol: Subvol): subvol.run_as_root([ 'mkdir', '--mode=0755', '--parents', subvol.path(META_DIR), ])
def test_does_not_exist(self): with tempfile.TemporaryDirectory() as td: with self.assertRaisesRegex(AssertionError, 'No btrfs subvol'): Subvol(td, already_exists=True) sv = Subvol(td) with self.assertRaisesRegex(AssertionError, 'exists is False'): sv.run_as_root(['true'])
def build(self, subvol: Subvol): outer_dir = self.path_to_make.split('/', 1)[0] inner_dir = subvol.path(os.path.join(self.into_dir, self.path_to_make)) subvol.run_as_root(['mkdir', '-p', inner_dir]) self.build_stat_options( subvol, subvol.path(os.path.join(self.into_dir, outer_dir)), )
def package_full(self, subvol: Subvol, output_path: str, opts: _Opts): create_ro(output_path, 'wb').close() # Ensure non-root ownership subvol.run_as_root([ 'mksquashfs', subvol.path(), output_path, '-comp', 'zstd', '-noappend', ])
def build_stat_options(self, subvol: Subvol, full_target_path: str): # -R is not a problem since it cannot be the case that we are # creating a directory that already has something inside it. On the # plus side, it helps with nested directory creation. subvol.run_as_root( ['chmod', '-R', self._mode_impl(), full_target_path]) subvol.run_as_root([ 'chown', '-R', f'{self.user}:{self.group}', full_target_path, ])
def build(self, subvol: Subvol): subvol.run_as_root([ 'tar', '-C', subvol.path(self.into_dir), '-x', # The next option is an extra safeguard that is redundant with # the compiler's prevention of `provides` conflicts. It has two # consequences: # # (1) If a file already exists, `tar` will fail with an error. # It is **not** an error if a directory already exists -- # otherwise, one would never be able to safely untar # something into e.g. `/usr/local/bin`. # # (2) Less obviously, the option prevents `tar` from # overwriting the permissions of `directory`, as it # otherwise would. # # Thanks to the compiler's conflict detection, this should # not come up, but now you know. Observe us clobber the # permissions without it: # # $ mkdir IN OUT # $ touch IN/file # $ chmod og-rwx IN # $ ls -ld IN OUT # drwx------. 2 lesha users 17 Sep 11 21:50 IN # drwxr-xr-x. 2 lesha users 6 Sep 11 21:50 OUT # $ tar -C IN -czf file.tgz . # $ tar -C OUT -xvf file.tgz # ./ # ./file # $ ls -ld IN OUT # drwx------. 2 lesha users 17 Sep 11 21:50 IN # drwx------. 2 lesha users 17 Sep 11 21:50 OUT # # Adding `--keep-old-files` preserves the metadata of `OUT`: # # $ rm -rf OUT ; mkdir out ; ls -ld OUT # drwxr-xr-x. 2 lesha users 6 Sep 11 21:53 OUT # $ tar -C OUT --keep-old-files -xvf file.tgz # ./ # ./file # $ ls -ld IN OUT # drwx------. 2 lesha users 17 Sep 11 21:50 IN # drwxr-xr-x. 2 lesha users 17 Sep 11 21:54 OUT '--keep-old-files', '-f', self.tarball ])
def builder(subvol: Subvol): protected_paths = _protected_path_set(subvol) # Reverse-lexicographic order deletes inner paths before # deleting the outer paths, thus minimizing conflicts between # `remove_paths` items. for item in sorted( items, reverse=True, key=lambda i: i.__sort_key(), ): if _is_path_protected(item.path, protected_paths): # For META_DIR, this is never reached because of # _make_path_normal_relative's check, but for other # protected paths, this is required. raise AssertionError( f'Cannot remove protected {item}: {protected_paths}') # This ensures that there are no symlinks in item.path that # might take us outside of the subvolume. Since recursive # `rm` does not follow symlinks, it is OK if the inode at # `item.path` is a symlink (or one of its sub-paths). path = subvol.path(item.path, no_dereference_leaf=True) if not os.path.lexists(path): if item.action == RemovePathAction.assert_exists: raise AssertionError(f'Path does not exist: {item}') elif item.action == RemovePathAction.if_exists: continue else: # pragma: no cover raise AssertionError(f'Unknown {item.action}') subvol.run_as_root([ 'rm', '-r', # This prevents us from making removes outside of the # per-repo loopback, which is an important safeguard. # It does not stop us from reaching into other subvols, # but since those have random IDs in the path, this is # nearly impossible to do by accident. '--one-file-system', path, ]) pass
def build_resolves_targets( self, *, subvol: Subvol, target_to_path: Mapping[str, str], subvolumes_dir: str, ): mount_dir = os.path.join(mount_item.META_MOUNTS_DIR, self.mountpoint, mount_item.MOUNT_MARKER) for name, data in ( # NB: Not exporting self.mountpoint since it's implicit in the path. ('is_directory', self.is_directory), ('build_source', self.build_source._asdict()), ('runtime_source', json.loads(self.runtime_source)), ): procfs_serde.serialize(data, subvol, os.path.join(mount_dir, name)) source_path = self.build_source.to_path( target_to_path=target_to_path, subvolumes_dir=subvolumes_dir, ) # Support mounting directories and non-directories... This check # follows symlinks for the mount source, which seems correct. is_dir = os.path.isdir(source_path) assert is_dir == self.is_directory, self if is_dir: mkdir_opts = ['--mode=0755'] if self.is_repo_root: mkdir_opts.append('-p') # NB: if is_repo_root, mkdir below will create a non-portable dir # like /home/username/fbsource/fbcode in subvol layer, but such # a layer should never be published as a package. subvol.run_as_root( ['mkdir', *mkdir_opts, subvol.path(self.mountpoint)]) else: # Regular files, device nodes, FIFOs, you name it. # `touch` lacks a `--mode` argument, but the mode of this # mountpoint will be shadowed anyway, so let it be whatever. subvol.run_as_root(['touch', subvol.path(self.mountpoint)]) mount_item.ro_rbind_mount(source_path, subvol, self.mountpoint)
def build_stat_options(self, subvol: Subvol, full_target_path: str): # `chmod` lacks a --no-dereference flag to protect us from following # `full_target_path` if it's a symlink. As far as I know, this # should never occur, so just let the exception fly. subvol.run_as_root(['test', '!', '-L', full_target_path]) # -R is not a problem since it cannot be the case that we are # creating a directory that already has something inside it. On # the plus side, it helps with nested directory creation. subvol.run_as_root( ['chmod', '-R', self._mode_impl(), full_target_path]) subvol.run_as_root([ 'chown', '--no-dereference', '-R', f'{self.user}:{self.group}', full_target_path, ])
def build_stat_options(item, subvol: Subvol, full_target_path: str): # `chmod` lacks a --no-dereference flag to protect us from following # `full_target_path` if it's a symlink. As far as I know, this should # never occur, so just let the exception fly. subvol.run_as_root(['test', '!', '-L', full_target_path]) # -R is not a problem since it cannot be the case that we are creating a # directory that already has something inside it. On the plus side, it # helps with nested directory creation. subvol.run_as_root([ 'chmod', '-R', ( # The symbolic mode must be applied after 0ing all bits. f'{item.mode:04o}' if isinstance(item.mode, int) else f'a-rwxXst,{item.mode}'), full_target_path ]) subvol.run_as_root([ 'chown', '--no-dereference', '-R', item.user_group, full_target_path, ])
def build(self, subvol: Subvol, layer_opts: LayerOpts): dest = subvol.path(self.dest) subvol.run_as_root(['cp', self.source, dest]) build_stat_options(self, subvol, dest)
def test_run_as_root_no_cwd(self): sv = Subvol('/dev/null/no-such-dir') sv.run_as_root(['true'], _subvol_exists=False) with self.assertRaisesRegex(AssertionError, 'cwd= is not permitte'): sv.run_as_root(['true'], _subvol_exists=False, cwd='.')
def build(self, subvol: Subvol): subvol.create() # Guarantee standard permissions. This could be made configurable, # but in practice, probably any other choice would be wrong. subvol.run_as_root(['chmod', '0755', subvol.path()]) subvol.run_as_root(['chown', 'root:root', subvol.path()])
def builder(subvol: Subvol): for action, rpms in action_to_rpms.items(): if not rpms: continue # Future: `yum-from-snapshot` is actually designed to run # unprivileged (but we have no nice abstraction for this). if layer_opts.build_appliance is None: subvol.run_as_root([ # Since `yum-from-snapshot` variants are generally # Python binaries built from this very repo, in # @mode/dev, we would run a symlink-PAR from the # buck-out tree as `root`. This would leave behind # root-owned `__pycache__` directories, which would # break Buck's fragile cleanup, and cause us to leak old # build artifacts. This eventually runs the host out of # disk space. Un-deletable *.pyc files can also # interfere with e.g. `test-image-layer`, since that # test relies on there being just one `create_ops` # subvolume in `buck-image-out` with the "received UUID" # that was committed to VCS as part of the test # sendstream. 'env', 'PYTHONDONTWRITEBYTECODE=1', layer_opts.yum_from_snapshot, *sum((['--protected-path', d] for d in _protected_path_set(subvol)), []), '--install-root', subvol.path(), '--', RPM_ACTION_TYPE_TO_YUM_CMD[action], # Sort ensures determinism even if `yum` is # order-dependent '--assumeyes', '--', *sorted(rpms), ]) else: ''' ## Future - implement image feature "manifold_support" with all those bind-mounts below in mounts = [...] - add features = ["manifold_support"] to fb_build_appliance - call nspawn_in_subvol() instead of run_as_root() below ''' svol = Subvol( layer_opts.build_appliance, already_exists=True, ) mountpoints = mount_item.mountpoints_from_subvol_meta(svol) bind_mount_args = sum(([ b'--bind-ro=' + svol.path(mp).replace(b':', b'\\:') + b':' + b'/' + mp.encode() ] for mp in mountpoints), []) protected_path_args = ' '.join( sum((['--protected-path', d] for d in _protected_path_set(subvol)), [])) # Without this, nspawn would look for the host systemd's # cgroup setup, which breaks us in continuous integration # containers, which may not have a `systemd` in the host # container. subvol.run_as_root([ 'env', 'UNIFIED_CGROUP_HIERARCHY=yes', 'systemd-nspawn', '--quiet', f'--directory={layer_opts.build_appliance}', '--register=no', '--keep-unit', '--ephemeral', b'--bind=' + subvol.path().replace(b':', b'\\:') + b':/mnt', '--bind-ro=/dev/fuse', '--bind-ro=/etc/fbwhoami', '--bind-ro=/etc/smc.tiers', '--bind-ro=/var/facebook/rootcanal', *bind_mount_args, '--capability=CAP_NET_ADMIN', 'sh', '-c', ('mkdir -p /mnt/var/cache/yum; ' 'mount --bind /var/cache/yum /mnt/var/cache/yum; ' '/usr/bin/yum-from-fb-snapshot ' f'{protected_path_args}' ' --install-root /mnt -- ' f'{RPM_ACTION_TYPE_TO_YUM_CMD[action]} ' '--assumeyes -- ' f'{" ".join(sorted(rpms))}') ])
def builder(subvol: Subvol) -> None: # Go through the list of RPMs to install and change the action to # downgrade if it is a local RPM with a lower version than what is # installed. # This is done in the builder because we need access to the subvol. for nor in action_to_names_or_rpms[RpmAction.install].copy(): if isinstance(nor, _LocalRpm): try: old = RpmMetadata.from_subvol(subvol, nor.metadata.name) except (RuntimeError, ValueError): # This can happen if the RPM DB does not exist in the # subvolume or the package is not installed. continue if compare_rpm_versions(nor.metadata, old) <= 0: action_to_names_or_rpms[RpmAction.install].remove(nor) action_to_names_or_rpms[RpmAction.downgrade].add(nor) for action, nors in action_to_names_or_rpms.items(): if not nors: continue # Future: `yum-from-snapshot` is actually designed to run # unprivileged (but we have no nice abstraction for this). if layer_opts.build_appliance is None: subvol.run_as_root([ # Since `yum-from-snapshot` variants are generally # Python binaries built from this very repo, in # @mode/dev, we would run a symlink-PAR from the # buck-out tree as `root`. This would leave behind # root-owned `__pycache__` directories, which would # break Buck's fragile cleanup, and cause us to leak old # build artifacts. This eventually runs the host out of # disk space. Un-deletable *.pyc files can also # interfere with e.g. `test-image-layer`, since that # test relies on there being just one `create_ops` # subvolume in `buck-image-out` with the "received UUID" # that was committed to VCS as part of the test # sendstream. 'env', 'PYTHONDONTWRITEBYTECODE=1', layer_opts.yum_from_snapshot, *sum((['--protected-path', d] for d in protected_path_set(subvol)), []), '--install-root', subvol.path(), '--', RPM_ACTION_TYPE_TO_YUM_CMD[action], # Sort ensures determinism even if `yum` is # order-dependent '--assumeyes', '--', *sorted((nor.path if isinstance(nor, _LocalRpm ) else nor.encode()) for nor in nors), ]) else: rpms, bind_ro_args = _rpms_and_bind_ro_args(nors) _yum_using_build_appliance( build_appliance=Subvol( layer_opts.build_appliance, already_exists=True, ), nspawn_args=bind_ro_args, install_root=subvol.path(), protected_paths=protected_path_set(subvol), yum_args=[ RPM_ACTION_TYPE_TO_YUM_CMD[action], '--assumeyes', # Sort ensures determinism even if `yum` is # order-dependent *sorted(rpms), ], preserve_yum_cache=layer_opts.preserve_yum_cache, )
def provides(self): parent_subvol = Subvol(self.path, already_exists=True) protected_paths = _protected_path_set(parent_subvol) for prot_path in protected_paths: yield ProvidesDoNotAccess(path=prot_path) provided_root = False # We need to traverse the parent image as root, so that we have # permission to access everything. for type_and_path in parent_subvol.run_as_root( [ # -P is the analog of --no-dereference in GNU tools # # Filter out the protected paths at traversal time. If one of # the paths has a very large or very slow mount, traversing it # would have a devastating effect on build times, so let's avoid # looking inside protected paths entirely. An alternative would # be to `send` and to parse the sendstream, but this is ok too. 'find', '-P', self.path, '(', *itertools.dropwhile( lambda x: x == '-o', # Drop the initial `-o` itertools.chain.from_iterable([ # `normpath` removes the trailing / for protected dirs '-o', '-path', os.path.join(self.path, os.path.normpath(p)) ] for p in protected_paths), ), ')', '-prune', '-o', '-printf', '%y %p\\0', ], stdout=subprocess.PIPE).stdout.split(b'\0'): if not type_and_path: # after the trailing \0 continue filetype, abspath = type_and_path.decode().split(' ', 1) relpath = os.path.relpath(abspath, self.path) # We already "provided" this path above, and it should have been # filtered out by `find`. assert not _is_path_protected(relpath, protected_paths), relpath # Future: This provides all symlinks as files, while we should # probably provide symlinks to valid directories inside the # image as directories to be consistent with SymlinkToDirItem. if filetype in ['b', 'c', 'p', 'f', 'l', 's']: yield ProvidesFile(path=relpath) elif filetype == 'd': yield ProvidesDirectory(path=relpath) else: # pragma: no cover raise AssertionError(f'Unknown {filetype} for {abspath}') if relpath == '.': assert filetype == 'd' provided_root = True assert provided_root, 'parent layer {} lacks /'.format(self.path)
def build(self, subvol: Subvol): with _maybe_popen_zstd(self.tarball) as maybe_proc: subvol.run_as_root( [ 'tar', # Future: Bug: `tar` unfortunately FOLLOWS existing symlinks # when unpacking. This isn't dire because the compiler's # conflict prevention SHOULD prevent us from going out of # the subvolume since this TarballItem's provides would # collide with whatever is already present. However, it's # hard to state that with complete confidence, especially if # we start adding support for following directory symlinks. '-C', subvol.path(self.into_dir), '-x', # Block tar's weird handling of paths containing colons. '--force-local', # The uid:gid doing the extraction is root:root, so by default # tar would try to restore the file ownership from the archive. # In some cases, we just want all the files to be root-owned. *(['--no-same-owner'] if self.force_root_ownership else []), # The next option is an extra safeguard that is redundant # with the compiler's prevention of `provides` conflicts. # It has two consequences: # # (1) If a file already exists, `tar` will fail with an error. # It is **not** an error if a directory already exists -- # otherwise, one would never be able to safely untar # something into e.g. `/usr/local/bin`. # # (2) Less obviously, the option prevents `tar` from # overwriting the permissions of `directory`, as it # otherwise would. # # Thanks to the compiler's conflict detection, this should # not come up, but now you know. Observe us clobber the # permissions without it: # # $ mkdir IN OUT # $ touch IN/file # $ chmod og-rwx IN # $ ls -ld IN OUT # drwx------. 2 lesha users 17 Sep 11 21:50 IN # drwxr-xr-x. 2 lesha users 6 Sep 11 21:50 OUT # $ tar -C IN -czf file.tgz . # $ tar -C OUT -xvf file.tgz # ./ # ./file # $ ls -ld IN OUT # drwx------. 2 lesha users 17 Sep 11 21:50 IN # drwx------. 2 lesha users 17 Sep 11 21:50 OUT # # Adding `--keep-old-files` preserves `OUT`'s metadata: # # $ rm -rf OUT ; mkdir out ; ls -ld OUT # drwxr-xr-x. 2 lesha users 6 Sep 11 21:53 OUT # $ tar -C OUT --keep-old-files -xvf file.tgz # ./ # ./file # $ ls -ld IN OUT # drwx------. 2 lesha users 17 Sep 11 21:50 IN # drwxr-xr-x. 2 lesha users 17 Sep 11 21:54 OUT '--keep-old-files', '-f', ('-' if maybe_proc else self.tarball), ], stdin=(maybe_proc.stdout if maybe_proc else None))
def build(self, subvol: Subvol): dest = subvol.path(self.dest) # Source is always absolute inside the image subvolume source = os.path.join('/', self.source) subvol.run_as_root( ['ln', '--symbolic', '--no-dereference', source, dest])
def build(self, subvol: Subvol): dest = subvol.path(self.dest) subvol.run_as_root(['cp', self.source, dest]) self.build_stat_options(subvol, dest)