def vagrant_setup_create(args): """Sets up the experiment to be run in a Vagrant-built virtual machine. This can either build a chroot or not. If building a chroot, we do just like without Vagrant: we copy all the files and only get what's missing from the host. But we do install automatically the packages whose files are required. If not building a chroot, we install all the packages, and only unpack files that don't come from packages. In short: files from packages with packfiles=True will only be used if building a chroot. """ if not args.pack: logging.critical("setup/create needs the pack filename") sys.exit(1) pack = Path(args.pack[0]) target = Path(args.target[0]) if target.exists(): logging.critical("Target directory exists") sys.exit(1) use_chroot = args.use_chroot mount_bind = args.bind_magic_dirs record_usage(use_chroot=use_chroot, mount_bind=mount_bind) signals.pre_setup(target=target, pack=pack) # Unpacks configuration file rpz_pack = RPZPack(pack) rpz_pack.extract_config(target / 'config.yml') # Loads config runs, packages, other_files = config = load_config(target / 'config.yml', True) if not args.memory: memory = None else: try: memory = int(args.memory[-1]) except ValueError: logging.critical("Invalid value for memory size: %r", args.memory) sys.exit(1) if args.base_image and args.base_image[0]: record_usage(vagrant_explicit_image=True) box = args.base_image[0] if args.distribution: target_distribution = args.distribution[0] else: target_distribution = None else: target_distribution, box = select_box(runs) logging.info("Using box %s", box) logging.debug("Distribution: %s", target_distribution or "unknown") # If using chroot, we might still need to install packages to get missing # (not packed) files if use_chroot: packages = [pkg for pkg in packages if not pkg.packfiles] if packages: record_usage(vagrant_install_pkgs=True) logging.info("Some packages were not packed, so we'll install and " "copy their files\n" "Packages that are missing:\n%s", ' '.join(pkg.name for pkg in packages)) if packages: try: installer = select_installer(pack, runs, target_distribution) except CantFindInstaller as e: logging.error("Need to install %d packages but couldn't select a " "package installer: %s", len(packages), e) target.mkdir(parents=True) try: # Writes setup script logging.info("Writing setup script %s...", target / 'setup.sh') with (target / 'setup.sh').open('w', encoding='utf-8', newline='\n') as fp: fp.write('#!/bin/sh\n\nset -e\n\n') if packages: # Updates package sources fp.write(installer.update_script()) fp.write('\n') # Installs necessary packages fp.write(installer.install_script(packages)) fp.write('\n') # TODO : Compare package versions (painful because of sh) # Untar if use_chroot: fp.write('\n' 'mkdir /experimentroot; cd /experimentroot\n') fp.write('tar zpxf /vagrant/data.tgz --numeric-owner ' '--strip=1 %s\n' % rpz_pack.data_prefix) if mount_bind: fp.write('\n' 'mkdir -p /experimentroot/dev\n' 'mkdir -p /experimentroot/proc\n') for pkg in packages: fp.write('\n# Copies files from package %s\n' % pkg.name) for f in pkg.files: f = f.path dest = join_root(PosixPath('/experimentroot'), f) fp.write('mkdir -p %s\n' % shell_escape(unicode_(f.parent))) fp.write('cp -L %s %s\n' % ( shell_escape(unicode_(f)), shell_escape(unicode_(dest)))) else: fp.write('\ncd /\n') paths = set() pathlist = [] # Adds intermediate directories, and checks for existence in # the tar for f in other_files: path = PosixPath('/') for c in rpz_pack.remove_data_prefix(f.path).components: path = path / c if path in paths: continue paths.add(path) try: rpz_pack.get_data(path) except KeyError: logging.info("Missing file %s", path) else: pathlist.append(path) # FIXME : for some reason we need reversed() here, I'm not sure # why. Need to read more of tar's docs. # TAR bug: --no-overwrite-dir removes --keep-old-files # TAR bug: there is no way to make --keep-old-files not report # an error if an existing file is encountered. --skip-old-files # was introduced too recently. Instead, we just ignore the exit # status with (target / 'rpz-files.list').open('wb') as lfp: for p in reversed(pathlist): lfp.write(join_root(rpz_pack.data_prefix, p).path) lfp.write(b'\0') fp.write('tar zpxf /vagrant/data.tgz --keep-old-files ' '--numeric-owner --strip=1 ' '--null -T /vagrant/rpz-files.list || /bin/true\n') # Copies busybox if use_chroot: arch = runs[0]['architecture'] download_file(busybox_url(arch), target / 'busybox', 'busybox-%s' % arch) fp.write(r''' cp /vagrant/busybox /experimentroot/busybox chmod +x /experimentroot/busybox mkdir -p /experimentroot/bin [ -e /experimentroot/bin/sh ] || \ ln -s /busybox /experimentroot/bin/sh ''') # Copies pack logging.info("Copying pack file...") rpz_pack.copy_data_tar(target / 'data.tgz') rpz_pack.close() # Writes Vagrant file logging.info("Writing %s...", target / 'Vagrantfile') with (target / 'Vagrantfile').open('w', encoding='utf-8', newline='\n') as fp: # Vagrant header and version fp.write( '# -*- mode: ruby -*-\n' '# vi: set ft=ruby\n\n' 'VAGRANTFILE_API_VERSION = "2"\n\n' 'Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|\n') # Selects which box to install fp.write(' config.vm.box = "%s"\n' % box) # Run the setup script on the virtual machine fp.write(' config.vm.provision "shell", path: "setup.sh"\n') # Memory size if memory is not None: fp.write(' config.vm.provider "virtualbox" do |v|\n' ' v.memory = %d\n' ' end\n' % memory) fp.write('end\n') # Meta-data for reprounzip write_dict(target, metadata_initial_iofiles(config, {'use_chroot': use_chroot})) signals.post_setup(target=target, pack=pack) except Exception: target.rmtree(ignore_errors=True) raise
def docker_setup_create(args): """Sets up the experiment to be run in a Docker-built container. """ pack = Path(args.pack[0]) target = Path(args.target[0]) if target.exists(): logging.critical("Target directory exists") sys.exit(1) signals.pre_setup(target=target, pack=pack) target.mkdir() try: # Unpacks configuration file rpz_pack = RPZPack(pack) rpz_pack.extract_config(target / 'config.yml') # Loads config runs, packages, other_files = config = load_config( target / 'config.yml', True) if args.base_image: record_usage(docker_explicit_base=True) base_image = args.base_image[0] if args.distribution: target_distribution = args.distribution[0] else: target_distribution = None else: target_distribution, base_image = select_image(runs) logging.info("Using base image %s", base_image) logging.debug("Distribution: %s", target_distribution or "unknown") rpz_pack.copy_data_tar(target / 'data.tgz') arch = runs[0]['architecture'] # Writes Dockerfile logging.info("Writing %s...", target / 'Dockerfile') with (target / 'Dockerfile').open('w', encoding='utf-8', newline='\n') as fp: fp.write('FROM %s\n\n' % base_image) # Installs busybox download_file(busybox_url(arch), target / 'busybox', 'busybox-%s' % arch) fp.write('COPY busybox /busybox\n') # Installs rpzsudo download_file(sudo_url(arch), target / 'rpzsudo', 'rpzsudo-%s' % arch) fp.write('COPY rpzsudo /rpzsudo\n\n') fp.write('COPY data.tgz /reprozip_data.tgz\n\n') fp.write('COPY rpz-files.list /rpz-files.list\n') fp.write('RUN \\\n' ' chmod +x /busybox /rpzsudo && \\\n') if args.install_pkgs: # Install every package through package manager missing_packages = [] else: # Only install packages that were not packed missing_packages = [pkg for pkg in packages if pkg.packfiles] packages = [pkg for pkg in packages if not pkg.packfiles] if packages: record_usage(docker_install_pkgs=True) try: installer = select_installer(pack, runs, target_distribution) except CantFindInstaller as e: logging.error( "Need to install %d packages but couldn't " "select a package installer: %s", len(packages), e) sys.exit(1) # Updates package sources update_script = installer.update_script() if update_script: fp.write(' %s && \\\n' % update_script) # Installs necessary packages fp.write(' %s && \\\n' % installer.install_script(packages)) logging.info( "Dockerfile will install the %d software " "packages that were not packed", len(packages)) else: record_usage(docker_install_pkgs=False) # Untar paths = set() pathlist = [] # Add intermediate directories, and check for existence in the tar missing_files = chain.from_iterable(pkg.files for pkg in missing_packages) data_files = rpz_pack.data_filenames() listoffiles = list(chain(other_files, missing_files)) for f in listoffiles: path = PosixPath('/') for c in rpz_pack.remove_data_prefix(f.path).components: path = path / c if path in paths: continue paths.add(path) if path in data_files: pathlist.append(path) else: logging.info("Missing file %s", path) rpz_pack.close() # FIXME : for some reason we need reversed() here, I'm not sure why # Need to read more of tar's docs. # TAR bug: --no-overwrite-dir removes --keep-old-files with (target / 'rpz-files.list').open('wb') as lfp: for p in reversed(pathlist): lfp.write(join_root(rpz_pack.data_prefix, p).path) lfp.write(b'\0') fp.write(' cd / && ' '(tar zpxf /reprozip_data.tgz -U --recursive-unlink ' '--numeric-owner --strip=1 --null -T /rpz-files.list || ' '/busybox echo "TAR reports errors, this might or might ' 'not prevent the execution to run")\n') # Meta-data for reprounzip write_dict(target, metadata_initial_iofiles(config)) signals.post_setup(target=target, pack=pack) except Exception: target.rmtree(ignore_errors=True) raise
def docker_setup_create(args): """Sets up the experiment to be run in a Docker-built container. """ pack = Path(args.pack[0]) target = Path(args.target[0]) if target.exists(): logging.critical("Target directory exists") sys.exit(1) signals.pre_setup(target=target, pack=pack) # Unpacks configuration file tar = tarfile.open(str(pack), 'r:*') member = tar.getmember('METADATA/config.yml') member.name = 'config.yml' tar.extract(member, str(target)) tar.close() # Loads config runs, packages, other_files = load_config(target / 'config.yml', True) if args.base_image: record_usage(docker_explicit_base=True) base_image = args.base_image[0] if args.distribution: target_distribution = args.distribution[0] else: target_distribution = None else: target_distribution, base_image = select_image(runs) logging.info("Using base image %s", base_image) logging.debug("Distribution: %s", target_distribution or "unknown") target.mkdir(parents=True) pack.copyfile(target / 'experiment.rpz') # Writes Dockerfile logging.info("Writing %s...", target / 'Dockerfile') with (target / 'Dockerfile').open('w', encoding='utf-8', newline='\n') as fp: fp.write('FROM %s\n\n' % base_image) # Installs busybox download_file(busybox_url(runs[0]['architecture']), target / 'busybox') fp.write('COPY busybox /bin/busybox\n') fp.write('COPY experiment.rpz /reprozip_experiment.rpz\n\n') fp.write('RUN \\\n' ' chmod +x /bin/busybox && \\\n') if args.install_pkgs: # Install every package through package manager missing_packages = [] else: # Only install packages that were not packed missing_packages = [pkg for pkg in packages if pkg.packfiles] packages = [pkg for pkg in packages if not pkg.packfiles] # FIXME : Right now, we need 'sudo' to be available (and it's not # necessarily in the base image) if packages: record_usage(docker_install_pkgs=True) else: record_usage(docker_install_pkgs="sudo") packages += [Package('sudo', None, packfiles=False)] if packages: try: installer = select_installer(pack, runs, target_distribution) except CantFindInstaller as e: logging.error("Need to install %d packages but couldn't " "select a package installer: %s", len(packages), e) sys.exit(1) # Updates package sources fp.write(' %s && \\\n' % installer.update_script()) # Installs necessary packages fp.write(' %s && \\\n' % installer.install_script(packages)) logging.info("Dockerfile will install the %d software packages that " "were not packed", len(packages)) # Untar paths = set() pathlist = [] dataroot = PosixPath('DATA') # Adds intermediate directories, and checks for existence in the tar tar = tarfile.open(str(pack), 'r:*') missing_files = chain.from_iterable(pkg.files for pkg in missing_packages) for f in chain(other_files, missing_files): path = PosixPath('/') for c in f.path.components[1:]: path = path / c if path in paths: continue paths.add(path) datapath = join_root(dataroot, path) try: tar.getmember(str(datapath)) except KeyError: logging.info("Missing file %s", datapath) else: pathlist.append(unicode_(datapath)) tar.close() # FIXME : for some reason we need reversed() here, I'm not sure why. # Need to read more of tar's docs. # TAR bug: --no-overwrite-dir removes --keep-old-files fp.write(' cd / && tar zpxf /reprozip_experiment.rpz ' '--numeric-owner --strip=1 %s\n' % ' '.join(shell_escape(p) for p in reversed(pathlist))) # Meta-data for reprounzip write_dict(target / '.reprounzip', {}) signals.post_setup(target=target)
def docker_setup_create(args): """Sets up the experiment to be run in a Docker-built container. """ pack = Path(args.pack[0]) target = Path(args.target[0]) if target.exists(): logging.critical("Target directory exists") sys.exit(1) signals.pre_setup(target=target, pack=pack) target.mkdir() try: # Unpacks configuration file rpz_pack = RPZPack(pack) rpz_pack.extract_config(target / 'config.yml') # Loads config runs, packages, other_files = config = load_config( target / 'config.yml', True) if args.base_image: record_usage(docker_explicit_base=True) base_image = args.base_image[0] if args.distribution: target_distribution = args.distribution[0] else: target_distribution = None else: target_distribution, base_image = select_image(runs) logging.info("Using base image %s", base_image) logging.debug("Distribution: %s", target_distribution or "unknown") rpz_pack.copy_data_tar(target / 'data.tgz') arch = runs[0]['architecture'] # Writes Dockerfile logging.info("Writing %s...", target / 'Dockerfile') with (target / 'Dockerfile').open('w', encoding='utf-8', newline='\n') as fp: fp.write('FROM %s\n\n' % base_image) # Installs busybox download_file(busybox_url(arch), target / 'busybox', 'busybox-%s' % arch) fp.write('COPY busybox /busybox\n') # Installs rpzsudo download_file(sudo_url(arch), target / 'rpzsudo', 'rpzsudo-%s' % arch) fp.write('COPY rpzsudo /rpzsudo\n\n') fp.write('COPY data.tgz /reprozip_data.tgz\n\n') fp.write('COPY rpz-files.list /rpz-files.list\n') fp.write('RUN \\\n' ' chmod +x /busybox /rpzsudo && \\\n') if args.install_pkgs: # Install every package through package manager missing_packages = [] else: # Only install packages that were not packed missing_packages = [pkg for pkg in packages if pkg.packfiles] packages = [pkg for pkg in packages if not pkg.packfiles] if packages: record_usage(docker_install_pkgs=True) try: installer = select_installer(pack, runs, target_distribution) except CantFindInstaller as e: logging.error("Need to install %d packages but couldn't " "select a package installer: %s", len(packages), e) sys.exit(1) # Updates package sources update_script = installer.update_script() if update_script: fp.write(' %s && \\\n' % update_script) # Installs necessary packages fp.write(' %s && \\\n' % installer.install_script(packages)) logging.info("Dockerfile will install the %d software " "packages that were not packed", len(packages)) else: record_usage(docker_install_pkgs=False) # Untar paths = set() pathlist = [] # Add intermediate directories, and check for existence in the tar logging.info("Generating file list...") missing_files = chain.from_iterable(pkg.files for pkg in missing_packages) data_files = rpz_pack.data_filenames() listoffiles = list(chain(other_files, missing_files)) for f in listoffiles: if f.path.name == 'resolv.conf' and ( f.path.lies_under('/etc') or f.path.lies_under('/run') or f.path.lies_under('/var')): continue path = PosixPath('/') for c in rpz_pack.remove_data_prefix(f.path).components: path = path / c if path in paths: continue paths.add(path) if path in data_files: pathlist.append(path) else: logging.info("Missing file %s", path) rpz_pack.close() # FIXME : for some reason we need reversed() here, I'm not sure why # Need to read more of tar's docs. # TAR bug: --no-overwrite-dir removes --keep-old-files with (target / 'rpz-files.list').open('wb') as lfp: for p in reversed(pathlist): lfp.write(join_root(rpz_pack.data_prefix, p).path) lfp.write(b'\0') fp.write(' cd / && ' '(tar zpxf /reprozip_data.tgz -U --recursive-unlink ' '--numeric-owner --strip=1 --null -T /rpz-files.list || ' '/busybox echo "TAR reports errors, this might or might ' 'not prevent the execution to run")\n') # Setup entry point fp.write('COPY rpz_entrypoint.sh /rpz_entrypoint.sh\n' 'ENTRYPOINT ["/busybox", "sh", "/rpz_entrypoint.sh"]\n') # Write entry point script logging.info("Writing %s...", target / 'rpz_entrypoint.sh') with (target / 'rpz_entrypoint.sh').open('w', encoding='utf-8', newline='\n') as fp: # The entrypoint gets some arguments from the run command # By default, it just does all the runs # "run N" executes the run with that number # "cmd STR" sets a replacement command-line for the next run # "do STR" executes a command as-is fp.write( '#!/bin/sh\n' '\n' 'COMMAND=\n' 'ENVVARS=\n' '\n' 'if [ $# = 0 ]; then\n' ' exec /busybox sh /rpz_entrypoint.sh') for nb in irange(len(runs)): fp.write(' run %d' % nb) fp.write( '\n' 'fi\n' '\n' 'while [ $# != 0 ]; do\n' ' case "$1" in\n' ' help)\n' ' echo "Image built from reprounzip-docker" >&2\n' ' echo "Usage: docker run <image> [cmd word [word ' '...]] [run <R>]" >&2\n' ' echo " \\`cmd ...\\` changes the command for ' 'the next \\`run\\` option" >&2\n' ' echo " \\`run <name|number>\\` runs the ' 'specified run" >&2\n' ' echo "By default, all the runs are executed." ' '>&2\n' ' echo "The runs in this image are:" >&2\n') for run in runs: fp.write( ' echo " {name}: {cmdline}" >&2\n'.format( name=run['id'], cmdline=' '.join(shell_escape(a) for a in run['argv']))) fp.write( ' exit 0\n' ' ;;\n' ' do)\n' ' shift\n' ' $1\n' ' ;;\n' ' env)\n' ' shift\n' ' ENVVARS="$1"\n' ' ;;\n' ' cmd)\n' ' shift\n' ' COMMAND="$1"\n' ' ;;\n' ' run)\n' ' shift\n' ' case "$1" in\n') for i, run in enumerate(runs): cmdline = ' '.join([run['binary']] + run['argv'][1:]) fp.write( ' {name})\n' ' RUNCOMMAND={cmd}\n' ' RUNWD={wd}\n' ' RUNENV={env}\n' ' RUNUID={uid}\n' ' RUNGID={gid}\n' ' ;;\n'.format( name='%s|%d' % (run['id'], i), cmd=shell_escape(cmdline), wd=shell_escape(run['workingdir']), env=shell_escape(' '.join( '%s=%s' % (shell_escape(k), shell_escape(v)) for k, v in iteritems(run['environ']))), uid=run.get('uid', 1000), gid=run.get('gid', 1000))) fp.write( ' *)\n' ' echo "RPZ: Unknown run $1" >&2\n' ' exit 1\n' ' ;;\n' ' esac\n' ' if [ -n "$COMMAND" ]; then\n' ' RUNCOMMAND="$COMMAND"\n' ' COMMAND=\n' ' fi\n' ' export RUNWD; export RUNENV; export ENVVARS; ' 'export RUNCOMMAND\n' ' /rpzsudo "#$RUNUID" "#$RUNGID" /busybox sh -c ' '"cd \\"\\$RUNWD\\" && /busybox env -i $RUNENV $ENVVARS ' '$RUNCOMMAND"\n' ' ENVVARS=\n' ' ;;\n' ' *)\n' ' echo "RPZ: Unknown option $1" >&2\n' ' exit 1\n' ' ;;\n' ' esac\n' ' shift\n' 'done\n') # Meta-data for reprounzip write_dict(target, metadata_initial_iofiles(config)) signals.post_setup(target=target, pack=pack) except Exception: target.rmtree(ignore_errors=True) raise
def chroot_create(args): """Unpacks the experiment in a folder so it can be run with chroot. All the files in the pack are unpacked; system files are copied only if they were not packed, and busybox is installed if /bin/sh wasn't packed. In addition, input files are put in a tar.gz (so they can be put back after an upload) and the configuration file is extracted. """ if not args.pack: logging.critical("setup/create needs the pack filename") sys.exit(1) pack = Path(args.pack[0]) target = Path(args.target[0]) if target.exists(): logging.critical("Target directory exists") sys.exit(1) if DefaultAbstractPath is not PosixPath: logging.critical("Not unpacking on POSIX system") sys.exit(1) signals.pre_setup(target=target, pack=pack) # We can only restore owner/group of files if running as root restore_owner = should_restore_owner(args.restore_owner) # Unpacks configuration file rpz_pack = RPZPack(pack) rpz_pack.extract_config(target / 'config.yml') # Loads config config = load_config_file(target / 'config.yml', True) packages = config.packages target.mkdir() root = (target / 'root').absolute() root.mkdir() try: # Checks that everything was packed packages_not_packed = [pkg for pkg in packages if not pkg.packfiles] if packages_not_packed: record_usage(chroot_missing_pkgs=True) logging.warning("According to configuration, some files were left " "out because they belong to the following " "packages:%s\nWill copy files from HOST SYSTEM", ''.join('\n %s' % pkg for pkg in packages_not_packed)) missing_files = False for pkg in packages_not_packed: for f in pkg.files: path = Path(f.path) if not path.exists(): logging.error( "Missing file %s (from package %s) on host, " "experiment will probably miss it", path, pkg.name) missing_files = True continue dest = join_root(root, path) dest.parent.mkdir(parents=True) if path.is_link(): dest.symlink(path.read_link()) else: path.copy(dest) if restore_owner: stat = path.stat() dest.chown(stat.st_uid, stat.st_gid) if missing_files: record_usage(chroot_mising_files=True) # Unpacks files members = rpz_pack.list_data() for m in members: # Remove 'DATA/' prefix m.name = str(rpz_pack.remove_data_prefix(m.name)) if not restore_owner: uid = os.getuid() gid = os.getgid() for m in members: m.uid = uid m.gid = gid logging.info("Extracting files...") rpz_pack.extract_data(root, members) rpz_pack.close() # Sets up /bin/sh and /usr/bin/env, downloading busybox if necessary sh_path = join_root(root, Path('/bin/sh')) env_path = join_root(root, Path('/usr/bin/env')) if not sh_path.lexists() or not env_path.lexists(): logging.info("Setting up busybox...") busybox_path = join_root(root, Path('/bin/busybox')) busybox_path.parent.mkdir(parents=True) with make_dir_writable(join_root(root, Path('/bin'))): download_file(busybox_url(config.runs[0]['architecture']), busybox_path, 'busybox-%s' % config.runs[0]['architecture']) busybox_path.chmod(0o755) if not sh_path.lexists(): sh_path.parent.mkdir(parents=True) sh_path.symlink('/bin/busybox') if not env_path.lexists(): env_path.parent.mkdir(parents=True) env_path.symlink('/bin/busybox') # Original input files, so upload can restore them input_files = [f.path for f in itervalues(config.inputs_outputs) if f.read_runs] if input_files: logging.info("Packing up original input files...") inputtar = tarfile.open(str(target / 'inputs.tar.gz'), 'w:gz') for ifile in input_files: filename = join_root(root, ifile) if filename.exists(): inputtar.add(str(filename), str(ifile)) inputtar.close() # Meta-data for reprounzip metadata_write(target, metadata_initial_iofiles(config), 'chroot') signals.post_setup(target=target, pack=pack) except Exception: rmtree_fixed(root) raise
def directory_create(args): """Unpacks the experiment in a folder. Only the files that are not part of a package are copied (unless they are missing from the system and were packed). In addition, input files are put in a tar.gz (so they can be put back after an upload) and the configuration file is extracted. """ if not args.pack: logging.critical("setup needs the pack filename") sys.exit(1) pack = Path(args.pack[0]) target = Path(args.target[0]) if target.exists(): logging.critical("Target directory exists") sys.exit(1) if not issubclass(DefaultAbstractPath, PosixPath): logging.critical("Not unpacking on POSIX system") sys.exit(1) signals.pre_setup(target=target, pack=pack) # Unpacks configuration file rpz_pack = RPZPack(pack) rpz_pack.extract_config(target / 'config.yml') # Loads config config = load_config_file(target / 'config.yml', True) packages = config.packages target.mkdir() root = (target / 'root').absolute() # Checks packages missing_files = False for pkg in packages: if pkg.packfiles: continue for f in pkg.files: if not Path(f.path).exists(): logging.error( "Missing file %s (from package %s that wasn't packed) " "on host, experiment will probably miss it.", f, pkg.name) missing_files = True if missing_files: record_usage(directory_missing_pkgs=True) logging.error("Some packages are missing, you should probably install " "them.\nUse 'reprounzip installpkgs -h' for help") root.mkdir() try: # Unpacks files members = rpz_pack.list_data() for m in members: # Remove 'DATA/' prefix m.name = str(rpz_pack.remove_data_prefix(m.name)) # Makes symlink targets relative if m.issym(): linkname = PosixPath(m.linkname) if linkname.is_absolute: m.linkname = join_root(root, PosixPath(m.linkname)).path logging.info("Extracting files...") rpz_pack.extract_data(root, members) rpz_pack.close() # Original input files, so upload can restore them input_files = [f.path for f in itervalues(config.inputs_outputs) if f.read_runs] if input_files: logging.info("Packing up original input files...") inputtar = tarfile.open(str(target / 'inputs.tar.gz'), 'w:gz') for ifile in input_files: filename = join_root(root, ifile) if filename.exists(): inputtar.add(str(filename), str(ifile)) inputtar.close() # Meta-data for reprounzip metadata_write(target, metadata_initial_iofiles(config), 'directory') signals.post_setup(target=target, pack=pack) except Exception: rmtree_fixed(root) raise
def docker_setup_create(args): """Sets up the experiment to be run in a Docker-built container. """ pack = Path(args.pack[0]) target = Path(args.target[0]) if target.exists(): logger.critical("Target directory exists") sys.exit(1) signals.pre_setup(target=target, pack=pack) target.mkdir() try: # Unpacks configuration file rpz_pack = RPZPack(pack) rpz_pack.extract_config(target / 'config.yml') # Loads config runs, packages, other_files = config = load_config( target / 'config.yml', True) if args.base_image: record_usage(docker_explicit_base=True) base_image = args.base_image[0] if args.distribution: target_distribution = args.distribution[0] else: target_distribution = None else: target_distribution, base_image = select_image(runs) logger.info("Using base image %s", base_image) logger.debug("Distribution: %s", target_distribution or "unknown") rpz_pack.copy_data_tar(target / 'data.tgz') arch = runs[0]['architecture'] # Writes Dockerfile logger.info("Writing %s...", target / 'Dockerfile') with (target / 'Dockerfile').open('w', encoding='utf-8', newline='\n') as fp: fp.write('FROM %s\n\n' % base_image) # Installs busybox download_file(busybox_url(arch), target / 'busybox', 'busybox-%s' % arch) fp.write('COPY busybox /busybox\n') # Installs rpzsudo download_file(sudo_url(arch), target / 'rpzsudo', 'rpzsudo-%s' % arch) fp.write('COPY rpzsudo /rpzsudo\n\n') fp.write('COPY data.tgz /reprozip_data.tgz\n\n') fp.write('COPY rpz-files.list /rpz-files.list\n') fp.write('RUN \\\n' ' chmod +x /busybox /rpzsudo && \\\n') if args.install_pkgs: # Install every package through package manager missing_packages = [] else: # Only install packages that were not packed missing_packages = [pkg for pkg in packages if pkg.packfiles] packages = [pkg for pkg in packages if not pkg.packfiles] if packages: record_usage(docker_install_pkgs=True) try: installer = select_installer(pack, runs, target_distribution) except CantFindInstaller as e: logger.error("Need to install %d packages but couldn't " "select a package installer: %s", len(packages), e) sys.exit(1) # Updates package sources update_script = installer.update_script() if update_script: fp.write(' %s && \\\n' % update_script) # Installs necessary packages fp.write(' %s && \\\n' % installer.install_script(packages)) logger.info("Dockerfile will install the %d software " "packages that were not packed", len(packages)) else: record_usage(docker_install_pkgs=False) # Untar paths = set() pathlist = [] # Add intermediate directories, and check for existence in the tar logger.info("Generating file list...") missing_files = chain.from_iterable(pkg.files for pkg in missing_packages) data_files = rpz_pack.data_filenames() listoffiles = list(chain(other_files, missing_files)) for f in listoffiles: if f.path.name == 'resolv.conf' and ( f.path.lies_under('/etc') or f.path.lies_under('/run') or f.path.lies_under('/var')): continue path = PosixPath('/') for c in rpz_pack.remove_data_prefix(f.path).components: path = path / c if path in paths: continue paths.add(path) if path in data_files: pathlist.append(path) else: logger.info("Missing file %s", path) rpz_pack.close() # FIXME : for some reason we need reversed() here, I'm not sure why # Need to read more of tar's docs. # TAR bug: --no-overwrite-dir removes --keep-old-files with (target / 'rpz-files.list').open('wb') as lfp: for p in reversed(pathlist): lfp.write(join_root(rpz_pack.data_prefix, p).path) lfp.write(b'\0') fp.write(' cd / && ' '(tar zpxf /reprozip_data.tgz -U --recursive-unlink ' '--numeric-owner --strip=1 --null -T /rpz-files.list || ' '/busybox echo "TAR reports errors, this might or might ' 'not prevent the execution to run")\n') # Setup entry point fp.write('COPY rpz_entrypoint.sh /rpz_entrypoint.sh\n' 'ENTRYPOINT ["/busybox", "sh", "/rpz_entrypoint.sh"]\n') # Write entry point script logger.info("Writing %s...", target / 'rpz_entrypoint.sh') with (target / 'rpz_entrypoint.sh').open('w', encoding='utf-8', newline='\n') as fp: # The entrypoint gets some arguments from the run command # By default, it just does all the runs # "run N" executes the run with that number # "cmd STR" sets a replacement command-line for the next run # "do STR" executes a command as-is fp.write( '#!/bin/sh\n' '\n' 'COMMAND=\n' 'ENVVARS=\n' '\n' 'if [ $# = 0 ]; then\n' ' exec /busybox sh /rpz_entrypoint.sh') for nb in irange(len(runs)): fp.write(' run %d' % nb) fp.write( '\n' 'fi\n' '\n' 'while [ $# != 0 ]; do\n' ' case "$1" in\n' ' help)\n' ' echo "Image built from reprounzip-docker" >&2\n' ' echo "Usage: docker run <image> [cmd "word [word ' '...]"] [run <R>]" >&2\n' ' echo " \\`cmd ...\\` changes the command for ' 'the next \\`run\\` option" >&2\n' ' echo " \\`run <name|number>\\` runs the ' 'specified run" >&2\n' ' echo "By default, all the runs are executed." ' '>&2\n' ' echo "The runs in this image are:" >&2\n') for run in runs: fp.write( ' echo " {name}: {cmdline}" >&2\n'.format( name=run['id'], cmdline=' '.join(shell_escape(a) for a in run['argv']))) fp.write( ' exit 0\n' ' ;;\n' ' do)\n' ' shift\n' ' $1\n' ' ;;\n' ' env)\n' ' shift\n' ' ENVVARS="$1"\n' ' ;;\n' ' cmd)\n' ' shift\n' ' COMMAND="$1"\n' ' ;;\n' ' run)\n' ' shift\n' ' case "$1" in\n') for i, run in enumerate(runs): cmdline = ' '.join([run['binary']] + run['argv'][1:]) fp.write( ' {name})\n' ' RUNCOMMAND={cmd}\n' ' RUNWD={wd}\n' ' RUNENV={env}\n' ' RUNUID={uid}\n' ' RUNGID={gid}\n' ' ;;\n'.format( name='%s|%d' % (run['id'], i), cmd=shell_escape(cmdline), wd=shell_escape(run['workingdir']), env=shell_escape(' '.join( '%s=%s' % (shell_escape(k), shell_escape(v)) for k, v in iteritems(run['environ']))), uid=run.get('uid', 1000), gid=run.get('gid', 1000))) fp.write( ' *)\n' ' echo "RPZ: Unknown run $1" >&2\n' ' exit 1\n' ' ;;\n' ' esac\n' ' if [ -n "$COMMAND" ]; then\n' ' RUNCOMMAND="$COMMAND"\n' ' COMMAND=\n' ' fi\n' ' export RUNWD; export RUNENV; export ENVVARS; ' 'export RUNCOMMAND\n' ' /rpzsudo "#$RUNUID" "#$RUNGID" /busybox sh -c ' '"cd \\"\\$RUNWD\\" && /busybox env -i $RUNENV $ENVVARS ' '$RUNCOMMAND; echo \\"*** Command finished, status: \\$?\\""\n' ' ENVVARS=\n' ' ;;\n' ' *)\n' ' echo "RPZ: Unknown option $1" >&2\n' ' exit 1\n' ' ;;\n' ' esac\n' ' shift\n' 'done\n') # Meta-data for reprounzip write_dict(target, metadata_initial_iofiles(config)) signals.post_setup(target=target, pack=pack) except Exception: target.rmtree(ignore_errors=True) raise
def directory_create(args): """Unpacks the experiment in a folder. Only the files that are not part of a package are copied (unless they are missing from the system and were packed). In addition, input files are put in a tar.gz (so they can be put back after an upload) and the configuration file is extracted. """ if not args.pack: logging.critical("setup needs the pack filename") sys.exit(1) pack = Path(args.pack[0]) target = Path(args.target[0]) if target.exists(): logging.critical("Target directory exists") sys.exit(1) if DefaultAbstractPath is not PosixPath: logging.critical("Not unpacking on POSIX system") sys.exit(1) signals.pre_setup(target=target, pack=pack) # Unpacks configuration file tar = tarfile.open(str(pack), 'r:*') member = tar.getmember('METADATA/config.yml') member.name = 'config.yml' tar.extract(member, str(target)) # Loads config runs, packages, other_files = load_config_file(target / 'config.yml', True) target.mkdir() root = (target / 'root').absolute() root.mkdir() # Checks packages missing_files = False for pkg in packages: if pkg.packfiles: continue for f in pkg.files: f = Path(f.path) if not f.exists(): logging.error( "Missing file %s (from package %s that wasn't packed) " "on host, experiment will probably miss it.", f, pkg.name) missing_files = True if missing_files: logging.error("Some packages are missing, you should probably install " "them.\nUse 'reprounzip installpkgs -h' for help") # Unpacks files if any('..' in m.name or m.name.startswith('/') for m in tar.getmembers()): logging.critical("Tar archive contains invalid pathnames") sys.exit(1) members = [m for m in tar.getmembers() if m.name.startswith('DATA/')] for m in members: m.name = m.name[5:] # Makes symlink targets relative for m in members: if not m.issym(): continue linkname = PosixPath(m.linkname) if linkname.is_absolute: m.linkname = join_root(root, PosixPath(m.linkname)).path logging.info("Extracting files...") tar.extractall(str(root), members) tar.close() # Gets library paths lib_dirs = [] p = subprocess.Popen(['/sbin/ldconfig', '-v', '-N'], stdout=subprocess.PIPE) try: for l in p.stdout: if len(l) < 3 or l[0] in (b' ', b'\t'): continue if l.endswith(b':\n'): lib_dirs.append(Path(l[:-2])) finally: p.wait() # Original input files, so upload can restore them if any(run['input_files'] for run in runs): logging.info("Packing up original input files...") inputtar = tarfile.open(str(target / 'inputs.tar.gz'), 'w:gz') for run in runs: for ifile in itervalues(run['input_files']): inputtar.add(str(join_root(root, PosixPath(ifile))), str(PosixPath(ifile))) inputtar.close() # Meta-data for reprounzip write_dict(target / '.reprounzip', {}, 'directory') signals.post_setup(target=target)
def vagrant_setup_create(args): """Sets up the experiment to be run in a Vagrant-built virtual machine. This can either build a chroot or not. If building a chroot, we do just like without Vagrant: we copy all the files and only get what's missing from the host. But we do install automatically the packages whose files are required. If not building a chroot, we install all the packages, and only unpack files that don't come from packages. In short: files from packages with packfiles=True will only be used if building a chroot. """ if not args.pack: logging.critical("setup/create needs the pack filename") sys.exit(1) pack = Path(args.pack[0]) target = Path(args.target[0]) if target.exists(): logging.critical("Target directory exists") sys.exit(1) use_chroot = args.use_chroot mount_bind = args.bind_magic_dirs record_usage(use_chroot=use_chroot, mount_bind=mount_bind) signals.pre_setup(target=target, pack=pack) # Unpacks configuration file rpz_pack = RPZPack(pack) rpz_pack.extract_config(target / 'config.yml') # Loads config runs, packages, other_files = config = load_config(target / 'config.yml', True) if not args.memory: memory = None else: try: memory = int(args.memory[-1]) except ValueError: logging.critical("Invalid value for memory size: %r", args.memory) sys.exit(1) if args.base_image and args.base_image[0]: record_usage(vagrant_explicit_image=True) box = args.base_image[0] if args.distribution: target_distribution = args.distribution[0] else: target_distribution = None else: target_distribution, box = select_box(runs, gui=args.gui) logging.info("Using box %s", box) logging.debug("Distribution: %s", target_distribution or "unknown") # If using chroot, we might still need to install packages to get missing # (not packed) files if use_chroot: packages = [pkg for pkg in packages if not pkg.packfiles] if packages: record_usage(vagrant_install_pkgs=True) logging.info("Some packages were not packed, so we'll install and " "copy their files\n" "Packages that are missing:\n%s", ' '.join(pkg.name for pkg in packages)) if packages: try: installer = select_installer(pack, runs, target_distribution) except CantFindInstaller as e: logging.error("Need to install %d packages but couldn't select a " "package installer: %s", len(packages), e) target.mkdir(parents=True) try: # Writes setup script logging.info("Writing setup script %s...", target / 'setup.sh') with (target / 'setup.sh').open('w', encoding='utf-8', newline='\n') as fp: fp.write('#!/bin/sh\n\nset -e\n\n') if packages: # Updates package sources update_script = installer.update_script() if update_script: fp.write(update_script) fp.write('\n') # Installs necessary packages fp.write(installer.install_script(packages)) fp.write('\n') # TODO : Compare package versions (painful because of sh) # Untar if use_chroot: fp.write('\n' 'mkdir /experimentroot; cd /experimentroot\n') fp.write('tar zpxf /vagrant/data.tgz --numeric-owner ' '--strip=1 %s\n' % rpz_pack.data_prefix) if mount_bind: fp.write('\n' 'mkdir -p /experimentroot/dev\n' 'mkdir -p /experimentroot/proc\n') for pkg in packages: fp.write('\n# Copies files from package %s\n' % pkg.name) for f in pkg.files: f = f.path dest = join_root(PosixPath('/experimentroot'), f) fp.write('mkdir -p %s\n' % shell_escape(unicode_(f.parent))) fp.write('cp -L %s %s\n' % ( shell_escape(unicode_(f)), shell_escape(unicode_(dest)))) fp.write( '\n' 'cp /etc/resolv.conf /experimentroot/etc/resolv.conf\n') else: fp.write('\ncd /\n') paths = set() pathlist = [] # Adds intermediate directories, and checks for existence in # the tar logging.info("Generating file list...") data_files = rpz_pack.data_filenames() for f in other_files: if f.path.name == 'resolv.conf' and ( f.path.lies_under('/etc') or f.path.lies_under('/run') or f.path.lies_under('/var')): continue path = PosixPath('/') for c in rpz_pack.remove_data_prefix(f.path).components: path = path / c if path in paths: continue paths.add(path) if path in data_files: pathlist.append(path) else: logging.info("Missing file %s", path) # FIXME : for some reason we need reversed() here, I'm not sure # why. Need to read more of tar's docs. # TAR bug: --no-overwrite-dir removes --keep-old-files # TAR bug: there is no way to make --keep-old-files not report # an error if an existing file is encountered. --skip-old-files # was introduced too recently. Instead, we just ignore the exit # status with (target / 'rpz-files.list').open('wb') as lfp: for p in reversed(pathlist): lfp.write(join_root(rpz_pack.data_prefix, p).path) lfp.write(b'\0') fp.write('tar zpxf /vagrant/data.tgz --keep-old-files ' '--numeric-owner --strip=1 ' '--null -T /vagrant/rpz-files.list || /bin/true\n') # Copies busybox if use_chroot: arch = runs[0]['architecture'] download_file(busybox_url(arch), target / 'busybox', 'busybox-%s' % arch) fp.write(r''' cp /vagrant/busybox /experimentroot/busybox chmod +x /experimentroot/busybox mkdir -p /experimentroot/bin [ -e /experimentroot/bin/sh ] || \ ln -s /busybox /experimentroot/bin/sh ''') # Copies pack logging.info("Copying pack file...") rpz_pack.copy_data_tar(target / 'data.tgz') rpz_pack.close() # Writes Vagrant file logging.info("Writing %s...", target / 'Vagrantfile') with (target / 'Vagrantfile').open('w', encoding='utf-8', newline='\n') as fp: # Vagrant header and version fp.write( '# -*- mode: ruby -*-\n' '# vi: set ft=ruby\n\n' 'VAGRANTFILE_API_VERSION = "2"\n\n' 'Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|\n') # Selects which box to install fp.write(' config.vm.box = "%s"\n' % box) # Run the setup script on the virtual machine fp.write(' config.vm.provision "shell", path: "setup.sh"\n') # Memory size if memory is not None or args.gui: fp.write(' config.vm.provider "virtualbox" do |v|\n') if memory is not None: fp.write(' v.memory = %d\n' % memory) if args.gui: fp.write(' v.gui = true\n') fp.write(' end\n') fp.write('end\n') # Meta-data for reprounzip write_dict(target, metadata_initial_iofiles(config, {'use_chroot': use_chroot, 'gui': args.gui})) signals.post_setup(target=target, pack=pack) except Exception: target.rmtree(ignore_errors=True) raise
def vagrant_setup_create(args): """Sets up the experiment to be run in a Vagrant-built virtual machine. This can either build a chroot or not. If building a chroot, we do just like without Vagrant: we copy all the files and only get what's missing from the host. But we do install automatically the packages whose files are required. If not building a chroot, we install all the packages, and only unpack files that don't come from packages. In short: files from packages with packfiles=True will only be used if building a chroot. """ if not args.pack: logging.critical("setup/create needs the pack filename") sys.exit(1) pack = Path(args.pack[0]) target = Path(args.target[0]) if target.exists(): logging.critical("Target directory exists") sys.exit(1) use_chroot = args.use_chroot mount_bind = args.bind_magic_dirs record_usage(use_chroot=use_chroot, mount_bind=mount_bind) signals.pre_setup(target=target, pack=pack) # Unpacks configuration file tar = tarfile.open(str(pack), 'r:*') member = tar.getmember('METADATA/config.yml') member.name = 'config.yml' tar.extract(member, str(target)) tar.close() # Loads config runs, packages, other_files = load_config(target / 'config.yml', True) if args.base_image and args.base_image[0]: record_usage(vagrant_explicit_image=True) box = args.base_image[0] if args.distribution: target_distribution = args.distribution[0] else: target_distribution = None else: target_distribution, box = select_box(runs) logging.info("Using box %s", box) logging.debug("Distribution: %s", target_distribution or "unknown") # If using chroot, we might still need to install packages to get missing # (not packed) files if use_chroot: packages = [pkg for pkg in packages if not pkg.packfiles] if packages: record_usage(vagrant_install_pkgs=True) logging.info( "Some packages were not packed, so we'll install and " "copy their files\n" "Packages that are missing:\n%s", ' '.join(pkg.name for pkg in packages)) if packages: try: installer = select_installer(pack, runs, target_distribution) except CantFindInstaller as e: logging.error( "Need to install %d packages but couldn't select a " "package installer: %s", len(packages), e) target.mkdir(parents=True) # Writes setup script logging.info("Writing setup script %s...", target / 'setup.sh') with (target / 'setup.sh').open('w', encoding='utf-8', newline='\n') as fp: fp.write('#!/bin/sh\n\nset -e\n\n') if packages: # Updates package sources fp.write(installer.update_script()) fp.write('\n') # Installs necessary packages fp.write(installer.install_script(packages)) fp.write('\n') # TODO : Compare package versions (painful because of sh) # Untar if use_chroot: fp.write('\n' 'mkdir /experimentroot; cd /experimentroot\n') fp.write('tar zpxf /vagrant/experiment.rpz ' '--numeric-owner --strip=1 DATA\n') if mount_bind: fp.write('\n' 'mkdir -p /experimentroot/dev\n' 'mount -o rbind /dev /experimentroot/dev\n' 'mkdir -p /experimentroot/proc\n' 'mount -o rbind /proc /experimentroot/proc\n') for pkg in packages: fp.write('\n# Copies files from package %s\n' % pkg.name) for f in pkg.files: f = f.path dest = join_root(PosixPath('/experimentroot'), f) fp.write('mkdir -p %s\n' % shell_escape(unicode_(f.parent))) fp.write('cp -L %s %s\n' % (shell_escape( unicode_(f)), shell_escape(unicode_(dest)))) else: fp.write('\ncd /\n') paths = set() pathlist = [] dataroot = PosixPath('DATA') # Adds intermediate directories, and checks for existence in the # tar tar = tarfile.open(str(pack), 'r:*') for f in other_files: path = PosixPath('/') for c in f.path.components[1:]: path = path / c if path in paths: continue paths.add(path) datapath = join_root(dataroot, path) try: tar.getmember(str(datapath)) except KeyError: logging.info("Missing file %s", datapath) else: pathlist.append(unicode_(datapath)) tar.close() # FIXME : for some reason we need reversed() here, I'm not sure # why. Need to read more of tar's docs. # TAR bug: --no-overwrite-dir removes --keep-old-files # TAR bug: there is no way to make --keep-old-files not report an # error if an existing file is encountered. --skip-old-files was # introduced too recently. Instead, we just ignore the exit status fp.write('tar zpxf /vagrant/experiment.rpz --keep-old-files ' '--numeric-owner --strip=1 %s || /bin/true\n' % ' '.join(shell_escape(p) for p in reversed(pathlist))) # Copies /bin/sh + dependencies if use_chroot: url = busybox_url(runs[0]['architecture']) fp.write(r''' mkdir -p /experimentroot/bin mkdir -p /experimentroot/usr/bin if [ ! -e /experimentroot/bin/sh -o ! -e /experimentroot/usr/bin/env ]; then wget --quiet -O /experimentroot/bin/busybox {url} chmod +x /experimentroot/bin/busybox fi [ -e /experimentroot/bin/sh ] || \ ln -s /bin/busybox /experimentroot/bin/sh [ -e /experimentroot/usr/bin/env ] || \ ln -s /bin/busybox /experimentroot/usr/bin/env '''.format(url=url)) # Copies pack logging.info("Copying pack file...") pack.copyfile(target / 'experiment.rpz') # Writes Vagrant file logging.info("Writing %s...", target / 'Vagrantfile') with (target / 'Vagrantfile').open('w', encoding='utf-8', newline='\n') as fp: # Vagrant header and version fp.write('# -*- mode: ruby -*-\n' '# vi: set ft=ruby\n\n' 'VAGRANTFILE_API_VERSION = "2"\n\n' 'Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|\n') # Selects which box to install fp.write(' config.vm.box = "%s"\n' % box) # Run the setup script on the virtual machine fp.write(' config.vm.provision "shell", path: "setup.sh"\n') fp.write('end\n') # Meta-data for reprounzip write_dict(target / '.reprounzip', {'use_chroot': use_chroot}) signals.post_setup(target=target)
def vagrant_setup_create(args): """Sets up the experiment to be run in a Vagrant-built virtual machine. This can either build a chroot or not. If building a chroot, we do just like without Vagrant: we copy all the files and only get what's missing from the host. But we do install automatically the packages whose files are required. If not building a chroot, we install all the packages, and only unpack files that don't come from packages. In short: files from packages with packfiles=True will only be used if building a chroot. """ if not args.pack: logging.critical("setup/create needs the pack filename") sys.exit(1) pack = Path(args.pack[0]) target = Path(args.target[0]) if target.exists(): logging.critical("Target directory exists") sys.exit(1) use_chroot = args.use_chroot mount_bind = args.bind_magic_dirs record_usage(use_chroot=use_chroot, mount_bind=mount_bind) signals.pre_setup(target=target, pack=pack) # Unpacks configuration file tar = tarfile.open(str(pack), 'r:*') member = tar.getmember('METADATA/config.yml') member.name = 'config.yml' tar.extract(member, str(target)) tar.close() # Loads config runs, packages, other_files = load_config(target / 'config.yml', True) if args.base_image and args.base_image[0]: record_usage(vagrant_explicit_image=True) box = args.base_image[0] if args.distribution: target_distribution = args.distribution[0] else: target_distribution = None else: target_distribution, box = select_box(runs) logging.info("Using box %s", box) logging.debug("Distribution: %s", target_distribution or "unknown") # If using chroot, we might still need to install packages to get missing # (not packed) files if use_chroot: packages = [pkg for pkg in packages if not pkg.packfiles] if packages: record_usage(vagrant_install_pkgs=True) logging.info("Some packages were not packed, so we'll install and " "copy their files\n" "Packages that are missing:\n%s", ' '.join(pkg.name for pkg in packages)) if packages: try: installer = select_installer(pack, runs, target_distribution) except CantFindInstaller as e: logging.error("Need to install %d packages but couldn't select a " "package installer: %s", len(packages), e) target.mkdir(parents=True) # Writes setup script logging.info("Writing setup script %s...", target / 'setup.sh') with (target / 'setup.sh').open('w', encoding='utf-8', newline='\n') as fp: fp.write('#!/bin/sh\n\nset -e\n\n') if packages: # Updates package sources fp.write(installer.update_script()) fp.write('\n') # Installs necessary packages fp.write(installer.install_script(packages)) fp.write('\n') # TODO : Compare package versions (painful because of sh) # Untar if use_chroot: fp.write('\n' 'mkdir /experimentroot; cd /experimentroot\n') fp.write('tar zpxf /vagrant/experiment.rpz ' '--numeric-owner --strip=1 DATA\n') if mount_bind: fp.write('\n' 'mkdir -p /experimentroot/dev\n' 'mount -o rbind /dev /experimentroot/dev\n' 'mkdir -p /experimentroot/proc\n' 'mount -o rbind /proc /experimentroot/proc\n') for pkg in packages: fp.write('\n# Copies files from package %s\n' % pkg.name) for f in pkg.files: f = f.path dest = join_root(PosixPath('/experimentroot'), f) fp.write('mkdir -p %s\n' % shell_escape(unicode_(f.parent))) fp.write('cp -L %s %s\n' % ( shell_escape(unicode_(f)), shell_escape(unicode_(dest)))) else: fp.write('\ncd /\n') paths = set() pathlist = [] dataroot = PosixPath('DATA') # Adds intermediate directories, and checks for existence in the # tar tar = tarfile.open(str(pack), 'r:*') for f in other_files: path = PosixPath('/') for c in f.path.components[1:]: path = path / c if path in paths: continue paths.add(path) datapath = join_root(dataroot, path) try: tar.getmember(str(datapath)) except KeyError: logging.info("Missing file %s", datapath) else: pathlist.append(unicode_(datapath)) tar.close() # FIXME : for some reason we need reversed() here, I'm not sure # why. Need to read more of tar's docs. # TAR bug: --no-overwrite-dir removes --keep-old-files # TAR bug: there is no way to make --keep-old-files not report an # error if an existing file is encountered. --skip-old-files was # introduced too recently. Instead, we just ignore the exit status fp.write('tar zpxf /vagrant/experiment.rpz --keep-old-files ' '--numeric-owner --strip=1 %s || /bin/true\n' % ' '.join(shell_escape(p) for p in reversed(pathlist))) # Copies /bin/sh + dependencies if use_chroot: url = busybox_url(runs[0]['architecture']) fp.write(r''' mkdir -p /experimentroot/bin mkdir -p /experimentroot/usr/bin if [ ! -e /experimentroot/bin/sh -o ! -e /experimentroot/usr/bin/env ]; then wget --quiet -O /experimentroot/bin/busybox {url} chmod +x /experimentroot/bin/busybox fi [ -e /experimentroot/bin/sh ] || \ ln -s /bin/busybox /experimentroot/bin/sh [ -e /experimentroot/usr/bin/env ] || \ ln -s /bin/busybox /experimentroot/usr/bin/env '''.format(url=url)) # Copies pack logging.info("Copying pack file...") pack.copyfile(target / 'experiment.rpz') # Writes Vagrant file logging.info("Writing %s...", target / 'Vagrantfile') with (target / 'Vagrantfile').open('w', encoding='utf-8', newline='\n') as fp: # Vagrant header and version fp.write('# -*- mode: ruby -*-\n' '# vi: set ft=ruby\n\n' 'VAGRANTFILE_API_VERSION = "2"\n\n' 'Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|\n') # Selects which box to install fp.write(' config.vm.box = "%s"\n' % box) # Run the setup script on the virtual machine fp.write(' config.vm.provision "shell", path: "setup.sh"\n') fp.write('end\n') # Meta-data for reprounzip write_dict(target / '.reprounzip', {'use_chroot': use_chroot}) signals.post_setup(target=target)
def chroot_create(args): """Unpacks the experiment in a folder so it can be run with chroot. All the files in the pack are unpacked; system files are copied only if they were not packed, and busybox is installed if /bin/sh wasn't packed. In addition, input files are put in a tar.gz (so they can be put back after an upload) and the configuration file is extracted. """ if not args.pack: logging.critical("setup/create needs the pack filename") sys.exit(1) pack = Path(args.pack[0]) target = Path(args.target[0]) if target.exists(): logging.critical("Target directory exists") sys.exit(1) if DefaultAbstractPath is not PosixPath: logging.critical("Not unpacking on POSIX system") sys.exit(1) signals.pre_setup(target=target, pack=pack) # We can only restore owner/group of files if running as root restore_owner = should_restore_owner(args.restore_owner) # Unpacks configuration file tar = tarfile.open(str(pack), 'r:*') member = tar.getmember('METADATA/config.yml') member.name = 'config.yml' tar.extract(member, str(target)) # Loads config runs, packages, other_files = load_config_file(target / 'config.yml', True) target.mkdir() root = (target / 'root').absolute() root.mkdir() # Checks that everything was packed packages_not_packed = [pkg for pkg in packages if not pkg.packfiles] if packages_not_packed: record_usage(chroot_missing_pkgs=True) logging.warning("According to configuration, some files were left out " "because they belong to the following packages:%s" "\nWill copy files from HOST SYSTEM", ''.join('\n %s' % pkg for pkg in packages_not_packed)) missing_files = False for pkg in packages_not_packed: for f in pkg.files: f = Path(f.path) if not f.exists(): logging.error( "Missing file %s (from package %s) on host, " "experiment will probably miss it", f, pkg.name) missing_files = True continue dest = join_root(root, f) dest.parent.mkdir(parents=True) if f.is_link(): dest.symlink(f.read_link()) else: f.copy(dest) if restore_owner: stat = f.stat() dest.chown(stat.st_uid, stat.st_gid) if missing_files: record_usage(chroot_mising_files=True) # Unpacks files if any('..' in m.name or m.name.startswith('/') for m in tar.getmembers()): logging.critical("Tar archive contains invalid pathnames") sys.exit(1) members = [m for m in tar.getmembers() if m.name.startswith('DATA/')] for m in members: m.name = m.name[5:] if not restore_owner: uid = os.getuid() gid = os.getgid() for m in members: m.uid = uid m.gid = gid logging.info("Extracting files...") tar.extractall(str(root), members) tar.close() # Sets up /bin/sh and /usr/bin/env, downloading busybox if necessary sh_path = join_root(root, Path('/bin/sh')) env_path = join_root(root, Path('/usr/bin/env')) if not sh_path.lexists() or not env_path.lexists(): logging.info("Setting up busybox...") busybox_path = join_root(root, Path('/bin/busybox')) busybox_path.parent.mkdir(parents=True) with make_dir_writable(join_root(root, Path('/bin'))): download_file(busybox_url(runs[0]['architecture']), busybox_path) busybox_path.chmod(0o755) if not sh_path.lexists(): sh_path.parent.mkdir(parents=True) sh_path.symlink('/bin/busybox') if not env_path.lexists(): env_path.parent.mkdir(parents=True) env_path.symlink('/bin/busybox') # Original input files, so upload can restore them if any(run['input_files'] for run in runs): logging.info("Packing up original input files...") inputtar = tarfile.open(str(target / 'inputs.tar.gz'), 'w:gz') for run in runs: for ifile in itervalues(run['input_files']): inputtar.add(str(join_root(root, PosixPath(ifile))), str(PosixPath(ifile))) inputtar.close() # Meta-data for reprounzip write_dict(target / '.reprounzip', {}, 'chroot') signals.post_setup(target=target)
def directory_create(args): """Unpacks the experiment in a folder. Only the files that are not part of a package are copied (unless they are missing from the system and were packed). In addition, input files are put in a tar.gz (so they can be put back after an upload) and the configuration file is extracted. """ if not args.pack: logging.critical("setup needs the pack filename") sys.exit(1) pack = Path(args.pack[0]) target = Path(args.target[0]) if target.exists(): logging.critical("Target directory exists") sys.exit(1) if not issubclass(DefaultAbstractPath, PosixPath): logging.critical("Not unpacking on POSIX system") sys.exit(1) signals.pre_setup(target=target, pack=pack) # Unpacks configuration file tar = tarfile.open(str(pack), 'r:*') member = tar.getmember('METADATA/config.yml') member.name = 'config.yml' tar.extract(member, str(target)) # Loads config runs, packages, other_files = load_config_file(target / 'config.yml', True) target.mkdir() root = (target / 'root').absolute() root.mkdir() # Checks packages missing_files = False for pkg in packages: if pkg.packfiles: continue for f in pkg.files: f = Path(f.path) if not f.exists(): logging.error( "Missing file %s (from package %s that wasn't packed) " "on host, experiment will probably miss it.", f, pkg.name) missing_files = True if missing_files: record_usage(directory_missing_pkgs=True) logging.error( "Some packages are missing, you should probably install " "them.\nUse 'reprounzip installpkgs -h' for help") # Unpacks files if any('..' in m.name or m.name.startswith('/') for m in tar.getmembers()): logging.critical("Tar archive contains invalid pathnames") sys.exit(1) members = [m for m in tar.getmembers() if m.name.startswith('DATA/')] for m in members: m.name = m.name[5:] # Makes symlink targets relative for m in members: if not m.issym(): continue linkname = PosixPath(m.linkname) if linkname.is_absolute: m.linkname = join_root(root, PosixPath(m.linkname)).path logging.info("Extracting files...") tar.extractall(str(root), members) tar.close() # Gets library paths lib_dirs = [] p = subprocess.Popen(['/sbin/ldconfig', '-v', '-N'], stdout=subprocess.PIPE) try: for l in p.stdout: if len(l) < 3 or l[0] in (b' ', b'\t'): continue if l.endswith(b':\n'): lib_dirs.append(Path(l[:-2])) finally: p.wait() # Original input files, so upload can restore them if any(run['input_files'] for run in runs): logging.info("Packing up original input files...") inputtar = tarfile.open(str(target / 'inputs.tar.gz'), 'w:gz') for run in runs: for ifile in itervalues(run['input_files']): inputtar.add(str(join_root(root, PosixPath(ifile))), str(PosixPath(ifile))) inputtar.close() # Meta-data for reprounzip write_dict(target / '.reprounzip', {}, 'directory') signals.post_setup(target=target)
def chroot_create(args): """Unpacks the experiment in a folder so it can be run with chroot. All the files in the pack are unpacked; system files are copied only if they were not packed, and busybox is installed if /bin/sh wasn't packed. In addition, input files are put in a tar.gz (so they can be put back after an upload) and the configuration file is extracted. """ if not args.pack: logger.critical("setup/create needs the pack filename") sys.exit(1) pack = Path(args.pack[0]) target = Path(args.target[0]) if target.exists(): logger.critical("Target directory exists") sys.exit(1) if not issubclass(DefaultAbstractPath, PosixPath): logger.critical("Not unpacking on POSIX system") sys.exit(1) signals.pre_setup(target=target, pack=pack) # We can only restore owner/group of files if running as root restore_owner = should_restore_owner(args.restore_owner) # Unpacks configuration file rpz_pack = RPZPack(pack) rpz_pack.extract_config(target / 'config.yml') # Loads config config = load_config_file(target / 'config.yml', True) packages = config.packages target.mkdir() root = (target / 'root').absolute() root.mkdir() try: # Checks that everything was packed packages_not_packed = [pkg for pkg in packages if not pkg.packfiles] if packages_not_packed: record_usage(chroot_missing_pkgs=True) logger.warning("According to configuration, some files were left " "out because they belong to the following " "packages:%s\nWill copy files from HOST SYSTEM", ''.join('\n %s' % pkg for pkg in packages_not_packed)) missing_files = False for pkg in packages_not_packed: for f in pkg.files: path = Path(f.path) if not path.exists(): logger.error( "Missing file %s (from package %s) on host, " "experiment will probably miss it", path, pkg.name) missing_files = True continue dest = join_root(root, path) dest.parent.mkdir(parents=True) if path.is_link(): dest.symlink(path.read_link()) else: path.copy(dest) if restore_owner: stat = path.stat() dest.chown(stat.st_uid, stat.st_gid) if missing_files: record_usage(chroot_mising_files=True) # Unpacks files members = rpz_pack.list_data() for m in members: # Remove 'DATA/' prefix m.name = str(rpz_pack.remove_data_prefix(m.name)) if not restore_owner: uid = os.getuid() gid = os.getgid() for m in members: m.uid = uid m.gid = gid logger.info("Extracting files...") rpz_pack.extract_data(root, members) rpz_pack.close() resolvconf_src = Path('/etc/resolv.conf') if resolvconf_src.exists(): try: resolvconf_src.copy(root / 'etc/resolv.conf') except IOError: pass # Sets up /bin/sh and /usr/bin/env, downloading busybox if necessary sh_path = join_root(root, Path('/bin/sh')) env_path = join_root(root, Path('/usr/bin/env')) if not sh_path.lexists() or not env_path.lexists(): logger.info("Setting up busybox...") busybox_path = join_root(root, Path('/bin/busybox')) busybox_path.parent.mkdir(parents=True) with make_dir_writable(join_root(root, Path('/bin'))): download_file(busybox_url(config.runs[0]['architecture']), busybox_path, 'busybox-%s' % config.runs[0]['architecture']) busybox_path.chmod(0o755) if not sh_path.lexists(): sh_path.parent.mkdir(parents=True) sh_path.symlink('/bin/busybox') if not env_path.lexists(): env_path.parent.mkdir(parents=True) env_path.symlink('/bin/busybox') # Original input files, so upload can restore them input_files = [f.path for f in config.inputs_outputs.values() if f.read_runs] if input_files: logger.info("Packing up original input files...") inputtar = tarfile.open(str(target / 'inputs.tar.gz'), 'w:gz') for ifile in input_files: filename = join_root(root, ifile) if filename.exists(): inputtar.add(str(filename), str(ifile)) inputtar.close() # Meta-data for reprounzip metadata_write(target, metadata_initial_iofiles(config), 'chroot') signals.post_setup(target=target, pack=pack) except Exception: rmtree_fixed(root) raise
def docker_setup_create(args): """Sets up the experiment to be run in a Docker-built container. """ pack = Path(args.pack[0]) target = Path(args.target[0]) if target.exists(): logging.critical("Target directory exists") sys.exit(1) signals.pre_setup(target=target, pack=pack) # Unpacks configuration file rpz_pack = RPZPack(pack) rpz_pack.extract_config(target / 'config.yml') # Loads config runs, packages, other_files = load_config(target / 'config.yml', True) if args.base_image: record_usage(docker_explicit_base=True) base_image = args.base_image[0] if args.distribution: target_distribution = args.distribution[0] else: target_distribution = None else: target_distribution, base_image = select_image(runs) logging.info("Using base image %s", base_image) logging.debug("Distribution: %s", target_distribution or "unknown") target.mkdir(parents=True) rpz_pack.copy_data_tar(target / 'data.tgz') arch = runs[0]['architecture'] # Writes Dockerfile logging.info("Writing %s...", target / 'Dockerfile') with (target / 'Dockerfile').open('w', encoding='utf-8', newline='\n') as fp: fp.write('FROM %s\n\n' % base_image) # Installs busybox download_file(busybox_url(arch), target / 'busybox', 'busybox-%s' % arch) fp.write('COPY busybox /busybox\n') # Installs rpzsudo download_file(sudo_url(arch), target / 'rpzsudo', 'rpzsudo-%s' % arch) fp.write('COPY rpzsudo /rpzsudo\n\n') fp.write('COPY data.tgz /reprozip_data.tgz\n\n') fp.write('COPY rpz-files.list /rpz-files.list\n') fp.write('RUN \\\n' ' chmod +x /busybox /rpzsudo && \\\n') if args.install_pkgs: # Install every package through package manager missing_packages = [] else: # Only install packages that were not packed missing_packages = [pkg for pkg in packages if pkg.packfiles] packages = [pkg for pkg in packages if not pkg.packfiles] if packages: record_usage(docker_install_pkgs=True) try: installer = select_installer(pack, runs, target_distribution) except CantFindInstaller as e: logging.error("Need to install %d packages but couldn't " "select a package installer: %s", len(packages), e) sys.exit(1) # Updates package sources fp.write(' %s && \\\n' % installer.update_script()) # Installs necessary packages fp.write(' %s && \\\n' % installer.install_script(packages)) logging.info("Dockerfile will install the %d software packages " "that were not packed", len(packages)) else: record_usage(docker_install_pkgs=False) # Untar paths = set() pathlist = [] # Adds intermediate directories, and checks for existence in the tar missing_files = chain.from_iterable(pkg.files for pkg in missing_packages) for f in chain(other_files, missing_files): path = PosixPath('/') for c in rpz_pack.remove_data_prefix(f.path).components: path = path / c if path in paths: continue paths.add(path) try: rpz_pack.get_data(path) except KeyError: logging.info("Missing file %s", path) else: pathlist.append(path) rpz_pack.close() # FIXME : for some reason we need reversed() here, I'm not sure why. # Need to read more of tar's docs. # TAR bug: --no-overwrite-dir removes --keep-old-files with (target / 'rpz-files.list').open('wb') as lfp: for p in reversed(pathlist): lfp.write(join_root(rpz_pack.data_prefix, p).path) lfp.write(b'\0') fp.write(' cd / && ' '(tar zpxf /reprozip_data.tgz -U --recursive-unlink ' '--numeric-owner --strip=1 --null -T /rpz-files.list || ' '/busybox echo "TAR reports errors, this might or might ' 'not prevent the execution to run")\n') # Meta-data for reprounzip write_dict(target / '.reprounzip', {}) signals.post_setup(target=target, pack=pack)
def directory_create(args): """Unpacks the experiment in a folder. Only the files that are not part of a package are copied (unless they are missing from the system and were packed). In addition, input files are put in a tar.gz (so they can be put back after an upload) and the configuration file is extracted. """ if not args.pack: logger.critical("setup needs the pack filename") sys.exit(1) pack = Path(args.pack[0]) target = Path(args.target[0]) if target.exists(): logger.critical("Target directory exists") sys.exit(1) if not issubclass(DefaultAbstractPath, PosixPath): logger.critical("Not unpacking on POSIX system") sys.exit(1) signals.pre_setup(target=target, pack=pack) # Unpacks configuration file rpz_pack = RPZPack(pack) rpz_pack.extract_config(target / 'config.yml') # Loads config config = load_config_file(target / 'config.yml', True) packages = config.packages target.mkdir() root = (target / 'root').absolute() # Checks packages missing_files = False for pkg in packages: if pkg.packfiles: continue for f in pkg.files: if not Path(f.path).exists(): logger.error( "Missing file %s (from package %s that wasn't packed) " "on host, experiment will probably miss it.", f, pkg.name) missing_files = True if missing_files: record_usage(directory_missing_pkgs=True) logger.error("Some packages are missing, you should probably install " "them.\nUse 'reprounzip installpkgs -h' for help") root.mkdir() try: # Unpacks files members = rpz_pack.list_data() for m in members: # Remove 'DATA/' prefix m.name = str(rpz_pack.remove_data_prefix(m.name)) # Makes symlink targets relative if m.issym(): linkname = PosixPath(m.linkname) if linkname.is_absolute: m.linkname = join_root(root, PosixPath(m.linkname)).path logger.info("Extracting files...") rpz_pack.extract_data(root, members) rpz_pack.close() # Original input files, so upload can restore them input_files = [f.path for f in config.inputs_outputs.values() if f.read_runs] if input_files: logger.info("Packing up original input files...") inputtar = tarfile.open(str(target / 'inputs.tar.gz'), 'w:gz') for ifile in input_files: filename = join_root(root, ifile) if filename.exists(): inputtar.add(str(filename), str(ifile)) inputtar.close() # Meta-data for reprounzip metadata_write(target, metadata_initial_iofiles(config), 'directory') signals.post_setup(target=target, pack=pack) except Exception: rmtree_fixed(root) raise
def chroot_create(args): """Unpacks the experiment in a folder so it can be run with chroot. All the files in the pack are unpacked; system files are copied only if they were not packed, and busybox is installed if /bin/sh wasn't packed. In addition, input files are put in a tar.gz (so they can be put back after an upload) and the configuration file is extracted. """ if not args.pack: logging.critical("setup/create needs the pack filename") sys.exit(1) pack = Path(args.pack[0]) target = Path(args.target[0]) if target.exists(): logging.critical("Target directory exists") sys.exit(1) if DefaultAbstractPath is not PosixPath: logging.critical("Not unpacking on POSIX system") sys.exit(1) signals.pre_setup(target=target, pack=pack) # We can only restore owner/group of files if running as root restore_owner = should_restore_owner(args.restore_owner) # Unpacks configuration file tar = tarfile.open(str(pack), 'r:*') member = tar.getmember('METADATA/config.yml') member.name = 'config.yml' tar.extract(member, str(target)) # Loads config runs, packages, other_files = load_config_file(target / 'config.yml', True) target.mkdir() root = (target / 'root').absolute() root.mkdir() # Checks that everything was packed packages_not_packed = [pkg for pkg in packages if not pkg.packfiles] if packages_not_packed: logging.warning( "According to configuration, some files were left out " "because they belong to the following packages:%s" "\nWill copy files from HOST SYSTEM", ''.join('\n %s' % pkg for pkg in packages_not_packed)) for pkg in packages_not_packed: for f in pkg.files: f = Path(f.path) if not f.exists(): logging.error( "Missing file %s (from package %s) on host, " "experiment will probably miss it", f, pkg.name) dest = join_root(root, f) dest.parent.mkdir(parents=True) if f.is_link(): dest.symlink(f.read_link()) else: f.copy(dest) if restore_owner: stat = f.stat() dest.chown(stat.st_uid, stat.st_gid) # Unpacks files if any('..' in m.name or m.name.startswith('/') for m in tar.getmembers()): logging.critical("Tar archive contains invalid pathnames") sys.exit(1) members = [m for m in tar.getmembers() if m.name.startswith('DATA/')] for m in members: m.name = m.name[5:] if not restore_owner: uid = os.getuid() gid = os.getgid() for m in members: m.uid = uid m.gid = gid logging.info("Extracting files...") tar.extractall(str(root), members) tar.close() # Sets up /bin/sh and /usr/bin/env, downloading busybox if necessary sh_path = join_root(root, Path('/bin/sh')) env_path = join_root(root, Path('/usr/bin/env')) if not sh_path.lexists() or not env_path.lexists(): logging.info("Setting up busybox...") busybox_path = join_root(root, Path('/bin/busybox')) busybox_path.parent.mkdir(parents=True) with make_dir_writable(join_root(root, Path('/bin'))): download_file(busybox_url(runs[0]['architecture']), busybox_path) busybox_path.chmod(0o755) if not sh_path.lexists(): sh_path.parent.mkdir(parents=True) sh_path.symlink('/bin/busybox') if not env_path.lexists(): env_path.parent.mkdir(parents=True) env_path.symlink('/bin/busybox') # Original input files, so upload can restore them if any(run['input_files'] for run in runs): logging.info("Packing up original input files...") inputtar = tarfile.open(str(target / 'inputs.tar.gz'), 'w:gz') for run in runs: for ifile in itervalues(run['input_files']): inputtar.add(str(join_root(root, PosixPath(ifile))), str(PosixPath(ifile))) inputtar.close() # Meta-data for reprounzip write_dict(target / '.reprounzip', {}, 'chroot') signals.post_setup(target=target)