def read_files(files, File=File): if files is None: return [] return [File(PosixPath(f)) for f in files]
def directory_run(args): """Runs the command in the directory. """ target = Path(args.target[0]) unpacked_info = metadata_read(target, 'directory') cmdline = args.cmdline # Loads config config = load_config_file(target / 'config.yml', True) runs = config.runs selected_runs = get_runs(runs, args.run, cmdline) root = (target / 'root').absolute() # Gets library paths lib_dirs = [] p = subprocess.Popen(['/sbin/ldconfig', '-v', '-N'], stdout=subprocess.PIPE) try: for l in p.stdout: if len(l) < 3 or l[0] in (b' ', b'\t'): continue if l.endswith(b':\n'): lib_dirs.append(Path(l[:-2])) finally: p.wait() lib_dirs = ( 'export LD_LIBRARY_PATH=%s' % ':'.join(shell_escape(unicode_(join_root(root, d))) for d in lib_dirs)) cmds = [lib_dirs] for run_number in selected_runs: run = runs[run_number] cmd = 'cd %s && ' % shell_escape( unicode_(join_root(root, Path(run['workingdir'])))) cmd += '/usr/bin/env -i ' environ = run['environ'] environ = fixup_environment(environ, args) if args.x11: if 'DISPLAY' in os.environ: environ['DISPLAY'] = os.environ['DISPLAY'] if 'XAUTHORITY' in os.environ: environ['XAUTHORITY'] = os.environ['XAUTHORITY'] cmd += ' '.join('%s=%s' % (shell_escape(k), shell_escape(v)) for k, v in iteritems(environ) if k != 'PATH') cmd += ' ' # PATH # Get the original PATH components path = [ PosixPath(d) for d in run['environ'].get('PATH', '').split(':') ] # The same paths but in the directory dir_path = [join_root(root, d) for d in path if d.root == '/'] # Rebuild string path = ':'.join(unicode_(d) for d in dir_path + path) cmd += 'PATH=%s ' % shell_escape(path) # FIXME : Use exec -a or something if binary != argv[0] if cmdline is None: argv = run['argv'] # Rewrites command-line arguments that are absolute filenames rewritten = False for i in irange(len(argv)): try: p = Path(argv[i]) except UnicodeEncodeError: continue if p.is_absolute: rp = join_root(root, p) if (rp.exists() or (len(rp.components) > 3 and rp.parent.exists())): argv[i] = str(rp) rewritten = True if rewritten: logging.warning("Rewrote command-line as: %s", ' '.join(shell_escape(a) for a in argv)) else: argv = cmdline cmd += ' '.join(shell_escape(a) for a in argv) cmds.append(cmd) cmds = ' && '.join(cmds) signals.pre_run(target=target) retcode = interruptible_call(cmds, shell=True) stderr.write("\n*** Command finished, status: %d\n" % retcode) signals.post_run(target=target, retcode=retcode) # Update input file status metadata_update_run(config, unpacked_info, selected_runs) metadata_write(target, unpacked_info, 'directory')
def directory_create(args): """Unpacks the experiment in a folder. Only the files that are not part of a package are copied (unless they are missing from the system and were packed). In addition, input files are put in a tar.gz (so they can be put back after an upload) and the configuration file is extracted. """ if not args.pack: logging.critical("setup needs the pack filename") sys.exit(1) pack = Path(args.pack[0]) target = Path(args.target[0]) if target.exists(): logging.critical("Target directory exists") sys.exit(1) if not issubclass(DefaultAbstractPath, PosixPath): logging.critical("Not unpacking on POSIX system") sys.exit(1) signals.pre_setup(target=target, pack=pack) # Unpacks configuration file rpz_pack = RPZPack(pack) rpz_pack.extract_config(target / 'config.yml') # Loads config config = load_config_file(target / 'config.yml', True) packages = config.packages target.mkdir() root = (target / 'root').absolute() # Checks packages missing_files = False for pkg in packages: if pkg.packfiles: continue for f in pkg.files: if not Path(f.path).exists(): logging.error( "Missing file %s (from package %s that wasn't packed) " "on host, experiment will probably miss it.", f, pkg.name) missing_files = True if missing_files: record_usage(directory_missing_pkgs=True) logging.error("Some packages are missing, you should probably install " "them.\nUse 'reprounzip installpkgs -h' for help") root.mkdir() try: # Unpacks files members = rpz_pack.list_data() for m in members: # Remove 'DATA/' prefix m.name = str(rpz_pack.remove_data_prefix(m.name)) # Makes symlink targets relative if m.issym(): linkname = PosixPath(m.linkname) if linkname.is_absolute: m.linkname = join_root(root, PosixPath(m.linkname)).path logging.info("Extracting files...") rpz_pack.extract_data(root, members) rpz_pack.close() # Original input files, so upload can restore them input_files = [ f.path for f in itervalues(config.inputs_outputs) if f.read_runs ] if input_files: logging.info("Packing up original input files...") inputtar = tarfile.open(str(target / 'inputs.tar.gz'), 'w:gz') for ifile in input_files: filename = join_root(root, ifile) if filename.exists(): inputtar.add(str(filename), str(ifile)) inputtar.close() # Meta-data for reprounzip metadata_write(target, metadata_initial_iofiles(config), 'directory') signals.post_setup(target=target, pack=pack) except Exception: rmtree_fixed(root) raise
def finalize(self): if not self.docker_copy: self.build_directory.rmtree() return from_image = self.unpacked_info['current_image'] with self.build_directory.open('w', 'Dockerfile', encoding='utf-8', newline='\n') as dockerfile: dockerfile.write('FROM %s\n\n' % from_image.decode('ascii')) for src, target in self.docker_copy: # FIXME : spaces in filenames will probably break Docker dockerfile.write( 'COPY \\\n %s \\\n %s\n' % (shell_escape(str(src)), shell_escape(str(target)))) for src, target in self.docker_copy: uid = gid = None # Keep permissions if the file is already in there tar = tarfile.open(str(self.target / 'data.tgz'), 'r:*') try: info = tar.getmember( str(join_root(PosixPath(b'DATA'), target))) uid, gid = info.uid, info.gid except KeyError: pass # Otherwise default on the first run's UID/GID if uid is None: uid, gid = self.default_ownership # Lastly, use 1000 if uid is None: uid = gid = 1000 dockerfile.write('RUN ["/busybox", "chown", "%d:%d", %s]\n' % ( uid, gid, json.dumps(str(target)), )) image = make_unique_name(b'reprounzip_image_') retcode = subprocess.call(self.docker_cmd + ['build', '-t', image, '.'], cwd=self.build_directory.path) if retcode != 0: logger.critical("docker build failed with code %d", retcode) sys.exit(1) else: logger.info("New image created: %s", image.decode('ascii')) if from_image != self.unpacked_info['initial_image']: logger.info("Untagging previous image %s", from_image.decode('ascii')) retcode = subprocess.call(self.docker_cmd + ['rmi', from_image]) if retcode != 0: logger.warning( "Can't remove previous image, docker " "returned %d", retcode) self.unpacked_info['current_image'] = image write_dict(self.target, self.unpacked_info) self.build_directory.rmtree()
def __init__(self, enabled, target, display=None): self.enabled = enabled if not self.enabled: return self.target = target self.xauth = PosixPath('/.reprounzip_xauthority') self.display = (int(display) if display is not None else self.DISPLAY_NUMBER) logger.debug( "X11 support enabled; will create Xauthority file %s " "for experiment. Display number is %d", self.xauth, self.display) # List of addresses that match the $DISPLAY variable possible, local_display = self._locate_display() tcp_portnum = ((6000 + local_display) if local_display is not None else None) if ('XAUTHORITY' in os.environ and Path(os.environ['XAUTHORITY']).is_file()): xauthority = Path(os.environ['XAUTHORITY']) # Note: I'm assuming here that Xauthority has no XDG support else: xauthority = Path('~').expand_user() / '.Xauthority' # Read Xauthority file xauth_entries = {} if xauthority.is_file(): with xauthority.open('rb') as fp: fp.seek(0, os.SEEK_END) size = fp.tell() fp.seek(0, os.SEEK_SET) while fp.tell() < size: entry = Xauth.from_file(fp) if (entry.name == 'MIT-MAGIC-COOKIE-1' and entry.number == local_display): if entry.family == Xauth.FAMILY_LOCAL: xauth_entries[(entry.family, None)] = entry elif (entry.family == Xauth.FAMILY_INTERNET or entry.family == Xauth.FAMILY_INTERNET6): xauth_entries[(entry.family, entry.address)] = entry # FIXME: this completely ignores addresses logger.debug("Possible X endpoints: %s", (possible, )) # Select socket and authentication cookie self.xauth_record = None self.connection_info = None for family, address in possible: # Checks that we have a cookie entry = family, (None if family is Xauth.FAMILY_LOCAL else address) if entry not in xauth_entries: continue if family == Xauth.FAMILY_LOCAL and hasattr(socket, 'AF_UNIX'): # Checks that the socket exists if not Path(address).exists(): continue self.connection_info = (socket.AF_UNIX, socket.SOCK_STREAM, address) self.xauth_record = xauth_entries[(family, None)] logger.debug( "Will connect to local X display via UNIX " "socket %s", address) break else: # Checks that we have a cookie family = self.X2SOCK[family] self.connection_info = (family, socket.SOCK_STREAM, (address, tcp_portnum)) self.xauth_record = xauth_entries[(family, address)] logger.debug("Will connect to X display %s:%d via %s/TCP", address, tcp_portnum, "IPv6" if family == socket.AF_INET6 else "IPv4") break # Didn't find an Xauthority record -- assume no authentication is # needed, but still set self.connection_info if self.connection_info is None: for family, address in possible: # Only try UNIX sockets, we'll use 127.0.0.1 otherwise if family == Xauth.FAMILY_LOCAL: if not hasattr(socket, 'AF_UNIX'): continue self.connection_info = (socket.AF_UNIX, socket.SOCK_STREAM, address) logger.debug( "Will connect to X display via UNIX socket " "%s, no authentication", address) break else: self.connection_info = (socket.AF_INET, socket.SOCK_STREAM, ('127.0.0.1', tcp_portnum)) logger.debug( "Will connect to X display 127.0.0.1:%d via " "IPv4/TCP, no authentication", tcp_portnum) if self.connection_info is None: raise RuntimeError("Couldn't determine how to connect to local X " "server, DISPLAY is %s" % (repr(os.environ['DISPLAY']) if 'DISPLAY' in os.environ else 'not set'))
def vagrant_setup_create(args): """Sets up the experiment to be run in a Vagrant-built virtual machine. This can either build a chroot or not. If building a chroot, we do just like without Vagrant: we copy all the files and only get what's missing from the host. But we do install automatically the packages whose files are required. If not building a chroot, we install all the packages, and only unpack files that don't come from packages. In short: files from packages with packfiles=True will only be used if building a chroot. """ if not args.pack: logging.critical("setup/create needs the pack filename") sys.exit(1) pack = Path(args.pack[0]) target = Path(args.target[0]) if target.exists(): logging.critical("Target directory exists") sys.exit(1) use_chroot = args.use_chroot mount_bind = args.bind_magic_dirs record_usage(use_chroot=use_chroot, mount_bind=mount_bind) signals.pre_setup(target=target, pack=pack) # Unpacks configuration file rpz_pack = RPZPack(pack) rpz_pack.extract_config(target / 'config.yml') # Loads config runs, packages, other_files = config = load_config(target / 'config.yml', True) if not args.memory: memory = None else: try: memory = int(args.memory[-1]) except ValueError: logging.critical("Invalid value for memory size: %r", args.memory) sys.exit(1) if args.base_image and args.base_image[0]: record_usage(vagrant_explicit_image=True) box = args.base_image[0] if args.distribution: target_distribution = args.distribution[0] else: target_distribution = None else: target_distribution, box = select_box(runs, gui=args.gui) logging.info("Using box %s", box) logging.debug("Distribution: %s", target_distribution or "unknown") # If using chroot, we might still need to install packages to get missing # (not packed) files if use_chroot: packages = [pkg for pkg in packages if not pkg.packfiles] if packages: record_usage(vagrant_install_pkgs=True) logging.info( "Some packages were not packed, so we'll install and " "copy their files\n" "Packages that are missing:\n%s", ' '.join(pkg.name for pkg in packages)) if packages: try: installer = select_installer(pack, runs, target_distribution) except CantFindInstaller as e: logging.error( "Need to install %d packages but couldn't select a " "package installer: %s", len(packages), e) target.mkdir(parents=True) try: # Writes setup script logging.info("Writing setup script %s...", target / 'setup.sh') with (target / 'setup.sh').open('w', encoding='utf-8', newline='\n') as fp: fp.write('#!/bin/sh\n\nset -e\n\n') if packages: # Updates package sources update_script = installer.update_script() if update_script: fp.write(update_script) fp.write('\n') # Installs necessary packages fp.write(installer.install_script(packages)) fp.write('\n') # TODO : Compare package versions (painful because of sh) # Untar if use_chroot: fp.write('\n' 'mkdir /experimentroot; cd /experimentroot\n') fp.write('tar zpxf /vagrant/data.tgz --numeric-owner ' '--strip=1 %s\n' % rpz_pack.data_prefix) if mount_bind: fp.write('\n' 'mkdir -p /experimentroot/dev\n' 'mkdir -p /experimentroot/proc\n') for pkg in packages: fp.write('\n# Copies files from package %s\n' % pkg.name) for f in pkg.files: f = f.path dest = join_root(PosixPath('/experimentroot'), f) fp.write('mkdir -p %s\n' % shell_escape(unicode_(f.parent))) fp.write('cp -L %s %s\n' % (shell_escape( unicode_(f)), shell_escape(unicode_(dest)))) else: fp.write('\ncd /\n') paths = set() pathlist = [] # Adds intermediate directories, and checks for existence in # the tar data_files = rpz_pack.data_filenames() for f in other_files: path = PosixPath('/') for c in rpz_pack.remove_data_prefix(f.path).components: path = path / c if path in paths: continue paths.add(path) if path in data_files: pathlist.append(path) else: logging.info("Missing file %s", path) # FIXME : for some reason we need reversed() here, I'm not sure # why. Need to read more of tar's docs. # TAR bug: --no-overwrite-dir removes --keep-old-files # TAR bug: there is no way to make --keep-old-files not report # an error if an existing file is encountered. --skip-old-files # was introduced too recently. Instead, we just ignore the exit # status with (target / 'rpz-files.list').open('wb') as lfp: for p in reversed(pathlist): lfp.write(join_root(rpz_pack.data_prefix, p).path) lfp.write(b'\0') fp.write('tar zpxf /vagrant/data.tgz --keep-old-files ' '--numeric-owner --strip=1 ' '--null -T /vagrant/rpz-files.list || /bin/true\n') # Copies busybox if use_chroot: arch = runs[0]['architecture'] download_file(busybox_url(arch), target / 'busybox', 'busybox-%s' % arch) fp.write(r''' cp /vagrant/busybox /experimentroot/busybox chmod +x /experimentroot/busybox mkdir -p /experimentroot/bin [ -e /experimentroot/bin/sh ] || \ ln -s /busybox /experimentroot/bin/sh ''') # Copies pack logging.info("Copying pack file...") rpz_pack.copy_data_tar(target / 'data.tgz') rpz_pack.close() # Writes Vagrant file logging.info("Writing %s...", target / 'Vagrantfile') with (target / 'Vagrantfile').open('w', encoding='utf-8', newline='\n') as fp: # Vagrant header and version fp.write( '# -*- mode: ruby -*-\n' '# vi: set ft=ruby\n\n' 'VAGRANTFILE_API_VERSION = "2"\n\n' 'Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|\n') # Selects which box to install fp.write(' config.vm.box = "%s"\n' % box) # Run the setup script on the virtual machine fp.write(' config.vm.provision "shell", path: "setup.sh"\n') # Memory size if memory is not None or args.gui: fp.write(' config.vm.provider "virtualbox" do |v|\n') if memory is not None: fp.write(' v.memory = %d\n' % memory) if args.gui: fp.write(' v.gui = true\n') fp.write(' end\n') fp.write('end\n') # Meta-data for reprounzip write_dict( target, metadata_initial_iofiles(config, { 'use_chroot': use_chroot, 'gui': args.gui })) signals.post_setup(target=target, pack=pack) except Exception: target.rmtree(ignore_errors=True) raise
def directory_run(args): """Runs the command in the directory. """ target = Path(args.target[0]) read_dict(target / '.reprounzip', 'directory') cmdline = args.cmdline # Loads config runs, packages, other_files = load_config_file(target / 'config.yml', True) selected_runs = get_runs(runs, args.run, cmdline) root = target / 'root' # Gets library paths lib_dirs = [] p = subprocess.Popen(['/sbin/ldconfig', '-v', '-N'], stdout=subprocess.PIPE) try: for l in p.stdout: if len(l) < 3 or l[0] in (b' ', b'\t'): continue if l.endswith(b':\n'): lib_dirs.append(Path(l[:-2])) finally: p.wait() lib_dirs = ( 'export LD_LIBRARY_PATH=%s' % ':'.join(shell_escape(unicode_(join_root(root, d))) for d in lib_dirs)) cmds = [lib_dirs] for run_number in selected_runs: run = runs[run_number] cmd = 'cd %s && ' % shell_escape( unicode_(join_root(root, Path(run['workingdir'])))) cmd += '/usr/bin/env -i ' cmd += ' '.join('%s=%s' % (k, shell_escape(v)) for k, v in iteritems(run['environ']) if k != 'PATH') cmd += ' ' # PATH # Get the original PATH components path = [ PosixPath(d) for d in run['environ'].get('PATH', '').split(':') ] # The same paths but in the directory dir_path = [join_root(root, d) for d in path if d.root == '/'] # Rebuild string path = ':'.join(unicode_(d) for d in dir_path + path) cmd += 'PATH=%s ' % shell_escape(path) # FIXME : Use exec -a or something if binary != argv[0] if cmdline is None: argv = run['argv'] else: argv = cmdline cmd += ' '.join(shell_escape(a) for a in argv) cmds.append(cmd) cmds = ' && '.join(cmds) signals.pre_run(target=target) retcode = subprocess.call(cmds, shell=True) sys.stderr.write("\n*** Command finished, status: %d\n" % retcode) signals.post_run(target=target, retcode=retcode)
def docker_setup_create(args): """Sets up the experiment to be run in a Docker-built container. """ pack = Path(args.pack[0]) target = Path(args.target[0]) if target.exists(): logging.critical("Target directory exists") sys.exit(1) signals.pre_setup(target=target, pack=pack) # Unpacks configuration file tar = tarfile.open(str(pack), 'r:*') member = tar.getmember('METADATA/config.yml') member.name = 'config.yml' tar.extract(member, str(target)) tar.close() # Loads config runs, packages, other_files = load_config(target / 'config.yml', True) if args.base_image: target_distribution = None base_image = args.base_image[0] else: target_distribution, base_image = select_image(runs) logging.info("Using base image %s", base_image) logging.debug("Distribution: %s", target_distribution or "unknown") target.mkdir(parents=True) pack.copyfile(target / 'experiment.rpz') # Writes Dockerfile logging.info("Writing %s...", target / 'Dockerfile') with (target / 'Dockerfile').open('w', encoding='utf-8', newline='\n') as fp: fp.write('FROM %s\n\n' % base_image) fp.write('COPY experiment.rpz /reprozip_experiment.rpz\n\n') fp.write('RUN \\\n') # Installs missing packages packages = [pkg for pkg in packages if not pkg.packfiles] # FIXME : Right now, we need 'sudo' to be available (and it's not # necessarily in the base image) packages += [Package('sudo', None, packfiles=False)] if packages: installer = select_installer(pack, runs, target_distribution) # Updates package sources fp.write(' %s && \\\n' % installer.update_script()) # Installs necessary packages fp.write(' %s && \\\n' % installer.install_script(packages)) logging.info("Dockerfile will install the %d software packages that " "were not packed", len(packages)) # Untar paths = set() pathlist = [] dataroot = PosixPath('DATA') # Adds intermediate directories, and checks for existence in the tar tar = tarfile.open(str(pack), 'r:*') for f in other_files: path = PosixPath('/') for c in f.path.components[1:]: path = path / c if path in paths: continue paths.add(path) datapath = join_root(dataroot, path) try: tar.getmember(str(datapath)) except KeyError: logging.info("Missing file %s", datapath) else: pathlist.append(unicode_(datapath)) tar.close() # FIXME : for some reason we need reversed() here, I'm not sure why. # Need to read more of tar's docs. # TAR bug: --no-overwrite-dir removes --keep-old-files fp.write(' cd / && tar zpxf /reprozip_experiment.rpz ' '--numeric-owner --strip=1 %s\n' % ' '.join(shell_escape(p) for p in reversed(pathlist))) # Meta-data for reprounzip write_dict(target / '.reprounzip', {}) signals.post_setup(target=target)
def showfiles(args): """Writes out the input and output files. Works both for a pack file and for an extracted directory. """ pack = Path(args.pack[0]) if not pack.exists(): logging.critical("Pack or directory %s does not exist", pack) sys.exit(1) if pack.is_dir(): # Reads info from an unpacked directory runs, packages, other_files = load_config_file(pack / 'config.yml', canonical=True) # The '.reprounzip' file is a pickled dictionary, it contains the name # of the files that replaced each input file (if upload was used) with pack.open('rb', '.reprounzip') as fp: unpacked_info = pickle.load(fp) input_files = unpacked_info.get('input_files', {}) print("Input files:") for i, run in enumerate(runs): if len(runs) > 1: print(" Run %d:" % i) for input_name, path in iteritems(run['input_files']): if args.verbosity >= 2: print(" %s (%s)" % (input_name, path)) else: print(" %s" % input_name) if input_files.get(input_name) is not None: assigned = PosixPath(input_files[input_name]) else: assigned = "(original)" print(" %s" % assigned) print("Output files:") for i, run in enumerate(runs): if len(runs) > 1: print(" Run %d:" % i) for output_name, path in iteritems(run['output_files']): if args.verbosity >= 2: print(" %s (%s)" % (output_name, path)) else: print(" %s" % output_name) else: # pack.is_file() # Reads info from a pack file runs, packages, other_files = load_config(pack) print("Input files:") for i, run in enumerate(runs): if len(runs) > 1: print(" Run %d:" % i) for input_name, path in iteritems(run['input_files']): if args.verbosity >= 2: print(" %s (%s)" % (input_name, path)) else: print(" %s" % input_name) print("Output files:") for i, run in enumerate(runs): if len(runs) > 1: print(" Run %d:" % i) for output_name, path in iteritems(run['output_files']): if args.verbosity >= 2: print(" %s (%s)" % (output_name, path)) else: print(" %s" % output_name)
def run(self, files): reprounzip.common.record_usage(upload_files=len(files)) inputs_outputs = self.get_config().inputs_outputs # No argument: list all the input files and exit if not files: print("Input files:") for input_name in sorted(n for n, f in inputs_outputs.items() if f.read_runs): assigned = self.input_files.get(input_name) if assigned is None: assigned = "(original)" elif assigned is False: assigned = "(not created)" elif assigned is True: assigned = "(generated)" else: assert isinstance(assigned, (bytes, str)) print(" %s: %s" % (input_name, assigned)) return self.prepare_upload(files) try: # Upload files for filespec in files: filespec_split = filespec.rsplit(':', 1) if len(filespec_split) != 2: logger.critical("Invalid file specification: %r", filespec) sys.exit(1) local_path, input_name = filespec_split if input_name.startswith('/'): input_path = PosixPath(input_name) else: try: input_path = inputs_outputs[input_name].path except KeyError: logger.critical("Invalid input file: %r", input_name) sys.exit(1) temp = None if not local_path: # Restore original file from pack logger.debug("Restoring input file %s", input_path) fd, temp = Path.tempfile(prefix='reprozip_input_') os.close(fd) local_path = self.extract_original_input(input_name, input_path, temp) if local_path is None: temp.remove() logger.warning("No original packed, can't restore " "input file %s", input_name) continue else: local_path = Path(local_path) logger.debug("Uploading file %s to %s", local_path, input_path) if not local_path.exists(): logger.critical("Local file %s doesn't exist", local_path) sys.exit(1) self.upload_file(local_path, input_path) if temp is not None: temp.remove() self.input_files.pop(input_name, None) else: self.input_files[input_name] = local_path.absolute().path finally: self.finalize()
def run(self, files, all_): reprounzip.common.record_usage(download_files=len(files)) inputs_outputs = self.get_config().inputs_outputs # No argument: list all the output files and exit if not (all_ or files): print("Output files:") for output_name in sorted(n for n, f in inputs_outputs.items() if f.write_runs): print(" %s" % output_name) return # Parse the name[:path] syntax resolved_files = [] all_files = set(n for n, f in inputs_outputs.items() if f.write_runs) for filespec in files: filespec_split = filespec.split(':', 1) if len(filespec_split) == 1: output_name = local_path = filespec elif len(filespec_split) == 2: output_name, local_path = filespec_split else: logger.critical("Invalid file specification: %r", filespec) sys.exit(1) local_path = Path(local_path) if local_path else None all_files.discard(output_name) resolved_files.append((output_name, local_path)) # If all_ is set, add all the files that weren't explicitely named if all_: for output_name in all_files: resolved_files.append((output_name, Path(output_name))) self.prepare_download(resolved_files) success = True try: # Download files for output_name, local_path in resolved_files: if output_name.startswith('/'): remote_path = PosixPath(output_name) else: try: remote_path = inputs_outputs[output_name].path except KeyError: logger.critical("Invalid output file: %r", output_name) sys.exit(1) logger.debug("Downloading file %s", remote_path) if local_path is None: ret = self.download_and_print(remote_path) else: ret = self.download(remote_path, local_path) if ret is None: ret = True warnings.warn("download() returned None instead of " "True/False, assuming True", category=DeprecationWarning) if not ret: success = False if not success: sys.exit(1) finally: self.finalize()
def test_lies_under(self): """ Tests the lies_under method.""" self.assertTrue(PosixPath(b'/tmp') .lies_under(b'/')) self.assertFalse(PosixPath(b'/tmp') .lies_under(b'/var')) self.assertTrue(PosixPath(b'/tmp/some/file/here') .lies_under(b'/tmp/some')) self.assertFalse(PosixPath(b'/tmp/some/file/here') .lies_under(b'/tmp/no')) self.assertFalse(PosixPath(b'/tmp/some/file/here') .lies_under(b'/no/tmp/some')) self.assertFalse(PosixPath(b'/tmp/some/file/here') .lies_under(b'/no/some')) self.assertTrue(PosixPath(b'/tmp/some/file/here') .lies_under(b'/tmp/some/file/here')) self.assertTrue(PosixPath(b'/') .lies_under(b'/')) self.assertTrue(PosixPath(b'') .lies_under(b'')) self.assertTrue(PosixPath(b'test') .lies_under(b'')) self.assertFalse(PosixPath(b'') .lies_under(b'test')) self.assertFalse(PosixPath(b'test') .lies_under(b'/'))
def directory_create(args): """Unpacks the experiment in a folder. Only the files that are not part of a package are copied (unless they are missing from the system and were packed). In addition, input files are put in a tar.gz (so they can be put back after an upload) and the configuration file is extracted. """ if not args.pack: logging.critical("setup needs the pack filename") sys.exit(1) pack = Path(args.pack[0]) target = Path(args.target[0]) if target.exists(): logging.critical("Target directory exists") sys.exit(1) if DefaultAbstractPath is not PosixPath: logging.critical("Not unpacking on POSIX system") sys.exit(1) signals.pre_setup(target=target, pack=pack) # Unpacks configuration file tar = tarfile.open(str(pack), 'r:*') member = tar.getmember('METADATA/config.yml') member.name = 'config.yml' tar.extract(member, str(target)) # Loads config runs, packages, other_files = load_config_file(target / 'config.yml', True) target.mkdir() root = (target / 'root').absolute() root.mkdir() # Checks packages missing_files = False for pkg in packages: if pkg.packfiles: continue for f in pkg.files: f = Path(f.path) if not f.exists(): logging.error( "Missing file %s (from package %s that wasn't packed) " "on host, experiment will probably miss it.", f, pkg.name) missing_files = True if missing_files: logging.error("Some packages are missing, you should probably install " "them.\nUse 'reprounzip installpkgs -h' for help") # Unpacks files if any('..' in m.name or m.name.startswith('/') for m in tar.getmembers()): logging.critical("Tar archive contains invalid pathnames") sys.exit(1) members = [m for m in tar.getmembers() if m.name.startswith('DATA/')] for m in members: m.name = m.name[5:] # Makes symlink targets relative for m in members: if not m.issym(): continue linkname = PosixPath(m.linkname) if linkname.is_absolute: m.linkname = join_root(root, PosixPath(m.linkname)).path logging.info("Extracting files...") tar.extractall(str(root), members) tar.close() # Gets library paths lib_dirs = [] p = subprocess.Popen(['/sbin/ldconfig', '-v', '-N'], stdout=subprocess.PIPE) try: for l in p.stdout: if len(l) < 3 or l[0] in (b' ', b'\t'): continue if l.endswith(b':\n'): lib_dirs.append(Path(l[:-2])) finally: p.wait() # Original input files, so upload can restore them if any(run['input_files'] for run in runs): logging.info("Packing up original input files...") inputtar = tarfile.open(str(target / 'inputs.tar.gz'), 'w:gz') for run in runs: for ifile in itervalues(run['input_files']): inputtar.add(str(join_root(root, PosixPath(ifile))), str(PosixPath(ifile))) inputtar.close() # Meta-data for reprounzip write_dict(target / '.reprounzip', {}, 'directory') signals.post_setup(target=target)
def chroot_create(args): """Unpacks the experiment in a folder so it can be run with chroot. All the files in the pack are unpacked; system files are copied only if they were not packed, and busybox is installed if /bin/sh wasn't packed. In addition, input files are put in a tar.gz (so they can be put back after an upload) and the configuration file is extracted. """ if not args.pack: logging.critical("setup/create needs the pack filename") sys.exit(1) pack = Path(args.pack[0]) target = Path(args.target[0]) if target.exists(): logging.critical("Target directory exists") sys.exit(1) if DefaultAbstractPath is not PosixPath: logging.critical("Not unpacking on POSIX system") sys.exit(1) signals.pre_setup(target=target, pack=pack) # We can only restore owner/group of files if running as root restore_owner = should_restore_owner(args.restore_owner) # Unpacks configuration file tar = tarfile.open(str(pack), 'r:*') member = tar.getmember('METADATA/config.yml') member.name = 'config.yml' tar.extract(member, str(target)) # Loads config runs, packages, other_files = load_config_file(target / 'config.yml', True) target.mkdir() root = (target / 'root').absolute() root.mkdir() # Checks that everything was packed packages_not_packed = [pkg for pkg in packages if not pkg.packfiles] if packages_not_packed: logging.warning( "According to configuration, some files were left out " "because they belong to the following packages:%s" "\nWill copy files from HOST SYSTEM", ''.join('\n %s' % pkg for pkg in packages_not_packed)) for pkg in packages_not_packed: for f in pkg.files: f = Path(f.path) if not f.exists(): logging.error( "Missing file %s (from package %s) on host, " "experiment will probably miss it", f, pkg.name) dest = join_root(root, f) dest.parent.mkdir(parents=True) if f.is_link(): dest.symlink(f.read_link()) else: f.copy(dest) if restore_owner: stat = f.stat() dest.chown(stat.st_uid, stat.st_gid) # Unpacks files if any('..' in m.name or m.name.startswith('/') for m in tar.getmembers()): logging.critical("Tar archive contains invalid pathnames") sys.exit(1) members = [m for m in tar.getmembers() if m.name.startswith('DATA/')] for m in members: m.name = m.name[5:] if not restore_owner: uid = os.getuid() gid = os.getgid() for m in members: m.uid = uid m.gid = gid logging.info("Extracting files...") tar.extractall(str(root), members) tar.close() # Sets up /bin/sh and /usr/bin/env, downloading busybox if necessary sh_path = join_root(root, Path('/bin/sh')) env_path = join_root(root, Path('/usr/bin/env')) if not sh_path.lexists() or not env_path.lexists(): logging.info("Setting up busybox...") busybox_path = join_root(root, Path('/bin/busybox')) busybox_path.parent.mkdir(parents=True) with make_dir_writable(join_root(root, Path('/bin'))): download_file(busybox_url(runs[0]['architecture']), busybox_path) busybox_path.chmod(0o755) if not sh_path.lexists(): sh_path.parent.mkdir(parents=True) sh_path.symlink('/bin/busybox') if not env_path.lexists(): env_path.parent.mkdir(parents=True) env_path.symlink('/bin/busybox') # Original input files, so upload can restore them if any(run['input_files'] for run in runs): logging.info("Packing up original input files...") inputtar = tarfile.open(str(target / 'inputs.tar.gz'), 'w:gz') for run in runs: for ifile in itervalues(run['input_files']): inputtar.add(str(join_root(root, PosixPath(ifile))), str(PosixPath(ifile))) inputtar.close() # Meta-data for reprounzip write_dict(target / '.reprounzip', {}, 'chroot') signals.post_setup(target=target)
def load_iofiles(config, runs): """Loads the inputs_outputs part of the configuration. This tests for duplicates, merge the lists of executions, and optionally loads from the runs for reprozip < 0.7 compatibility. """ files_list = config.get('inputs_outputs') or [] # reprozip < 0.7 compatibility: read input_files and output_files from runs if 'inputs_outputs' not in config: for i, run in enumerate(runs): for rkey, wkey in (('input_files', 'read_by_runs'), ('output_files', 'written_by_runs')): for k, p in run.pop(rkey, {}).items(): files_list.append({'name': k, 'path': p, wkey: [i]}) files = {} # name:str: InputOutputFile paths = {} # path:PosixPath: name:str required_keys = {'name', 'path'} optional_keys = {'read_by_runs', 'written_by_runs'} uniquenames = UniqueNames() for i, f in enumerate(files_list): keys = set(f) if (not keys.issubset(required_keys | optional_keys) or not keys.issuperset(required_keys)): raise InvalidConfig("File #%d has invalid keys") name = f['name'] if name.startswith('/'): logger.warning( "File name looks like a path: %s, prefixing with " ".", name) name = '.%s' % name path = PosixPath(f['path']) readers = sorted(f.get('read_by_runs', [])) writers = sorted(f.get('written_by_runs', [])) if (not isinstance(readers, (tuple, list)) or not all(isinstance(e, int) for e in readers)): raise InvalidConfig("read_by_runs should be a list of integers") if (not isinstance(writers, (tuple, list)) or not all(isinstance(e, int) for e in writers)): raise InvalidConfig("written_by_runs should be a list of integers") if name in files: if files[name].path != path: old_name, name = name, uniquenames(name) logger.warning( "File name appears multiple times: %s\n" "Using name %s instead", old_name, name) else: uniquenames.insert(name) if path in paths: if paths[path] == name: logger.warning("File appears multiple times: %s", name) else: logger.warning( "Two files have the same path (but different " "names): %s, %s\nUsing name %s", name, paths[path], paths[path]) name = paths[path] files[name].read_runs.update(readers) files[name].write_runs.update(writers) else: paths[path] = name files[name] = InputOutputFile(path, readers, writers) return files
def docker_setup_create(args): """Sets up the experiment to be run in a Docker-built container. """ pack = Path(args.pack[0]) target = Path(args.target[0]) if target.exists(): logging.critical("Target directory exists") sys.exit(1) signals.pre_setup(target=target, pack=pack) target.mkdir() try: # Unpacks configuration file rpz_pack = RPZPack(pack) rpz_pack.extract_config(target / 'config.yml') # Loads config runs, packages, other_files = config = load_config( target / 'config.yml', True) if args.base_image: record_usage(docker_explicit_base=True) base_image = args.base_image[0] if args.distribution: target_distribution = args.distribution[0] else: target_distribution = None else: target_distribution, base_image = select_image(runs) logging.info("Using base image %s", base_image) logging.debug("Distribution: %s", target_distribution or "unknown") rpz_pack.copy_data_tar(target / 'data.tgz') arch = runs[0]['architecture'] # Writes Dockerfile logging.info("Writing %s...", target / 'Dockerfile') with (target / 'Dockerfile').open('w', encoding='utf-8', newline='\n') as fp: fp.write('FROM %s\n\n' % base_image) # Installs busybox download_file(busybox_url(arch), target / 'busybox', 'busybox-%s' % arch) fp.write('COPY busybox /busybox\n') # Installs rpzsudo download_file(sudo_url(arch), target / 'rpzsudo', 'rpzsudo-%s' % arch) fp.write('COPY rpzsudo /rpzsudo\n\n') fp.write('COPY data.tgz /reprozip_data.tgz\n\n') fp.write('COPY rpz-files.list /rpz-files.list\n') fp.write('RUN \\\n' ' chmod +x /busybox /rpzsudo && \\\n') if args.install_pkgs: # Install every package through package manager missing_packages = [] else: # Only install packages that were not packed missing_packages = [pkg for pkg in packages if pkg.packfiles] packages = [pkg for pkg in packages if not pkg.packfiles] if packages: record_usage(docker_install_pkgs=True) try: installer = select_installer(pack, runs, target_distribution) except CantFindInstaller as e: logging.error( "Need to install %d packages but couldn't " "select a package installer: %s", len(packages), e) sys.exit(1) # Updates package sources update_script = installer.update_script() if update_script: fp.write(' %s && \\\n' % update_script) # Installs necessary packages fp.write(' %s && \\\n' % installer.install_script(packages)) logging.info( "Dockerfile will install the %d software " "packages that were not packed", len(packages)) else: record_usage(docker_install_pkgs=False) # Untar paths = set() pathlist = [] # Add intermediate directories, and check for existence in the tar logging.info("Generating file list...") missing_files = chain.from_iterable(pkg.files for pkg in missing_packages) data_files = rpz_pack.data_filenames() listoffiles = list(chain(other_files, missing_files)) for f in listoffiles: if f.path.name == 'resolv.conf' and ( f.path.lies_under('/etc') or f.path.lies_under('/run') or f.path.lies_under('/var')): continue path = PosixPath('/') for c in rpz_pack.remove_data_prefix(f.path).components: path = path / c if path in paths: continue paths.add(path) if path in data_files: pathlist.append(path) else: logging.info("Missing file %s", path) rpz_pack.close() # FIXME : for some reason we need reversed() here, I'm not sure why # Need to read more of tar's docs. # TAR bug: --no-overwrite-dir removes --keep-old-files with (target / 'rpz-files.list').open('wb') as lfp: for p in reversed(pathlist): lfp.write(join_root(rpz_pack.data_prefix, p).path) lfp.write(b'\0') fp.write(' cd / && ' '(tar zpxf /reprozip_data.tgz -U --recursive-unlink ' '--numeric-owner --strip=1 --null -T /rpz-files.list || ' '/busybox echo "TAR reports errors, this might or might ' 'not prevent the execution to run")\n') # Meta-data for reprounzip write_dict(target, metadata_initial_iofiles(config)) signals.post_setup(target=target, pack=pack) except Exception: target.rmtree(ignore_errors=True) raise
def generate(target, configfile, database, all_forks=False): """Main function for the graph subcommand. """ # In here, a file is any file on the filesystem. A binary is a file, that # gets executed. A process is a system-level task, identified by its pid # (pids don't get reused in the database). # What I call program is the couple (process, binary), so forking creates a # new program (with the same binary) and exec'ing creates a new program as # well (with the same process) # Because of this, fork+exec will create an intermediate program that # doesn't do anything (new process but still old binary). If that program # doesn't do anything worth showing on the graph, it will be erased, unless # all_forks is True (--all-forks). # Reads package ownership from the configuration if not configfile.is_file(): logging.critical("Configuration file does not exist!\n" "Did you forget to run 'reprozip trace'?\n" "If not, you might want to use --dir to specify an " "alternate location.") sys.exit(1) runs, packages, other_files = load_config(configfile, canonical=False) packages = dict((f.path, pkg) for pkg in packages for f in pkg.files) if PY3: # On PY3, connect() only accepts unicode conn = sqlite3.connect(str(database)) else: conn = sqlite3.connect(database.path) conn.row_factory = sqlite3.Row # This is a bit weird. We need to iterate on all types of events at the # same time, ordering by timestamp, so we decorate-sort-undecorate # Decoration adds timestamp (for sorting) and tags by event type, one of # 'process', 'open' or 'exec' # Reads processes from the database process_cursor = conn.cursor() process_rows = process_cursor.execute(''' SELECT id, parent, timestamp FROM processes ORDER BY id ''') processes = {} all_programs = [] # ... and opened files... file_cursor = conn.cursor() file_rows = file_cursor.execute(''' SELECT name, timestamp, mode, process FROM opened_files ORDER BY id ''') binaries = set() files = OrderedSet() edges = OrderedSet() # ... as well as executed files. exec_cursor = conn.cursor() exec_rows = exec_cursor.execute(''' SELECT name, timestamp, process, argv FROM executed_files ORDER BY id ''') # Loop on all event lists logging.info("Getting all events from database...") rows = heapq.merge(((r[2], 'process', r) for r in process_rows), ((r[1], 'open', r) for r in file_rows), ((r[1], 'exec', r) for r in exec_rows)) for ts, event_type, data in rows: if event_type == 'process': r_id, r_parent, r_timestamp = data if r_parent is not None: parent = processes[r_parent] binary = parent.binary else: parent = None binary = None p = Process(r_id, parent, r_timestamp, False, binary, C_INITIAL if r_parent is None else C_FORK) processes[r_id] = p all_programs.append(p) elif event_type == 'open': r_name, r_timestamp, r_mode, r_process = data r_name = PosixPath(r_name) if r_mode != FILE_WDIR: process = processes[r_process] files.add(r_name) edges.add((process, r_name, r_mode, None)) elif event_type == 'exec': r_name, r_timestamp, r_process, r_argv = data r_name = PosixPath(r_name) process = processes[r_process] binaries.add(r_name) # Here we split this process in two "programs", unless the previous # one hasn't done anything since it was created via fork() if not all_forks and not process.acted: process.binary = r_name process.created = C_FORKEXEC process.acted = True else: process = Process( process.pid, process, r_timestamp, True, # Hides exec only once r_name, C_EXEC) all_programs.append(process) processes[r_process] = process argv = tuple(r_argv.split('\0')) if not argv[-1]: argv = argv[:-1] edges.add((process, r_name, None, argv)) process_cursor.close() file_cursor.close() conn.close() # Puts files in packages logging.info("Organizes packages...") package_files = {} other_files = [] for f in files: pkg = packages.get(f) if pkg is not None: package_files.setdefault((pkg.name, pkg.version), []).append(f) else: other_files.append(f) # Writes DOT file with target.open('w', encoding='utf-8', newline='\n') as fp: fp.write('digraph G {\n /* programs */\n node [shape=box];\n') # Programs logging.info("Writing programs...") for program in all_programs: fp.write(' prog%d [label="%s (%d)"];\n' % (id(program), program.binary or "-", program.pid)) if program.parent is not None: reason = '' if program.created == C_FORK: reason = "fork" elif program.created == C_EXEC: reason = "exec" elif program.created == C_FORKEXEC: reason = "fork+exec" fp.write(' prog%d -> prog%d [label="%s"];\n' % (id(program.parent), id(program), reason)) fp.write('\n node [shape=ellipse];\n\n /* system packages */\n') # Files from packages logging.info("Writing packages...") for i, ((name, version), files) in enumerate(iteritems(package_files)): fp.write(' subgraph cluster%d {\n label=' % i) if version: fp.write('"%s %s";\n' % (escape(name), escape(version))) else: fp.write('"%s";\n' % escape(name)) for f in files: fp.write(' "%s";\n' % escape(unicode_(f))) fp.write(' }\n') fp.write('\n /* other files */\n') # Other files logging.info("Writing other files...") for f in other_files: fp.write(' "%s"\n' % escape(unicode_(f))) fp.write('\n') # Edges logging.info("Connecting edges...") for prog, f, mode, argv in edges: if mode is None: fp.write( ' "%s" -> prog%d [color=blue, label="%s"];\n' % (escape(unicode_(f)), id(prog), escape(' '.join(argv)))) elif mode & FILE_WRITE: fp.write(' prog%d -> "%s" [color=red];\n' % (id(prog), escape(unicode_(f)))) elif mode & FILE_READ: fp.write(' "%s" -> prog%d [color=green];\n' % (escape(unicode_(f)), id(prog))) fp.write('}\n')
def generate(target, configfile, database, all_forks=False, graph_format='dot', level_pkgs='file', level_processes='thread', level_other_files='all', regex_filters=None, regex_includes=None, regex_replaces=None, aggregates=None): """Main function for the graph subcommand. """ try: graph_format = {'dot': FORMAT_DOT, 'DOT': FORMAT_DOT, 'json': FORMAT_JSON, 'JSON': FORMAT_JSON}[graph_format] except KeyError: logger.critical("Unknown output format %r", graph_format) sys.exit(1) level_pkgs, level_processes, level_other_files, file_depth = \ parse_levels(level_pkgs, level_processes, level_other_files) if target.exists(): logger.critical("Output file %s exists", target) sys.exit(1) # Reads package ownership from the configuration if not configfile.is_file(): logger.critical("Configuration file does not exist!\n" "Did you forget to run 'reprozip trace'?\n" "If not, you might want to use --dir to specify an " "alternate location.") sys.exit(1) config = load_config(configfile, canonical=False) inputs_outputs = config.inputs_outputs inputs_outputs_map = dict((f.path, n) for n, f in config.inputs_outputs.items()) has_thread_flag = config.format_version >= LooseVersion('0.7') has_exit_timestamp = config.format_version >= LooseVersion('1.1') runs, files, edges = read_events(database, all_forks, has_thread_flag, has_exit_timestamp) # Label the runs if len(runs) != len(config.runs): logger.warning("Configuration file doesn't list the same number of " "runs we found in the database!") else: for config_run, run in zip(config.runs, runs): run.name = config_run['id'] # Apply regexes ignore = [lambda path, r=re.compile(p): r.search(path) is not None for p in regex_filters or []] include = [lambda path, r=re.compile(p): r.search(path) is not None for p in regex_includes or []] replace = [lambda path, r=re.compile(p): r.sub(repl, path) for p, repl in regex_replaces or []] def filefilter(path): pathuni = str(path) if include and not any(f(pathuni) for f in include): logger.debug("IGN(include) %s", pathuni) return None if any(f(pathuni) for f in ignore): logger.debug("IGN %s", pathuni) return None if not (replace or aggregates): return path for fi in replace: pathuni_ = fi(pathuni) if pathuni_ != pathuni: logger.debug("SUB %s -> %s", pathuni, pathuni_) pathuni = pathuni_ for prefix in aggregates or []: if pathuni.startswith(prefix): logger.debug("AGG %s -> %s", pathuni, prefix) pathuni = prefix break return PosixPath(pathuni) files_new = set() for fi in files: fi = filefilter(fi) if fi is not None: files_new.add(fi) files = files_new edges_new = OrderedSet() for prog, fi, mode, argv in edges: fi = filefilter(fi) if fi is not None: edges_new.add((prog, fi, mode, argv)) edges = edges_new # Puts files in packages package_map = {} if level_pkgs == LVL_PKG_IGNORE: packages = [] other_files = files else: logger.info("Organizes packages...") file2package = dict((f.path, pkg) for pkg in config.packages for f in pkg.files) packages = {} other_files = [] for fi in files: pkg = file2package.get(fi) if pkg is not None: package = packages.get(pkg.name) if package is None: package = Package(pkg.name, pkg.version, pkg.meta.get('section', None)) packages[pkg.name] = package package.files.add(fi) package_map[fi] = package else: other_files.append(fi) packages = sorted(packages.values(), key=lambda pkg: pkg.name) for i, pkg in enumerate(packages): pkg.id = i # Filter other files if level_other_files == LVL_OTHER_ALL and file_depth is not None: other_files = set(PosixPath(*f.components[:file_depth + 1]) for f in other_files) edges = OrderedSet((prog, f if f in package_map else PosixPath(*f.components[:file_depth + 1]), mode, argv) for prog, f, mode, argv in edges) else: if level_other_files == LVL_OTHER_IO: other_files = set(f for f in other_files if f in inputs_outputs_map) edges = [(prog, f, mode, argv) for prog, f, mode, argv in edges if f in package_map or f in other_files] elif level_other_files == LVL_OTHER_NO: other_files = set() edges = [(prog, f, mode, argv) for prog, f, mode, argv in edges if f in package_map] args = (target, runs, packages, other_files, package_map, edges, inputs_outputs, inputs_outputs_map, level_pkgs, level_processes, level_other_files) if graph_format == FORMAT_DOT: graph_dot(*args) elif graph_format == FORMAT_JSON: graph_json(*args) else: assert False
def docker_setup_create(args): """Sets up the experiment to be run in a Docker-built container. """ pack = Path(args.pack[0]) target = Path(args.target[0]) if target.exists(): logger.critical("Target directory exists") sys.exit(1) signals.pre_setup(target=target, pack=pack) target.mkdir() try: # Unpacks configuration file rpz_pack = RPZPack(pack) rpz_pack.extract_config(target / 'config.yml') # Loads config runs, packages, other_files = config = load_config( target / 'config.yml', True) if args.base_image: record_usage(docker_explicit_base=True) base_image = args.base_image[0] if args.distribution: target_distribution = args.distribution[0] else: target_distribution = None else: target_distribution, base_image = select_image(runs) logger.info("Using base image %s", base_image) logger.debug("Distribution: %s", target_distribution or "unknown") rpz_pack.copy_data_tar(target / 'data.tgz') arch = runs[0]['architecture'] # Writes Dockerfile logger.info("Writing %s...", target / 'Dockerfile') with (target / 'Dockerfile').open('w', encoding='utf-8', newline='\n') as fp: fp.write('FROM %s\n\n' % base_image) # Installs busybox download_file(busybox_url(arch), target / 'busybox', 'busybox-%s' % arch) fp.write('COPY busybox /busybox\n') # Installs rpzsudo download_file(sudo_url(arch), target / 'rpzsudo', 'rpzsudo-%s' % arch) fp.write('COPY rpzsudo /rpzsudo\n\n') fp.write('COPY data.tgz /reprozip_data.tgz\n\n') fp.write('COPY rpz-files.list /rpz-files.list\n') fp.write('RUN \\\n' ' chmod +x /busybox /rpzsudo && \\\n') if args.install_pkgs: # Install every package through package manager missing_packages = [] else: # Only install packages that were not packed missing_packages = [pkg for pkg in packages if pkg.packfiles] packages = [pkg for pkg in packages if not pkg.packfiles] if packages: record_usage(docker_install_pkgs=True) try: installer = select_installer(pack, runs, target_distribution) except CantFindInstaller as e: logger.error( "Need to install %d packages but couldn't " "select a package installer: %s", len(packages), e) sys.exit(1) # Updates package sources update_script = installer.update_script() if update_script: fp.write(' %s && \\\n' % update_script) # Installs necessary packages fp.write(' %s && \\\n' % installer.install_script(packages)) logger.info( "Dockerfile will install the %d software " "packages that were not packed", len(packages)) else: record_usage(docker_install_pkgs=False) # Untar paths = set() pathlist = [] # Add intermediate directories, and check for existence in the tar logger.info("Generating file list...") missing_files = chain.from_iterable(pkg.files for pkg in missing_packages) data_files = rpz_pack.data_filenames() listoffiles = list(chain(other_files, missing_files)) for f in listoffiles: if f.path.name == 'resolv.conf' and ( f.path.lies_under('/etc') or f.path.lies_under('/run') or f.path.lies_under('/var')): continue path = PosixPath('/') for c in rpz_pack.remove_data_prefix(f.path).components: path = path / c if path in paths: continue paths.add(path) if path in data_files: pathlist.append(path) else: logger.info("Missing file %s", path) rpz_pack.close() # FIXME : for some reason we need reversed() here, I'm not sure why # Need to read more of tar's docs. # TAR bug: --no-overwrite-dir removes --keep-old-files with (target / 'rpz-files.list').open('wb') as lfp: for p in reversed(pathlist): lfp.write(join_root(rpz_pack.data_prefix, p).path) lfp.write(b'\0') fp.write(' cd / && ' '(tar zpxf /reprozip_data.tgz -U --recursive-unlink ' '--numeric-owner --strip=1 --null -T /rpz-files.list || ' '/busybox echo "TAR reports errors, this might or might ' 'not prevent the execution to run")\n') # Setup entry point fp.write('COPY rpz_entrypoint.sh /rpz_entrypoint.sh\n' 'ENTRYPOINT ["/busybox", "sh", "/rpz_entrypoint.sh"]\n') # Write entry point script logger.info("Writing %s...", target / 'rpz_entrypoint.sh') with (target / 'rpz_entrypoint.sh').open('w', encoding='utf-8', newline='\n') as fp: # The entrypoint gets some arguments from the run command # By default, it just does all the runs # "run N" executes the run with that number # "cmd STR" sets a replacement command-line for the next run # "do STR" executes a command as-is fp.write('#!/bin/sh\n' '\n' 'COMMAND=\n' 'ENVVARS=\n' '\n' 'if [ $# = 0 ]; then\n' ' exec /busybox sh /rpz_entrypoint.sh') for nb in irange(len(runs)): fp.write(' run %d' % nb) fp.write( '\n' 'fi\n' '\n' 'while [ $# != 0 ]; do\n' ' case "$1" in\n' ' help)\n' ' echo "Image built from reprounzip-docker" >&2\n' ' echo "Usage: docker run <image> [cmd "word [word ' '...]"] [run <R>]" >&2\n' ' echo " \\`cmd ...\\` changes the command for ' 'the next \\`run\\` option" >&2\n' ' echo " \\`run <name|number>\\` runs the ' 'specified run" >&2\n' ' echo "By default, all the runs are executed." ' '>&2\n' ' echo "The runs in this image are:" >&2\n') for run in runs: fp.write( ' echo " {name}: {cmdline}" >&2\n'.format( name=run['id'], cmdline=' '.join(shell_escape(a) for a in run['argv']))) fp.write(' exit 0\n' ' ;;\n' ' do)\n' ' shift\n' ' $1\n' ' ;;\n' ' env)\n' ' shift\n' ' ENVVARS="$1"\n' ' ;;\n' ' cmd)\n' ' shift\n' ' COMMAND="$1"\n' ' ;;\n' ' run)\n' ' shift\n' ' case "$1" in\n') for i, run in enumerate(runs): cmdline = ' '.join([run['binary']] + run['argv'][1:]) fp.write(' {name})\n' ' RUNCOMMAND={cmd}\n' ' RUNWD={wd}\n' ' RUNENV={env}\n' ' RUNUID={uid}\n' ' RUNGID={gid}\n' ' ;;\n'.format( name='%s|%d' % (run['id'], i), cmd=shell_escape(cmdline), wd=shell_escape(run['workingdir']), env=shell_escape(' '.join( '%s=%s' % (shell_escape(k), shell_escape(v)) for k, v in iteritems(run['environ']))), uid=run.get('uid', 1000), gid=run.get('gid', 1000))) fp.write( ' *)\n' ' echo "RPZ: Unknown run $1" >&2\n' ' exit 1\n' ' ;;\n' ' esac\n' ' if [ -n "$COMMAND" ]; then\n' ' RUNCOMMAND="$COMMAND"\n' ' COMMAND=\n' ' fi\n' ' export RUNWD; export RUNENV; export ENVVARS; ' 'export RUNCOMMAND\n' ' /rpzsudo "#$RUNUID" "#$RUNGID" /busybox sh -c ' '"cd \\"\\$RUNWD\\" && /busybox env -i $RUNENV $ENVVARS ' '$RUNCOMMAND; echo \\"*** Command finished, status: \\$?\\""\n' ' ENVVARS=\n' ' ;;\n' ' *)\n' ' echo "RPZ: Unknown option $1" >&2\n' ' exit 1\n' ' ;;\n' ' esac\n' ' shift\n' 'done\n') # Meta-data for reprounzip write_dict(target, metadata_initial_iofiles(config)) signals.post_setup(target=target, pack=pack) except Exception: target.rmtree(ignore_errors=True) raise
def run(self, files): runs = self.get_runs_from_config() # No argument: list all the input files and exit if not files: print("Input files:") for i, run in enumerate(runs): if len(runs) > 1: print(" Run %d:" % i) for input_name in run['input_files']: if self.input_files.get(input_name) is not None: assigned = PosixPath(self.input_files[input_name]) else: assigned = "(original)" print(" %s: %s" % (input_name, assigned)) return self.prepare_upload(files) # Get the path of each input file all_input_files = {} for run in runs: all_input_files.update(run['input_files']) try: # Upload files for filespec in files: filespec_split = filespec.rsplit(':', 1) if len(filespec_split) != 2: logging.critical("Invalid file specification: %r", filespec) sys.exit(1) local_path, input_name = filespec_split try: input_path = PosixPath(all_input_files[input_name]) except KeyError: logging.critical("Invalid input file: %r", input_name) sys.exit(1) temp = None if not local_path: # Restore original file from pack logging.debug("Restoring input file %s", input_path) fd, temp = Path.tempfile(prefix='reprozip_input_') os.close(fd) local_path = self.extract_original_input(input_name, input_path, temp) else: local_path = Path(local_path) logging.debug("Uploading file %s to %s", local_path, input_path) if not local_path.exists(): logging.critical("Local file %s doesn't exist", local_path) sys.exit(1) self.upload_file(local_path, input_path) if temp is not None: temp.remove() del self.input_files[input_name] else: self.input_files[input_name] = local_path.absolute().path finally: self.finalize()