def showfiles(args): """Writes out the input and output files. Works both for a pack file and for an extracted directory. """ pack = Path(args.pack[0]) if not pack.exists(): logging.critical("Pack or directory %s does not exist", pack) sys.exit(1) if pack.is_dir(): # Reads info from an unpacked directory runs, packages, other_files = load_config_file(pack / 'config.yml', canonical=True) # The '.reprounzip' file is a pickled dictionary, it contains the name # of the files that replaced each input file (if upload was used) with pack.open('rb', '.reprounzip') as fp: unpacked_info = pickle.load(fp) input_files = unpacked_info.get('input_files', {}) print("Input files:") for i, run in enumerate(runs): if len(runs) > 1: print(" Run %d:" % i) for input_name, path in iteritems(run['input_files']): print(" %s (%s)" % (input_name, path)) if input_files.get(input_name) is not None: assigned = PosixPath(input_files[input_name]) else: assigned = "(original)" print(" %s" % assigned) print("Output files:") for i, run in enumerate(runs): if len(runs) > 1: print(" Run %d:" % i) for output_name, path in iteritems(run['output_files']): print(" %s (%s)" % (output_name, path)) else: # pack.is_file() # Reads info from a pack file runs, packages, other_files = load_config(pack) print("Input files:") for i, run in enumerate(runs): if len(runs) > 1: print(" Run %d:" % i) for input_name, path in iteritems(run['input_files']): print(" %s (%s)" % (input_name, path)) print("Output files:") for i, run in enumerate(runs): if len(runs) > 1: print(" Run %d:" % i) for output_name, path in iteritems(run['output_files']): print(" %s (%s)" % (output_name, path))
def showfiles(args): """Writes out the input and output files. Works both for a pack file and for an extracted directory. """ pack = Path(args.pack[0]) if not pack.exists(): logging.critical("Pack or directory %s does not exist", pack) sys.exit(1) if pack.is_dir(): # Reads info from an unpacked directory config = load_config_file(pack / 'config.yml', canonical=True) # The '.reprounzip' file is a pickled dictionary, it contains the name # of the files that replaced each input file (if upload was used) with pack.open('rb', '.reprounzip') as fp: unpacked_info = pickle.load(fp) assigned_input_files = unpacked_info.get('input_files', {}) print("Input files:") for input_name, f in iteritems(config.inputs_outputs): if not f.read_runs: continue print(" %s (%s)" % (input_name, f.path)) if assigned_input_files.get(input_name) is not None: assigned = assigned_input_files[input_name] else: assigned = "(original)" print(" %s" % assigned) print("Output files:") for output_name, f in iteritems(config.inputs_outputs): if f.write_runs: print(" %s (%s)" % (output_name, f.path)) else: # pack.is_file() # Reads info from a pack file config = load_config(pack) print("Input files:") for input_name, f in iteritems(config.inputs_outputs): if f.read_runs: print(" %s (%s)" % (input_name, f.path)) print("Output files:") for output_name, f in iteritems(config.inputs_outputs): if f.write_runs: print(" %s (%s)" % (output_name, f.path))
def metadata_initial_iofiles(config, dct=None): """Add the initial state of the {in/out}put files to the unpacker metadata. :param config: The configuration as returned by `load_config()`, which will be used to list the input and output files and to determine which ones have been packed (and therefore exist initially). The `input_files` key contains a dict mapping the name to either: * None (or inexistent): original file and exists * False: doesn't exist (wasn't packed) * True: has been generated by one of the run since the experiment was unpacked * basestring: the user uploaded a file with this path, and no run has overwritten it yet """ if dct is None: dct = {} path2iofile = {f.path: n for n, f in iteritems(config.inputs_outputs)} def packed_files(): yield config.other_files for pkg in config.packages: if pkg.packfiles: yield pkg.files for f in itertools.chain.from_iterable(packed_files()): f = f.path path2iofile.pop(f, None) dct['input_files'] = dict((n, False) for n in itervalues(path2iofile)) return dct
def __call__(self, **kwargs): info = {} for arg, argtype in iteritems(self._args): if argtype == Signal.REQUIRED: try: info[arg] = kwargs.pop(arg) except KeyError: warnings.warn( "signal: Missing required argument %s; " "signal ignored" % arg, category=SignalWarning, stacklevel=2, ) return else: if arg in kwargs: info[arg] = kwargs.pop(arg) if argtype == Signal.DEPRECATED: warnings.warn("signal: Argument %s is deprecated" % arg, category=SignalWarning, stacklevel=2) if kwargs: arg = next(iter(kwargs)) warnings.warn("signal: Unexpected argument %s; signal ignored" % arg, category=SignalWarning, stacklevel=2) return for listener in self._listeners: try: listener(**info) except Exception: traceback.print_exc() warnings.warn("signal: Got an exception calling a signal", category=SignalWarning)
def metadata_initial_iofiles(config, dct=None): """Add the initial state of the input files to the unpacker metadata. :param config: The configuration as returned by `load_config()`, which will be used to list the input files and to determine which ones have been packed (and therefore exist initially). The `input_files` key contains a dict mapping the name to either: * None (or inexistent): original file and exists * False: doesn't exist (wasn't packed) * True: has been generated by one of the run since the experiment was unpacked * basestring: the user uploaded a file with this path, and no run has overwritten it yet """ if dct is None: dct = {} path2iofile = {f.path: n for n, f in iteritems(config.inputs_outputs) if f.read_runs} def packed_files(): yield config.other_files for pkg in config.packages: if pkg.packfiles: yield pkg.files for f in itertools.chain.from_iterable(packed_files()): f = f.path path2iofile.pop(f, None) dct['input_files'] = dict((n, False) for n in itervalues(path2iofile)) return dct
def __call__(self, **kwargs): info = {} for arg, argtype in iteritems(self._args): if argtype == Signal.REQUIRED: try: info[arg] = kwargs.pop(arg) except KeyError: warnings.warn("signal: Missing required argument %s; " "signal ignored" % arg, category=SignalWarning, stacklevel=2) return else: if arg in kwargs: info[arg] = kwargs.pop(arg) if argtype == Signal.DEPRECATED: warnings.warn("signal: Argument %s is deprecated" % arg, category=SignalWarning, stacklevel=2) if kwargs: arg = next(iter(kwargs)) warnings.warn("signal: Unexpected argument %s; signal ignored" % arg, category=SignalWarning, stacklevel=2) return for listener in self._listeners: try: listener(**info) except Exception: traceback.print_exc() warnings.warn("signal: Got an exception calling a signal", category=SignalWarning)
def chroot_run(args): """Runs the command in the chroot. """ target = Path(args.target[0]) unpacked_info = metadata_read(target, 'chroot') cmdline = args.cmdline # Loads config config = load_config_file(target / 'config.yml', True) runs = config.runs selected_runs = get_runs(runs, args.run, cmdline) root = target / 'root' # X11 handler x11 = X11Handler(args.x11, ('local', socket.gethostname()), args.x11_display) cmds = [] for run_number in selected_runs: run = runs[run_number] cmd = 'cd %s && ' % shell_escape(run['workingdir']) cmd += '/usr/bin/env -i ' environ = x11.fix_env(run['environ']) environ = fixup_environment(environ, args) cmd += ' '.join('%s=%s' % (shell_escape(k), shell_escape(v)) for k, v in iteritems(environ)) cmd += ' ' # FIXME : Use exec -a or something if binary != argv[0] if cmdline is None: argv = [run['binary']] + run['argv'][1:] else: argv = cmdline cmd += ' '.join(shell_escape(a) for a in argv) userspec = '%s:%s' % (run.get('uid', 1000), run.get('gid', 1000)) cmd = 'chroot --userspec=%s %s /bin/sh -c %s' % ( userspec, shell_escape(unicode_(root)), shell_escape(cmd)) cmds.append(cmd) cmds = [ 'chroot %s /bin/sh -c %s' % (shell_escape(unicode_(root)), shell_escape(c)) for c in x11.init_cmds ] + cmds cmds = ' && '.join(cmds) # Starts forwarding forwarders = [] for portnum, connector in x11.port_forward: fwd = LocalForwarder(connector, portnum) forwarders.append(fwd) signals.pre_run(target=target) retcode = interruptible_call(cmds, shell=True) stderr.write("\n*** Command finished, status: %d\n" % retcode) signals.post_run(target=target, retcode=retcode) # Update input file status metadata_update_run(config, unpacked_info, selected_runs) metadata_write(target, unpacked_info, 'chroot')
def chroot_run(args): """Runs the command in the chroot. """ target = Path(args.target[0]) read_dict(target / '.reprounzip', 'chroot') cmdline = args.cmdline # Loads config runs, packages, other_files = load_config_file(target / 'config.yml', True) selected_runs = get_runs(runs, args.run, cmdline) root = target / 'root' # X11 handler x11 = X11Handler(args.x11, ('local', socket.gethostname()), args.x11_display) cmds = [] for run_number in selected_runs: run = runs[run_number] cmd = 'cd %s && ' % shell_escape(run['workingdir']) cmd += '/usr/bin/env -i ' environ = x11.fix_env(run['environ']) cmd += ' '.join('%s=%s' % (k, shell_escape(v)) for k, v in iteritems(environ)) cmd += ' ' # FIXME : Use exec -a or something if binary != argv[0] if cmdline is None: argv = [run['binary']] + run['argv'][1:] else: argv = cmdline cmd += ' '.join(shell_escape(a) for a in argv) userspec = '%s:%s' % (run.get('uid', 1000), run.get('gid', 1000)) cmd = 'chroot --userspec=%s %s /bin/sh -c %s' % ( userspec, shell_escape(unicode_(root)), shell_escape(cmd)) cmds.append(cmd) cmds = ['chroot %s /bin/sh -c %s' % (shell_escape(unicode_(root)), shell_escape(c)) for c in x11.init_cmds] + cmds cmds = ' && '.join(cmds) # Starts forwarding forwarders = [] for portnum, connector in x11.port_forward: fwd = LocalForwarder(connector, portnum) forwarders.append(fwd) signals.pre_run(target=target) retcode = interruptible_call(cmds, shell=True) sys.stderr.write("\n*** Command finished, status: %d\n" % retcode) signals.post_run(target=target, retcode=retcode)
def docker_setup_build(args): """Builds the container from the Dockerfile """ target = Path(args.target[0]) unpacked_info = read_dict(target) if 'initial_image' in unpacked_info: logger.critical("Image already built") sys.exit(1) if args.image_name: image = args.image_name[0] if not isinstance(image, bytes): image = image.encode('ascii') else: image = make_unique_name(b'reprounzip_image_') logger.info("Calling 'docker build'...") try: retcode = subprocess.call(args.docker_cmd.split() + ['build', '-t'] + args.docker_option + [image, '.'], cwd=target.path) except OSError: logger.critical("docker executable not found") sys.exit(1) else: if retcode != 0: logger.critical("docker build failed with code %d", retcode) sys.exit(1) logger.info("Initial image created: %s", image.decode('ascii')) unpacked_info['initial_image'] = image unpacked_info['current_image'] = image if 'DOCKER_MACHINE_NAME' in os.environ: unpacked_info['docker_host'] = { 'type': 'docker-machine', 'name': os.environ['DOCKER_MACHINE_NAME'] } elif 'DOCKER_HOST' in os.environ: unpacked_info['docker_host'] = { 'type': 'custom', 'env': dict((k, v) for k, v in iteritems(os.environ) if k.startswith('DOCKER_')) } write_dict(target, unpacked_info)
def docker_setup_build(args): """Builds the container from the Dockerfile """ target = Path(args.target[0]) unpacked_info = read_dict(target) if 'initial_image' in unpacked_info: logger.critical("Image already built") sys.exit(1) if args.image_name: image = args.image_name[0] if not isinstance(image, bytes): image = image.encode('ascii') else: image = make_unique_name(b'reprounzip_image_') logger.info("Calling 'docker build'...") try: retcode = subprocess.call(args.docker_cmd.split() + ['build', '-t'] + args.docker_option + [image, '.'], cwd=target.path) except OSError: logger.critical("docker executable not found") sys.exit(1) else: if retcode != 0: logger.critical("docker build failed with code %d", retcode) sys.exit(1) logger.info("Initial image created: %s", image.decode('ascii')) unpacked_info['initial_image'] = image unpacked_info['current_image'] = image if 'DOCKER_MACHINE_NAME' in os.environ: unpacked_info['docker_host'] = { 'type': 'docker-machine', 'name': os.environ['DOCKER_MACHINE_NAME']} elif 'DOCKER_HOST' in os.environ: unpacked_info['docker_host'] = { 'type': 'custom', 'env': dict((k, v) for k, v in iteritems(os.environ) if k.startswith('DOCKER_'))} write_dict(target, unpacked_info)
def metadata_update_run(config, dct, runs): """Update the unpacker metadata after some runs have executed. :param runs: An iterable of run numbers that were probably executed. This maintains a crude idea of the status of input files by updating the files that are outputs of the runs that were just executed. This means that files that were uploaded by the user will no longer be shown as uploaded (they have been overwritten by the experiment) and files that weren't packed exist from now on. This is not very reliable because a run might have created a file that is not designated as its output anyway, or might have failed and thus not created the output (or a bad output). """ runs = set(runs) input_files = dct.setdefault('input_files', {}) for name, fi in iteritems(config.inputs_outputs): if fi.read_runs and any(r in runs for r in fi.write_runs): input_files[name] = True
def run(self, files): reprounzip.common.record_usage(download_files=len(files)) output_files = dict( (n, f.path) for n, f in iteritems(self.get_config().inputs_outputs) if f.write_runs) # No argument: list all the output files and exit if not files: print("Output files:") for output_name in output_files: print(" %s" % output_name) return self.prepare_download(files) try: # Download files for filespec in files: filespec_split = filespec.split(':', 1) if len(filespec_split) != 2: logging.critical("Invalid file specification: %r", filespec) sys.exit(1) output_name, local_path = filespec_split try: remote_path = output_files[output_name] except KeyError: logging.critical("Invalid output file: %r", output_name) sys.exit(1) logging.debug("Downloading file %s", remote_path) if not local_path: self.download_and_print(remote_path) else: self.download(remote_path, Path(local_path)) finally: self.finalize()
def chroot_run(args): """Runs the command in the chroot. """ target = Path(args.target[0]) read_dict(target / '.reprounzip', 'chroot') cmdline = args.cmdline # Loads config runs, packages, other_files = load_config_file(target / 'config.yml', True) selected_runs = get_runs(runs, args.run, cmdline) root = target / 'root' cmds = [] for run_number in selected_runs: run = runs[run_number] cmd = 'cd %s && ' % shell_escape(run['workingdir']) cmd += '/usr/bin/env -i ' cmd += ' '.join('%s=%s' % (k, shell_escape(v)) for k, v in iteritems(run['environ'])) cmd += ' ' # FIXME : Use exec -a or something if binary != argv[0] if cmdline is None: argv = [run['binary']] + run['argv'][1:] else: argv = cmdline cmd += ' '.join(shell_escape(a) for a in argv) userspec = '%s:%s' % (run.get('uid', 1000), run.get('gid', 1000)) cmd = 'chroot --userspec=%s %s /bin/sh -c %s' % ( userspec, shell_escape(unicode_(root)), shell_escape(cmd)) cmds.append(cmd) cmds = ' && '.join(cmds) signals.pre_run(target=target) retcode = subprocess.call(cmds, shell=True) sys.stderr.write("\n*** Command finished, status: %d\n" % retcode) signals.post_run(target=target, retcode=retcode)
def docker_run(args): """Runs the experiment in the container. """ target = Path(args.target[0]) unpacked_info = read_dict(target / '.reprounzip') cmdline = args.cmdline # Loads config runs, packages, other_files = load_config(target / 'config.yml', True) selected_runs = get_runs(runs, args.run, cmdline) # Destroy previous container if 'ran_container' in unpacked_info: container = unpacked_info.pop('ran_container') logging.info("Destroying previous container %s", container.decode('ascii')) retcode = subprocess.call(['docker', 'rm', '-f', container]) if retcode != 0: logging.error("Error deleting previous container %s", container.decode('ascii')) write_dict(target / '.reprounzip', unpacked_info) # Use the initial image directly if 'current_image' in unpacked_info: image = unpacked_info['current_image'] logging.debug("Running from image %s", image.decode('ascii')) else: logging.critical("Image doesn't exist yet, have you run setup/build?") sys.exit(1) # Name of new container container = make_unique_name(b'reprounzip_run_') cmds = [] for run_number in selected_runs: run = runs[run_number] cmd = 'cd %s && ' % shell_escape(run['workingdir']) cmd += '/usr/bin/env -i ' cmd += ' '.join('%s=%s' % (k, shell_escape(v)) for k, v in iteritems(run['environ'])) cmd += ' ' # FIXME : Use exec -a or something if binary != argv[0] if cmdline is None: argv = [run['binary']] + run['argv'][1:] else: argv = cmdline cmd += ' '.join(shell_escape(a) for a in argv) uid = run.get('uid', 1000) cmd = 'sudo -u \'#%d\' sh -c %s\n' % (uid, shell_escape(cmd)) cmds.append(cmd) cmds = ' && '.join(cmds) signals.pre_run(target=target) # Run command in container logging.info("Starting container %s", container.decode('ascii')) retcode = subprocess.call(['docker', 'run', b'--name=' + container, '-i', '-t', image, '/bin/sh', '-c', cmds]) sys.stderr.write("\n*** Command finished, status: %d\n" % retcode) # Store container name (so we can download output files) unpacked_info['ran_container'] = container write_dict(target / '.reprounzip', unpacked_info) signals.post_run(target=target, retcode=retcode)
def directory_run(args): """Runs the command in the directory. """ target = Path(args.target[0]) unpacked_info = metadata_read(target, 'directory') cmdline = args.cmdline # Loads config config = load_config_file(target / 'config.yml', True) runs = config.runs selected_runs = get_runs(runs, args.run, cmdline) root = (target / 'root').absolute() # Gets library paths lib_dirs = [] p = subprocess.Popen(['/sbin/ldconfig', '-v', '-N'], stdout=subprocess.PIPE) try: for l in p.stdout: if len(l) < 3 or l[0] in (b' ', b'\t'): continue if l.endswith(b':\n'): lib_dirs.append(Path(l[:-2])) finally: p.wait() lib_dirs = ( 'export LD_LIBRARY_PATH=%s' % ':'.join(shell_escape(unicode_(join_root(root, d))) for d in lib_dirs)) cmds = [lib_dirs] for run_number in selected_runs: run = runs[run_number] cmd = 'cd %s && ' % shell_escape( unicode_(join_root(root, Path(run['workingdir'])))) cmd += '/usr/bin/env -i ' environ = run['environ'] environ = fixup_environment(environ, args) if args.x11: if 'DISPLAY' in os.environ: environ['DISPLAY'] = os.environ['DISPLAY'] if 'XAUTHORITY' in os.environ: environ['XAUTHORITY'] = os.environ['XAUTHORITY'] cmd += ' '.join('%s=%s' % (shell_escape(k), shell_escape(v)) for k, v in iteritems(environ) if k != 'PATH') cmd += ' ' # PATH # Get the original PATH components path = [ PosixPath(d) for d in run['environ'].get('PATH', '').split(':') ] # The same paths but in the directory dir_path = [join_root(root, d) for d in path if d.root == '/'] # Rebuild string path = ':'.join(unicode_(d) for d in dir_path + path) cmd += 'PATH=%s ' % shell_escape(path) # FIXME : Use exec -a or something if binary != argv[0] if cmdline is None: argv = run['argv'] # Rewrites command-line arguments that are absolute filenames rewritten = False for i in irange(len(argv)): try: p = Path(argv[i]) except UnicodeEncodeError: continue if p.is_absolute: rp = join_root(root, p) if (rp.exists() or (len(rp.components) > 3 and rp.parent.exists())): argv[i] = str(rp) rewritten = True if rewritten: logger.warning("Rewrote command-line as: %s", ' '.join(shell_escape(a) for a in argv)) else: argv = cmdline cmd += ' '.join(shell_escape(a) for a in argv) cmds.append(cmd) cmds = ' && '.join(cmds) signals.pre_run(target=target) retcode = interruptible_call(cmds, shell=True) stderr.write("\n*** Command finished, status: %d\n" % retcode) signals.post_run(target=target, retcode=retcode) # Update input file status metadata_update_run(config, unpacked_info, selected_runs) metadata_write(target, unpacked_info, 'directory')
def graph_json(target, runs, packages, other_files, package_map, edges, inputs_outputs, inputs_outputs_map, level_pkgs, level_processes, level_other_files): """Writes a JSON file suitable for further processing. """ # Packages if level_pkgs in (LVL_PKG_IGNORE, LVL_PKG_DROP): json_packages = [] else: json_packages = [pkg.json(level_pkgs) for pkg in packages] # Other files json_other_files = [unicode_(fi) for fi in sorted(other_files)] # Programs prog_map = {} json_runs = [run.json(prog_map, level_processes) for run in runs] # Connect edges done_edges = set() for prog, fi, mode, argv in edges: endp_prog = prog_map[prog] if fi in package_map: if level_pkgs == LVL_PKG_DROP: continue endp_file = package_map[fi].json_endpoint(fi, level_pkgs) e = endp_prog['name'], endp_file, mode if e in done_edges: continue else: done_edges.add(e) else: endp_file = unicode_(fi) if mode is None: endp_prog['reads'].append(endp_file) elif mode & FILE_WRITE: endp_prog['writes'].append(endp_file) elif mode & FILE_READ: endp_prog['reads'].append(endp_file) json_other_files.sort() if PY3: fp = target.open('w', encoding='utf-8', newline='\n') else: fp = target.open('wb') try: json.dump({'packages': sorted(json_packages, key=lambda p: p['name']), 'other_files': json_other_files, 'runs': json_runs, 'inputs_outputs': [ {'name': k, 'path': unicode_(v.path), 'read_by_runs': v.read_runs, 'written_by_runs': v.write_runs} for k, v in sorted(iteritems(inputs_outputs))]}, fp, ensure_ascii=False, indent=2, sort_keys=True) finally: fp.close()
def run_from_vistrails(): setup_logging('REPROUNZIP-VISTRAILS', logging.INFO) cli_version = 1 if len(sys.argv) > 1: try: cli_version = int(sys.argv[1]) except ValueError: logging.info("Compatibility mode: reprounzip-vistrails didn't get " "a version number") if cli_version != 1: logging.critical("Unknown interface version %d; you are probably " "using a version of reprounzip-vistrails too old for " "your VisTrails package. Consider upgrading.", cli_version) sys.exit(1) parser = argparse.ArgumentParser() parser.add_argument('unpacker') parser.add_argument('directory') parser.add_argument('run') parser.add_argument('--input-file', action='append', default=[]) parser.add_argument('--output-file', action='append', default=[]) parser.add_argument('--cmdline', action='store') args = parser.parse_args(sys.argv[2:]) config = load_config(Path(args.directory) / 'config.yml', canonical=True) python = sys.executable rpuz = [python, '-m', 'reprounzip.main', args.unpacker] os.environ['REPROUNZIP_NON_INTERACTIVE'] = 'y' def cmd(lst, add=None): if add: logging.info("cmd: %s %s", ' '.join(lst), add) string = ' '.join(shell_escape(a) for a in (rpuz + lst)) string += ' ' + add subprocess.check_call(string, shell=True, cwd=args.directory) else: logging.info("cmd: %s", ' '.join(lst)) subprocess.check_call(rpuz + lst, cwd=args.directory) logging.info("reprounzip-vistrails calling reprounzip; dir=%s", args.directory) # Parses input files from the command-line upload_command = [] seen_input_names = set() for input_file in args.input_file: input_name, filename = input_file.split(':', 1) upload_command.append('%s:%s' % (filename, input_name)) seen_input_names.add(input_name) # Resets the input files that are used by this run and were not given for name, f in iteritems(config.inputs_outputs): if name not in seen_input_names and int(args.run) in f.read_runs: upload_command.append(':%s' % name) # Runs the command cmd(['upload', '.'] + upload_command) # Runs the experiment if args.cmdline: cmd(['run', '.', args.run, '--cmdline'], add=args.cmdline) else: cmd(['run', '.', args.run]) # Gets output files for output_file in args.output_file: output_name, filename = output_file.split(':', 1) cmd(['download', '.', '%s:%s' % (output_name, filename)])
def graph_json(target, runs, packages, other_files, package_map, edges, inputs_outputs, inputs_outputs_map, level_pkgs, level_processes, level_other_files): """Writes a JSON file suitable for further processing. """ # Packages if level_pkgs in (LVL_PKG_IGNORE, LVL_PKG_DROP): json_packages = [] else: json_packages = [pkg.json(level_pkgs) for pkg in packages] # Other files json_other_files = [unicode_(fi) for fi in sorted(other_files)] # Programs prog_map = {} json_runs = [run.json(prog_map, level_processes) for run in runs] # Connect edges done_edges = set() for prog, fi, mode, argv in edges: endp_prog = prog_map[prog] if fi in package_map: if level_pkgs == LVL_PKG_DROP: continue endp_file = package_map[fi].json_endpoint(fi, level_pkgs) e = endp_prog['name'], endp_file, mode if e in done_edges: continue else: done_edges.add(e) else: endp_file = unicode_(fi) if mode is None: endp_prog['reads'].append(endp_file) # TODO: argv? elif mode & FILE_WRITE: endp_prog['writes'].append(endp_file) elif mode & FILE_READ: endp_prog['reads'].append(endp_file) json_other_files.sort() if PY3: fp = target.open('w', encoding='utf-8', newline='\n') else: fp = target.open('wb') try: json.dump( { 'packages': sorted(json_packages, key=lambda p: p['name']), 'other_files': json_other_files, 'runs': json_runs, 'inputs_outputs': [{ 'name': k, 'path': unicode_(v.path), 'read_by_runs': v.read_runs, 'written_by_runs': v.write_runs } for k, v in sorted(iteritems(inputs_outputs))] }, fp, ensure_ascii=False, indent=2, sort_keys=True) finally: fp.close()
def run(self, files): reprounzip.common.record_usage(upload_files=len(files)) inputs_outputs = self.get_config().inputs_outputs # No argument: list all the input files and exit if not files: print("Input files:") for input_name in sorted(n for n, f in iteritems(inputs_outputs) if f.read_runs): assigned = self.input_files.get(input_name) if assigned is None: assigned = "(original)" elif assigned is False: assigned = "(not created)" elif assigned is True: assigned = "(generated)" else: assert isinstance(assigned, (bytes, unicode_)) print(" %s: %s" % (input_name, assigned)) return self.prepare_upload(files) try: # Upload files for filespec in files: filespec_split = filespec.rsplit(':', 1) if len(filespec_split) != 2: logging.critical("Invalid file specification: %r", filespec) sys.exit(1) local_path, input_name = filespec_split if input_name.startswith('/'): input_path = PosixPath(input_name) else: try: input_path = inputs_outputs[input_name].path except KeyError: logging.critical("Invalid input file: %r", input_name) sys.exit(1) temp = None if not local_path: # Restore original file from pack logging.debug("Restoring input file %s", input_path) fd, temp = Path.tempfile(prefix='reprozip_input_') os.close(fd) local_path = self.extract_original_input( input_name, input_path, temp) if local_path is None: temp.remove() logging.warning( "No original packed, can't restore " "input file %s", input_name) continue else: local_path = Path(local_path) logging.debug("Uploading file %s to %s", local_path, input_path) if not local_path.exists(): logging.critical("Local file %s doesn't exist", local_path) sys.exit(1) self.upload_file(local_path, input_path) if temp is not None: temp.remove() self.input_files.pop(input_name, None) else: self.input_files[input_name] = local_path.absolute().path finally: self.finalize()
def get_package_info(pack, read_data=False): """Get information about a package. """ runs, packages, other_files = config = load_config(pack) inputs_outputs = config.inputs_outputs information = {} if read_data: total_size = 0 total_paths = 0 files = 0 dirs = 0 symlinks = 0 hardlinks = 0 others = 0 rpz_pack = RPZPack(pack) for m in rpz_pack.list_data(): total_size += m.size total_paths += 1 if m.isfile(): files += 1 elif m.isdir(): dirs += 1 elif m.issym(): symlinks += 1 elif hasattr(m, 'islnk') and m.islnk(): hardlinks += 1 else: others += 1 rpz_pack.close() information['pack'] = { 'total_size': total_size, 'total_paths': total_paths, 'files': files, 'dirs': dirs, 'symlinks': symlinks, 'hardlinks': hardlinks, 'others': others, } total_paths = 0 packed_packages_files = 0 unpacked_packages_files = 0 packed_packages = 0 for package in packages: nb = len(package.files) total_paths += nb if package.packfiles: packed_packages_files += nb packed_packages += 1 else: unpacked_packages_files += nb nb = len(other_files) total_paths += nb information['meta'] = { 'total_paths': total_paths, 'packed_packages_files': packed_packages_files, 'unpacked_packages_files': unpacked_packages_files, 'packages': len(packages), 'packed_packages': packed_packages, 'packed_paths': packed_packages_files + nb, } if runs: architecture = runs[0]['architecture'] if any(r['architecture'] != architecture for r in runs): logging.warning("Runs have different architectures") information['meta']['architecture'] = architecture distribution = runs[0]['distribution'] if any(r['distribution'] != distribution for r in runs): logging.warning("Runs have different distributions") information['meta']['distribution'] = distribution information['runs'] = [ dict((k, run[k]) for k in [ 'id', 'binary', 'argv', 'environ', 'workingdir', 'signal', 'exitcode' ] if k in run) for run in runs ] information['inputs_outputs'] = { name: { 'path': str(iofile.path), 'read_runs': iofile.read_runs, 'write_runs': iofile.write_runs } for name, iofile in iteritems(inputs_outputs) } # Unpacker compatibility unpacker_status = {} for name, upk in iteritems(unpackers): if 'test_compatibility' in upk: compat = upk['test_compatibility'] if callable(compat): compat = compat(pack, config=config) if isinstance(compat, (tuple, list)): compat, msg = compat else: msg = None unpacker_status.setdefault(compat, []).append((name, msg)) else: unpacker_status.setdefault(None, []).append((name, None)) information['unpacker_status'] = unpacker_status return information
def _print_package_info(pack, info, verbosity=1): print("Pack file: %s" % pack) print("\n----- Pack information -----") print("Compressed size: %s" % hsize(pack.size())) info_pack = info.get('pack') if info_pack: if 'total_size' in info_pack: print("Unpacked size: %s" % hsize(info_pack['total_size'])) if 'total_paths' in info_pack: print("Total packed paths: %d" % info_pack['total_paths']) if verbosity >= 3: print(" Files: %d" % info_pack['files']) print(" Directories: %d" % info_pack['dirs']) if info_pack.get('symlinks'): print(" Symbolic links: %d" % info_pack['symlinks']) if info_pack.get('hardlinks'): print(" Hard links: %d" % info_pack['hardlinks']) if info_pack.get('others'): print(" Unknown (what!?): %d" % info_pack['others']) print("\n----- Metadata -----") info_meta = info['meta'] if verbosity >= 3: print("Total paths: %d" % info_meta['total_paths']) print("Listed packed paths: %d" % info_meta['packed_paths']) if info_meta.get('packages'): print("Total software packages: %d" % info_meta['packages']) print("Packed software packages: %d" % info_meta['packed_packages']) if verbosity >= 3: print("Files from packed software packages: %d" % info_meta['packed_packages_files']) print("Files from unpacked software packages: %d" % info_meta['unpacked_packages_files']) if 'architecture' in info_meta: print("Architecture: %s (current: %s)" % (info_meta['architecture'], platform.machine().lower())) if 'distribution' in info_meta: distribution = ' '.join(t for t in info_meta['distribution'] if t) current_distribution = platform.linux_distribution()[0:2] current_distribution = ' '.join(t for t in current_distribution if t) print("Distribution: %s (current: %s)" % (distribution, current_distribution or "(not Linux)")) if 'runs' in info: runs = info['runs'] print("Runs (%d):" % len(runs)) for run in runs: cmdline = ' '.join(shell_escape(a) for a in run['argv']) if len(runs) == 1 and run['id'] == "run0": print(" %s" % cmdline) else: print(" %s: %s" % (run['id'], cmdline)) if verbosity >= 2: print(" wd: %s" % run['workingdir']) if 'signal' in run: print(" signal: %d" % run['signal']) else: print(" exitcode: %d" % run['exitcode']) if run.get('walltime') is not None: print(" walltime: %s" % run['walltime']) inputs_outputs = info.get('inputs_outputs') if inputs_outputs: if verbosity < 2: print("Inputs/outputs files (%d): %s" % (len(inputs_outputs), ", ".join(sorted(inputs_outputs)))) else: print("Inputs/outputs files (%d):" % len(inputs_outputs)) for name, f in sorted(iteritems(inputs_outputs)): t = [] if f['read_runs']: t.append("in") if f['write_runs']: t.append("out") print(" %s (%s): %s" % (name, ' '.join(t), f['path'])) unpacker_status = info.get('unpacker_status') if unpacker_status: print("\n----- Unpackers -----") for s, n in [(COMPAT_OK, "Compatible"), (COMPAT_MAYBE, "Unknown"), (COMPAT_NO, "Incompatible")]: if s != COMPAT_OK and verbosity < 2: continue if s not in unpacker_status: continue upks = unpacker_status[s] print("%s (%d):" % (n, len(upks))) for upk_name, msg in upks: if msg is not None: print(" %s (%s)" % (upk_name, msg)) else: print(" %s" % upk_name)
def docker_run(args): """Runs the experiment in the container. """ target = Path(args.target[0]) unpacked_info = read_dict(target) cmdline = args.cmdline # Sanity check if args.detach and args.x11: logger.critical("Error: Can't use X11 forwarding if you're detaching") raise UsageError # Loads config config = load_config(target / 'config.yml', True) runs = config.runs selected_runs = get_runs(runs, args.run, cmdline) # Get current image name if 'current_image' in unpacked_info: image = unpacked_info['current_image'] logger.debug("Running from image %s", image.decode('ascii')) else: logger.critical("Image doesn't exist yet, have you run setup/build?") sys.exit(1) # Name of new container if args.detach: container = make_unique_name(b'reprounzip_detached_') else: container = make_unique_name(b'reprounzip_run_') hostname = runs[selected_runs[0]].get('hostname', 'reprounzip') # Port forwarding port_options = [] for port_host, port_container, proto in parse_ports(args.expose_port): port_options.extend(['-p', '%s:%s/%s' % (port_host, port_container, proto)]) # X11 handler if args.x11: local_ip = get_local_addr() docker_host = local_ip if os.environ.get('DOCKER_HOST'): m = _dockerhost_re.match(os.environ['DOCKER_HOST']) if m is not None: docker_host = m.group(1) if args.tunneled_x11: x11 = X11Handler(True, ('internet', docker_host), args.x11_display) else: x11 = X11Handler(True, ('internet', local_ip), args.x11_display) if (docker_host != local_ip and docker_host != 'localhost' and not docker_host.startswith('127.') and not docker_host.startswith('192.168.99.')): ssh_cmdline = ' '.join( '-R*:%(p)d:127.0.0.1:%(p)d' % {'p': port} for port, connector in x11.port_forward) logger.warning( "You requested X11 forwarding but the Docker container " "appears to be running remotely. It is probable that it " "won't be able to connect to the local display. Creating " "a remote SSH tunnel and running with --tunneled-x11 " "might help (%s).", ssh_cmdline) else: x11 = X11Handler(False, ('local', hostname), args.x11_display) cmd = [] for run_number in selected_runs: run = runs[run_number] env_set, env_unset = x11.env_fixes(run['environ']) a_env_set, a_env_unset = parse_environment_args(args) env_set.update(a_env_set) env_unset.extend(a_env_unset) if env_set or env_unset: cmd.append('env') env = [] for k in env_unset: env.append('-u') env.append(shell_escape(k)) for k, v in iteritems(env_set): env.append('%s=%s' % (shell_escape(k), shell_escape(v))) cmd.append(' '.join(env)) # FIXME : Use exec -a or something if binary != argv[0] if cmdline is not None: cmd.append('cmd') cmd.append(' '.join(shell_escape(a) for a in cmdline)) cmd.append('run') cmd.append('%d' % run_number) cmd = list(chain.from_iterable([['do', shell_escape(c)] for c in x11.init_cmds] + [cmd])) if logger.isEnabledFor(logging.DEBUG): logger.debug("Passing arguments to Docker image:") for c in cmd: logger.debug(c) signals.pre_run(target=target) # Creates forwarders forwarders = [] for port, connector in x11.port_forward: forwarders.append(LocalForwarder(connector, port)) if args.detach: logger.info("Start container %s (detached)", container.decode('ascii')) retcode = interruptible_call(args.docker_cmd.split() + ['run', b'--name=' + container, '-h', hostname, '-d', '-t'] + port_options + args.docker_option + [image] + cmd) if retcode != 0: logger.critical("docker run failed with code %d", retcode) subprocess.call(['docker', 'rm', '-f', container]) sys.exit(1) return # Run command in container logger.info("Starting container %s", container.decode('ascii')) retcode = interruptible_call(args.docker_cmd.split() + ['run', b'--name=' + container, '-h', hostname, '-i', '-t'] + port_options + args.docker_option + [image] + cmd, request_tty=True) # The image prints out the exit status(es) itself if retcode != 0: logger.critical("docker run failed with code %d", retcode) subprocess.call(['docker', 'rm', '-f', container]) sys.exit(1) # Commit to create new image new_image = make_unique_name(b'reprounzip_image_') logger.info("Committing container %s to image %s", container.decode('ascii'), new_image.decode('ascii')) subprocess.check_call(args.docker_cmd.split() + ['commit', container, new_image]) # Update image name unpacked_info['current_image'] = new_image write_dict(target, unpacked_info) # Remove the container logger.info("Destroying container %s", container.decode('ascii')) retcode = subprocess.call(args.docker_cmd.split() + ['rm', container]) if retcode != 0: logger.error("Error deleting container %s", container.decode('ascii')) # Untag previous image, unless it is the initial_image if image != unpacked_info['initial_image']: logger.info("Untagging previous image %s", image.decode('ascii')) subprocess.check_call(args.docker_cmd.split() + ['rmi', image]) # Update input file status metadata_update_run(config, unpacked_info, selected_runs) write_dict(target, unpacked_info) signals.post_run(target=target, retcode=retcode)
def run(self, files): reprounzip.common.record_usage(upload_files=len(files)) input_files = dict( (n, f.path) for n, f in iteritems(self.get_config().inputs_outputs) if f.read_runs) # No argument: list all the input files and exit if not files: print("Input files:") for input_name in input_files: if self.input_files.get(input_name) is not None: assigned = self.input_files[input_name] else: assigned = "(original)" print(" %s: %s" % (input_name, assigned)) return self.prepare_upload(files) try: # Upload files for filespec in files: filespec_split = filespec.rsplit(':', 1) if len(filespec_split) != 2: logging.critical("Invalid file specification: %r", filespec) sys.exit(1) local_path, input_name = filespec_split try: input_path = input_files[input_name] except KeyError: logging.critical("Invalid input file: %r", input_name) sys.exit(1) temp = None if not local_path: # Restore original file from pack logging.debug("Restoring input file %s", input_path) fd, temp = Path.tempfile(prefix='reprozip_input_') os.close(fd) local_path = self.extract_original_input(input_name, input_path, temp) if local_path is None: temp.remove() logging.warning("No original packed, can't restore " "input file %s", input_name) continue else: local_path = Path(local_path) logging.debug("Uploading file %s to %s", local_path, input_path) if not local_path.exists(): logging.critical("Local file %s doesn't exist", local_path) sys.exit(1) self.upload_file(local_path, input_path) if temp is not None: temp.remove() self.input_files.pop(input_name, None) else: self.input_files[input_name] = local_path.absolute().path finally: self.finalize()
def showfiles(args): """Writes out the input and output files. Works both for a pack file and for an extracted directory. """ def parse_run(runs, s): for i, run in enumerate(runs): if run['id'] == s: return i try: r = int(s) except ValueError: logging.critical("Error: Unknown run %s", s) raise UsageError if r < 0 or r >= len(runs): logging.critical("Error: Expected 0 <= run <= %d, got %d", len(runs) - 1, r) sys.exit(1) return r show_inputs = args.input or not args.output show_outputs = args.output or not args.input def file_filter(fio): if file_filter.run is None: return ((show_inputs and fio.read_runs) or (show_outputs and fio.write_runs)) else: return ((show_inputs and file_filter.run in fio.read_runs) or (show_outputs and file_filter.run in fio.write_runs)) file_filter.run = None pack = Path(args.pack[0]) if not pack.exists(): logging.critical("Pack or directory %s does not exist", pack) sys.exit(1) if pack.is_dir(): # Reads info from an unpacked directory config = load_config_file(pack / 'config.yml', canonical=True) # Filter files by run if args.run is not None: file_filter.run = parse_run(config.runs, args.run) # The '.reprounzip' file is a pickled dictionary, it contains the name # of the files that replaced each input file (if upload was used) unpacked_info = metadata_read(pack, None) assigned_input_files = unpacked_info.get('input_files', {}) if show_inputs: shown = False for input_name, f in sorted(iteritems(config.inputs_outputs)): if f.read_runs and file_filter(f): if not shown: print("Input files:") shown = True if args.verbosity >= 2: print(" %s (%s)" % (input_name, f.path)) else: print(" %s" % input_name) assigned = assigned_input_files.get(input_name) if assigned is None: assigned = "(original)" elif assigned is False: assigned = "(not created)" elif assigned is True: assigned = "(generated)" else: assert isinstance(assigned, (bytes, unicode_)) print(" %s" % assigned) if not shown: print("Input files: none") if show_outputs: shown = False for output_name, f in sorted(iteritems(config.inputs_outputs)): if f.write_runs and file_filter(f): if not shown: print("Output files:") shown = True if args.verbosity >= 2: print(" %s (%s)" % (output_name, f.path)) else: print(" %s" % output_name) if not shown: print("Output files: none") else: # pack.is_file() # Reads info from a pack file config = load_config(pack) # Filter files by run if args.run is not None: file_filter.run = parse_run(config.runs, args.run) if any(f.read_runs for f in itervalues(config.inputs_outputs)): print("Input files:") for input_name, f in sorted(iteritems(config.inputs_outputs)): if f.read_runs and file_filter(f): if args.verbosity >= 2: print(" %s (%s)" % (input_name, f.path)) else: print(" %s" % input_name) else: print("Input files: none") if any(f.write_runs for f in itervalues(config.inputs_outputs)): print("Output files:") for output_name, f in sorted(iteritems(config.inputs_outputs)): if f.write_runs and file_filter(f): if args.verbosity >= 2: print(" %s (%s)" % (output_name, f.path)) else: print(" %s" % output_name) else: print("Output files: none")
def docker_run(args): """Runs the experiment in the container. """ target = Path(args.target[0]) unpacked_info = read_dict(target) cmdline = args.cmdline # Sanity check if args.detach and args.x11: logging.critical("Error: Can't use X11 forwarding if you're detaching") raise UsageError # Loads config config = load_config(target / 'config.yml', True) runs = config.runs selected_runs = get_runs(runs, args.run, cmdline) # Get current image name if 'current_image' in unpacked_info: image = unpacked_info['current_image'] logging.debug("Running from image %s", image.decode('ascii')) else: logging.critical("Image doesn't exist yet, have you run setup/build?") sys.exit(1) # Name of new container if args.detach: container = make_unique_name(b'reprounzip_detached_') else: container = make_unique_name(b'reprounzip_run_') hostname = runs[selected_runs[0]].get('hostname', 'reprounzip') # X11 handler if args.x11: local_ip = get_local_addr() docker_host = local_ip if os.environ.get('DOCKER_HOST'): m = _dockerhost_re.match(os.environ['DOCKER_HOST']) if m is not None: docker_host = m.group(1) if args.tunneled_x11: x11 = X11Handler(True, ('internet', docker_host), args.x11_display) else: x11 = X11Handler(True, ('internet', local_ip), args.x11_display) if (docker_host != local_ip and docker_host != 'localhost' and not docker_host.startswith('127.') and not docker_host.startswith('192.168.99.')): ssh_cmdline = ' '.join( '-R*:%(p)d:127.0.0.1:%(p)d' % {'p': port} for port, connector in x11.port_forward) logging.warning( "You requested X11 forwarding but the Docker container " "appears to be running remotely. It is probable that it " "won't be able to connect to the local display. Creating " "a remote SSH tunnel and running with --tunneled-x11 " "might help (%s).", ssh_cmdline) else: x11 = X11Handler(False, ('local', hostname), args.x11_display) cmds = [] for run_number in selected_runs: run = runs[run_number] cmd = 'cd %s && ' % shell_escape(run['workingdir']) cmd += '/busybox env -i ' environ = x11.fix_env(run['environ']) environ = fixup_environment(environ, args) cmd += ' '.join('%s=%s' % (shell_escape(k), shell_escape(v)) for k, v in iteritems(environ)) cmd += ' ' # FIXME : Use exec -a or something if binary != argv[0] if cmdline is None: argv = [run['binary']] + run['argv'][1:] else: argv = cmdline cmd += ' '.join(shell_escape(a) for a in argv) uid = run.get('uid', 1000) gid = run.get('gid', 1000) cmd = '/rpzsudo \'#%d\' \'#%d\' /busybox sh -c %s' % ( uid, gid, shell_escape(cmd)) cmds.append(cmd) cmds = x11.init_cmds + cmds cmds = ' && '.join(cmds) signals.pre_run(target=target) # Creates forwarders forwarders = [] for port, connector in x11.port_forward: forwarders.append(LocalForwarder(connector, port)) if args.detach: logging.info("Start container %s (detached)", container.decode('ascii')) retcode = interruptible_call(['docker', 'run', b'--name=' + container, '-h', hostname, '-d', '-t'] + args.docker_option + [image, '/busybox', 'sh', '-c', cmds]) if retcode != 0: logging.critical("docker run failed with code %d", retcode) subprocess.call(['docker', 'rm', '-f', container]) sys.exit(1) return # Run command in container logging.info("Starting container %s", container.decode('ascii')) retcode = interruptible_call(['docker', 'run', b'--name=' + container, '-h', hostname, '-i', '-t'] + args.docker_option + [image, '/busybox', 'sh', '-c', cmds]) if retcode != 0: logging.critical("docker run failed with code %d", retcode) subprocess.call(['docker', 'rm', '-f', container]) sys.exit(1) # Get exit status from "docker inspect" out = subprocess.check_output(['docker', 'inspect', container]) outjson = json.loads(out.decode('ascii')) if (outjson[0]["State"]["Running"] is not False or outjson[0]["State"]["Paused"] is not False): logging.error("Invalid container state after execution:\n%s", json.dumps(outjson[0]["State"])) retcode = outjson[0]["State"]["ExitCode"] stderr.write("\n*** Command finished, status: %d\n" % retcode) # Commit to create new image new_image = make_unique_name(b'reprounzip_image_') logging.info("Committing container %s to image %s", container.decode('ascii'), new_image.decode('ascii')) subprocess.check_call(['docker', 'commit', container, new_image]) # Update image name unpacked_info['current_image'] = new_image write_dict(target, unpacked_info) # Remove the container logging.info("Destroying container %s", container.decode('ascii')) retcode = subprocess.call(['docker', 'rm', container]) if retcode != 0: logging.error("Error deleting container %s", container.decode('ascii')) # Untag previous image, unless it is the initial_image if image != unpacked_info['initial_image']: logging.info("Untagging previous image %s", image.decode('ascii')) subprocess.check_call(['docker', 'rmi', image]) # Update input file status metadata_update_run(config, unpacked_info, selected_runs) write_dict(target, unpacked_info) signals.post_run(target=target, retcode=retcode)
def get_package_info(pack, read_data=False): """Get information about a package. """ runs, packages, other_files = config = load_config(pack) inputs_outputs = config.inputs_outputs information = {} if read_data: total_size = 0 total_paths = 0 files = 0 dirs = 0 symlinks = 0 hardlinks = 0 others = 0 rpz_pack = RPZPack(pack) for m in rpz_pack.list_data(): total_size += m.size total_paths += 1 if m.isfile(): files += 1 elif m.isdir(): dirs += 1 elif m.issym(): symlinks += 1 elif hasattr(m, 'islnk') and m.islnk(): hardlinks += 1 else: others += 1 rpz_pack.close() information['pack'] = { 'total_size': total_size, 'total_paths': total_paths, 'files': files, 'dirs': dirs, 'symlinks': symlinks, 'hardlinks': hardlinks, 'others': others, } total_paths = 0 packed_packages_files = 0 unpacked_packages_files = 0 packed_packages = 0 for package in packages: nb = len(package.files) total_paths += nb if package.packfiles: packed_packages_files += nb packed_packages += 1 else: unpacked_packages_files += nb nb = len(other_files) total_paths += nb information['meta'] = { 'total_paths': total_paths, 'packed_packages_files': packed_packages_files, 'unpacked_packages_files': unpacked_packages_files, 'packages': len(packages), 'packed_packages': packed_packages, 'packed_paths': packed_packages_files + nb, } if runs: architecture = runs[0]['architecture'] if any(r['architecture'] != architecture for r in runs): logger.warning("Runs have different architectures") information['meta']['architecture'] = architecture distribution = runs[0]['distribution'] if any(r['distribution'] != distribution for r in runs): logger.warning("Runs have different distributions") information['meta']['distribution'] = distribution information['runs'] = [ dict((k, run[k]) for k in ['id', 'binary', 'argv', 'environ', 'workingdir', 'signal', 'exitcode'] if k in run) for run in runs] information['inputs_outputs'] = { name: {'path': str(iofile.path), 'read_runs': iofile.read_runs, 'write_runs': iofile.write_runs} for name, iofile in iteritems(inputs_outputs)} # Unpacker compatibility unpacker_status = {} for name, upk in iteritems(unpackers): if 'test_compatibility' in upk: compat = upk['test_compatibility'] if callable(compat): compat = compat(pack, config=config) if isinstance(compat, (tuple, list)): compat, msg = compat else: msg = None unpacker_status.setdefault(compat, []).append((name, msg)) else: unpacker_status.setdefault(None, []).append((name, None)) information['unpacker_status'] = unpacker_status return information
def run(self, files, all_): reprounzip.common.record_usage(download_files=len(files)) output_files = {n: f.path for n, f in iteritems(self.get_config().inputs_outputs) if f.write_runs} # No argument: list all the output files and exit if not (all_ or files): print("Output files:") for output_name in output_files: print(" %s" % output_name) return # Parse the name[:path] syntax resolved_files = [] all_files = set(output_files) for filespec in files: filespec_split = filespec.split(':', 1) if len(filespec_split) == 1: output_name = local_path = filespec elif len(filespec_split) == 2: output_name, local_path = filespec_split else: logging.critical("Invalid file specification: %r", filespec) sys.exit(1) local_path = Path(local_path) if local_path else None all_files.discard(output_name) resolved_files.append((output_name, local_path)) # If all_ is set, add all the files that weren't explicitely named if all_: for output_name in all_files: resolved_files.append((output_name, Path(output_name))) self.prepare_download(resolved_files) success = True try: # Download files for output_name, local_path in resolved_files: try: remote_path = output_files[output_name] except KeyError: logging.critical("Invalid output file: %r", output_name) sys.exit(1) logging.debug("Downloading file %s", remote_path) if local_path is None: ret = self.download_and_print(remote_path) else: ret = self.download(remote_path, local_path) if ret is None: ret = True warnings.warn("download() returned None instead of " "True/False, assuming True", category=DeprecationWarning) if not ret: success = False if not success: sys.exit(1) finally: self.finalize()
def generate(target, directory, all_forks=False): """Main function for the graph subcommand. """ # In here, a file is any file on the filesystem. A binary is a file, that # gets executed. A process is a system-level task, identified by its pid # (pids don't get reused in the database). # What I call program is the couple (process, binary), so forking creates a # new program (with the same binary) and exec'ing creates a new program as # well (with the same process) # Because of this, fork+exec will create an intermediate program that # doesn't do anything (new process but still old binary). If that program # doesn't do anything worth showing on the graph, it will be erased, unless # all_forks is True (--all-forks). database = directory / 'trace.sqlite3' # Reads package ownership from the configuration configfile = directory / 'config.yml' if not configfile.is_file(): logging.critical("Configuration file does not exist!\n" "Did you forget to run 'reprozip trace'?\n" "If not, you might want to use --dir to specify an " "alternate location.") sys.exit(1) runs, packages, other_files, patterns = load_config(configfile, canonical=False) packages = dict((f.path, pkg) for pkg in packages for f in pkg.files) if PY3: # On PY3, connect() only accepts unicode conn = sqlite3.connect(str(database)) else: conn = sqlite3.connect(database.path) # This is a bit weird. We need to iterate on all types of events at the # same time, ordering by timestamp, so we decorate-sort-undecorate # Decoration adds timestamp (for sorting) and tags by event type, one of # 'process', 'open' or 'exec' # Reads processes from the database process_cursor = conn.cursor() process_rows = process_cursor.execute( ''' SELECT id, parent, timestamp FROM processes ORDER BY id ''') processes = {} all_programs = [] # ... and opened files... file_cursor = conn.cursor() file_rows = file_cursor.execute( ''' SELECT name, timestamp, mode, process FROM opened_files ORDER BY id ''') binaries = set() files = OrderedSet() edges = OrderedSet() # ... as well as executed files. exec_cursor = conn.cursor() exec_rows = exec_cursor.execute( ''' SELECT name, timestamp, process, argv FROM executed_files ORDER BY id ''') # Loop on all event lists logging.info("Getting all events from database...") rows = heapq.merge(((r[2], 'process', r) for r in process_rows), ((r[1], 'open', r) for r in file_rows), ((r[1], 'exec', r) for r in exec_rows)) for ts, event_type, data in rows: if event_type == 'process': r_id, r_parent, r_timestamp = data if r_parent is not None: parent = processes[r_parent] binary = parent.binary else: parent = None binary = None p = Process(r_id, parent, r_timestamp, False, binary, C_INITIAL if r_parent is None else C_FORK) processes[r_id] = p all_programs.append(p) elif event_type == 'open': r_name, r_timestamp, r_mode, r_process = data r_name = PosixPath(r_name) if r_mode != FILE_WDIR: process = processes[r_process] files.add(r_name) edges.add((process, r_name, r_mode, None)) elif event_type == 'exec': r_name, r_timestamp, r_process, r_argv = data r_name = PosixPath(r_name) process = processes[r_process] binaries.add(r_name) # Here we split this process in two "programs", unless the previous # one hasn't done anything since it was created via fork() if not all_forks and not process.acted: process.binary = r_name process.created = C_FORKEXEC process.acted = True else: process = Process(process.pid, process, r_timestamp, True, # Hides exec only once r_name, C_EXEC) all_programs.append(process) processes[r_process] = process argv = tuple(r_argv.split('\0')) if not argv[-1]: argv = argv[:-1] edges.add((process, r_name, None, argv)) process_cursor.close() file_cursor.close() conn.close() # Puts files in packages logging.info("Organizes packages...") package_files = {} other_files = [] for f in files: pkg = packages.get(f) if pkg is not None: package_files.setdefault((pkg.name, pkg.version), []).append(f) else: other_files.append(f) # Writes DOT file with target.open('w', encoding='utf-8', newline='\n') as fp: fp.write('digraph G {\n /* programs */\n node [shape=box];\n') # Programs logging.info("Writing programs...") for program in all_programs: fp.write(' prog%d [label="%s (%d)"];\n' % ( id(program), program.binary or "-", program.pid)) if program.parent is not None: reason = '' if program.created == C_FORK: reason = "fork" elif program.created == C_EXEC: reason = "exec" elif program.created == C_FORKEXEC: reason = "fork+exec" fp.write(' prog%d -> prog%d [label="%s"];\n' % ( id(program.parent), id(program), reason)) fp.write('\n node [shape=ellipse];\n\n /* system packages */\n') # Files from packages logging.info("Writing packages...") for i, ((name, version), files) in enumerate(iteritems(package_files)): fp.write(' subgraph cluster%d {\n label=' % i) if version: fp.write('"%s %s";\n' % (escape(name), escape(version))) else: fp.write('"%s";\n' % escape(name)) for f in files: fp.write(' "%s";\n' % escape(unicode_(f))) fp.write(' }\n') fp.write('\n /* other files */\n') # Other files logging.info("Writing other files...") for f in other_files: fp.write(' "%s"\n' % escape(unicode_(f))) fp.write('\n') # Edges logging.info("Connecting edges...") for prog, f, mode, argv in edges: if mode is None: fp.write(' "%s" -> prog%d [color=blue, label="%s"];\n' % ( escape(unicode_(f)), id(prog), escape(' '.join(argv)))) elif mode & FILE_WRITE: fp.write(' prog%d -> "%s" [color=red];\n' % ( id(prog), escape(unicode_(f)))) elif mode & FILE_READ: fp.write(' "%s" -> prog%d [color=green];\n' % ( escape(unicode_(f)), id(prog))) fp.write('}\n')
def generate(target, configfile, database, all_forks=False, graph_format='dot', level_pkgs='file', level_processes='thread', level_other_files='all', regex_filters=None, regex_replaces=None, aggregates=None): """Main function for the graph subcommand. """ try: graph_format = {'dot': FORMAT_DOT, 'DOT': FORMAT_DOT, 'json': FORMAT_JSON, 'JSON': FORMAT_JSON}[graph_format] except KeyError: logging.critical("Unknown output format %r", graph_format) sys.exit(1) level_pkgs, level_processes, level_other_files, file_depth = \ parse_levels(level_pkgs, level_processes, level_other_files) # Reads package ownership from the configuration if not configfile.is_file(): logging.critical("Configuration file does not exist!\n" "Did you forget to run 'reprozip trace'?\n" "If not, you might want to use --dir to specify an " "alternate location.") sys.exit(1) config = load_config(configfile, canonical=False) inputs_outputs = dict((f.path, n) for n, f in iteritems(config.inputs_outputs)) has_thread_flag = config.format_version >= LooseVersion('0.7') runs, files, edges = read_events(database, all_forks, has_thread_flag) # Label the runs if len(runs) != len(config.runs): logging.warning("Configuration file doesn't list the same number of " "runs we found in the database!") else: for config_run, run in izip(config.runs, runs): run.name = config_run['id'] # Apply regexes ignore = [lambda path, r=re.compile(p): r.search(path) is not None for p in regex_filters or []] replace = [lambda path, r=re.compile(p): r.sub(repl, path) for p, repl in regex_replaces or []] def filefilter(path): pathuni = unicode_(path) if any(f(pathuni) for f in ignore): logging.debug("IGN %s", pathuni) return None if not (replace or aggregates): return path for fi in replace: pathuni_ = fi(pathuni) if pathuni_ != pathuni: logging.debug("SUB %s -> %s", pathuni, pathuni_) pathuni = pathuni_ for prefix in aggregates or []: if pathuni.startswith(prefix): logging.debug("AGG %s -> %s", pathuni, prefix) pathuni = prefix break return PosixPath(pathuni) files_new = set() for fi in files: fi = filefilter(fi) if fi is not None: files_new.add(fi) files = files_new edges_new = OrderedSet() for prog, fi, mode, argv in edges: fi = filefilter(fi) if fi is not None: edges_new.add((prog, fi, mode, argv)) edges = edges_new # Puts files in packages package_map = {} if level_pkgs == LVL_PKG_IGNORE: packages = [] other_files = files else: logging.info("Organizes packages...") file2package = dict((f.path, pkg) for pkg in config.packages for f in pkg.files) packages = {} other_files = [] for fi in files: pkg = file2package.get(fi) if pkg is not None: package = packages.get(pkg.name) if package is None: package = Package(pkg.name, pkg.version) packages[pkg.name] = package package.files.add(fi) package_map[fi] = package else: other_files.append(fi) packages = sorted(itervalues(packages), key=lambda pkg: pkg.name) for i, pkg in enumerate(packages): pkg.id = i # Filter other files if level_other_files == LVL_OTHER_ALL and file_depth is not None: other_files = set(PosixPath(*f.components[:file_depth + 1]) for f in other_files) edges = OrderedSet((prog, f if f in package_map else PosixPath(*f.components[:file_depth + 1]), mode, argv) for prog, f, mode, argv in edges) else: if level_other_files == LVL_OTHER_IO: other_files = set(f for f in other_files if f in inputs_outputs) edges = [(prog, f, mode, argv) for prog, f, mode, argv in edges if f in package_map or f in other_files] elif level_other_files == LVL_OTHER_NO: other_files = set() edges = [(prog, f, mode, argv) for prog, f, mode, argv in edges if f in package_map] args = (target, runs, packages, other_files, package_map, edges, inputs_outputs, level_pkgs, level_processes, level_other_files) if graph_format == FORMAT_DOT: graph_dot(*args) elif graph_format == FORMAT_JSON: graph_json(*args) else: assert False
def docker_setup_create(args): """Sets up the experiment to be run in a Docker-built container. """ pack = Path(args.pack[0]) target = Path(args.target[0]) if target.exists(): logging.critical("Target directory exists") sys.exit(1) signals.pre_setup(target=target, pack=pack) target.mkdir() try: # Unpacks configuration file rpz_pack = RPZPack(pack) rpz_pack.extract_config(target / 'config.yml') # Loads config runs, packages, other_files = config = load_config( target / 'config.yml', True) if args.base_image: record_usage(docker_explicit_base=True) base_image = args.base_image[0] if args.distribution: target_distribution = args.distribution[0] else: target_distribution = None else: target_distribution, base_image = select_image(runs) logging.info("Using base image %s", base_image) logging.debug("Distribution: %s", target_distribution or "unknown") rpz_pack.copy_data_tar(target / 'data.tgz') arch = runs[0]['architecture'] # Writes Dockerfile logging.info("Writing %s...", target / 'Dockerfile') with (target / 'Dockerfile').open('w', encoding='utf-8', newline='\n') as fp: fp.write('FROM %s\n\n' % base_image) # Installs busybox download_file(busybox_url(arch), target / 'busybox', 'busybox-%s' % arch) fp.write('COPY busybox /busybox\n') # Installs rpzsudo download_file(sudo_url(arch), target / 'rpzsudo', 'rpzsudo-%s' % arch) fp.write('COPY rpzsudo /rpzsudo\n\n') fp.write('COPY data.tgz /reprozip_data.tgz\n\n') fp.write('COPY rpz-files.list /rpz-files.list\n') fp.write('RUN \\\n' ' chmod +x /busybox /rpzsudo && \\\n') if args.install_pkgs: # Install every package through package manager missing_packages = [] else: # Only install packages that were not packed missing_packages = [pkg for pkg in packages if pkg.packfiles] packages = [pkg for pkg in packages if not pkg.packfiles] if packages: record_usage(docker_install_pkgs=True) try: installer = select_installer(pack, runs, target_distribution) except CantFindInstaller as e: logging.error("Need to install %d packages but couldn't " "select a package installer: %s", len(packages), e) sys.exit(1) # Updates package sources update_script = installer.update_script() if update_script: fp.write(' %s && \\\n' % update_script) # Installs necessary packages fp.write(' %s && \\\n' % installer.install_script(packages)) logging.info("Dockerfile will install the %d software " "packages that were not packed", len(packages)) else: record_usage(docker_install_pkgs=False) # Untar paths = set() pathlist = [] # Add intermediate directories, and check for existence in the tar logging.info("Generating file list...") missing_files = chain.from_iterable(pkg.files for pkg in missing_packages) data_files = rpz_pack.data_filenames() listoffiles = list(chain(other_files, missing_files)) for f in listoffiles: if f.path.name == 'resolv.conf' and ( f.path.lies_under('/etc') or f.path.lies_under('/run') or f.path.lies_under('/var')): continue path = PosixPath('/') for c in rpz_pack.remove_data_prefix(f.path).components: path = path / c if path in paths: continue paths.add(path) if path in data_files: pathlist.append(path) else: logging.info("Missing file %s", path) rpz_pack.close() # FIXME : for some reason we need reversed() here, I'm not sure why # Need to read more of tar's docs. # TAR bug: --no-overwrite-dir removes --keep-old-files with (target / 'rpz-files.list').open('wb') as lfp: for p in reversed(pathlist): lfp.write(join_root(rpz_pack.data_prefix, p).path) lfp.write(b'\0') fp.write(' cd / && ' '(tar zpxf /reprozip_data.tgz -U --recursive-unlink ' '--numeric-owner --strip=1 --null -T /rpz-files.list || ' '/busybox echo "TAR reports errors, this might or might ' 'not prevent the execution to run")\n') # Setup entry point fp.write('COPY rpz_entrypoint.sh /rpz_entrypoint.sh\n' 'ENTRYPOINT ["/busybox", "sh", "/rpz_entrypoint.sh"]\n') # Write entry point script logging.info("Writing %s...", target / 'rpz_entrypoint.sh') with (target / 'rpz_entrypoint.sh').open('w', encoding='utf-8', newline='\n') as fp: # The entrypoint gets some arguments from the run command # By default, it just does all the runs # "run N" executes the run with that number # "cmd STR" sets a replacement command-line for the next run # "do STR" executes a command as-is fp.write( '#!/bin/sh\n' '\n' 'COMMAND=\n' 'ENVVARS=\n' '\n' 'if [ $# = 0 ]; then\n' ' exec /busybox sh /rpz_entrypoint.sh') for nb in irange(len(runs)): fp.write(' run %d' % nb) fp.write( '\n' 'fi\n' '\n' 'while [ $# != 0 ]; do\n' ' case "$1" in\n' ' help)\n' ' echo "Image built from reprounzip-docker" >&2\n' ' echo "Usage: docker run <image> [cmd word [word ' '...]] [run <R>]" >&2\n' ' echo " \\`cmd ...\\` changes the command for ' 'the next \\`run\\` option" >&2\n' ' echo " \\`run <name|number>\\` runs the ' 'specified run" >&2\n' ' echo "By default, all the runs are executed." ' '>&2\n' ' echo "The runs in this image are:" >&2\n') for run in runs: fp.write( ' echo " {name}: {cmdline}" >&2\n'.format( name=run['id'], cmdline=' '.join(shell_escape(a) for a in run['argv']))) fp.write( ' exit 0\n' ' ;;\n' ' do)\n' ' shift\n' ' $1\n' ' ;;\n' ' env)\n' ' shift\n' ' ENVVARS="$1"\n' ' ;;\n' ' cmd)\n' ' shift\n' ' COMMAND="$1"\n' ' ;;\n' ' run)\n' ' shift\n' ' case "$1" in\n') for i, run in enumerate(runs): cmdline = ' '.join([run['binary']] + run['argv'][1:]) fp.write( ' {name})\n' ' RUNCOMMAND={cmd}\n' ' RUNWD={wd}\n' ' RUNENV={env}\n' ' RUNUID={uid}\n' ' RUNGID={gid}\n' ' ;;\n'.format( name='%s|%d' % (run['id'], i), cmd=shell_escape(cmdline), wd=shell_escape(run['workingdir']), env=shell_escape(' '.join( '%s=%s' % (shell_escape(k), shell_escape(v)) for k, v in iteritems(run['environ']))), uid=run.get('uid', 1000), gid=run.get('gid', 1000))) fp.write( ' *)\n' ' echo "RPZ: Unknown run $1" >&2\n' ' exit 1\n' ' ;;\n' ' esac\n' ' if [ -n "$COMMAND" ]; then\n' ' RUNCOMMAND="$COMMAND"\n' ' COMMAND=\n' ' fi\n' ' export RUNWD; export RUNENV; export ENVVARS; ' 'export RUNCOMMAND\n' ' /rpzsudo "#$RUNUID" "#$RUNGID" /busybox sh -c ' '"cd \\"\\$RUNWD\\" && /busybox env -i $RUNENV $ENVVARS ' '$RUNCOMMAND"\n' ' ENVVARS=\n' ' ;;\n' ' *)\n' ' echo "RPZ: Unknown option $1" >&2\n' ' exit 1\n' ' ;;\n' ' esac\n' ' shift\n' 'done\n') # Meta-data for reprounzip write_dict(target, metadata_initial_iofiles(config)) signals.post_setup(target=target, pack=pack) except Exception: target.rmtree(ignore_errors=True) raise
def directory_run(args): """Runs the command in the directory. """ target = Path(args.target[0]) unpacked_info = metadata_read(target, 'directory') cmdline = args.cmdline # Loads config config = load_config_file(target / 'config.yml', True) runs = config.runs selected_runs = get_runs(runs, args.run, cmdline) root = (target / 'root').absolute() # Gets library paths lib_dirs = [] p = subprocess.Popen(['/sbin/ldconfig', '-v', '-N'], stdout=subprocess.PIPE) try: for l in p.stdout: if len(l) < 3 or l[0] in (b' ', b'\t'): continue if l.endswith(b':\n'): lib_dirs.append(Path(l[:-2])) finally: p.communicate() lib_dirs = ('export LD_LIBRARY_PATH=%s' % ':'.join( shell_escape(unicode_(join_root(root, d))) for d in lib_dirs)) cmds = [lib_dirs] for run_number in selected_runs: run = runs[run_number] cmd = 'cd %s && ' % shell_escape( unicode_(join_root(root, Path(run['workingdir'])))) cmd += '/usr/bin/env -i ' environ = run['environ'] environ = fixup_environment(environ, args) if args.x11: if 'DISPLAY' in os.environ: environ['DISPLAY'] = os.environ['DISPLAY'] if 'XAUTHORITY' in os.environ: environ['XAUTHORITY'] = os.environ['XAUTHORITY'] cmd += ' '.join('%s=%s' % (shell_escape(k), shell_escape(v)) for k, v in iteritems(environ) if k != 'PATH') cmd += ' ' # PATH # Get the original PATH components path = [PosixPath(d) for d in run['environ'].get('PATH', '').split(':')] # The same paths but in the directory dir_path = [join_root(root, d) for d in path if d.root == '/'] # Rebuild string path = ':'.join(unicode_(d) for d in dir_path + path) cmd += 'PATH=%s ' % shell_escape(path) # FIXME : Use exec -a or something if binary != argv[0] if cmdline is None: argv = run['argv'] # Rewrites command-line arguments that are absolute filenames rewritten = False for i in irange(len(argv)): try: p = Path(argv[i]) except UnicodeEncodeError: continue if p.is_absolute: rp = join_root(root, p) if (rp.exists() or (len(rp.components) > 3 and rp.parent.exists())): argv[i] = str(rp) rewritten = True if rewritten: logging.warning("Rewrote command-line as: %s", ' '.join(shell_escape(a) for a in argv)) else: argv = cmdline cmd += ' '.join(shell_escape(a) for a in argv) cmds.append(cmd) cmds = ' && '.join(cmds) signals.pre_run(target=target) retcode = interruptible_call(cmds, shell=True) stderr.write("\n*** Command finished, status: %d\n" % retcode) signals.post_run(target=target, retcode=retcode) # Update input file status metadata_update_run(config, unpacked_info, selected_runs) metadata_write(target, unpacked_info, 'directory')
def generate(target, configfile, database): """Go over the trace and generate the graph file. """ # Reads package ownership from the configuration if not configfile.is_file(): logger.critical("Configuration file does not exist!\n" "Did you forget to run 'reprozip trace'?\n" "If not, you might want to use --dir to specify an " "alternate location.") sys.exit(1) config = load_config(configfile, canonical=False) has_thread_flag = config.format_version >= LooseVersion('0.7') if PY3: # On PY3, connect() only accepts unicode conn = sqlite3.connect(str(database)) else: conn = sqlite3.connect(database.path) conn.row_factory = sqlite3.Row vertices = [] edges = [] # Create user entity, that initiates the runs vertices.append({'ID': 'user', 'type': 'Agent', 'subtype': 'User', 'label': 'User'}) run = -1 # Read processes cur = conn.cursor() rows = cur.execute( ''' SELECT id, parent, timestamp, is_thread, exitcode FROM processes; ''' if has_thread_flag else ''' SELECT id, parent, timestamp, 0 as is_thread, exitcode FROM processes; ''') for r_id, r_parent, r_timestamp, r_isthread, r_exitcode in rows: if r_parent is None: # Create run entity run += 1 vertices.append({'ID': 'run%d' % run, 'type': 'Activity', 'subtype': 'Run', 'label': "Run #%d" % run, 'date': r_timestamp}) # User -> run edges.append({'ID': 'user_run%d' % run, 'type': 'UserRuns', 'label': "User runs command", 'sourceID': 'user', 'targetID': 'run%d' % run}) # Run -> process edges.append({'ID': 'run_start%d' % run, 'type': 'RunStarts', 'label': "Run #%d command", 'sourceID': 'run%d' % run, 'targetID': 'process%d' % r_id}) # Create process entity vertices.append({'ID': 'process%d' % r_id, 'type': 'Agent', 'subtype': 'Thread' if r_isthread else 'Process', 'label': 'Process #%d' % r_id, 'date': r_timestamp}) # TODO: add process end time (use master branch?) # Add process creation activity if r_parent is not None: # Process creation activity vertex = {'ID': 'fork%d' % r_id, 'type': 'Activity', 'subtype': 'Fork', 'label': "#%d creates %s #%d" % ( r_parent, "thread" if r_isthread else "process", r_id), 'date': r_timestamp} if has_thread_flag: vertex['thread'] = 'true' if r_isthread else 'false' vertices.append(vertex) # Parent -> creation edges.append({'ID': 'fork_p_%d' % r_id, 'type': 'PerformsFork', 'label': "Performs fork", 'sourceID': 'process%d' % r_parent, 'targetID': 'fork%d' % r_id}) # Creation -> child edges.append({'ID': 'fork_c_%d' % r_id, 'type': 'ForkCreates', 'label': "Fork creates", 'sourceID': 'fork%d' % r_id, 'targetID': 'process%d' % r_id}) cur.close() file2package = dict((f.path.path, pkg) for pkg in config.packages for f in pkg.files) inputs_outputs = dict((f.path.path, (bool(f.write_runs), bool(f.read_runs))) for n, f in iteritems(config.inputs_outputs)) # Read opened files cur = conn.cursor() rows = cur.execute( ''' SELECT name, is_directory FROM opened_files GROUP BY name; ''') for r_name, r_directory in rows: # Create file entity vertex = {'ID': r_name, 'type': 'Entity', 'subtype': 'Directory' if r_directory else 'File', 'label': r_name} if r_name in file2package: vertex['package'] = file2package[r_name].name if r_name in inputs_outputs: out_, in_ = inputs_outputs[r_name] if in_: vertex['input'] = True if out_: vertex['output'] = True vertices.append(vertex) cur.close() # Read file opens cur = conn.cursor() rows = cur.execute( ''' SELECT id, name, timestamp, mode, process FROM opened_files; ''') for r_id, r_name, r_timestamp, r_mode, r_process in rows: # Create file access activity vertices.append({'ID': 'access%d' % r_id, 'type': 'Activity', 'subtype': ('FileWrites' if r_mode & FILE_WRITE else 'FileReads'), 'label': ("File write: %s" if r_mode & FILE_WRITE else "File read: %s") % r_name, 'date': r_timestamp, 'mode': r_mode}) # Process -> access edges.append({'ID': 'proc_access%d' % r_id, 'type': 'PerformsFileAccess', 'label': "Process does file access", 'sourceID': 'process%d' % r_process, 'targetID': 'access%d' % r_id}) # Access -> file edges.append({'ID': 'access_file%d' % r_id, 'type': 'AccessFile', 'label': "File access touches", 'sourceID': 'access%d' % r_id, 'targetID': r_name}) cur.close() # Read executions cur = conn.cursor() rows = cur.execute( ''' SELECT id, name, timestamp, process, argv FROM executed_files; ''') for r_id, r_name, r_timestamp, r_process, r_argv in rows: argv = r_argv.split('\0') if not argv[-1]: argv = argv[:-1] cmdline = ' '.join(shell_escape(a) for a in argv) # Create execution activity vertices.append({'ID': 'exec%d' % r_id, 'type': 'Activity', 'subtype': 'ProcessExecutes', 'label': "Process #%d executes file %s" % (r_process, r_name), 'date': r_timestamp, 'cmdline': cmdline, 'process': r_process, 'file': r_name}) # Process -> execution edges.append({'ID': 'proc_exec%d' % r_id, 'type': 'ProcessExecution', 'label': "Process does exec()", 'sourceID': 'process%d' % r_process, 'targetID': 'exec%d' % r_id}) # Execution -> file edges.append({'ID': 'exec_file%d' % r_id, 'type': 'ExecutionFile', 'label': "Execute file", 'sourceID': 'exec%d' % r_id, 'targetID': r_name}) cur.close() # Write the file from the created lists with target.open('w', encoding='utf-8', newline='\n') as out: out.write('<?xml version="1.0"?>\n\n' '<provenancedata xmlns:xsi="http://www.w3.org/2001/XMLSchema' '-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema">\n' ' <vertices>\n') for vertex in vertices: if 'date' not in vertex: vertex['date'] = '-1' tags = {} for k in ('ID', 'type', 'label', 'date'): if k not in vertex: vertex.update(tags) raise ValueError("Vertex is missing tag '%s': %r" % ( k, vertex)) tags[k] = vertex.pop(k) out.write(' <vertex>\n ' + '\n '.join('<{k}>{v}</{k}>'.format(k=k, v=xml_escape(v)) for k, v in iteritems(tags))) if vertex: out.write('\n <attributes>\n') for k, v in iteritems(vertex): out.write(' <attribute>\n' ' <name>{k}</name>\n' ' <value>{v}</value>\n' ' </attribute>\n' .format(k=xml_escape(k), v=xml_escape(v))) out.write(' </attributes>') out.write('\n </vertex>\n') out.write(' </vertices>\n' ' <edges>\n') for edge in edges: for k in ('ID', 'type', 'label', 'sourceID', 'targetID'): if k not in edge: raise ValueError("Edge is missing tag '%s': %r" % ( k, edge)) if 'value' not in edge: edge['value'] = '' out.write(' <edge>\n ' + '\n '.join('<{k}>{v}</{k}>'.format(k=k, v=xml_escape(v)) for k, v in iteritems(edge)) + '\n </edge>\n') out.write(' </edges>\n' '</provenancedata>\n') conn.close()
def run(self, files, all_): reprounzip.common.record_usage(download_files=len(files)) inputs_outputs = self.get_config().inputs_outputs # No argument: list all the output files and exit if not (all_ or files): print("Output files:") for output_name in sorted(n for n, f in iteritems(inputs_outputs) if f.write_runs): print(" %s" % output_name) return # Parse the name[:path] syntax resolved_files = [] all_files = set(n for n, f in iteritems(inputs_outputs) if f.write_runs) for filespec in files: filespec_split = filespec.split(':', 1) if len(filespec_split) == 1: output_name = local_path = filespec elif len(filespec_split) == 2: output_name, local_path = filespec_split else: logging.critical("Invalid file specification: %r", filespec) sys.exit(1) local_path = Path(local_path) if local_path else None all_files.discard(output_name) resolved_files.append((output_name, local_path)) # If all_ is set, add all the files that weren't explicitely named if all_: for output_name in all_files: resolved_files.append((output_name, Path(output_name))) self.prepare_download(resolved_files) success = True try: # Download files for output_name, local_path in resolved_files: if output_name.startswith('/'): remote_path = PosixPath(output_name) else: try: remote_path = inputs_outputs[output_name].path except KeyError: logging.critical("Invalid output file: %r", output_name) sys.exit(1) logging.debug("Downloading file %s", remote_path) if local_path is None: ret = self.download_and_print(remote_path) else: ret = self.download(remote_path, local_path) if ret is None: ret = True warnings.warn( "download() returned None instead of " "True/False, assuming True", category=DeprecationWarning) if not ret: success = False if not success: sys.exit(1) finally: self.finalize()
def functional_tests(raise_warnings, interactive, run_vagrant, run_docker): # Tests on Python < 2.7.3: need to use separate reprozip Python (with known # working version of Python) if sys.version_info < (2, 7, 3): bug13676 = True if 'REPROZIP_PYTHON' not in os.environ: sys.stderr.write("Error: using reprozip with Python %s!\n" % sys.version.split(' ', 1)[0]) sys.exit(1) else: bug13676 = False rpz = [os.environ.get('REPROZIP_PYTHON', sys.executable)] rpuz = [os.environ.get('REPROUNZIP_PYTHON', sys.executable)] # Can't match on the SignalWarning category here because of a Python bug # http://bugs.python.org/issue22543 if raise_warnings: rpz.extend(['-W', 'error:signal']) rpuz.extend(['-W', 'error:signal']) if 'COVER' in os.environ: rpz.extend(['-m'] + os.environ['COVER'].split(' ')) rpuz.extend(['-m'] + os.environ['COVER'].split(' ')) reprozip_main = tests.parent / 'reprozip/reprozip/main.py' reprounzip_main = tests.parent / 'reprounzip/reprounzip/main.py' verbose = ['-v'] * 3 rpz.extend([reprozip_main.absolute().path] + verbose) rpuz.extend([reprounzip_main.absolute().path] + verbose) print("Command lines are:\n%r\n%r" % (rpz, rpuz)) # ######################################## # testrun /bin/echo # output = check_output(rpz + ['testrun', '/bin/echo', 'outputhere']) assert any(b' 1 | /bin/echo outputhere ' in l for l in output.splitlines()) output = check_output( rpz + ['testrun', '-a', '/fake/path/echo', '/bin/echo', 'outputhere']) assert any(b' 1 | (/bin/echo) /fake/path/echo outputhere ' in l for l in output.splitlines()) # ######################################## # testrun multiple commands # check_call(rpz + [ 'testrun', 'bash', '-c', 'cat ../../../../../etc/passwd;' 'cd /var/lib;' 'cat ../../etc/group' ]) check_call(rpz + ['trace', 'bash', '-c', 'cat /etc/passwd;echo']) check_call( rpz + ['trace', '--continue', 'sh', '-c', 'cat /etc/group;/usr/bin/id']) check_call(rpz + ['pack']) if not bug13676: check_call(rpuz + ['graph', 'graph.dot']) check_call(rpuz + ['graph', 'graph2.dot', 'experiment.rpz']) sudo = ['sudo', '-E'] # -E to keep REPROZIP_USAGE_STATS # ######################################## # 'simple' program: trace, pack, info, unpack # # Build build('simple', ['simple.c']) # Trace check_call(rpz + [ 'trace', '-d', 'rpz-simple', './simple', (tests / 'simple_input.txt').path, 'simple_output.txt' ]) orig_output_location = Path('simple_output.txt').absolute() assert orig_output_location.is_file() with orig_output_location.open(encoding='utf-8') as fp: assert fp.read().strip() == '42' orig_output_location.remove() # Read config with Path('rpz-simple/config.yml').open(encoding='utf-8') as fp: conf = yaml.safe_load(fp) other_files = set(Path(f).absolute() for f in conf['other_files']) expected = [Path('simple'), (tests / 'simple_input.txt')] assert other_files.issuperset([f.resolve() for f in expected]) # Check input and output files input_files = conf['runs'][0]['input_files'] assert (dict((k, Path(f).name) for k, f in iteritems(input_files)) == { 'arg': b'simple_input.txt' }) output_files = conf['runs'][0]['output_files'] print(dict((k, Path(f).name) for k, f in iteritems(output_files))) # Here we don't test for dict equality, since we might have C coverage # files in the mix assert Path(output_files['arg']).name == b'simple_output.txt' # Pack check_call(rpz + ['pack', '-d', 'rpz-simple', 'simple.rpz']) Path('simple').remove() # Info check_call(rpuz + ['info', 'simple.rpz']) # Show files check_call(rpuz + ['showfiles', 'simple.rpz']) # Lists packages check_call(rpuz + ['installpkgs', '--summary', 'simple.rpz']) # Unpack directory check_call(rpuz + ['directory', 'setup', 'simple.rpz', 'simpledir']) # Run directory check_call(rpuz + ['directory', 'run', 'simpledir']) output_in_dir = join_root(Path('simpledir/root'), orig_output_location) with output_in_dir.open(encoding='utf-8') as fp: assert fp.read().strip() == '42' # Delete with wrong command (should fail) p = subprocess.Popen(rpuz + ['chroot', 'destroy', 'simpledir'], stderr=subprocess.PIPE) stdout, stderr = p.communicate() assert p.poll() != 0 stderr = stderr.splitlines() assert b"Wrong unpacker used" in stderr[0] assert stderr[1].startswith(b"usage: ") # Delete directory check_call(rpuz + ['directory', 'destroy', 'simpledir']) # Unpack chroot check_call( sudo + rpuz + ['chroot', 'setup', '--bind-magic-dirs', 'simple.rpz', 'simplechroot']) try: # Run chroot check_call(sudo + rpuz + ['chroot', 'run', 'simplechroot']) output_in_chroot = join_root(Path('simplechroot/root'), orig_output_location) with output_in_chroot.open(encoding='utf-8') as fp: assert fp.read().strip() == '42' # Get output file check_call(sudo + rpuz + ['chroot', 'download', 'simplechroot', 'arg:output1.txt']) with Path('output1.txt').open(encoding='utf-8') as fp: assert fp.read().strip() == '42' # Replace input file check_call(sudo + rpuz + [ 'chroot', 'upload', 'simplechroot', '%s:arg' % (tests / 'simple_input2.txt') ]) check_call(sudo + rpuz + ['chroot', 'upload', 'simplechroot']) # Run again check_call(sudo + rpuz + ['chroot', 'run', 'simplechroot']) output_in_chroot = join_root(Path('simplechroot/root'), orig_output_location) with output_in_chroot.open(encoding='utf-8') as fp: assert fp.read().strip() == '36' # Delete with wrong command (should fail) p = subprocess.Popen(rpuz + ['directory', 'destroy', 'simplechroot'], stderr=subprocess.PIPE) stdout, stderr = p.communicate() assert p.poll() != 0 stderr = stderr.splitlines() assert b"Wrong unpacker used" in stderr[0] assert stderr[1].startswith(b"usage:") finally: # Delete chroot check_call(sudo + rpuz + ['chroot', 'destroy', 'simplechroot']) if not (tests / 'vagrant').exists(): check_call([ 'sudo', 'sh', '-c', 'mkdir %(d)s; chmod 777 %(d)s' % { 'd': tests / 'vagrant' } ]) # Unpack Vagrant-chroot check_call(rpuz + [ 'vagrant', 'setup/create', '--use-chroot', 'simple.rpz', (tests / 'vagrant/simplevagrantchroot').path ]) print("\nVagrant project set up in simplevagrantchroot") try: if run_vagrant: check_call(rpuz + [ 'vagrant', 'run', '--no-stdin', (tests / 'vagrant/simplevagrantchroot').path ]) # Destroy check_call(rpuz + [ 'vagrant', 'destroy', (tests / 'vagrant/simplevagrantchroot').path ]) elif interactive: print("Test and press enter") sys.stdin.readline() finally: if (tests / 'vagrant/simplevagrantchroot').exists(): (tests / 'vagrant/simplevagrantchroot').rmtree() # Unpack Vagrant without chroot check_call(rpuz + [ 'vagrant', 'setup/create', '--dont-use-chroot', 'simple.rpz', (tests / 'vagrant/simplevagrant').path ]) print("\nVagrant project set up in simplevagrant") try: if run_vagrant: check_call(rpuz + [ 'vagrant', 'run', '--no-stdin', (tests / 'vagrant/simplevagrant').path ]) # Destroy check_call( rpuz + ['vagrant', 'destroy', (tests / 'vagrant/simplevagrant').path]) elif interactive: print("Test and press enter") sys.stdin.readline() finally: if (tests / 'vagrant/simplevagrant').exists(): (tests / 'vagrant/simplevagrant').rmtree() # Unpack Docker check_call(rpuz + ['docker', 'setup/create', 'simple.rpz', 'simpledocker']) print("\nDocker project set up in simpledocker") try: if run_docker: check_call(rpuz + ['docker', 'setup/build', 'simpledocker']) check_call(rpuz + ['docker', 'run', 'simpledocker']) # Get output file check_call( rpuz + ['docker', 'download', 'simpledocker', 'arg:doutput1.txt']) with Path('doutput1.txt').open(encoding='utf-8') as fp: assert fp.read().strip() == '42' # Replace input file check_call(rpuz + [ 'docker', 'upload', 'simpledocker', '%s:arg' % (tests / 'simple_input2.txt') ]) check_call(rpuz + ['docker', 'upload', 'simpledocker']) check_call(rpuz + ['showfiles', 'simpledocker']) # Run again check_call(rpuz + ['docker', 'run', 'simpledocker']) # Get output file check_call( rpuz + ['docker', 'download', 'simpledocker', 'arg:doutput2.txt']) with Path('doutput2.txt').open(encoding='utf-8') as fp: assert fp.read().strip() == '36' # Destroy check_call(rpuz + ['docker', 'destroy', 'simpledocker']) elif interactive: print("Test and press enter") sys.stdin.readline() finally: if Path('simpledocker').exists(): Path('simpledocker').rmtree() # ######################################## # 'threads' program: testrun # # Build build('threads', ['threads.c'], ['-lpthread']) # Trace check_call(rpz + ['testrun', './threads']) # ######################################## # 'segv' program: testrun # # Build build('segv', ['segv.c']) # Trace check_call(rpz + ['testrun', './segv']) # ######################################## # 'exec_echo' program: trace, pack, run --cmdline # # Build build('exec_echo', ['exec_echo.c']) # Trace check_call(rpz + ['trace', './exec_echo', 'originalexecechooutput']) # Pack check_call(rpz + ['pack', 'exec_echo.rpz']) # Unpack chroot check_call(sudo + rpuz + ['chroot', 'setup', 'exec_echo.rpz', 'echochroot']) try: # Run original command-line output = check_output(sudo + rpuz + ['chroot', 'run', 'echochroot']) assert output == b'originalexecechooutput\n' # Prints out command-line output = check_output(sudo + rpuz + ['chroot', 'run', 'echochroot', '--cmdline']) assert any(b'./exec_echo originalexecechooutput' == s.strip() for s in output.split(b'\n')) # Run with different command-line output = check_output(sudo + rpuz + [ 'chroot', 'run', 'echochroot', '--cmdline', './exec_echo', 'changedexecechooutput' ]) assert output == b'changedexecechooutput\n' finally: check_call(sudo + rpuz + ['chroot', 'destroy', 'echochroot']) # ######################################## # 'exec_echo' program: testrun # This is built with -m32 so that we transition: # python (x64) -> exec_echo (i386) -> echo (x64) # if sys.maxsize > 2**32: # Build build('exec_echo32', ['exec_echo.c'], ['-m32']) # Trace check_call(rpz + ['testrun', './exec_echo32 42']) else: print("Can't try exec_echo transitions: not running on 64bits") # ######################################## # Tracing non-existing program # check_call(rpz + ['testrun', './doesntexist']) # ######################################## # 'connect' program: testrun # # Build build('connect', ['connect.c']) # Trace stderr = check_errout(rpz + ['testrun', './connect']) stderr = stderr.split(b'\n') assert not any(b'program exited with non-zero code' in l for l in stderr) assert any( re.search(br'process connected to [0-9.]+:80', l) for l in stderr) # ######################################## # Copies back coverage report # coverage = Path('.coverage') if coverage.exists(): coverage.copyfile(tests.parent / '.coverage.runpy')
def docker_run(args): """Runs the experiment in the container. """ target = Path(args.target[0]) unpacked_info = read_dict(target / '.reprounzip') cmdline = args.cmdline # Loads config runs, packages, other_files = load_config(target / 'config.yml', True) selected_runs = get_runs(runs, args.run, cmdline) # Destroy previous container if 'ran_container' in unpacked_info: container = unpacked_info.pop('ran_container') logging.info("Destroying previous container %s", container.decode('ascii')) retcode = subprocess.call(['docker', 'rm', '-f', container]) if retcode != 0: logging.error("Error deleting previous container %s", container.decode('ascii')) write_dict(target / '.reprounzip', unpacked_info) # Use the initial image directly if 'current_image' in unpacked_info: image = unpacked_info['current_image'] logging.debug("Running from image %s", image.decode('ascii')) else: logging.critical("Image doesn't exist yet, have you run setup/build?") sys.exit(1) # Name of new container container = make_unique_name(b'reprounzip_run_') hostname = runs[selected_runs[0]].get('hostname', 'reprounzip') # Get the local bridge IP ip_str = get_iface_addr('docker0') # X11 handler x11 = X11Handler(args.x11, ('internet', ip_str), args.x11_display) cmds = [] for run_number in selected_runs: run = runs[run_number] cmd = 'cd %s && ' % shell_escape(run['workingdir']) cmd += '/usr/bin/env -i ' environ = x11.fix_env(run['environ']) cmd += ' '.join('%s=%s' % (k, shell_escape(v)) for k, v in iteritems(environ)) cmd += ' ' # FIXME : Use exec -a or something if binary != argv[0] if cmdline is None: argv = [run['binary']] + run['argv'][1:] else: argv = cmdline cmd += ' '.join(shell_escape(a) for a in argv) uid = run.get('uid', 1000) cmd = 'sudo -u \'#%d\' /bin/busybox sh -c %s\n' % (uid, shell_escape(cmd)) cmds.append(cmd) cmds = x11.init_cmds + cmds cmds = ' && '.join(cmds) signals.pre_run(target=target) # Creates forwarders forwarders = [] for port, connector in x11.port_forward: forwarders.append( LocalForwarder(connector, port)) # Run command in container logging.info("Starting container %s", container.decode('ascii')) retcode = interruptible_call(['docker', 'run', b'--name=' + container, '-h', hostname, '-i', '-t', image, '/bin/busybox', 'sh', '-c', cmds]) if retcode != 0: logging.critical("docker run failed with code %d", retcode) sys.exit(1) # Get exit status from "docker inspect" out = subprocess.check_output(['docker', 'inspect', container]) outjson = json.loads(out.decode('ascii')) if (outjson[0]["State"]["Running"] is not False or outjson[0]["State"]["Paused"] is not False): logging.error("Invalid container state after execution:\n%s", json.dumps(outjson[0]["State"])) retcode = outjson[0]["State"]["ExitCode"] sys.stderr.write("\n*** Command finished, status: %d\n" % retcode) # Store container name (so we can download output files) unpacked_info['ran_container'] = container write_dict(target / '.reprounzip', unpacked_info) signals.post_run(target=target, retcode=retcode)
def vagrant_run(args): """Runs the experiment in the virtual machine. """ target = Path(args.target[0]) unpacked_info = read_dict(target) use_chroot = unpacked_info['use_chroot'] cmdline = args.cmdline check_vagrant_version() # Loads config config = load_config(target / 'config.yml', True) runs = config.runs selected_runs = get_runs(runs, args.run, cmdline) hostname = runs[selected_runs[0]].get('hostname', 'reprounzip') # Port forwarding ports = parse_ports(args.expose_port) # If the requested ports are not a subset of the ones already set on the # VM, we have to update the Vagrantfile and issue `vagrant reload`, which # will reboot the machine req_ports = set(ports) set_ports = set(unpacked_info.get('ports', [])) if not req_ports.issubset(set_ports): # Build new set of forwarded ports: the ones already set + the one just # requested # The ones we request now override the previous config all_ports = dict( (host, (guest, proto)) for host, guest, proto in set_ports) for host, guest, proto in req_ports: all_ports[host] = guest, proto unpacked_info['ports'] = sorted( (host, guest, proto) for host, (guest, proto) in iteritems(all_ports)) write_vagrantfile(target, unpacked_info) logger.info("Some requested ports are not yet forwarded, running " "'vagrant reload'") retcode = subprocess.call(['vagrant', 'reload', '--no-provision'], cwd=target.path) if retcode != 0: logger.critical("vagrant reload failed with code %d, aborting", retcode) sys.exit(1) write_dict(target, unpacked_info) # X11 handler if unpacked_info['gui']: x11 = LocalX11Handler() else: x11 = X11Handler(args.x11, ('local', hostname), args.x11_display) cmds = [] for run_number in selected_runs: run = runs[run_number] cmd = 'cd %s && ' % shell_escape(run['workingdir']) if use_chroot: cmd += '/busybox env -i ' else: cmd += '/usr/bin/env -i ' environ = x11.fix_env(run['environ']) environ = fixup_environment(environ, args) cmd += ' '.join('%s=%s' % (shell_escape(k), shell_escape(v)) for k, v in iteritems(environ)) cmd += ' ' # FIXME : Use exec -a or something if binary != argv[0] if cmdline is None: argv = [run['binary']] + run['argv'][1:] else: argv = cmdline cmd += ' '.join(shell_escape(a) for a in argv) uid = run.get('uid', 1000) gid = run.get('gid', 1000) if use_chroot: userspec = '%s:%s' % (uid, gid) cmd = ('chroot --userspec=%s /experimentroot ' '/bin/sh -c %s' % (userspec, shell_escape(cmd))) else: cmd = 'sudo -u \'#%d\' sh -c %s' % (uid, shell_escape(cmd)) cmds.append(cmd) if use_chroot: cmds = [ 'chroot /experimentroot /bin/sh -c %s' % shell_escape(c) for c in x11.init_cmds ] + cmds else: cmds = x11.init_cmds + cmds cmds = ' && '.join(cmds) # Sets the hostname to the original experiment's machine's # FIXME: not reentrant: this restores the Vagrant machine's hostname after # the run, which might cause issues if several "reprounzip vagrant run" are # running at once cmds = ('OLD_HOSTNAME=$(/bin/hostname); /bin/hostname %s; ' % hostname + cmds + '; RES=$?; /bin/hostname "$OLD_HOSTNAME"; exit $RES') cmds = '/usr/bin/sudo /bin/sh -c %s' % shell_escape(cmds) # Gets vagrant SSH parameters info = machine_setup(target) signals.pre_run(target=target) interactive = not (args.no_stdin or os.environ.get('REPROUNZIP_NON_INTERACTIVE')) retcode = run_interactive(info, interactive, cmds, not args.no_pty, x11.port_forward) stderr.write("\r\n*** Command finished, status: %d\r\n" % retcode) # Update input file status metadata_update_run(config, unpacked_info, selected_runs) write_dict(target, unpacked_info) signals.post_run(target=target, retcode=retcode)
def docker_run(args): """Runs the experiment in the container. """ target = Path(args.target[0]) unpacked_info = read_dict(target) cmdline = args.cmdline # Sanity check if args.detach and args.x11: logging.critical("Error: Can't use X11 forwarding if you're detaching") raise UsageError # Loads config config = load_config(target / 'config.yml', True) runs = config.runs selected_runs = get_runs(runs, args.run, cmdline) # Get current image name if 'current_image' in unpacked_info: image = unpacked_info['current_image'] logging.debug("Running from image %s", image.decode('ascii')) else: logging.critical("Image doesn't exist yet, have you run setup/build?") sys.exit(1) # Name of new container if args.detach: container = make_unique_name(b'reprounzip_detached_') else: container = make_unique_name(b'reprounzip_run_') hostname = runs[selected_runs[0]].get('hostname', 'reprounzip') # Port forwarding port_options = [] for port_host, port_container, proto in parse_ports(args.expose_port): port_options.extend(['-p', '%s:%s%s' % (port_host, port_container, proto)]) # X11 handler if args.x11: local_ip = get_local_addr() docker_host = local_ip if os.environ.get('DOCKER_HOST'): m = _dockerhost_re.match(os.environ['DOCKER_HOST']) if m is not None: docker_host = m.group(1) if args.tunneled_x11: x11 = X11Handler(True, ('internet', docker_host), args.x11_display) else: x11 = X11Handler(True, ('internet', local_ip), args.x11_display) if (docker_host != local_ip and docker_host != 'localhost' and not docker_host.startswith('127.') and not docker_host.startswith('192.168.99.')): ssh_cmdline = ' '.join( '-R*:%(p)d:127.0.0.1:%(p)d' % {'p': port} for port, connector in x11.port_forward) logging.warning( "You requested X11 forwarding but the Docker container " "appears to be running remotely. It is probable that it " "won't be able to connect to the local display. Creating " "a remote SSH tunnel and running with --tunneled-x11 " "might help (%s).", ssh_cmdline) else: x11 = X11Handler(False, ('local', hostname), args.x11_display) cmd = [] for run_number in selected_runs: run = runs[run_number] env_set, env_unset = x11.env_fixes(run['environ']) a_env_set, a_env_unset = parse_environment_args(args) env_set.update(a_env_set) env_unset.extend(a_env_unset) if env_set or env_unset: cmd.append('env') env = [] for k in env_unset: env.append('-u') env.append(shell_escape(k)) for k, v in iteritems(env_set): env.append('%s=%s' % (shell_escape(k), shell_escape(v))) cmd.append(' '.join(env)) # FIXME : Use exec -a or something if binary != argv[0] if cmdline is not None: cmd.append('cmd') cmd.append(' '.join(shell_escape(a) for a in cmdline)) cmd.append('run') cmd.append('%d' % run_number) cmd = list(chain.from_iterable([['do', shell_escape(c)] for c in x11.init_cmds] + [cmd])) if logging.getLogger().isEnabledFor(logging.DEBUG): logging.debug("Passing arguments to Docker image:") for c in cmd: logging.debug(c) signals.pre_run(target=target) # Creates forwarders forwarders = [] for port, connector in x11.port_forward: forwarders.append(LocalForwarder(connector, port)) if args.detach: logging.info("Start container %s (detached)", container.decode('ascii')) retcode = interruptible_call(args.docker_cmd.split() + ['run', b'--name=' + container, '-h', hostname, '-d', '-t'] + port_options + args.docker_option + [image] + cmd) if retcode != 0: logging.critical("docker run failed with code %d", retcode) subprocess.call(['docker', 'rm', '-f', container]) sys.exit(1) return # Run command in container logging.info("Starting container %s", container.decode('ascii')) retcode = interruptible_call(args.docker_cmd.split() + ['run', b'--name=' + container, '-h', hostname, '-i', '-t'] + port_options + args.docker_option + [image] + cmd) if retcode != 0: logging.critical("docker run failed with code %d", retcode) subprocess.call(['docker', 'rm', '-f', container]) sys.exit(1) # Get exit status from "docker inspect" out = subprocess.check_output(args.docker_cmd.split() + ['inspect', container]) outjson = json.loads(out.decode('ascii')) if (outjson[0]["State"]["Running"] is not False or outjson[0]["State"]["Paused"] is not False): logging.error("Invalid container state after execution:\n%s", json.dumps(outjson[0]["State"])) retcode = outjson[0]["State"]["ExitCode"] stderr.write("\n*** Command finished, status: %d\n" % retcode) # Commit to create new image new_image = make_unique_name(b'reprounzip_image_') logging.info("Committing container %s to image %s", container.decode('ascii'), new_image.decode('ascii')) subprocess.check_call(args.docker_cmd.split() + ['commit', container, new_image]) # Update image name unpacked_info['current_image'] = new_image write_dict(target, unpacked_info) # Remove the container logging.info("Destroying container %s", container.decode('ascii')) retcode = subprocess.call(args.docker_cmd.split() + ['rm', container]) if retcode != 0: logging.error("Error deleting container %s", container.decode('ascii')) # Untag previous image, unless it is the initial_image if image != unpacked_info['initial_image']: logging.info("Untagging previous image %s", image.decode('ascii')) subprocess.check_call(args.docker_cmd.split() + ['rmi', image]) # Update input file status metadata_update_run(config, unpacked_info, selected_runs) write_dict(target, unpacked_info) signals.post_run(target=target, retcode=retcode)
def _print_package_info(pack, info, verbosity=1): print("Pack file: %s" % pack) print("\n----- Pack information -----") print("Compressed size: %s" % hsize(pack.size())) info_pack = info.get('pack') if info_pack: if 'total_size' in info_pack: print("Unpacked size: %s" % hsize(info_pack['total_size'])) if 'total_paths' in info_pack: print("Total packed paths: %d" % info_pack['total_paths']) if verbosity >= 3: print(" Files: %d" % info_pack['files']) print(" Directories: %d" % info_pack['dirs']) if info_pack.get('symlinks'): print(" Symbolic links: %d" % info_pack['symlinks']) if info_pack.get('hardlinks'): print(" Hard links: %d" % info_pack['hardlinks']) if info_pack.get('others'): print(" Unknown (what!?): %d" % info_pack['others']) print("\n----- Metadata -----") info_meta = info['meta'] if verbosity >= 3: print("Total paths: %d" % info_meta['total_paths']) print("Listed packed paths: %d" % info_meta['packed_paths']) if info_meta.get('packages'): print("Total software packages: %d" % info_meta['packages']) print("Packed software packages: %d" % info_meta['packed_packages']) if verbosity >= 3: print("Files from packed software packages: %d" % info_meta['packed_packages_files']) print("Files from unpacked software packages: %d" % info_meta['unpacked_packages_files']) if 'architecture' in info_meta: print("Architecture: %s (current: %s)" % (info_meta['architecture'], platform.machine().lower())) if 'distribution' in info_meta: distribution = ' '.join(t for t in info_meta['distribution'] if t) current_distribution = [distro.id(), distro.version()] current_distribution = ' '.join(t for t in current_distribution if t) print("Distribution: %s (current: %s)" % ( distribution, current_distribution or "(not Linux)")) if 'runs' in info: runs = info['runs'] print("Runs (%d):" % len(runs)) for run in runs: cmdline = ' '.join(shell_escape(a) for a in run['argv']) if len(runs) == 1 and run['id'] == "run0": print(" %s" % cmdline) else: print(" %s: %s" % (run['id'], cmdline)) if verbosity >= 2: print(" wd: %s" % run['workingdir']) if 'signal' in run: print(" signal: %d" % run['signal']) else: print(" exitcode: %d" % run['exitcode']) if run.get('walltime') is not None: print(" walltime: %s" % run['walltime']) inputs_outputs = info.get('inputs_outputs') if inputs_outputs: if verbosity < 2: print("Inputs/outputs files (%d): %s" % ( len(inputs_outputs), ", ".join(sorted(inputs_outputs)))) else: print("Inputs/outputs files (%d):" % len(inputs_outputs)) for name, f in sorted(iteritems(inputs_outputs)): t = [] if f['read_runs']: t.append("in") if f['write_runs']: t.append("out") print(" %s (%s): %s" % (name, ' '.join(t), f['path'])) unpacker_status = info.get('unpacker_status') if unpacker_status: print("\n----- Unpackers -----") for s, n in [(COMPAT_OK, "Compatible"), (COMPAT_MAYBE, "Unknown"), (COMPAT_NO, "Incompatible")]: if s != COMPAT_OK and verbosity < 2: continue if s not in unpacker_status: continue upks = unpacker_status[s] print("%s (%d):" % (n, len(upks))) for upk_name, msg in upks: if msg is not None: print(" %s (%s)" % (upk_name, msg)) else: print(" %s" % upk_name)
def do_vistrails(target, pack=None, **kwargs): """Create a VisTrails workflow that runs the experiment. This is called from signals after an experiment has been setup by any unpacker. """ record_usage(do_vistrails=True) config = load_config(target / 'config.yml', canonical=True) # Writes VisTrails workflow bundle = target / 'vistrails.vt' logging.info("Writing VisTrails workflow %s...", bundle) vtdir = Path.tempdir(prefix='reprounzip_vistrails_') ids = IdScope() try: with vtdir.open('w', 'vistrail', encoding='utf-8', newline='\n') as fp: wf = Workflow(fp, ids) # Directory module, refering to this directory d = wf.add_module('%s:Directory' % rpz_id, rpz_version) wf.add_function(d, 'directory', [(directory_sig, str(target.resolve()))]) connect_from = d for i, run in enumerate(config.runs): inputs = sorted(n for n, f in iteritems(config.inputs_outputs) if i in f.read_runs) outputs = sorted(n for n, f in iteritems(config.inputs_outputs) if i in f.write_runs) ports = itertools.chain((('input', p) for p in inputs), (('output', p) for p in outputs)) # Run module r = wf.add_module('%s:Run' % rpz_id, rpz_version) wf.add_function(r, 'cmdline', [ (string_sig, ' '.join(shell_escape(arg) for arg in run['argv']))]) wf.add_function(r, 'run_number', [(integer_sig, i)]) # Port specs for input/output files for type_, name in ports: wf.add_port_spec(r, name, type_, [file_pkg_mod]) # Draw connection wf.connect(connect_from, experiment_sig, 'experiment', r, experiment_sig, 'experiment') connect_from = r wf.close() with bundle.open('wb') as fp: z = zipfile.ZipFile(fp, 'w') with vtdir.in_dir(): for path in Path('.').recursedir(): z.write(str(path)) z.close() finally: vtdir.rmtree()
def generate(target, configfile, database, all_forks=False, graph_format='dot', level_pkgs='file', level_processes='thread', level_other_files='all', regex_filters=None, regex_replaces=None, aggregates=None): """Main function for the graph subcommand. """ try: graph_format = { 'dot': FORMAT_DOT, 'DOT': FORMAT_DOT, 'json': FORMAT_JSON, 'JSON': FORMAT_JSON }[graph_format] except KeyError: logging.critical("Unknown output format %r", graph_format) sys.exit(1) level_pkgs, level_processes, level_other_files, file_depth = \ parse_levels(level_pkgs, level_processes, level_other_files) # Reads package ownership from the configuration if not configfile.is_file(): logging.critical("Configuration file does not exist!\n" "Did you forget to run 'reprozip trace'?\n" "If not, you might want to use --dir to specify an " "alternate location.") sys.exit(1) config = load_config(configfile, canonical=False) inputs_outputs = dict( (f.path, n) for n, f in iteritems(config.inputs_outputs)) has_thread_flag = config.format_version >= LooseVersion('0.7') runs, files, edges = read_events(database, all_forks, has_thread_flag) # Label the runs if len(runs) != len(config.runs): logging.warning("Configuration file doesn't list the same number of " "runs we found in the database!") else: for config_run, run in izip(config.runs, runs): run.name = config_run['id'] # Apply regexes ignore = [ lambda path, r=re.compile(p): r.search(path) is not None for p in regex_filters or [] ] replace = [ lambda path, r=re.compile(p): r.sub(repl, path) for p, repl in regex_replaces or [] ] def filefilter(path): pathuni = unicode_(path) if any(f(pathuni) for f in ignore): logging.debug("IGN %s", pathuni) return None if not (replace or aggregates): return path for fi in replace: pathuni_ = fi(pathuni) if pathuni_ != pathuni: logging.debug("SUB %s -> %s", pathuni, pathuni_) pathuni = pathuni_ for prefix in aggregates or []: if pathuni.startswith(prefix): logging.debug("AGG %s -> %s", pathuni, prefix) pathuni = prefix break return PosixPath(pathuni) files_new = set() for fi in files: fi = filefilter(fi) if fi is not None: files_new.add(fi) files = files_new edges_new = OrderedSet() for prog, fi, mode, argv in edges: fi = filefilter(fi) if fi is not None: edges_new.add((prog, fi, mode, argv)) edges = edges_new # Puts files in packages package_map = {} if level_pkgs == LVL_PKG_IGNORE: packages = [] other_files = files else: logging.info("Organizes packages...") file2package = dict( (f.path, pkg) for pkg in config.packages for f in pkg.files) packages = {} other_files = [] for fi in files: pkg = file2package.get(fi) if pkg is not None: package = packages.get(pkg.name) if package is None: package = Package(pkg.name, pkg.version) packages[pkg.name] = package package.files.add(fi) package_map[fi] = package else: other_files.append(fi) packages = sorted(itervalues(packages), key=lambda pkg: pkg.name) for i, pkg in enumerate(packages): pkg.id = i # Filter other files if level_other_files == LVL_OTHER_ALL and file_depth is not None: other_files = set( PosixPath(*f.components[:file_depth + 1]) for f in other_files) edges = OrderedSet((prog, f if f in package_map else PosixPath( *f.components[:file_depth + 1]), mode, argv) for prog, f, mode, argv in edges) else: if level_other_files == LVL_OTHER_IO: other_files = set(f for f in other_files if f in inputs_outputs) edges = [(prog, f, mode, argv) for prog, f, mode, argv in edges if f in package_map or f in other_files] elif level_other_files == LVL_OTHER_NO: other_files = set() edges = [(prog, f, mode, argv) for prog, f, mode, argv in edges if f in package_map] args = (target, runs, packages, other_files, package_map, edges, inputs_outputs, level_pkgs, level_processes, level_other_files) if graph_format == FORMAT_DOT: graph_dot(*args) elif graph_format == FORMAT_JSON: graph_json(*args) else: assert False
def vagrant_run(args): """Runs the experiment in the virtual machine. """ target = Path(args.target[0]) unpacked_info = read_dict(target) use_chroot = unpacked_info.get('use_chroot', True) cmdline = args.cmdline check_vagrant_version() # Loads config config = load_config(target / 'config.yml', True) runs = config.runs selected_runs = get_runs(runs, args.run, cmdline) hostname = runs[selected_runs[0]].get('hostname', 'reprounzip') # X11 handler x11 = X11Handler(args.x11, ('local', hostname), args.x11_display) cmds = [] for run_number in selected_runs: run = runs[run_number] cmd = 'cd %s && ' % shell_escape(run['workingdir']) if use_chroot: cmd += '/busybox env -i ' else: cmd += '/usr/bin/env -i ' environ = x11.fix_env(run['environ']) environ = fixup_environment(environ, args) cmd += ' '.join('%s=%s' % (k, shell_escape(v)) for k, v in iteritems(environ)) cmd += ' ' # FIXME : Use exec -a or something if binary != argv[0] if cmdline is None: argv = [run['binary']] + run['argv'][1:] else: argv = cmdline cmd += ' '.join(shell_escape(a) for a in argv) uid = run.get('uid', 1000) gid = run.get('gid', 1000) if use_chroot: userspec = '%s:%s' % (uid, gid) cmd = ('chroot --userspec=%s /experimentroot ' '/bin/sh -c %s' % ( userspec, shell_escape(cmd))) else: cmd = 'sudo -u \'#%d\' sh -c %s' % (uid, shell_escape(cmd)) cmds.append(cmd) if use_chroot: cmds = ['chroot /experimentroot /bin/sh -c %s' % shell_escape(c) for c in x11.init_cmds] + cmds else: cmds = x11.init_cmds + cmds cmds = ' && '.join(cmds) # Sets the hostname to the original experiment's machine's # FIXME: not reentrant: this restores the Vagrant machine's hostname after # the run, which might cause issues if several "reprounzip vagrant run" are # running at once cmds = ('OLD_HOSTNAME=$(/bin/hostname); /bin/hostname %s; ' % hostname + cmds + '; RES=$?; /bin/hostname "$OLD_HOSTNAME"; exit $RES') cmds = '/usr/bin/sudo /bin/sh -c %s' % shell_escape(cmds) # Gets vagrant SSH parameters info = machine_setup(target, unpacked_info['use_chroot']) signals.pre_run(target=target) interactive = not (args.no_stdin or os.environ.get('REPROUNZIP_NON_INTERACTIVE')) retcode = run_interactive(info, interactive, cmds, not args.no_pty, x11.port_forward) stderr.write("\r\n*** Command finished, status: %d\r\n" % retcode) # Update input file status metadata_update_run(config, unpacked_info, selected_runs) write_dict(target, unpacked_info) signals.post_run(target=target, retcode=retcode)
class X11Handler(object): """X11 handler. This selects a way to connect to the local X server and an authentication mechanism. If provides `fix_env()` to set the X environment variable for the experiment, `init_cmds` to setup X before running the experiment's main commands, and `port_forward` which describes the reverse port tunnels from the experiment to the local X server. """ DISPLAY_NUMBER = 15 SOCK2X = { socket.AF_INET: Xauth.FAMILY_INTERNET, socket.AF_INET6: Xauth.FAMILY_INTERNET6 } X2SOCK = dict((v, k) for k, v in iteritems(SOCK2X)) def __init__(self, enabled, target, display=None): self.enabled = enabled if not self.enabled: return self.target = target self.xauth = PosixPath('/.reprounzip_xauthority') self.display = display if display is not None else self.DISPLAY_NUMBER logging.debug( "X11 support enabled; will create Xauthority file %s " "for experiment. Display number is %d", self.xauth, self.display) # List of addresses that match the $DISPLAY variable possible, local_display = self._locate_display() tcp_portnum = ((6000 + local_display) if local_display is not None else None) if ('XAUTHORITY' in os.environ and Path(os.environ['XAUTHORITY']).is_file()): xauthority = Path(os.environ['XAUTHORITY']) # Note: I'm assuming here that Xauthority has no XDG support else: xauthority = Path('~').expand_user() / '.Xauthority' # Read Xauthority file xauth_entries = {} if xauthority.is_file(): with xauthority.open('rb') as fp: fp.seek(0, os.SEEK_END) size = fp.tell() fp.seek(0, os.SEEK_SET) while fp.tell() < size: entry = Xauth.from_file(fp) if (entry.name == 'MIT-MAGIC-COOKIE-1' and entry.number == local_display): if entry.family == Xauth.FAMILY_LOCAL: xauth_entries[(entry.family, None)] = entry elif (entry.family == Xauth.FAMILY_INTERNET or entry.family == Xauth.FAMILY_INTERNET6): xauth_entries[(entry.family, entry.address)] = entry # FIXME: this completely ignores addresses logging.debug("Possible X endpoints: %s", (possible, )) # Select socket and authentication cookie self.xauth_record = None self.connection_info = None for family, address in possible: # Checks that we have a cookie entry = family, (None if family is Xauth.FAMILY_LOCAL else address) if entry not in xauth_entries: continue if family == Xauth.FAMILY_LOCAL and hasattr(socket, 'AF_UNIX'): # Checks that the socket exists if not Path(address).exists(): continue self.connection_info = (socket.AF_UNIX, socket.SOCK_STREAM, address) self.xauth_record = xauth_entries[(family, None)] logging.debug( "Will connect to local X display via UNIX " "socket %s", address) break else: # Checks that we have a cookie family = self.X2SOCK[family] self.connection_info = (family, socket.SOCK_STREAM, (address, tcp_portnum)) self.xauth_record = xauth_entries[(family, address)] logging.debug("Will connect to X display %s:%d via %s/TCP", address, tcp_portnum, "IPv6" if family == socket.AF_INET6 else "IPv4") break # Didn't find an Xauthority record -- assume no authentication is # needed, but still set self.connection_info if self.connection_info is None: for family, address in possible: # Only try UNIX sockets, we'll use 127.0.0.1 otherwise if family == Xauth.FAMILY_LOCAL: if not hasattr(socket, 'AF_UNIX'): continue self.connection_info = (socket.AF_UNIX, socket.SOCK_STREAM, address) logging.debug( "Will connect to X display via UNIX socket " "%s, no authentication", address) break else: self.connection_info = (socket.AF_INET, socket.SOCK_STREAM, ('127.0.0.1', tcp_portnum)) logging.debug( "Will connect to X display 127.0.0.1:%d via IPv4/TCP, " "no authentication", tcp_portnum) if self.connection_info is None: raise RuntimeError("Couldn't determine how to connect to local X " "server, DISPLAY is %s" % (repr(os.environ['DISPLAY']) if 'DISPLAY' is os.environ else 'not set')) @classmethod def _locate_display(cls): """Reads $DISPLAY and figures out possible sockets. """ # We default to ":0", Xming for instance doesn't set $DISPLAY display = os.environ.get('DISPLAY', ':0') # It might be the full path to a UNIX socket if display.startswith('/'): return [(Xauth.FAMILY_LOCAL, display)], None local_addr, local_display = display.rsplit(':', 1) local_display = int(local_display.split('.', 1)[0]) # Let's order the socket families: IPv4 first, then v6, then others def sort_families(gai, order={socket.AF_INET: 0, socket.AF_INET6: 1}): return sorted(gai, key=lambda x: order.get(x[0], 999999)) # Network addresses of the local machine local_addresses = [] for family, socktype, proto, canonname, sockaddr in \ sort_families(socket.getaddrinfo(socket.gethostname(), 6000)): try: family = cls.SOCK2X[family] except KeyError: continue local_addresses.append((family, sockaddr[0])) logging.debug("Local addresses: %s", (local_addresses, )) # Determine possible addresses for $DISPLAY if not local_addr: possible = [(Xauth.FAMILY_LOCAL, '/tmp/.X11-unix/X%d' % local_display)] possible += local_addresses else: local_possible = False possible = [] for family, socktype, proto, canonname, sockaddr in \ sort_families(socket.getaddrinfo(local_addr, 6000)): try: family = cls.SOCK2X[family] except KeyError: continue if (family, sockaddr[0]) in local_addresses: local_possible = True possible.append((family, sockaddr[0])) if local_possible: possible = [ (Xauth.FAMILY_LOCAL, '/tmp/.X11-unix/X%d' % local_display) ] + possible return possible, local_display @property def port_forward(self): """Builds the port forwarding info, for `run_interactive()`. Just requests port 6015 on the remote host to be forwarded to the X socket identified by `self.connection_info`. """ if not self.enabled: return [] @contextlib.contextmanager def connect(src_addr): logging.info("Got remote X connection from %s", (src_addr, )) logging.debug("Connecting to X server: %s", (self.connection_info, )) sock = socket.socket(*self.connection_info[:2]) sock.connect(self.connection_info[2]) yield sock sock.close() logging.info("X connection from %s closed", (src_addr, )) return [(6000 + self.display, connect)] def fix_env(self, env): """Sets ``$XAUTHORITY`` and ``$DISPLAY`` in the environment. """ if not self.enabled: return env new_env = dict(env) new_env['XAUTHORITY'] = str(self.xauth) if self.target[0] == 'local': new_env['DISPLAY'] = '127.0.0.1:%d' % self.display elif self.target[0] == 'internet': new_env['DISPLAY'] = '%s:%d' % (self.target[1], self.display) return new_env @property def init_cmds(self): """Gets the commands to setup X on the server before the experiment. """ if not self.enabled or self.xauth_record is None: return [] if self.target[0] == 'local': xauth_record = Xauth(Xauth.FAMILY_LOCAL, self.target[1], self.display, self.xauth_record.name, self.xauth_record.data) elif self.target[0] == 'internet': xauth_record = Xauth(Xauth.FAMILY_INTERNET, socket.inet_aton(self.target[1]), self.display, self.xauth_record.name, self.xauth_record.data) else: raise RuntimeError("Invalid target display type") buf = xauth_record.as_bytes() xauth = ''.join( ('\\x%02x' % ord(buf[i:i + 1])) for i in xrange(len(buf))) return ['echo -ne "%s" > %s' % (xauth, self.xauth)]
def docker_run(args): """Runs the experiment in the container. """ target = Path(args.target[0]) unpacked_info = read_dict(target) cmdline = args.cmdline # Sanity check if args.detach and args.x11: logging.critical("Error: Can't use X11 forwarding if you're detaching") raise UsageError # Loads config config = load_config(target / 'config.yml', True) runs = config.runs selected_runs = get_runs(runs, args.run, cmdline) # Get current image name if 'current_image' in unpacked_info: image = unpacked_info['current_image'] logging.debug("Running from image %s", image.decode('ascii')) else: logging.critical("Image doesn't exist yet, have you run setup/build?") sys.exit(1) # Name of new container if args.detach: container = make_unique_name(b'reprounzip_detached_') else: container = make_unique_name(b'reprounzip_run_') hostname = runs[selected_runs[0]].get('hostname', 'reprounzip') # X11 handler if args.x11: local_ip = get_local_addr() docker_host = local_ip if os.environ.get('DOCKER_HOST'): m = _dockerhost_re.match(os.environ['DOCKER_HOST']) if m is not None: docker_host = m.group(1) if args.tunneled_x11: x11 = X11Handler(True, ('internet', docker_host), args.x11_display) else: x11 = X11Handler(True, ('internet', local_ip), args.x11_display) if (docker_host != local_ip and docker_host != 'localhost' and not docker_host.startswith('127.') and not docker_host.startswith('192.168.99.')): ssh_cmdline = ' '.join('-R*:%(p)d:127.0.0.1:%(p)d' % {'p': port} for port, connector in x11.port_forward) logging.warning( "You requested X11 forwarding but the Docker container " "appears to be running remotely. It is probable that it " "won't be able to connect to the local display. Creating " "a remote SSH tunnel and running with --tunneled-x11 " "might help (%s).", ssh_cmdline) else: x11 = X11Handler(False, ('local', hostname), args.x11_display) cmds = [] for run_number in selected_runs: run = runs[run_number] cmd = 'cd %s && ' % shell_escape(run['workingdir']) cmd += '/busybox env -i ' environ = x11.fix_env(run['environ']) environ = fixup_environment(environ, args) cmd += ' '.join('%s=%s' % (shell_escape(k), shell_escape(v)) for k, v in iteritems(environ)) cmd += ' ' # FIXME : Use exec -a or something if binary != argv[0] if cmdline is None: argv = [run['binary']] + run['argv'][1:] else: argv = cmdline cmd += ' '.join(shell_escape(a) for a in argv) uid = run.get('uid', 1000) gid = run.get('gid', 1000) cmd = '/rpzsudo \'#%d\' \'#%d\' /busybox sh -c %s' % ( uid, gid, shell_escape(cmd)) cmds.append(cmd) cmds = x11.init_cmds + cmds cmds = ' && '.join(cmds) signals.pre_run(target=target) # Creates forwarders forwarders = [] for port, connector in x11.port_forward: forwarders.append(LocalForwarder(connector, port)) if args.detach: logging.info("Start container %s (detached)", container.decode('ascii')) retcode = interruptible_call([ 'docker', 'run', b'--name=' + container, '-h', hostname, '-d', '-t' ] + args.docker_option + [image, '/busybox', 'sh', '-c', cmds]) if retcode != 0: logging.critical("docker run failed with code %d", retcode) subprocess.call(['docker', 'rm', '-f', container]) sys.exit(1) return # Run command in container logging.info("Starting container %s", container.decode('ascii')) retcode = interruptible_call( ['docker', 'run', b'--name=' + container, '-h', hostname, '-i', '-t'] + args.docker_option + [image, '/busybox', 'sh', '-c', cmds]) if retcode != 0: logging.critical("docker run failed with code %d", retcode) subprocess.call(['docker', 'rm', '-f', container]) sys.exit(1) # Get exit status from "docker inspect" out = subprocess.check_output(['docker', 'inspect', container]) outjson = json.loads(out.decode('ascii')) if (outjson[0]["State"]["Running"] is not False or outjson[0]["State"]["Paused"] is not False): logging.error("Invalid container state after execution:\n%s", json.dumps(outjson[0]["State"])) retcode = outjson[0]["State"]["ExitCode"] stderr.write("\n*** Command finished, status: %d\n" % retcode) # Commit to create new image new_image = make_unique_name(b'reprounzip_image_') logging.info("Committing container %s to image %s", container.decode('ascii'), new_image.decode('ascii')) subprocess.check_call(['docker', 'commit', container, new_image]) # Update image name unpacked_info['current_image'] = new_image write_dict(target, unpacked_info) # Remove the container logging.info("Destroying container %s", container.decode('ascii')) retcode = subprocess.call(['docker', 'rm', container]) if retcode != 0: logging.error("Error deleting container %s", container.decode('ascii')) # Untag previous image, unless it is the initial_image if image != unpacked_info['initial_image']: logging.info("Untagging previous image %s", image.decode('ascii')) subprocess.check_call(['docker', 'rmi', image]) # Update input file status metadata_update_run(config, unpacked_info, selected_runs) write_dict(target, unpacked_info) signals.post_run(target=target, retcode=retcode)
def chroot_create(args): """Unpacks the experiment in a folder so it can be run with chroot. All the files in the pack are unpacked; system files are copied only if they were not packed, and busybox is installed if /bin/sh wasn't packed. In addition, input files are put in a tar.gz (so they can be put back after an upload) and the configuration file is extracted. """ if not args.pack: logging.critical("setup/create needs the pack filename") sys.exit(1) pack = Path(args.pack[0]) target = Path(args.target[0]) if target.exists(): logging.critical("Target directory exists") sys.exit(1) if DefaultAbstractPath is not PosixPath: logging.critical("Not unpacking on POSIX system") sys.exit(1) signals.pre_setup(target=target, pack=pack) # We can only restore owner/group of files if running as root restore_owner = should_restore_owner(args.restore_owner) # Unpacks configuration file rpz_pack = RPZPack(pack) rpz_pack.extract_config(target / 'config.yml') # Loads config config = load_config_file(target / 'config.yml', True) packages = config.packages target.mkdir() root = (target / 'root').absolute() root.mkdir() # Checks that everything was packed packages_not_packed = [pkg for pkg in packages if not pkg.packfiles] if packages_not_packed: record_usage(chroot_missing_pkgs=True) logging.warning("According to configuration, some files were left out " "because they belong to the following packages:%s" "\nWill copy files from HOST SYSTEM", ''.join('\n %s' % pkg for pkg in packages_not_packed)) missing_files = False for pkg in packages_not_packed: for f in pkg.files: path = Path(f.path) if not path.exists(): logging.error( "Missing file %s (from package %s) on host, " "experiment will probably miss it", path, pkg.name) missing_files = True continue dest = join_root(root, path) dest.parent.mkdir(parents=True) if path.is_link(): dest.symlink(path.read_link()) else: path.copy(dest) if restore_owner: stat = path.stat() dest.chown(stat.st_uid, stat.st_gid) if missing_files: record_usage(chroot_mising_files=True) # Unpacks files members = rpz_pack.list_data() for m in members: # Remove 'DATA/' prefix m.name = str(rpz_pack.remove_data_prefix(m.name)) if not restore_owner: uid = os.getuid() gid = os.getgid() for m in members: m.uid = uid m.gid = gid logging.info("Extracting files...") rpz_pack.extract_data(root, members) rpz_pack.close() # Sets up /bin/sh and /usr/bin/env, downloading busybox if necessary sh_path = join_root(root, Path('/bin/sh')) env_path = join_root(root, Path('/usr/bin/env')) if not sh_path.lexists() or not env_path.lexists(): logging.info("Setting up busybox...") busybox_path = join_root(root, Path('/bin/busybox')) busybox_path.parent.mkdir(parents=True) with make_dir_writable(join_root(root, Path('/bin'))): download_file(busybox_url(config.runs[0]['architecture']), busybox_path, 'busybox-%s' % config.runs[0]['architecture']) busybox_path.chmod(0o755) if not sh_path.lexists(): sh_path.parent.mkdir(parents=True) sh_path.symlink('/bin/busybox') if not env_path.lexists(): env_path.parent.mkdir(parents=True) env_path.symlink('/bin/busybox') # Original input files, so upload can restore them input_files = [f.path for n, f in iteritems(config.inputs_outputs) if f.read_runs] if input_files: logging.info("Packing up original input files...") inputtar = tarfile.open(str(target / 'inputs.tar.gz'), 'w:gz') for ifile in input_files: inputtar.add(str(join_root(root, ifile)), str(ifile)) inputtar.close() # Meta-data for reprounzip write_dict(target / '.reprounzip', {}, 'chroot') signals.post_setup(target=target, pack=pack)
def print_info(args): """Writes out some information about a pack file. """ pack = Path(args.pack[0]) # Loads config runs, packages, other_files = config = load_config(pack) inputs_outputs = config.inputs_outputs pack_total_size = 0 pack_total_paths = 0 pack_files = 0 pack_dirs = 0 pack_symlinks = 0 pack_others = 0 rpz_pack = RPZPack(pack) for m in rpz_pack.list_data(): pack_total_size += m.size pack_total_paths += 1 if m.isfile(): pack_files += 1 elif m.isdir(): pack_dirs += 1 elif m.issym(): pack_symlinks += 1 else: pack_others += 1 rpz_pack.close() meta_total_paths = 0 meta_packed_packages_files = 0 meta_unpacked_packages_files = 0 meta_packages = len(packages) meta_packed_packages = 0 for package in packages: nb = len(package.files) meta_total_paths += nb if package.packfiles: meta_packed_packages_files += nb meta_packed_packages += 1 else: meta_unpacked_packages_files += nb nb = len(other_files) meta_total_paths += nb meta_packed_paths = meta_packed_packages_files + nb if runs: meta_architecture = runs[0]['architecture'] if any(r['architecture'] != meta_architecture for r in runs): logging.warning("Runs have different architectures") meta_distribution = runs[0]['distribution'] if any(r['distribution'] != meta_distribution for r in runs): logging.warning("Runs have different distributions") meta_distribution = ' '.join(t for t in meta_distribution if t) current_architecture = platform.machine().lower() current_distribution = platform.linux_distribution()[0:2] current_distribution = ' '.join(t for t in current_distribution if t) print("Pack file: %s" % pack) print("\n----- Pack information -----") print("Compressed size: %s" % hsize(pack.size())) print("Unpacked size: %s" % hsize(pack_total_size)) print("Total packed paths: %d" % pack_total_paths) if args.verbosity >= 3: print(" Files: %d" % pack_files) print(" Directories: %d" % pack_dirs) print(" Symbolic links: %d" % pack_symlinks) if pack_others: print(" Unknown (what!?): %d" % pack_others) print("\n----- Metadata -----") if args.verbosity >= 3: print("Total paths: %d" % meta_total_paths) print("Listed packed paths: %d" % meta_packed_paths) if packages: print("Total software packages: %d" % meta_packages) print("Packed software packages: %d" % meta_packed_packages) if args.verbosity >= 3: print("Files from packed software packages: %d" % meta_packed_packages_files) print("Files from unpacked software packages: %d" % meta_unpacked_packages_files) if runs: print("Architecture: %s (current: %s)" % (meta_architecture, current_architecture)) print("Distribution: %s (current: %s)" % ( meta_distribution, current_distribution or "(not Linux)")) print("Executions (%d):" % len(runs)) for i, run in enumerate(runs): cmdline = ' '.join(shell_escape(a) for a in run['argv']) if len(runs) > 1: print(" %d: %s" % (i, cmdline)) else: print(" %s" % cmdline) if args.verbosity >= 2: print(" wd: %s" % run['workingdir']) if 'signal' in run: print(" signal: %d" % run['signal']) else: print(" exitcode: %d" % run['exitcode']) if inputs_outputs: if args.verbosity < 2: print("Inputs/outputs files (%d) :%s" % ( len(inputs_outputs), ", ".join(inputs_outputs))) else: print("Inputs/outputs files (%d):" % len(inputs_outputs)) for name, f in iteritems(inputs_outputs): t = [] if f.read_runs: t.append("in") if f.write_runs: t.append("out") print(" %s (%s): %s" % (name, ' '.join(t), f.path)) # Unpacker compatibility print("\n----- Unpackers -----") unpacker_status = {} for name, upk in iteritems(unpackers): if 'test_compatibility' in upk: compat = upk['test_compatibility'] if callable(compat): compat = compat(pack, config=config) if isinstance(compat, (tuple, list)): compat, msg = compat else: msg = None unpacker_status.setdefault(compat, []).append((name, msg)) else: unpacker_status.setdefault(None, []).append((name, None)) for s, n in [(COMPAT_OK, "Compatible"), (COMPAT_MAYBE, "Unknown"), (COMPAT_NO, "Incompatible")]: if s != COMPAT_OK and args.verbosity < 2: continue if s not in unpacker_status: continue upks = unpacker_status[s] print("%s (%d):" % (n, len(upks))) for upk_name, msg in upks: if msg is not None: print(" %s (%s)" % (upk_name, msg)) else: print(" %s" % upk_name)
def vagrant_run(args): """Runs the experiment in the virtual machine. """ target = Path(args.target[0]) unpacked_info = read_dict(target) use_chroot = unpacked_info['use_chroot'] cmdline = args.cmdline check_vagrant_version() # Loads config config = load_config(target / 'config.yml', True) runs = config.runs selected_runs = get_runs(runs, args.run, cmdline) hostname = runs[selected_runs[0]].get('hostname', 'reprounzip') # Port forwarding ports = parse_ports(args.expose_port) # If the requested ports are not a subset of the ones already set on the # VM, we have to update the Vagrantfile and issue `vagrant reload`, which # will reboot the machine req_ports = set(ports) set_ports = set(unpacked_info.get('ports', [])) if not req_ports.issubset(set_ports): # Build new set of forwarded ports: the ones already set + the one just # requested # The ones we request now override the previous config all_ports = dict((host, (guest, proto)) for host, guest, proto in set_ports) for host, guest, proto in req_ports: all_ports[host] = guest, proto unpacked_info['ports'] = sorted( (host, guest, proto) for host, (guest, proto) in iteritems(all_ports)) write_vagrantfile(target, unpacked_info) logger.info("Some requested ports are not yet forwarded, running " "'vagrant reload'") retcode = subprocess.call(['vagrant', 'reload', '--no-provision'], cwd=target.path) if retcode != 0: logger.critical("vagrant reload failed with code %d, aborting", retcode) sys.exit(1) write_dict(target, unpacked_info) # X11 handler if unpacked_info['gui']: x11 = LocalX11Handler() else: x11 = X11Handler(args.x11, ('local', hostname), args.x11_display) cmds = [] for run_number in selected_runs: run = runs[run_number] cmd = 'cd %s && ' % shell_escape(run['workingdir']) if use_chroot: cmd += '/busybox env -i ' else: cmd += '/usr/bin/env -i ' environ = x11.fix_env(run['environ']) environ = fixup_environment(environ, args) cmd += ' '.join('%s=%s' % (shell_escape(k), shell_escape(v)) for k, v in iteritems(environ)) cmd += ' ' # FIXME : Use exec -a or something if binary != argv[0] if cmdline is None: argv = [run['binary']] + run['argv'][1:] else: argv = cmdline cmd += ' '.join(shell_escape(a) for a in argv) uid = run.get('uid', 1000) gid = run.get('gid', 1000) if use_chroot: userspec = '%s:%s' % (uid, gid) cmd = ('chroot --userspec=%s /experimentroot ' '/bin/sh -c %s' % ( userspec, shell_escape(cmd))) else: cmd = 'sudo -u \'#%d\' sh -c %s' % (uid, shell_escape(cmd)) cmds.append(cmd) if use_chroot: cmds = ['chroot /experimentroot /bin/sh -c %s' % shell_escape(c) for c in x11.init_cmds] + cmds else: cmds = x11.init_cmds + cmds cmds = ' && '.join(cmds) # Sets the hostname to the original experiment's machine's # FIXME: not reentrant: this restores the Vagrant machine's hostname after # the run, which might cause issues if several "reprounzip vagrant run" are # running at once cmds = ('OLD_HOSTNAME=$(/bin/hostname); /bin/hostname %s; ' % hostname + cmds + '; RES=$?; /bin/hostname "$OLD_HOSTNAME"; exit $RES') cmds = '/usr/bin/sudo /bin/sh -c %s' % shell_escape(cmds) # Gets vagrant SSH parameters info = machine_setup(target) signals.pre_run(target=target) interactive = not (args.no_stdin or os.environ.get('REPROUNZIP_NON_INTERACTIVE')) retcode = run_interactive(info, interactive, cmds, not args.no_pty, x11.port_forward) stderr.write("\r\n*** Command finished, status: %d\r\n" % retcode) # Update input file status metadata_update_run(config, unpacked_info, selected_runs) write_dict(target, unpacked_info) signals.post_run(target=target, retcode=retcode)
def generate(target, configfile, database, all_forks=False): """Main function for the graph subcommand. """ # In here, a file is any file on the filesystem. A binary is a file, that # gets executed. A process is a system-level task, identified by its pid # (pids don't get reused in the database). # What I call program is the couple (process, binary), so forking creates a # new program (with the same binary) and exec'ing creates a new program as # well (with the same process) # Because of this, fork+exec will create an intermediate program that # doesn't do anything (new process but still old binary). If that program # doesn't do anything worth showing on the graph, it will be erased, unless # all_forks is True (--all-forks). # Reads package ownership from the configuration if not configfile.is_file(): logging.critical("Configuration file does not exist!\n" "Did you forget to run 'reprozip trace'?\n" "If not, you might want to use --dir to specify an " "alternate location.") sys.exit(1) runs, packages, other_files = load_config(configfile, canonical=False) packages = dict((f.path, pkg) for pkg in packages for f in pkg.files) if PY3: # On PY3, connect() only accepts unicode conn = sqlite3.connect(str(database)) else: conn = sqlite3.connect(database.path) conn.row_factory = sqlite3.Row # This is a bit weird. We need to iterate on all types of events at the # same time, ordering by timestamp, so we decorate-sort-undecorate # Decoration adds timestamp (for sorting) and tags by event type, one of # 'process', 'open' or 'exec' # Reads processes from the database process_cursor = conn.cursor() process_rows = process_cursor.execute( ''' SELECT id, parent, timestamp FROM processes ORDER BY id ''') processes = {} all_programs = [] # ... and opened files... file_cursor = conn.cursor() file_rows = file_cursor.execute( ''' SELECT name, timestamp, mode, process FROM opened_files ORDER BY id ''') binaries = set() files = OrderedSet() edges = OrderedSet() # ... as well as executed files. exec_cursor = conn.cursor() exec_rows = exec_cursor.execute( ''' SELECT name, timestamp, process, argv FROM executed_files ORDER BY id ''') # Loop on all event lists logging.info("Getting all events from database...") rows = heapq.merge(((r[2], 'process', r) for r in process_rows), ((r[1], 'open', r) for r in file_rows), ((r[1], 'exec', r) for r in exec_rows)) for ts, event_type, data in rows: if event_type == 'process': r_id, r_parent, r_timestamp = data if r_parent is not None: parent = processes[r_parent] binary = parent.binary else: parent = None binary = None p = Process(r_id, parent, r_timestamp, False, binary, C_INITIAL if r_parent is None else C_FORK) processes[r_id] = p all_programs.append(p) elif event_type == 'open': r_name, r_timestamp, r_mode, r_process = data r_name = PosixPath(r_name) if r_mode != FILE_WDIR: process = processes[r_process] files.add(r_name) edges.add((process, r_name, r_mode, None)) elif event_type == 'exec': r_name, r_timestamp, r_process, r_argv = data r_name = PosixPath(r_name) process = processes[r_process] binaries.add(r_name) # Here we split this process in two "programs", unless the previous # one hasn't done anything since it was created via fork() if not all_forks and not process.acted: process.binary = r_name process.created = C_FORKEXEC process.acted = True else: process = Process(process.pid, process, r_timestamp, True, # Hides exec only once r_name, C_EXEC) all_programs.append(process) processes[r_process] = process argv = tuple(r_argv.split('\0')) if not argv[-1]: argv = argv[:-1] edges.add((process, r_name, None, argv)) process_cursor.close() file_cursor.close() conn.close() # Puts files in packages logging.info("Organizes packages...") package_files = {} other_files = [] for f in files: pkg = packages.get(f) if pkg is not None: package_files.setdefault((pkg.name, pkg.version), []).append(f) else: other_files.append(f) # Writes DOT file with target.open('w', encoding='utf-8', newline='\n') as fp: fp.write('digraph G {\n /* programs */\n node [shape=box];\n') # Programs logging.info("Writing programs...") for program in all_programs: fp.write(' prog%d [label="%s (%d)"];\n' % ( id(program), program.binary or "-", program.pid)) if program.parent is not None: reason = '' if program.created == C_FORK: reason = "fork" elif program.created == C_EXEC: reason = "exec" elif program.created == C_FORKEXEC: reason = "fork+exec" fp.write(' prog%d -> prog%d [label="%s"];\n' % ( id(program.parent), id(program), reason)) fp.write('\n node [shape=ellipse];\n\n /* system packages */\n') # Files from packages logging.info("Writing packages...") for i, ((name, version), files) in enumerate(iteritems(package_files)): fp.write(' subgraph cluster%d {\n label=' % i) if version: fp.write('"%s %s";\n' % (escape(name), escape(version))) else: fp.write('"%s";\n' % escape(name)) for f in files: fp.write(' "%s";\n' % escape(unicode_(f))) fp.write(' }\n') fp.write('\n /* other files */\n') # Other files logging.info("Writing other files...") for f in other_files: fp.write(' "%s"\n' % escape(unicode_(f))) fp.write('\n') # Edges logging.info("Connecting edges...") for prog, f, mode, argv in edges: if mode is None: fp.write(' "%s" -> prog%d [color=blue, label="%s"];\n' % ( escape(unicode_(f)), id(prog), escape(' '.join(argv)))) elif mode & FILE_WRITE: fp.write(' prog%d -> "%s" [color=red];\n' % ( id(prog), escape(unicode_(f)))) elif mode & FILE_READ: fp.write(' "%s" -> prog%d [color=green];\n' % ( escape(unicode_(f)), id(prog))) fp.write('}\n')
def vagrant_run(args): """Runs the experiment in the virtual machine. """ target = Path(args.target[0]) use_chroot = read_dict(target / '.reprounzip').get('use_chroot', True) cmdline = args.cmdline check_vagrant_version() # Loads config runs, packages, other_files = load_config(target / 'config.yml', True) selected_runs = get_runs(runs, args.run, cmdline) hostname = runs[selected_runs[0]].get('hostname', 'reprounzip') # X11 handler x11 = X11Handler(args.x11, ('local', hostname), args.x11_display) cmds = [] for run_number in selected_runs: run = runs[run_number] cmd = 'cd %s && ' % shell_escape(run['workingdir']) cmd += '/usr/bin/env -i ' environ = x11.fix_env(run['environ']) cmd += ' '.join('%s=%s' % (k, shell_escape(v)) for k, v in iteritems(environ)) cmd += ' ' # FIXME : Use exec -a or something if binary != argv[0] if cmdline is None: argv = [run['binary']] + run['argv'][1:] else: argv = cmdline cmd += ' '.join(shell_escape(a) for a in argv) uid = run.get('uid', 1000) gid = run.get('gid', 1000) if use_chroot: userspec = '%s:%s' % (uid, gid) cmd = ('chroot --userspec=%s /experimentroot ' '/bin/sh -c %s' % (userspec, shell_escape(cmd))) else: cmd = 'sudo -u \'#%d\' sh -c %s' % (uid, shell_escape(cmd)) cmds.append(cmd) if use_chroot: cmds = [ 'chroot /experimentroot /bin/sh -c %s' % shell_escape(c) for c in x11.init_cmds ] + cmds else: cmds = x11.init_cmds + cmds cmds = ' && '.join(cmds) # Sets the hostname to the original experiment's machine's # FIXME: not reentrant: this restores the Vagrant machine's hostname after # the run, which might cause issues if several "reprounzip vagrant run" are # running at once cmds = ('OLD_HOSTNAME=$(/bin/hostname); /bin/hostname %s; ' % hostname + cmds + '; RES=$?; /bin/hostname "$OLD_HOSTNAME"; exit $RES') cmds = '/usr/bin/sudo /bin/sh -c %s' % shell_escape(cmds) # Gets vagrant SSH parameters info = get_ssh_parameters(target) signals.pre_run(target=target) interactive = not (args.no_stdin or os.environ.get('REPROUNZIP_NON_INTERACTIVE')) retcode = run_interactive(info, interactive, cmds, not args.no_pty, x11.port_forward) sys.stderr.write("\r\n*** Command finished, status: %d\r\n" % retcode) signals.post_run(target=target, retcode=retcode)
def docker_setup_create(args): """Sets up the experiment to be run in a Docker-built container. """ pack = Path(args.pack[0]) target = Path(args.target[0]) if target.exists(): logger.critical("Target directory exists") sys.exit(1) signals.pre_setup(target=target, pack=pack) target.mkdir() try: # Unpacks configuration file rpz_pack = RPZPack(pack) rpz_pack.extract_config(target / 'config.yml') # Loads config runs, packages, other_files = config = load_config( target / 'config.yml', True) if args.base_image: record_usage(docker_explicit_base=True) base_image = args.base_image[0] if args.distribution: target_distribution = args.distribution[0] else: target_distribution = None else: target_distribution, base_image = select_image(runs) logger.info("Using base image %s", base_image) logger.debug("Distribution: %s", target_distribution or "unknown") rpz_pack.copy_data_tar(target / 'data.tgz') arch = runs[0]['architecture'] # Writes Dockerfile logger.info("Writing %s...", target / 'Dockerfile') with (target / 'Dockerfile').open('w', encoding='utf-8', newline='\n') as fp: fp.write('FROM %s\n\n' % base_image) # Installs busybox download_file(busybox_url(arch), target / 'busybox', 'busybox-%s' % arch) fp.write('COPY busybox /busybox\n') # Installs rpzsudo download_file(sudo_url(arch), target / 'rpzsudo', 'rpzsudo-%s' % arch) fp.write('COPY rpzsudo /rpzsudo\n\n') fp.write('COPY data.tgz /reprozip_data.tgz\n\n') fp.write('COPY rpz-files.list /rpz-files.list\n') fp.write('RUN \\\n' ' chmod +x /busybox /rpzsudo && \\\n') if args.install_pkgs: # Install every package through package manager missing_packages = [] else: # Only install packages that were not packed missing_packages = [pkg for pkg in packages if pkg.packfiles] packages = [pkg for pkg in packages if not pkg.packfiles] if packages: record_usage(docker_install_pkgs=True) try: installer = select_installer(pack, runs, target_distribution) except CantFindInstaller as e: logger.error("Need to install %d packages but couldn't " "select a package installer: %s", len(packages), e) sys.exit(1) # Updates package sources update_script = installer.update_script() if update_script: fp.write(' %s && \\\n' % update_script) # Installs necessary packages fp.write(' %s && \\\n' % installer.install_script(packages)) logger.info("Dockerfile will install the %d software " "packages that were not packed", len(packages)) else: record_usage(docker_install_pkgs=False) # Untar paths = set() pathlist = [] # Add intermediate directories, and check for existence in the tar logger.info("Generating file list...") missing_files = chain.from_iterable(pkg.files for pkg in missing_packages) data_files = rpz_pack.data_filenames() listoffiles = list(chain(other_files, missing_files)) for f in listoffiles: if f.path.name == 'resolv.conf' and ( f.path.lies_under('/etc') or f.path.lies_under('/run') or f.path.lies_under('/var')): continue path = PosixPath('/') for c in rpz_pack.remove_data_prefix(f.path).components: path = path / c if path in paths: continue paths.add(path) if path in data_files: pathlist.append(path) else: logger.info("Missing file %s", path) rpz_pack.close() # FIXME : for some reason we need reversed() here, I'm not sure why # Need to read more of tar's docs. # TAR bug: --no-overwrite-dir removes --keep-old-files with (target / 'rpz-files.list').open('wb') as lfp: for p in reversed(pathlist): lfp.write(join_root(rpz_pack.data_prefix, p).path) lfp.write(b'\0') fp.write(' cd / && ' '(tar zpxf /reprozip_data.tgz -U --recursive-unlink ' '--numeric-owner --strip=1 --null -T /rpz-files.list || ' '/busybox echo "TAR reports errors, this might or might ' 'not prevent the execution to run")\n') # Setup entry point fp.write('COPY rpz_entrypoint.sh /rpz_entrypoint.sh\n' 'ENTRYPOINT ["/busybox", "sh", "/rpz_entrypoint.sh"]\n') # Write entry point script logger.info("Writing %s...", target / 'rpz_entrypoint.sh') with (target / 'rpz_entrypoint.sh').open('w', encoding='utf-8', newline='\n') as fp: # The entrypoint gets some arguments from the run command # By default, it just does all the runs # "run N" executes the run with that number # "cmd STR" sets a replacement command-line for the next run # "do STR" executes a command as-is fp.write( '#!/bin/sh\n' '\n' 'COMMAND=\n' 'ENVVARS=\n' '\n' 'if [ $# = 0 ]; then\n' ' exec /busybox sh /rpz_entrypoint.sh') for nb in irange(len(runs)): fp.write(' run %d' % nb) fp.write( '\n' 'fi\n' '\n' 'while [ $# != 0 ]; do\n' ' case "$1" in\n' ' help)\n' ' echo "Image built from reprounzip-docker" >&2\n' ' echo "Usage: docker run <image> [cmd "word [word ' '...]"] [run <R>]" >&2\n' ' echo " \\`cmd ...\\` changes the command for ' 'the next \\`run\\` option" >&2\n' ' echo " \\`run <name|number>\\` runs the ' 'specified run" >&2\n' ' echo "By default, all the runs are executed." ' '>&2\n' ' echo "The runs in this image are:" >&2\n') for run in runs: fp.write( ' echo " {name}: {cmdline}" >&2\n'.format( name=run['id'], cmdline=' '.join(shell_escape(a) for a in run['argv']))) fp.write( ' exit 0\n' ' ;;\n' ' do)\n' ' shift\n' ' $1\n' ' ;;\n' ' env)\n' ' shift\n' ' ENVVARS="$1"\n' ' ;;\n' ' cmd)\n' ' shift\n' ' COMMAND="$1"\n' ' ;;\n' ' run)\n' ' shift\n' ' case "$1" in\n') for i, run in enumerate(runs): cmdline = ' '.join([run['binary']] + run['argv'][1:]) fp.write( ' {name})\n' ' RUNCOMMAND={cmd}\n' ' RUNWD={wd}\n' ' RUNENV={env}\n' ' RUNUID={uid}\n' ' RUNGID={gid}\n' ' ;;\n'.format( name='%s|%d' % (run['id'], i), cmd=shell_escape(cmdline), wd=shell_escape(run['workingdir']), env=shell_escape(' '.join( '%s=%s' % (shell_escape(k), shell_escape(v)) for k, v in iteritems(run['environ']))), uid=run.get('uid', 1000), gid=run.get('gid', 1000))) fp.write( ' *)\n' ' echo "RPZ: Unknown run $1" >&2\n' ' exit 1\n' ' ;;\n' ' esac\n' ' if [ -n "$COMMAND" ]; then\n' ' RUNCOMMAND="$COMMAND"\n' ' COMMAND=\n' ' fi\n' ' export RUNWD; export RUNENV; export ENVVARS; ' 'export RUNCOMMAND\n' ' /rpzsudo "#$RUNUID" "#$RUNGID" /busybox sh -c ' '"cd \\"\\$RUNWD\\" && /busybox env -i $RUNENV $ENVVARS ' '$RUNCOMMAND; echo \\"*** Command finished, status: \\$?\\""\n' ' ENVVARS=\n' ' ;;\n' ' *)\n' ' echo "RPZ: Unknown option $1" >&2\n' ' exit 1\n' ' ;;\n' ' esac\n' ' shift\n' 'done\n') # Meta-data for reprounzip write_dict(target, metadata_initial_iofiles(config)) signals.post_setup(target=target, pack=pack) except Exception: target.rmtree(ignore_errors=True) raise
def directory_create(args): """Unpacks the experiment in a folder. Only the files that are not part of a package are copied (unless they are missing from the system and were packed). In addition, input files are put in a tar.gz (so they can be put back after an upload) and the configuration file is extracted. """ if not args.pack: logging.critical("setup needs the pack filename") sys.exit(1) pack = Path(args.pack[0]) target = Path(args.target[0]) if target.exists(): logging.critical("Target directory exists") sys.exit(1) if not issubclass(DefaultAbstractPath, PosixPath): logging.critical("Not unpacking on POSIX system") sys.exit(1) signals.pre_setup(target=target, pack=pack) # Unpacks configuration file rpz_pack = RPZPack(pack) rpz_pack.extract_config(target / 'config.yml') # Loads config config = load_config_file(target / 'config.yml', True) packages = config.packages target.mkdir() root = (target / 'root').absolute() root.mkdir() # Checks packages missing_files = False for pkg in packages: if pkg.packfiles: continue for f in pkg.files: if not Path(f.path).exists(): logging.error( "Missing file %s (from package %s that wasn't packed) " "on host, experiment will probably miss it.", f, pkg.name) missing_files = True if missing_files: record_usage(directory_missing_pkgs=True) logging.error("Some packages are missing, you should probably install " "them.\nUse 'reprounzip installpkgs -h' for help") # Unpacks files members = rpz_pack.list_data() for m in members: # Remove 'DATA/' prefix m.name = str(rpz_pack.remove_data_prefix(m.name)) # Makes symlink targets relative if m.issym(): linkname = PosixPath(m.linkname) if linkname.is_absolute: m.linkname = join_root(root, PosixPath(m.linkname)).path logging.info("Extracting files...") rpz_pack.extract_data(root, members) rpz_pack.close() # Gets library paths lib_dirs = [] p = subprocess.Popen(['/sbin/ldconfig', '-v', '-N'], stdout=subprocess.PIPE) try: for l in p.stdout: if len(l) < 3 or l[0] in (b' ', b'\t'): continue if l.endswith(b':\n'): lib_dirs.append(Path(l[:-2])) finally: p.wait() # Original input files, so upload can restore them input_files = [f.path for n, f in iteritems(config.inputs_outputs) if f.read_runs] if input_files: logging.info("Packing up original input files...") inputtar = tarfile.open(str(target / 'inputs.tar.gz'), 'w:gz') for ifile in input_files: inputtar.add(str(join_root(root, ifile)), str(ifile)) inputtar.close() # Meta-data for reprounzip write_dict(target / '.reprounzip', {}, 'directory') signals.post_setup(target=target, pack=pack)