def __init__(self, target, input_files, files, unpacked_info, docker_cmd='docker'): self.unpacked_info = unpacked_info self.docker_cmd = docker_cmd config = load_config(target / 'config.yml', True) if config.runs: first_run = config.runs[0] self.default_ownership = ( first_run.get('uid'), first_run.get('gid'), ) else: self.default_ownership = None, None FileUploader.__init__(self, target, input_files, files)
def do_vistrails(target): """Create a VisTrails workflow that runs the experiment. This is called from signals after an experiment has been setup by any unpacker. """ record_usage(do_vistrails=True) unpacker = signals.unpacker dot_vistrails = Path('~/.vistrails').expand_user() runs, packages, other_files = load_config(target / 'config.yml', canonical=True) for i, run in enumerate(runs): module_name = write_cltools_module(run, dot_vistrails) # Writes VisTrails workflow bundle = target / 'vistrails.vt' logging.info("Writing VisTrails workflow %s...", bundle) vtdir = Path.tempdir(prefix='reprounzip_vistrails_') try: with vtdir.open('w', 'vistrail', encoding='utf-8', newline='\n') as fp: vistrail = VISTRAILS_TEMPLATE cmdline = ' '.join(shell_escape(arg) for arg in run['argv']) vistrail = vistrail.format( date='2014-11-12 15:31:18', unpacker=unpacker, directory=escape_xml(str(target.absolute())), cmdline=escape_xml(cmdline), module_name=module_name, run=i) fp.write(vistrail) with bundle.open('wb') as fp: z = zipfile.ZipFile(fp, 'w') with vtdir.in_dir(): for path in Path('.').recursedir(): z.write(str(path)) z.close() finally: vtdir.rmtree()
def do_vistrails(target): """Create a VisTrails workflow that runs the experiment. This is called from signals after an experiment has been setup by any unpacker. """ record_usage(do_vistrails=True) unpacker = signals.unpacker dot_vistrails = Path('~/.vistrails').expand_user() runs, packages, other_files = load_config(target / 'config.yml', canonical=True) for i, run in enumerate(runs): module_name = write_cltools_module(run, dot_vistrails) # Writes VisTrails workflow bundle = target / 'vistrails.vt' logging.info("Writing VisTrails workflow %s...", bundle) vtdir = Path.tempdir(prefix='reprounzip_vistrails_') try: with vtdir.open('w', 'vistrail', encoding='utf-8', newline='\n') as fp: vistrail = VISTRAILS_TEMPLATE cmdline = ' '.join(shell_escape(arg) for arg in run['argv']) vistrail = vistrail.format(date='2014-11-12 15:31:18', unpacker=unpacker, directory=escape_xml( str(target.absolute())), cmdline=escape_xml(cmdline), module_name=module_name, run=i) fp.write(vistrail) with bundle.open('wb') as fp: z = zipfile.ZipFile(fp, 'w') with vtdir.in_dir(): for path in Path('.').recursedir(): z.write(str(path)) z.close() finally: vtdir.rmtree()
def generate(target, configfile, database, all_forks=False): """Main function for the graph subcommand. """ # In here, a file is any file on the filesystem. A binary is a file, that # gets executed. A process is a system-level task, identified by its pid # (pids don't get reused in the database). # What I call program is the couple (process, binary), so forking creates a # new program (with the same binary) and exec'ing creates a new program as # well (with the same process) # Because of this, fork+exec will create an intermediate program that # doesn't do anything (new process but still old binary). If that program # doesn't do anything worth showing on the graph, it will be erased, unless # all_forks is True (--all-forks). # Reads package ownership from the configuration if not configfile.is_file(): logging.critical("Configuration file does not exist!\n" "Did you forget to run 'reprozip trace'?\n" "If not, you might want to use --dir to specify an " "alternate location.") sys.exit(1) runs, packages, other_files = load_config(configfile, canonical=False) packages = dict((f.path, pkg) for pkg in packages for f in pkg.files) if PY3: # On PY3, connect() only accepts unicode conn = sqlite3.connect(str(database)) else: conn = sqlite3.connect(database.path) conn.row_factory = sqlite3.Row # This is a bit weird. We need to iterate on all types of events at the # same time, ordering by timestamp, so we decorate-sort-undecorate # Decoration adds timestamp (for sorting) and tags by event type, one of # 'process', 'open' or 'exec' # Reads processes from the database process_cursor = conn.cursor() process_rows = process_cursor.execute( ''' SELECT id, parent, timestamp FROM processes ORDER BY id ''') processes = {} all_programs = [] # ... and opened files... file_cursor = conn.cursor() file_rows = file_cursor.execute( ''' SELECT name, timestamp, mode, process FROM opened_files ORDER BY id ''') binaries = set() files = OrderedSet() edges = OrderedSet() # ... as well as executed files. exec_cursor = conn.cursor() exec_rows = exec_cursor.execute( ''' SELECT name, timestamp, process, argv FROM executed_files ORDER BY id ''') # Loop on all event lists logging.info("Getting all events from database...") rows = heapq.merge(((r[2], 'process', r) for r in process_rows), ((r[1], 'open', r) for r in file_rows), ((r[1], 'exec', r) for r in exec_rows)) for ts, event_type, data in rows: if event_type == 'process': r_id, r_parent, r_timestamp = data if r_parent is not None: parent = processes[r_parent] binary = parent.binary else: parent = None binary = None p = Process(r_id, parent, r_timestamp, False, binary, C_INITIAL if r_parent is None else C_FORK) processes[r_id] = p all_programs.append(p) elif event_type == 'open': r_name, r_timestamp, r_mode, r_process = data r_name = PosixPath(r_name) if r_mode != FILE_WDIR: process = processes[r_process] files.add(r_name) edges.add((process, r_name, r_mode, None)) elif event_type == 'exec': r_name, r_timestamp, r_process, r_argv = data r_name = PosixPath(r_name) process = processes[r_process] binaries.add(r_name) # Here we split this process in two "programs", unless the previous # one hasn't done anything since it was created via fork() if not all_forks and not process.acted: process.binary = r_name process.created = C_FORKEXEC process.acted = True else: process = Process(process.pid, process, r_timestamp, True, # Hides exec only once r_name, C_EXEC) all_programs.append(process) processes[r_process] = process argv = tuple(r_argv.split('\0')) if not argv[-1]: argv = argv[:-1] edges.add((process, r_name, None, argv)) process_cursor.close() file_cursor.close() conn.close() # Puts files in packages logging.info("Organizes packages...") package_files = {} other_files = [] for f in files: pkg = packages.get(f) if pkg is not None: package_files.setdefault((pkg.name, pkg.version), []).append(f) else: other_files.append(f) # Writes DOT file with target.open('w', encoding='utf-8', newline='\n') as fp: fp.write('digraph G {\n /* programs */\n node [shape=box];\n') # Programs logging.info("Writing programs...") for program in all_programs: fp.write(' prog%d [label="%s (%d)"];\n' % ( id(program), program.binary or "-", program.pid)) if program.parent is not None: reason = '' if program.created == C_FORK: reason = "fork" elif program.created == C_EXEC: reason = "exec" elif program.created == C_FORKEXEC: reason = "fork+exec" fp.write(' prog%d -> prog%d [label="%s"];\n' % ( id(program.parent), id(program), reason)) fp.write('\n node [shape=ellipse];\n\n /* system packages */\n') # Files from packages logging.info("Writing packages...") for i, ((name, version), files) in enumerate(iteritems(package_files)): fp.write(' subgraph cluster%d {\n label=' % i) if version: fp.write('"%s %s";\n' % (escape(name), escape(version))) else: fp.write('"%s";\n' % escape(name)) for f in files: fp.write(' "%s";\n' % escape(unicode_(f))) fp.write(' }\n') fp.write('\n /* other files */\n') # Other files logging.info("Writing other files...") for f in other_files: fp.write(' "%s"\n' % escape(unicode_(f))) fp.write('\n') # Edges logging.info("Connecting edges...") for prog, f, mode, argv in edges: if mode is None: fp.write(' "%s" -> prog%d [color=blue, label="%s"];\n' % ( escape(unicode_(f)), id(prog), escape(' '.join(argv)))) elif mode & FILE_WRITE: fp.write(' prog%d -> "%s" [color=red];\n' % ( id(prog), escape(unicode_(f)))) elif mode & FILE_READ: fp.write(' "%s" -> prog%d [color=green];\n' % ( escape(unicode_(f)), id(prog))) fp.write('}\n')
def docker_run(args): """Runs the experiment in the container. """ target = Path(args.target[0]) unpacked_info = read_dict(target) cmdline = args.cmdline # Sanity check if args.detach and args.x11: logging.critical("Error: Can't use X11 forwarding if you're detaching") raise UsageError # Loads config config = load_config(target / 'config.yml', True) runs = config.runs selected_runs = get_runs(runs, args.run, cmdline) # Get current image name if 'current_image' in unpacked_info: image = unpacked_info['current_image'] logging.debug("Running from image %s", image.decode('ascii')) else: logging.critical("Image doesn't exist yet, have you run setup/build?") sys.exit(1) # Name of new container if args.detach: container = make_unique_name(b'reprounzip_detached_') else: container = make_unique_name(b'reprounzip_run_') hostname = runs[selected_runs[0]].get('hostname', 'reprounzip') # X11 handler if args.x11: local_ip = get_local_addr() docker_host = local_ip if os.environ.get('DOCKER_HOST'): m = _dockerhost_re.match(os.environ['DOCKER_HOST']) if m is not None: docker_host = m.group(1) if args.tunneled_x11: x11 = X11Handler(True, ('internet', docker_host), args.x11_display) else: x11 = X11Handler(True, ('internet', local_ip), args.x11_display) if (docker_host != local_ip and docker_host != 'localhost' and not docker_host.startswith('127.') and not docker_host.startswith('192.168.99.')): ssh_cmdline = ' '.join('-R*:%(p)d:127.0.0.1:%(p)d' % {'p': port} for port, connector in x11.port_forward) logging.warning( "You requested X11 forwarding but the Docker container " "appears to be running remotely. It is probable that it " "won't be able to connect to the local display. Creating " "a remote SSH tunnel and running with --tunneled-x11 " "might help (%s).", ssh_cmdline) else: x11 = X11Handler(False, ('local', hostname), args.x11_display) cmds = [] for run_number in selected_runs: run = runs[run_number] cmd = 'cd %s && ' % shell_escape(run['workingdir']) cmd += '/busybox env -i ' environ = x11.fix_env(run['environ']) environ = fixup_environment(environ, args) cmd += ' '.join('%s=%s' % (shell_escape(k), shell_escape(v)) for k, v in iteritems(environ)) cmd += ' ' # FIXME : Use exec -a or something if binary != argv[0] if cmdline is None: argv = [run['binary']] + run['argv'][1:] else: argv = cmdline cmd += ' '.join(shell_escape(a) for a in argv) uid = run.get('uid', 1000) gid = run.get('gid', 1000) cmd = '/rpzsudo \'#%d\' \'#%d\' /busybox sh -c %s' % ( uid, gid, shell_escape(cmd)) cmds.append(cmd) cmds = x11.init_cmds + cmds cmds = ' && '.join(cmds) signals.pre_run(target=target) # Creates forwarders forwarders = [] for port, connector in x11.port_forward: forwarders.append(LocalForwarder(connector, port)) if args.detach: logging.info("Start container %s (detached)", container.decode('ascii')) retcode = interruptible_call([ 'docker', 'run', b'--name=' + container, '-h', hostname, '-d', '-t' ] + args.docker_option + [image, '/busybox', 'sh', '-c', cmds]) if retcode != 0: logging.critical("docker run failed with code %d", retcode) subprocess.call(['docker', 'rm', '-f', container]) sys.exit(1) return # Run command in container logging.info("Starting container %s", container.decode('ascii')) retcode = interruptible_call( ['docker', 'run', b'--name=' + container, '-h', hostname, '-i', '-t'] + args.docker_option + [image, '/busybox', 'sh', '-c', cmds]) if retcode != 0: logging.critical("docker run failed with code %d", retcode) subprocess.call(['docker', 'rm', '-f', container]) sys.exit(1) # Get exit status from "docker inspect" out = subprocess.check_output(['docker', 'inspect', container]) outjson = json.loads(out.decode('ascii')) if (outjson[0]["State"]["Running"] is not False or outjson[0]["State"]["Paused"] is not False): logging.error("Invalid container state after execution:\n%s", json.dumps(outjson[0]["State"])) retcode = outjson[0]["State"]["ExitCode"] stderr.write("\n*** Command finished, status: %d\n" % retcode) # Commit to create new image new_image = make_unique_name(b'reprounzip_image_') logging.info("Committing container %s to image %s", container.decode('ascii'), new_image.decode('ascii')) subprocess.check_call(['docker', 'commit', container, new_image]) # Update image name unpacked_info['current_image'] = new_image write_dict(target, unpacked_info) # Remove the container logging.info("Destroying container %s", container.decode('ascii')) retcode = subprocess.call(['docker', 'rm', container]) if retcode != 0: logging.error("Error deleting container %s", container.decode('ascii')) # Untag previous image, unless it is the initial_image if image != unpacked_info['initial_image']: logging.info("Untagging previous image %s", image.decode('ascii')) subprocess.check_call(['docker', 'rmi', image]) # Update input file status metadata_update_run(config, unpacked_info, selected_runs) write_dict(target, unpacked_info) signals.post_run(target=target, retcode=retcode)
def vagrant_run(args): """Runs the experiment in the virtual machine. """ target = Path(args.target[0]) unpacked_info = read_dict(target) use_chroot = unpacked_info.get('use_chroot', True) cmdline = args.cmdline check_vagrant_version() # Loads config config = load_config(target / 'config.yml', True) runs = config.runs selected_runs = get_runs(runs, args.run, cmdline) hostname = runs[selected_runs[0]].get('hostname', 'reprounzip') # X11 handler x11 = X11Handler(args.x11, ('local', hostname), args.x11_display) cmds = [] for run_number in selected_runs: run = runs[run_number] cmd = 'cd %s && ' % shell_escape(run['workingdir']) if use_chroot: cmd += '/busybox env -i ' else: cmd += '/usr/bin/env -i ' environ = x11.fix_env(run['environ']) environ = fixup_environment(environ, args) cmd += ' '.join('%s=%s' % (k, shell_escape(v)) for k, v in iteritems(environ)) cmd += ' ' # FIXME : Use exec -a or something if binary != argv[0] if cmdline is None: argv = [run['binary']] + run['argv'][1:] else: argv = cmdline cmd += ' '.join(shell_escape(a) for a in argv) uid = run.get('uid', 1000) gid = run.get('gid', 1000) if use_chroot: userspec = '%s:%s' % (uid, gid) cmd = ('chroot --userspec=%s /experimentroot ' '/bin/sh -c %s' % ( userspec, shell_escape(cmd))) else: cmd = 'sudo -u \'#%d\' sh -c %s' % (uid, shell_escape(cmd)) cmds.append(cmd) if use_chroot: cmds = ['chroot /experimentroot /bin/sh -c %s' % shell_escape(c) for c in x11.init_cmds] + cmds else: cmds = x11.init_cmds + cmds cmds = ' && '.join(cmds) # Sets the hostname to the original experiment's machine's # FIXME: not reentrant: this restores the Vagrant machine's hostname after # the run, which might cause issues if several "reprounzip vagrant run" are # running at once cmds = ('OLD_HOSTNAME=$(/bin/hostname); /bin/hostname %s; ' % hostname + cmds + '; RES=$?; /bin/hostname "$OLD_HOSTNAME"; exit $RES') cmds = '/usr/bin/sudo /bin/sh -c %s' % shell_escape(cmds) # Gets vagrant SSH parameters info = machine_setup(target, unpacked_info['use_chroot']) signals.pre_run(target=target) interactive = not (args.no_stdin or os.environ.get('REPROUNZIP_NON_INTERACTIVE')) retcode = run_interactive(info, interactive, cmds, not args.no_pty, x11.port_forward) stderr.write("\r\n*** Command finished, status: %d\r\n" % retcode) # Update input file status metadata_update_run(config, unpacked_info, selected_runs) write_dict(target, unpacked_info) signals.post_run(target=target, retcode=retcode)
def run_from_vistrails(): setup_logging('REPROUNZIP-VISTRAILS', logging.INFO) cli_version = 1 if len(sys.argv) > 1: try: cli_version = int(sys.argv[1]) except ValueError: logging.info("Compatibility mode: reprounzip-vistrails didn't get " "a version number") if cli_version != 1: logging.critical("Unknown interface version %d; you are probably " "using a version of reprounzip-vistrails too old for " "your VisTrails package. Consider upgrading.", cli_version) sys.exit(1) parser = argparse.ArgumentParser() parser.add_argument('unpacker') parser.add_argument('directory') parser.add_argument('run') parser.add_argument('--input-file', action='append', default=[]) parser.add_argument('--output-file', action='append', default=[]) parser.add_argument('--cmdline', action='store') args = parser.parse_args(sys.argv[2:]) config = load_config(Path(args.directory) / 'config.yml', canonical=True) python = sys.executable rpuz = [python, '-m', 'reprounzip.main', args.unpacker] os.environ['REPROUNZIP_NON_INTERACTIVE'] = 'y' def cmd(lst, add=None): if add: logging.info("cmd: %s %s", ' '.join(lst), add) string = ' '.join(shell_escape(a) for a in (rpuz + lst)) string += ' ' + add subprocess.check_call(string, shell=True, cwd=args.directory) else: logging.info("cmd: %s", ' '.join(lst)) subprocess.check_call(rpuz + lst, cwd=args.directory) logging.info("reprounzip-vistrails calling reprounzip; dir=%s", args.directory) # Parses input files from the command-line upload_command = [] seen_input_names = set() for input_file in args.input_file: input_name, filename = input_file.split(':', 1) upload_command.append('%s:%s' % (filename, input_name)) seen_input_names.add(input_name) # Resets the input files that are used by this run and were not given for name, f in iteritems(config.inputs_outputs): if name not in seen_input_names and int(args.run) in f.read_runs: upload_command.append(':%s' % name) # Runs the command cmd(['upload', '.'] + upload_command) # Runs the experiment if args.cmdline: cmd(['run', '.', args.run, '--cmdline'], add=args.cmdline) else: cmd(['run', '.', args.run]) # Gets output files for output_file in args.output_file: output_name, filename = output_file.split(':', 1) cmd(['download', '.', '%s:%s' % (output_name, filename)])
def generate(target, configfile, database): """Go over the trace and generate the graph file. """ # Reads package ownership from the configuration if not configfile.is_file(): logger.critical("Configuration file does not exist!\n" "Did you forget to run 'reprozip trace'?\n" "If not, you might want to use --dir to specify an " "alternate location.") sys.exit(1) config = load_config(configfile, canonical=False) has_thread_flag = config.format_version >= LooseVersion('0.7') if PY3: # On PY3, connect() only accepts unicode conn = sqlite3.connect(str(database)) else: conn = sqlite3.connect(database.path) conn.row_factory = sqlite3.Row vertices = [] edges = [] # Create user entity, that initiates the runs vertices.append({'ID': 'user', 'type': 'Agent', 'subtype': 'User', 'label': 'User'}) run = -1 # Read processes cur = conn.cursor() rows = cur.execute( ''' SELECT id, parent, timestamp, is_thread, exitcode FROM processes; ''' if has_thread_flag else ''' SELECT id, parent, timestamp, 0 as is_thread, exitcode FROM processes; ''') for r_id, r_parent, r_timestamp, r_isthread, r_exitcode in rows: if r_parent is None: # Create run entity run += 1 vertices.append({'ID': 'run%d' % run, 'type': 'Activity', 'subtype': 'Run', 'label': "Run #%d" % run, 'date': r_timestamp}) # User -> run edges.append({'ID': 'user_run%d' % run, 'type': 'UserRuns', 'label': "User runs command", 'sourceID': 'user', 'targetID': 'run%d' % run}) # Run -> process edges.append({'ID': 'run_start%d' % run, 'type': 'RunStarts', 'label': "Run #%d command", 'sourceID': 'run%d' % run, 'targetID': 'process%d' % r_id}) # Create process entity vertices.append({'ID': 'process%d' % r_id, 'type': 'Agent', 'subtype': 'Thread' if r_isthread else 'Process', 'label': 'Process #%d' % r_id, 'date': r_timestamp}) # TODO: add process end time (use master branch?) # Add process creation activity if r_parent is not None: # Process creation activity vertex = {'ID': 'fork%d' % r_id, 'type': 'Activity', 'subtype': 'Fork', 'label': "#%d creates %s #%d" % ( r_parent, "thread" if r_isthread else "process", r_id), 'date': r_timestamp} if has_thread_flag: vertex['thread'] = 'true' if r_isthread else 'false' vertices.append(vertex) # Parent -> creation edges.append({'ID': 'fork_p_%d' % r_id, 'type': 'PerformsFork', 'label': "Performs fork", 'sourceID': 'process%d' % r_parent, 'targetID': 'fork%d' % r_id}) # Creation -> child edges.append({'ID': 'fork_c_%d' % r_id, 'type': 'ForkCreates', 'label': "Fork creates", 'sourceID': 'fork%d' % r_id, 'targetID': 'process%d' % r_id}) cur.close() file2package = dict((f.path.path, pkg) for pkg in config.packages for f in pkg.files) inputs_outputs = dict((f.path.path, (bool(f.write_runs), bool(f.read_runs))) for n, f in iteritems(config.inputs_outputs)) # Read opened files cur = conn.cursor() rows = cur.execute( ''' SELECT name, is_directory FROM opened_files GROUP BY name; ''') for r_name, r_directory in rows: # Create file entity vertex = {'ID': r_name, 'type': 'Entity', 'subtype': 'Directory' if r_directory else 'File', 'label': r_name} if r_name in file2package: vertex['package'] = file2package[r_name].name if r_name in inputs_outputs: out_, in_ = inputs_outputs[r_name] if in_: vertex['input'] = True if out_: vertex['output'] = True vertices.append(vertex) cur.close() # Read file opens cur = conn.cursor() rows = cur.execute( ''' SELECT id, name, timestamp, mode, process FROM opened_files; ''') for r_id, r_name, r_timestamp, r_mode, r_process in rows: # Create file access activity vertices.append({'ID': 'access%d' % r_id, 'type': 'Activity', 'subtype': ('FileWrites' if r_mode & FILE_WRITE else 'FileReads'), 'label': ("File write: %s" if r_mode & FILE_WRITE else "File read: %s") % r_name, 'date': r_timestamp, 'mode': r_mode}) # Process -> access edges.append({'ID': 'proc_access%d' % r_id, 'type': 'PerformsFileAccess', 'label': "Process does file access", 'sourceID': 'process%d' % r_process, 'targetID': 'access%d' % r_id}) # Access -> file edges.append({'ID': 'access_file%d' % r_id, 'type': 'AccessFile', 'label': "File access touches", 'sourceID': 'access%d' % r_id, 'targetID': r_name}) cur.close() # Read executions cur = conn.cursor() rows = cur.execute( ''' SELECT id, name, timestamp, process, argv FROM executed_files; ''') for r_id, r_name, r_timestamp, r_process, r_argv in rows: argv = r_argv.split('\0') if not argv[-1]: argv = argv[:-1] cmdline = ' '.join(shell_escape(a) for a in argv) # Create execution activity vertices.append({'ID': 'exec%d' % r_id, 'type': 'Activity', 'subtype': 'ProcessExecutes', 'label': "Process #%d executes file %s" % (r_process, r_name), 'date': r_timestamp, 'cmdline': cmdline, 'process': r_process, 'file': r_name}) # Process -> execution edges.append({'ID': 'proc_exec%d' % r_id, 'type': 'ProcessExecution', 'label': "Process does exec()", 'sourceID': 'process%d' % r_process, 'targetID': 'exec%d' % r_id}) # Execution -> file edges.append({'ID': 'exec_file%d' % r_id, 'type': 'ExecutionFile', 'label': "Execute file", 'sourceID': 'exec%d' % r_id, 'targetID': r_name}) cur.close() # Write the file from the created lists with target.open('w', encoding='utf-8', newline='\n') as out: out.write('<?xml version="1.0"?>\n\n' '<provenancedata xmlns:xsi="http://www.w3.org/2001/XMLSchema' '-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema">\n' ' <vertices>\n') for vertex in vertices: if 'date' not in vertex: vertex['date'] = '-1' tags = {} for k in ('ID', 'type', 'label', 'date'): if k not in vertex: vertex.update(tags) raise ValueError("Vertex is missing tag '%s': %r" % ( k, vertex)) tags[k] = vertex.pop(k) out.write(' <vertex>\n ' + '\n '.join('<{k}>{v}</{k}>'.format(k=k, v=xml_escape(v)) for k, v in iteritems(tags))) if vertex: out.write('\n <attributes>\n') for k, v in iteritems(vertex): out.write(' <attribute>\n' ' <name>{k}</name>\n' ' <value>{v}</value>\n' ' </attribute>\n' .format(k=xml_escape(k), v=xml_escape(v))) out.write(' </attributes>') out.write('\n </vertex>\n') out.write(' </vertices>\n' ' <edges>\n') for edge in edges: for k in ('ID', 'type', 'label', 'sourceID', 'targetID'): if k not in edge: raise ValueError("Edge is missing tag '%s': %r" % ( k, edge)) if 'value' not in edge: edge['value'] = '' out.write(' <edge>\n ' + '\n '.join('<{k}>{v}</{k}>'.format(k=k, v=xml_escape(v)) for k, v in iteritems(edge)) + '\n </edge>\n') out.write(' </edges>\n' '</provenancedata>\n') conn.close()
def run_from_vistrails(): setup_logging('REPROUNZIP-VISTRAILS', logging.INFO) parser = argparse.ArgumentParser() parser.add_argument('unpacker') parser.add_argument('directory') parser.add_argument('run') parser.add_argument('--input-file', action='append', default=[]) parser.add_argument('--output-file', action='append', default=[]) parser.add_argument('--cmdline', action='store') args = parser.parse_args() runs, packages, other_files = load_config(Path(args.directory) / 'config.yml', canonical=True) run = runs[int(args.run)] python = sys.executable rpuz = [python, '-m', 'reprounzip.main', args.unpacker] os.environ['REPROUNZIP_NON_INTERACTIVE'] = 'y' def cmd(lst, add=None): if add: logging.info("cmd: %s %s", ' '.join(lst), add) string = ' '.join(shell_escape(a) for a in (rpuz + lst)) string += ' ' + add subprocess.check_call(string, shell=True, cwd=args.directory) else: logging.info("cmd: %s", ' '.join(lst)) subprocess.check_call(rpuz + lst, cwd=args.directory) logging.info("reprounzip-vistrails calling reprounzip; dir=%s", args.directory) # Parses input files from the command-line upload_command = [] seen_input_names = set() for input_file in args.input_file: input_name, filename = input_file.split(':', 1) upload_command.append('%s:%s' % (filename, input_name)) seen_input_names.add(input_name) # Resets the input files that were not given for input_name in run['input_files']: if input_name not in seen_input_names: upload_command.append(':%s' % input_name) # Runs the command cmd(['upload', '.'] + upload_command) # Runs the experiment if args.cmdline: cmd(['run', '.', '--cmdline'], add=args.cmdline) else: cmd(['run', '.']) # Gets output files for output_file in args.output_file: output_name, filename = output_file.split(':', 1) cmd(['download', '.', '%s:%s' % (output_name, filename)])
def docker_setup_create(args): """Sets up the experiment to be run in a Docker-built container. """ pack = Path(args.pack[0]) target = Path(args.target[0]) if target.exists(): logging.critical("Target directory exists") sys.exit(1) signals.pre_setup(target=target, pack=pack) target.mkdir() try: # Unpacks configuration file rpz_pack = RPZPack(pack) rpz_pack.extract_config(target / 'config.yml') # Loads config runs, packages, other_files = config = load_config( target / 'config.yml', True) if args.base_image: record_usage(docker_explicit_base=True) base_image = args.base_image[0] if args.distribution: target_distribution = args.distribution[0] else: target_distribution = None else: target_distribution, base_image = select_image(runs) logging.info("Using base image %s", base_image) logging.debug("Distribution: %s", target_distribution or "unknown") rpz_pack.copy_data_tar(target / 'data.tgz') arch = runs[0]['architecture'] # Writes Dockerfile logging.info("Writing %s...", target / 'Dockerfile') with (target / 'Dockerfile').open('w', encoding='utf-8', newline='\n') as fp: fp.write('FROM %s\n\n' % base_image) # Installs busybox download_file(busybox_url(arch), target / 'busybox', 'busybox-%s' % arch) fp.write('COPY busybox /busybox\n') # Installs rpzsudo download_file(sudo_url(arch), target / 'rpzsudo', 'rpzsudo-%s' % arch) fp.write('COPY rpzsudo /rpzsudo\n\n') fp.write('COPY data.tgz /reprozip_data.tgz\n\n') fp.write('COPY rpz-files.list /rpz-files.list\n') fp.write('RUN \\\n' ' chmod +x /busybox /rpzsudo && \\\n') if args.install_pkgs: # Install every package through package manager missing_packages = [] else: # Only install packages that were not packed missing_packages = [pkg for pkg in packages if pkg.packfiles] packages = [pkg for pkg in packages if not pkg.packfiles] if packages: record_usage(docker_install_pkgs=True) try: installer = select_installer(pack, runs, target_distribution) except CantFindInstaller as e: logging.error("Need to install %d packages but couldn't " "select a package installer: %s", len(packages), e) sys.exit(1) # Updates package sources update_script = installer.update_script() if update_script: fp.write(' %s && \\\n' % update_script) # Installs necessary packages fp.write(' %s && \\\n' % installer.install_script(packages)) logging.info("Dockerfile will install the %d software " "packages that were not packed", len(packages)) else: record_usage(docker_install_pkgs=False) # Untar paths = set() pathlist = [] # Add intermediate directories, and check for existence in the tar logging.info("Generating file list...") missing_files = chain.from_iterable(pkg.files for pkg in missing_packages) data_files = rpz_pack.data_filenames() listoffiles = list(chain(other_files, missing_files)) for f in listoffiles: if f.path.name == 'resolv.conf' and ( f.path.lies_under('/etc') or f.path.lies_under('/run') or f.path.lies_under('/var')): continue path = PosixPath('/') for c in rpz_pack.remove_data_prefix(f.path).components: path = path / c if path in paths: continue paths.add(path) if path in data_files: pathlist.append(path) else: logging.info("Missing file %s", path) rpz_pack.close() # FIXME : for some reason we need reversed() here, I'm not sure why # Need to read more of tar's docs. # TAR bug: --no-overwrite-dir removes --keep-old-files with (target / 'rpz-files.list').open('wb') as lfp: for p in reversed(pathlist): lfp.write(join_root(rpz_pack.data_prefix, p).path) lfp.write(b'\0') fp.write(' cd / && ' '(tar zpxf /reprozip_data.tgz -U --recursive-unlink ' '--numeric-owner --strip=1 --null -T /rpz-files.list || ' '/busybox echo "TAR reports errors, this might or might ' 'not prevent the execution to run")\n') # Setup entry point fp.write('COPY rpz_entrypoint.sh /rpz_entrypoint.sh\n' 'ENTRYPOINT ["/busybox", "sh", "/rpz_entrypoint.sh"]\n') # Write entry point script logging.info("Writing %s...", target / 'rpz_entrypoint.sh') with (target / 'rpz_entrypoint.sh').open('w', encoding='utf-8', newline='\n') as fp: # The entrypoint gets some arguments from the run command # By default, it just does all the runs # "run N" executes the run with that number # "cmd STR" sets a replacement command-line for the next run # "do STR" executes a command as-is fp.write( '#!/bin/sh\n' '\n' 'COMMAND=\n' 'ENVVARS=\n' '\n' 'if [ $# = 0 ]; then\n' ' exec /busybox sh /rpz_entrypoint.sh') for nb in irange(len(runs)): fp.write(' run %d' % nb) fp.write( '\n' 'fi\n' '\n' 'while [ $# != 0 ]; do\n' ' case "$1" in\n' ' help)\n' ' echo "Image built from reprounzip-docker" >&2\n' ' echo "Usage: docker run <image> [cmd word [word ' '...]] [run <R>]" >&2\n' ' echo " \\`cmd ...\\` changes the command for ' 'the next \\`run\\` option" >&2\n' ' echo " \\`run <name|number>\\` runs the ' 'specified run" >&2\n' ' echo "By default, all the runs are executed." ' '>&2\n' ' echo "The runs in this image are:" >&2\n') for run in runs: fp.write( ' echo " {name}: {cmdline}" >&2\n'.format( name=run['id'], cmdline=' '.join(shell_escape(a) for a in run['argv']))) fp.write( ' exit 0\n' ' ;;\n' ' do)\n' ' shift\n' ' $1\n' ' ;;\n' ' env)\n' ' shift\n' ' ENVVARS="$1"\n' ' ;;\n' ' cmd)\n' ' shift\n' ' COMMAND="$1"\n' ' ;;\n' ' run)\n' ' shift\n' ' case "$1" in\n') for i, run in enumerate(runs): cmdline = ' '.join([run['binary']] + run['argv'][1:]) fp.write( ' {name})\n' ' RUNCOMMAND={cmd}\n' ' RUNWD={wd}\n' ' RUNENV={env}\n' ' RUNUID={uid}\n' ' RUNGID={gid}\n' ' ;;\n'.format( name='%s|%d' % (run['id'], i), cmd=shell_escape(cmdline), wd=shell_escape(run['workingdir']), env=shell_escape(' '.join( '%s=%s' % (shell_escape(k), shell_escape(v)) for k, v in iteritems(run['environ']))), uid=run.get('uid', 1000), gid=run.get('gid', 1000))) fp.write( ' *)\n' ' echo "RPZ: Unknown run $1" >&2\n' ' exit 1\n' ' ;;\n' ' esac\n' ' if [ -n "$COMMAND" ]; then\n' ' RUNCOMMAND="$COMMAND"\n' ' COMMAND=\n' ' fi\n' ' export RUNWD; export RUNENV; export ENVVARS; ' 'export RUNCOMMAND\n' ' /rpzsudo "#$RUNUID" "#$RUNGID" /busybox sh -c ' '"cd \\"\\$RUNWD\\" && /busybox env -i $RUNENV $ENVVARS ' '$RUNCOMMAND"\n' ' ENVVARS=\n' ' ;;\n' ' *)\n' ' echo "RPZ: Unknown option $1" >&2\n' ' exit 1\n' ' ;;\n' ' esac\n' ' shift\n' 'done\n') # Meta-data for reprounzip write_dict(target, metadata_initial_iofiles(config)) signals.post_setup(target=target, pack=pack) except Exception: target.rmtree(ignore_errors=True) raise
def vagrant_run(args): """Runs the experiment in the virtual machine. """ target = Path(args.target[0]) unpacked_info = read_dict(target) use_chroot = unpacked_info['use_chroot'] cmdline = args.cmdline check_vagrant_version() # Loads config config = load_config(target / 'config.yml', True) runs = config.runs selected_runs = get_runs(runs, args.run, cmdline) hostname = runs[selected_runs[0]].get('hostname', 'reprounzip') # Port forwarding ports = parse_ports(args.expose_port) # If the requested ports are not a subset of the ones already set on the # VM, we have to update the Vagrantfile and issue `vagrant reload`, which # will reboot the machine req_ports = set(ports) set_ports = set(unpacked_info.get('ports', [])) if not req_ports.issubset(set_ports): # Build new set of forwarded ports: the ones already set + the one just # requested # The ones we request now override the previous config all_ports = dict( (host, (guest, proto)) for host, guest, proto in set_ports) for host, guest, proto in req_ports: all_ports[host] = guest, proto unpacked_info['ports'] = sorted( (host, guest, proto) for host, (guest, proto) in iteritems(all_ports)) write_vagrantfile(target, unpacked_info) logger.info("Some requested ports are not yet forwarded, running " "'vagrant reload'") retcode = subprocess.call(['vagrant', 'reload', '--no-provision'], cwd=target.path) if retcode != 0: logger.critical("vagrant reload failed with code %d, aborting", retcode) sys.exit(1) write_dict(target, unpacked_info) # X11 handler if unpacked_info['gui']: x11 = LocalX11Handler() else: x11 = X11Handler(args.x11, ('local', hostname), args.x11_display) cmds = [] for run_number in selected_runs: run = runs[run_number] cmd = 'cd %s && ' % shell_escape(run['workingdir']) if use_chroot: cmd += '/busybox env -i ' else: cmd += '/usr/bin/env -i ' environ = x11.fix_env(run['environ']) environ = fixup_environment(environ, args) cmd += ' '.join('%s=%s' % (shell_escape(k), shell_escape(v)) for k, v in iteritems(environ)) cmd += ' ' # FIXME : Use exec -a or something if binary != argv[0] if cmdline is None: argv = [run['binary']] + run['argv'][1:] else: argv = cmdline cmd += ' '.join(shell_escape(a) for a in argv) uid = run.get('uid', 1000) gid = run.get('gid', 1000) if use_chroot: userspec = '%s:%s' % (uid, gid) cmd = ('chroot --userspec=%s /experimentroot ' '/bin/sh -c %s' % (userspec, shell_escape(cmd))) else: cmd = 'sudo -u \'#%d\' sh -c %s' % (uid, shell_escape(cmd)) cmds.append(cmd) if use_chroot: cmds = [ 'chroot /experimentroot /bin/sh -c %s' % shell_escape(c) for c in x11.init_cmds ] + cmds else: cmds = x11.init_cmds + cmds cmds = ' && '.join(cmds) # Sets the hostname to the original experiment's machine's # FIXME: not reentrant: this restores the Vagrant machine's hostname after # the run, which might cause issues if several "reprounzip vagrant run" are # running at once cmds = ('OLD_HOSTNAME=$(/bin/hostname); /bin/hostname %s; ' % hostname + cmds + '; RES=$?; /bin/hostname "$OLD_HOSTNAME"; exit $RES') cmds = '/usr/bin/sudo /bin/sh -c %s' % shell_escape(cmds) # Gets vagrant SSH parameters info = machine_setup(target) signals.pre_run(target=target) interactive = not (args.no_stdin or os.environ.get('REPROUNZIP_NON_INTERACTIVE')) retcode = run_interactive(info, interactive, cmds, not args.no_pty, x11.port_forward) stderr.write("\r\n*** Command finished, status: %d\r\n" % retcode) # Update input file status metadata_update_run(config, unpacked_info, selected_runs) write_dict(target, unpacked_info) signals.post_run(target=target, retcode=retcode)
def generate(target, configfile, database, all_forks=False, graph_format='dot', level_pkgs='file', level_processes='thread', level_other_files='all', regex_filters=None, regex_replaces=None, aggregates=None): """Main function for the graph subcommand. """ try: graph_format = {'dot': FORMAT_DOT, 'DOT': FORMAT_DOT, 'json': FORMAT_JSON, 'JSON': FORMAT_JSON}[graph_format] except KeyError: logging.critical("Unknown output format %r", graph_format) sys.exit(1) level_pkgs, level_processes, level_other_files, file_depth = \ parse_levels(level_pkgs, level_processes, level_other_files) # Reads package ownership from the configuration if not configfile.is_file(): logging.critical("Configuration file does not exist!\n" "Did you forget to run 'reprozip trace'?\n" "If not, you might want to use --dir to specify an " "alternate location.") sys.exit(1) config = load_config(configfile, canonical=False) inputs_outputs = dict((f.path, n) for n, f in iteritems(config.inputs_outputs)) has_thread_flag = config.format_version >= LooseVersion('0.7') runs, files, edges = read_events(database, all_forks, has_thread_flag) # Label the runs if len(runs) != len(config.runs): logging.warning("Configuration file doesn't list the same number of " "runs we found in the database!") else: for config_run, run in izip(config.runs, runs): run.name = config_run['id'] # Apply regexes ignore = [lambda path, r=re.compile(p): r.search(path) is not None for p in regex_filters or []] replace = [lambda path, r=re.compile(p): r.sub(repl, path) for p, repl in regex_replaces or []] def filefilter(path): pathuni = unicode_(path) if any(f(pathuni) for f in ignore): logging.debug("IGN %s", pathuni) return None if not (replace or aggregates): return path for fi in replace: pathuni_ = fi(pathuni) if pathuni_ != pathuni: logging.debug("SUB %s -> %s", pathuni, pathuni_) pathuni = pathuni_ for prefix in aggregates or []: if pathuni.startswith(prefix): logging.debug("AGG %s -> %s", pathuni, prefix) pathuni = prefix break return PosixPath(pathuni) files_new = set() for fi in files: fi = filefilter(fi) if fi is not None: files_new.add(fi) files = files_new edges_new = OrderedSet() for prog, fi, mode, argv in edges: fi = filefilter(fi) if fi is not None: edges_new.add((prog, fi, mode, argv)) edges = edges_new # Puts files in packages package_map = {} if level_pkgs == LVL_PKG_IGNORE: packages = [] other_files = files else: logging.info("Organizes packages...") file2package = dict((f.path, pkg) for pkg in config.packages for f in pkg.files) packages = {} other_files = [] for fi in files: pkg = file2package.get(fi) if pkg is not None: package = packages.get(pkg.name) if package is None: package = Package(pkg.name, pkg.version) packages[pkg.name] = package package.files.add(fi) package_map[fi] = package else: other_files.append(fi) packages = sorted(itervalues(packages), key=lambda pkg: pkg.name) for i, pkg in enumerate(packages): pkg.id = i # Filter other files if level_other_files == LVL_OTHER_ALL and file_depth is not None: other_files = set(PosixPath(*f.components[:file_depth + 1]) for f in other_files) edges = OrderedSet((prog, f if f in package_map else PosixPath(*f.components[:file_depth + 1]), mode, argv) for prog, f, mode, argv in edges) else: if level_other_files == LVL_OTHER_IO: other_files = set(f for f in other_files if f in inputs_outputs) edges = [(prog, f, mode, argv) for prog, f, mode, argv in edges if f in package_map or f in other_files] elif level_other_files == LVL_OTHER_NO: other_files = set() edges = [(prog, f, mode, argv) for prog, f, mode, argv in edges if f in package_map] args = (target, runs, packages, other_files, package_map, edges, inputs_outputs, level_pkgs, level_processes, level_other_files) if graph_format == FORMAT_DOT: graph_dot(*args) elif graph_format == FORMAT_JSON: graph_json(*args) else: assert False
def docker_run(args): """Runs the experiment in the container. """ target = Path(args.target[0]) unpacked_info = read_dict(target) cmdline = args.cmdline # Sanity check if args.detach and args.x11: logging.critical("Error: Can't use X11 forwarding if you're detaching") raise UsageError # Loads config config = load_config(target / 'config.yml', True) runs = config.runs selected_runs = get_runs(runs, args.run, cmdline) # Get current image name if 'current_image' in unpacked_info: image = unpacked_info['current_image'] logging.debug("Running from image %s", image.decode('ascii')) else: logging.critical("Image doesn't exist yet, have you run setup/build?") sys.exit(1) # Name of new container if args.detach: container = make_unique_name(b'reprounzip_detached_') else: container = make_unique_name(b'reprounzip_run_') hostname = runs[selected_runs[0]].get('hostname', 'reprounzip') # X11 handler if args.x11: local_ip = get_local_addr() docker_host = local_ip if os.environ.get('DOCKER_HOST'): m = _dockerhost_re.match(os.environ['DOCKER_HOST']) if m is not None: docker_host = m.group(1) if args.tunneled_x11: x11 = X11Handler(True, ('internet', docker_host), args.x11_display) else: x11 = X11Handler(True, ('internet', local_ip), args.x11_display) if (docker_host != local_ip and docker_host != 'localhost' and not docker_host.startswith('127.') and not docker_host.startswith('192.168.99.')): ssh_cmdline = ' '.join( '-R*:%(p)d:127.0.0.1:%(p)d' % {'p': port} for port, connector in x11.port_forward) logging.warning( "You requested X11 forwarding but the Docker container " "appears to be running remotely. It is probable that it " "won't be able to connect to the local display. Creating " "a remote SSH tunnel and running with --tunneled-x11 " "might help (%s).", ssh_cmdline) else: x11 = X11Handler(False, ('local', hostname), args.x11_display) cmds = [] for run_number in selected_runs: run = runs[run_number] cmd = 'cd %s && ' % shell_escape(run['workingdir']) cmd += '/busybox env -i ' environ = x11.fix_env(run['environ']) environ = fixup_environment(environ, args) cmd += ' '.join('%s=%s' % (shell_escape(k), shell_escape(v)) for k, v in iteritems(environ)) cmd += ' ' # FIXME : Use exec -a or something if binary != argv[0] if cmdline is None: argv = [run['binary']] + run['argv'][1:] else: argv = cmdline cmd += ' '.join(shell_escape(a) for a in argv) uid = run.get('uid', 1000) gid = run.get('gid', 1000) cmd = '/rpzsudo \'#%d\' \'#%d\' /busybox sh -c %s' % ( uid, gid, shell_escape(cmd)) cmds.append(cmd) cmds = x11.init_cmds + cmds cmds = ' && '.join(cmds) signals.pre_run(target=target) # Creates forwarders forwarders = [] for port, connector in x11.port_forward: forwarders.append(LocalForwarder(connector, port)) if args.detach: logging.info("Start container %s (detached)", container.decode('ascii')) retcode = interruptible_call(['docker', 'run', b'--name=' + container, '-h', hostname, '-d', '-t'] + args.docker_option + [image, '/busybox', 'sh', '-c', cmds]) if retcode != 0: logging.critical("docker run failed with code %d", retcode) subprocess.call(['docker', 'rm', '-f', container]) sys.exit(1) return # Run command in container logging.info("Starting container %s", container.decode('ascii')) retcode = interruptible_call(['docker', 'run', b'--name=' + container, '-h', hostname, '-i', '-t'] + args.docker_option + [image, '/busybox', 'sh', '-c', cmds]) if retcode != 0: logging.critical("docker run failed with code %d", retcode) subprocess.call(['docker', 'rm', '-f', container]) sys.exit(1) # Get exit status from "docker inspect" out = subprocess.check_output(['docker', 'inspect', container]) outjson = json.loads(out.decode('ascii')) if (outjson[0]["State"]["Running"] is not False or outjson[0]["State"]["Paused"] is not False): logging.error("Invalid container state after execution:\n%s", json.dumps(outjson[0]["State"])) retcode = outjson[0]["State"]["ExitCode"] stderr.write("\n*** Command finished, status: %d\n" % retcode) # Commit to create new image new_image = make_unique_name(b'reprounzip_image_') logging.info("Committing container %s to image %s", container.decode('ascii'), new_image.decode('ascii')) subprocess.check_call(['docker', 'commit', container, new_image]) # Update image name unpacked_info['current_image'] = new_image write_dict(target, unpacked_info) # Remove the container logging.info("Destroying container %s", container.decode('ascii')) retcode = subprocess.call(['docker', 'rm', container]) if retcode != 0: logging.error("Error deleting container %s", container.decode('ascii')) # Untag previous image, unless it is the initial_image if image != unpacked_info['initial_image']: logging.info("Untagging previous image %s", image.decode('ascii')) subprocess.check_call(['docker', 'rmi', image]) # Update input file status metadata_update_run(config, unpacked_info, selected_runs) write_dict(target, unpacked_info) signals.post_run(target=target, retcode=retcode)
def vagrant_setup_create(args): """Sets up the experiment to be run in a Vagrant-built virtual machine. This can either build a chroot or not. If building a chroot, we do just like without Vagrant: we copy all the files and only get what's missing from the host. But we do install automatically the packages whose files are required. If not building a chroot, we install all the packages, and only unpack files that don't come from packages. In short: files from packages with packfiles=True will only be used if building a chroot. """ if not args.pack: logging.critical("setup/create needs the pack filename") sys.exit(1) pack = Path(args.pack[0]) target = Path(args.target[0]) if target.exists(): logging.critical("Target directory exists") sys.exit(1) use_chroot = args.use_chroot mount_bind = args.bind_magic_dirs record_usage(use_chroot=use_chroot, mount_bind=mount_bind) signals.pre_setup(target=target, pack=pack) # Unpacks configuration file tar = tarfile.open(str(pack), 'r:*') member = tar.getmember('METADATA/config.yml') member.name = 'config.yml' tar.extract(member, str(target)) tar.close() # Loads config runs, packages, other_files = load_config(target / 'config.yml', True) if args.base_image and args.base_image[0]: record_usage(vagrant_explicit_image=True) box = args.base_image[0] if args.distribution: target_distribution = args.distribution[0] else: target_distribution = None else: target_distribution, box = select_box(runs) logging.info("Using box %s", box) logging.debug("Distribution: %s", target_distribution or "unknown") # If using chroot, we might still need to install packages to get missing # (not packed) files if use_chroot: packages = [pkg for pkg in packages if not pkg.packfiles] if packages: record_usage(vagrant_install_pkgs=True) logging.info("Some packages were not packed, so we'll install and " "copy their files\n" "Packages that are missing:\n%s", ' '.join(pkg.name for pkg in packages)) if packages: try: installer = select_installer(pack, runs, target_distribution) except CantFindInstaller as e: logging.error("Need to install %d packages but couldn't select a " "package installer: %s", len(packages), e) target.mkdir(parents=True) # Writes setup script logging.info("Writing setup script %s...", target / 'setup.sh') with (target / 'setup.sh').open('w', encoding='utf-8', newline='\n') as fp: fp.write('#!/bin/sh\n\nset -e\n\n') if packages: # Updates package sources fp.write(installer.update_script()) fp.write('\n') # Installs necessary packages fp.write(installer.install_script(packages)) fp.write('\n') # TODO : Compare package versions (painful because of sh) # Untar if use_chroot: fp.write('\n' 'mkdir /experimentroot; cd /experimentroot\n') fp.write('tar zpxf /vagrant/experiment.rpz ' '--numeric-owner --strip=1 DATA\n') if mount_bind: fp.write('\n' 'mkdir -p /experimentroot/dev\n' 'mount -o rbind /dev /experimentroot/dev\n' 'mkdir -p /experimentroot/proc\n' 'mount -o rbind /proc /experimentroot/proc\n') for pkg in packages: fp.write('\n# Copies files from package %s\n' % pkg.name) for f in pkg.files: f = f.path dest = join_root(PosixPath('/experimentroot'), f) fp.write('mkdir -p %s\n' % shell_escape(unicode_(f.parent))) fp.write('cp -L %s %s\n' % ( shell_escape(unicode_(f)), shell_escape(unicode_(dest)))) else: fp.write('\ncd /\n') paths = set() pathlist = [] dataroot = PosixPath('DATA') # Adds intermediate directories, and checks for existence in the # tar tar = tarfile.open(str(pack), 'r:*') for f in other_files: path = PosixPath('/') for c in f.path.components[1:]: path = path / c if path in paths: continue paths.add(path) datapath = join_root(dataroot, path) try: tar.getmember(str(datapath)) except KeyError: logging.info("Missing file %s", datapath) else: pathlist.append(unicode_(datapath)) tar.close() # FIXME : for some reason we need reversed() here, I'm not sure # why. Need to read more of tar's docs. # TAR bug: --no-overwrite-dir removes --keep-old-files # TAR bug: there is no way to make --keep-old-files not report an # error if an existing file is encountered. --skip-old-files was # introduced too recently. Instead, we just ignore the exit status fp.write('tar zpxf /vagrant/experiment.rpz --keep-old-files ' '--numeric-owner --strip=1 %s || /bin/true\n' % ' '.join(shell_escape(p) for p in reversed(pathlist))) # Copies /bin/sh + dependencies if use_chroot: url = busybox_url(runs[0]['architecture']) fp.write(r''' mkdir -p /experimentroot/bin mkdir -p /experimentroot/usr/bin if [ ! -e /experimentroot/bin/sh -o ! -e /experimentroot/usr/bin/env ]; then wget --quiet -O /experimentroot/bin/busybox {url} chmod +x /experimentroot/bin/busybox fi [ -e /experimentroot/bin/sh ] || \ ln -s /bin/busybox /experimentroot/bin/sh [ -e /experimentroot/usr/bin/env ] || \ ln -s /bin/busybox /experimentroot/usr/bin/env '''.format(url=url)) # Copies pack logging.info("Copying pack file...") pack.copyfile(target / 'experiment.rpz') # Writes Vagrant file logging.info("Writing %s...", target / 'Vagrantfile') with (target / 'Vagrantfile').open('w', encoding='utf-8', newline='\n') as fp: # Vagrant header and version fp.write('# -*- mode: ruby -*-\n' '# vi: set ft=ruby\n\n' 'VAGRANTFILE_API_VERSION = "2"\n\n' 'Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|\n') # Selects which box to install fp.write(' config.vm.box = "%s"\n' % box) # Run the setup script on the virtual machine fp.write(' config.vm.provision "shell", path: "setup.sh"\n') fp.write('end\n') # Meta-data for reprounzip write_dict(target / '.reprounzip', {'use_chroot': use_chroot}) signals.post_setup(target=target)
def docker_run(args): """Runs the experiment in the container. """ target = Path(args.target[0]) unpacked_info = read_dict(target / '.reprounzip') cmdline = args.cmdline # Loads config runs, packages, other_files = load_config(target / 'config.yml', True) selected_runs = get_runs(runs, args.run, cmdline) # Destroy previous container if 'ran_container' in unpacked_info: container = unpacked_info.pop('ran_container') logging.info("Destroying previous container %s", container.decode('ascii')) retcode = subprocess.call(['docker', 'rm', '-f', container]) if retcode != 0: logging.error("Error deleting previous container %s", container.decode('ascii')) write_dict(target / '.reprounzip', unpacked_info) # Use the initial image directly if 'current_image' in unpacked_info: image = unpacked_info['current_image'] logging.debug("Running from image %s", image.decode('ascii')) else: logging.critical("Image doesn't exist yet, have you run setup/build?") sys.exit(1) # Name of new container container = make_unique_name(b'reprounzip_run_') cmds = [] for run_number in selected_runs: run = runs[run_number] cmd = 'cd %s && ' % shell_escape(run['workingdir']) cmd += '/usr/bin/env -i ' cmd += ' '.join('%s=%s' % (k, shell_escape(v)) for k, v in iteritems(run['environ'])) cmd += ' ' # FIXME : Use exec -a or something if binary != argv[0] if cmdline is None: argv = [run['binary']] + run['argv'][1:] else: argv = cmdline cmd += ' '.join(shell_escape(a) for a in argv) uid = run.get('uid', 1000) cmd = 'sudo -u \'#%d\' sh -c %s\n' % (uid, shell_escape(cmd)) cmds.append(cmd) cmds = ' && '.join(cmds) signals.pre_run(target=target) # Run command in container logging.info("Starting container %s", container.decode('ascii')) retcode = subprocess.call(['docker', 'run', b'--name=' + container, '-i', '-t', image, '/bin/sh', '-c', cmds]) sys.stderr.write("\n*** Command finished, status: %d\n" % retcode) # Store container name (so we can download output files) unpacked_info['ran_container'] = container write_dict(target / '.reprounzip', unpacked_info) signals.post_run(target=target, retcode=retcode)
def vagrant_run(args): """Runs the experiment in the virtual machine. """ target = Path(args.target[0]) unpacked_info = read_dict(target) use_chroot = unpacked_info['use_chroot'] cmdline = args.cmdline check_vagrant_version() # Loads config config = load_config(target / 'config.yml', True) runs = config.runs selected_runs = get_runs(runs, args.run, cmdline) hostname = runs[selected_runs[0]].get('hostname', 'reprounzip') # Port forwarding ports = parse_ports(args.expose_port) # If the requested ports are not a subset of the ones already set on the # VM, we have to update the Vagrantfile and issue `vagrant reload`, which # will reboot the machine req_ports = set(ports) set_ports = set(unpacked_info.get('ports', [])) if not req_ports.issubset(set_ports): # Build new set of forwarded ports: the ones already set + the one just # requested # The ones we request now override the previous config all_ports = dict((host, (guest, proto)) for host, guest, proto in set_ports) for host, guest, proto in req_ports: all_ports[host] = guest, proto unpacked_info['ports'] = sorted( (host, guest, proto) for host, (guest, proto) in iteritems(all_ports)) write_vagrantfile(target, unpacked_info) logger.info("Some requested ports are not yet forwarded, running " "'vagrant reload'") retcode = subprocess.call(['vagrant', 'reload', '--no-provision'], cwd=target.path) if retcode != 0: logger.critical("vagrant reload failed with code %d, aborting", retcode) sys.exit(1) write_dict(target, unpacked_info) # X11 handler if unpacked_info['gui']: x11 = LocalX11Handler() else: x11 = X11Handler(args.x11, ('local', hostname), args.x11_display) cmds = [] for run_number in selected_runs: run = runs[run_number] cmd = 'cd %s && ' % shell_escape(run['workingdir']) if use_chroot: cmd += '/busybox env -i ' else: cmd += '/usr/bin/env -i ' environ = x11.fix_env(run['environ']) environ = fixup_environment(environ, args) cmd += ' '.join('%s=%s' % (shell_escape(k), shell_escape(v)) for k, v in iteritems(environ)) cmd += ' ' # FIXME : Use exec -a or something if binary != argv[0] if cmdline is None: argv = [run['binary']] + run['argv'][1:] else: argv = cmdline cmd += ' '.join(shell_escape(a) for a in argv) uid = run.get('uid', 1000) gid = run.get('gid', 1000) if use_chroot: userspec = '%s:%s' % (uid, gid) cmd = ('chroot --userspec=%s /experimentroot ' '/bin/sh -c %s' % ( userspec, shell_escape(cmd))) else: cmd = 'sudo -u \'#%d\' sh -c %s' % (uid, shell_escape(cmd)) cmds.append(cmd) if use_chroot: cmds = ['chroot /experimentroot /bin/sh -c %s' % shell_escape(c) for c in x11.init_cmds] + cmds else: cmds = x11.init_cmds + cmds cmds = ' && '.join(cmds) # Sets the hostname to the original experiment's machine's # FIXME: not reentrant: this restores the Vagrant machine's hostname after # the run, which might cause issues if several "reprounzip vagrant run" are # running at once cmds = ('OLD_HOSTNAME=$(/bin/hostname); /bin/hostname %s; ' % hostname + cmds + '; RES=$?; /bin/hostname "$OLD_HOSTNAME"; exit $RES') cmds = '/usr/bin/sudo /bin/sh -c %s' % shell_escape(cmds) # Gets vagrant SSH parameters info = machine_setup(target) signals.pre_run(target=target) interactive = not (args.no_stdin or os.environ.get('REPROUNZIP_NON_INTERACTIVE')) retcode = run_interactive(info, interactive, cmds, not args.no_pty, x11.port_forward) stderr.write("\r\n*** Command finished, status: %d\r\n" % retcode) # Update input file status metadata_update_run(config, unpacked_info, selected_runs) write_dict(target, unpacked_info) signals.post_run(target=target, retcode=retcode)
def docker_run(args): """Runs the experiment in the container. """ target = Path(args.target[0]) unpacked_info = read_dict(target / '.reprounzip') cmdline = args.cmdline # Loads config runs, packages, other_files = load_config(target / 'config.yml', True) selected_runs = get_runs(runs, args.run, cmdline) # Destroy previous container if 'ran_container' in unpacked_info: container = unpacked_info.pop('ran_container') logging.info("Destroying previous container %s", container.decode('ascii')) retcode = subprocess.call(['docker', 'rm', '-f', container]) if retcode != 0: logging.error("Error deleting previous container %s", container.decode('ascii')) write_dict(target / '.reprounzip', unpacked_info) # Use the initial image directly if 'current_image' in unpacked_info: image = unpacked_info['current_image'] logging.debug("Running from image %s", image.decode('ascii')) else: logging.critical("Image doesn't exist yet, have you run setup/build?") sys.exit(1) # Name of new container container = make_unique_name(b'reprounzip_run_') hostname = runs[selected_runs[0]].get('hostname', 'reprounzip') # Get the local bridge IP ip_str = get_iface_addr('docker0') # X11 handler x11 = X11Handler(args.x11, ('internet', ip_str), args.x11_display) cmds = [] for run_number in selected_runs: run = runs[run_number] cmd = 'cd %s && ' % shell_escape(run['workingdir']) cmd += '/usr/bin/env -i ' environ = x11.fix_env(run['environ']) cmd += ' '.join('%s=%s' % (k, shell_escape(v)) for k, v in iteritems(environ)) cmd += ' ' # FIXME : Use exec -a or something if binary != argv[0] if cmdline is None: argv = [run['binary']] + run['argv'][1:] else: argv = cmdline cmd += ' '.join(shell_escape(a) for a in argv) uid = run.get('uid', 1000) cmd = 'sudo -u \'#%d\' /bin/busybox sh -c %s\n' % (uid, shell_escape(cmd)) cmds.append(cmd) cmds = x11.init_cmds + cmds cmds = ' && '.join(cmds) signals.pre_run(target=target) # Creates forwarders forwarders = [] for port, connector in x11.port_forward: forwarders.append( LocalForwarder(connector, port)) # Run command in container logging.info("Starting container %s", container.decode('ascii')) retcode = interruptible_call(['docker', 'run', b'--name=' + container, '-h', hostname, '-i', '-t', image, '/bin/busybox', 'sh', '-c', cmds]) if retcode != 0: logging.critical("docker run failed with code %d", retcode) sys.exit(1) # Get exit status from "docker inspect" out = subprocess.check_output(['docker', 'inspect', container]) outjson = json.loads(out.decode('ascii')) if (outjson[0]["State"]["Running"] is not False or outjson[0]["State"]["Paused"] is not False): logging.error("Invalid container state after execution:\n%s", json.dumps(outjson[0]["State"])) retcode = outjson[0]["State"]["ExitCode"] sys.stderr.write("\n*** Command finished, status: %d\n" % retcode) # Store container name (so we can download output files) unpacked_info['ran_container'] = container write_dict(target / '.reprounzip', unpacked_info) signals.post_run(target=target, retcode=retcode)
def vagrant_setup_create(args): """Sets up the experiment to be run in a Vagrant-built virtual machine. This can either build a chroot or not. If building a chroot, we do just like without Vagrant: we copy all the files and only get what's missing from the host. But we do install automatically the packages whose files are required. If not building a chroot, we install all the packages, and only unpack files that don't come from packages. In short: files from packages with packfiles=True will only be used if building a chroot. """ if not args.pack: logging.critical("setup/create needs the pack filename") sys.exit(1) pack = Path(args.pack[0]) target = Path(args.target[0]) if target.exists(): logging.critical("Target directory exists") sys.exit(1) use_chroot = args.use_chroot mount_bind = args.bind_magic_dirs record_usage(use_chroot=use_chroot, mount_bind=mount_bind) signals.pre_setup(target=target, pack=pack) # Unpacks configuration file rpz_pack = RPZPack(pack) rpz_pack.extract_config(target / 'config.yml') # Loads config runs, packages, other_files = config = load_config(target / 'config.yml', True) if not args.memory: memory = None else: try: memory = int(args.memory[-1]) except ValueError: logging.critical("Invalid value for memory size: %r", args.memory) sys.exit(1) if args.base_image and args.base_image[0]: record_usage(vagrant_explicit_image=True) box = args.base_image[0] if args.distribution: target_distribution = args.distribution[0] else: target_distribution = None else: target_distribution, box = select_box(runs, gui=args.gui) logging.info("Using box %s", box) logging.debug("Distribution: %s", target_distribution or "unknown") # If using chroot, we might still need to install packages to get missing # (not packed) files if use_chroot: packages = [pkg for pkg in packages if not pkg.packfiles] if packages: record_usage(vagrant_install_pkgs=True) logging.info("Some packages were not packed, so we'll install and " "copy their files\n" "Packages that are missing:\n%s", ' '.join(pkg.name for pkg in packages)) if packages: try: installer = select_installer(pack, runs, target_distribution) except CantFindInstaller as e: logging.error("Need to install %d packages but couldn't select a " "package installer: %s", len(packages), e) target.mkdir(parents=True) try: # Writes setup script logging.info("Writing setup script %s...", target / 'setup.sh') with (target / 'setup.sh').open('w', encoding='utf-8', newline='\n') as fp: fp.write('#!/bin/sh\n\nset -e\n\n') if packages: # Updates package sources update_script = installer.update_script() if update_script: fp.write(update_script) fp.write('\n') # Installs necessary packages fp.write(installer.install_script(packages)) fp.write('\n') # TODO : Compare package versions (painful because of sh) # Untar if use_chroot: fp.write('\n' 'mkdir /experimentroot; cd /experimentroot\n') fp.write('tar zpxf /vagrant/data.tgz --numeric-owner ' '--strip=1 %s\n' % rpz_pack.data_prefix) if mount_bind: fp.write('\n' 'mkdir -p /experimentroot/dev\n' 'mkdir -p /experimentroot/proc\n') for pkg in packages: fp.write('\n# Copies files from package %s\n' % pkg.name) for f in pkg.files: f = f.path dest = join_root(PosixPath('/experimentroot'), f) fp.write('mkdir -p %s\n' % shell_escape(unicode_(f.parent))) fp.write('cp -L %s %s\n' % ( shell_escape(unicode_(f)), shell_escape(unicode_(dest)))) fp.write( '\n' 'cp /etc/resolv.conf /experimentroot/etc/resolv.conf\n') else: fp.write('\ncd /\n') paths = set() pathlist = [] # Adds intermediate directories, and checks for existence in # the tar logging.info("Generating file list...") data_files = rpz_pack.data_filenames() for f in other_files: if f.path.name == 'resolv.conf' and ( f.path.lies_under('/etc') or f.path.lies_under('/run') or f.path.lies_under('/var')): continue path = PosixPath('/') for c in rpz_pack.remove_data_prefix(f.path).components: path = path / c if path in paths: continue paths.add(path) if path in data_files: pathlist.append(path) else: logging.info("Missing file %s", path) # FIXME : for some reason we need reversed() here, I'm not sure # why. Need to read more of tar's docs. # TAR bug: --no-overwrite-dir removes --keep-old-files # TAR bug: there is no way to make --keep-old-files not report # an error if an existing file is encountered. --skip-old-files # was introduced too recently. Instead, we just ignore the exit # status with (target / 'rpz-files.list').open('wb') as lfp: for p in reversed(pathlist): lfp.write(join_root(rpz_pack.data_prefix, p).path) lfp.write(b'\0') fp.write('tar zpxf /vagrant/data.tgz --keep-old-files ' '--numeric-owner --strip=1 ' '--null -T /vagrant/rpz-files.list || /bin/true\n') # Copies busybox if use_chroot: arch = runs[0]['architecture'] download_file(busybox_url(arch), target / 'busybox', 'busybox-%s' % arch) fp.write(r''' cp /vagrant/busybox /experimentroot/busybox chmod +x /experimentroot/busybox mkdir -p /experimentroot/bin [ -e /experimentroot/bin/sh ] || \ ln -s /busybox /experimentroot/bin/sh ''') # Copies pack logging.info("Copying pack file...") rpz_pack.copy_data_tar(target / 'data.tgz') rpz_pack.close() # Writes Vagrant file logging.info("Writing %s...", target / 'Vagrantfile') with (target / 'Vagrantfile').open('w', encoding='utf-8', newline='\n') as fp: # Vagrant header and version fp.write( '# -*- mode: ruby -*-\n' '# vi: set ft=ruby\n\n' 'VAGRANTFILE_API_VERSION = "2"\n\n' 'Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|\n') # Selects which box to install fp.write(' config.vm.box = "%s"\n' % box) # Run the setup script on the virtual machine fp.write(' config.vm.provision "shell", path: "setup.sh"\n') # Memory size if memory is not None or args.gui: fp.write(' config.vm.provider "virtualbox" do |v|\n') if memory is not None: fp.write(' v.memory = %d\n' % memory) if args.gui: fp.write(' v.gui = true\n') fp.write(' end\n') fp.write('end\n') # Meta-data for reprounzip write_dict(target, metadata_initial_iofiles(config, {'use_chroot': use_chroot, 'gui': args.gui})) signals.post_setup(target=target, pack=pack) except Exception: target.rmtree(ignore_errors=True) raise
def docker_run(args): """Runs the experiment in the container. """ target = Path(args.target[0]) unpacked_info = read_dict(target) cmdline = args.cmdline # Sanity check if args.detach and args.x11: logging.critical("Error: Can't use X11 forwarding if you're detaching") raise UsageError # Loads config config = load_config(target / 'config.yml', True) runs = config.runs selected_runs = get_runs(runs, args.run, cmdline) # Get current image name if 'current_image' in unpacked_info: image = unpacked_info['current_image'] logging.debug("Running from image %s", image.decode('ascii')) else: logging.critical("Image doesn't exist yet, have you run setup/build?") sys.exit(1) # Name of new container if args.detach: container = make_unique_name(b'reprounzip_detached_') else: container = make_unique_name(b'reprounzip_run_') hostname = runs[selected_runs[0]].get('hostname', 'reprounzip') # Port forwarding port_options = [] for port_host, port_container, proto in parse_ports(args.expose_port): port_options.extend(['-p', '%s:%s%s' % (port_host, port_container, proto)]) # X11 handler if args.x11: local_ip = get_local_addr() docker_host = local_ip if os.environ.get('DOCKER_HOST'): m = _dockerhost_re.match(os.environ['DOCKER_HOST']) if m is not None: docker_host = m.group(1) if args.tunneled_x11: x11 = X11Handler(True, ('internet', docker_host), args.x11_display) else: x11 = X11Handler(True, ('internet', local_ip), args.x11_display) if (docker_host != local_ip and docker_host != 'localhost' and not docker_host.startswith('127.') and not docker_host.startswith('192.168.99.')): ssh_cmdline = ' '.join( '-R*:%(p)d:127.0.0.1:%(p)d' % {'p': port} for port, connector in x11.port_forward) logging.warning( "You requested X11 forwarding but the Docker container " "appears to be running remotely. It is probable that it " "won't be able to connect to the local display. Creating " "a remote SSH tunnel and running with --tunneled-x11 " "might help (%s).", ssh_cmdline) else: x11 = X11Handler(False, ('local', hostname), args.x11_display) cmd = [] for run_number in selected_runs: run = runs[run_number] env_set, env_unset = x11.env_fixes(run['environ']) a_env_set, a_env_unset = parse_environment_args(args) env_set.update(a_env_set) env_unset.extend(a_env_unset) if env_set or env_unset: cmd.append('env') env = [] for k in env_unset: env.append('-u') env.append(shell_escape(k)) for k, v in iteritems(env_set): env.append('%s=%s' % (shell_escape(k), shell_escape(v))) cmd.append(' '.join(env)) # FIXME : Use exec -a or something if binary != argv[0] if cmdline is not None: cmd.append('cmd') cmd.append(' '.join(shell_escape(a) for a in cmdline)) cmd.append('run') cmd.append('%d' % run_number) cmd = list(chain.from_iterable([['do', shell_escape(c)] for c in x11.init_cmds] + [cmd])) if logging.getLogger().isEnabledFor(logging.DEBUG): logging.debug("Passing arguments to Docker image:") for c in cmd: logging.debug(c) signals.pre_run(target=target) # Creates forwarders forwarders = [] for port, connector in x11.port_forward: forwarders.append(LocalForwarder(connector, port)) if args.detach: logging.info("Start container %s (detached)", container.decode('ascii')) retcode = interruptible_call(args.docker_cmd.split() + ['run', b'--name=' + container, '-h', hostname, '-d', '-t'] + port_options + args.docker_option + [image] + cmd) if retcode != 0: logging.critical("docker run failed with code %d", retcode) subprocess.call(['docker', 'rm', '-f', container]) sys.exit(1) return # Run command in container logging.info("Starting container %s", container.decode('ascii')) retcode = interruptible_call(args.docker_cmd.split() + ['run', b'--name=' + container, '-h', hostname, '-i', '-t'] + port_options + args.docker_option + [image] + cmd) if retcode != 0: logging.critical("docker run failed with code %d", retcode) subprocess.call(['docker', 'rm', '-f', container]) sys.exit(1) # Get exit status from "docker inspect" out = subprocess.check_output(args.docker_cmd.split() + ['inspect', container]) outjson = json.loads(out.decode('ascii')) if (outjson[0]["State"]["Running"] is not False or outjson[0]["State"]["Paused"] is not False): logging.error("Invalid container state after execution:\n%s", json.dumps(outjson[0]["State"])) retcode = outjson[0]["State"]["ExitCode"] stderr.write("\n*** Command finished, status: %d\n" % retcode) # Commit to create new image new_image = make_unique_name(b'reprounzip_image_') logging.info("Committing container %s to image %s", container.decode('ascii'), new_image.decode('ascii')) subprocess.check_call(args.docker_cmd.split() + ['commit', container, new_image]) # Update image name unpacked_info['current_image'] = new_image write_dict(target, unpacked_info) # Remove the container logging.info("Destroying container %s", container.decode('ascii')) retcode = subprocess.call(args.docker_cmd.split() + ['rm', container]) if retcode != 0: logging.error("Error deleting container %s", container.decode('ascii')) # Untag previous image, unless it is the initial_image if image != unpacked_info['initial_image']: logging.info("Untagging previous image %s", image.decode('ascii')) subprocess.check_call(args.docker_cmd.split() + ['rmi', image]) # Update input file status metadata_update_run(config, unpacked_info, selected_runs) write_dict(target, unpacked_info) signals.post_run(target=target, retcode=retcode)
def vagrant_setup_create(args): """Sets up the experiment to be run in a Vagrant-built virtual machine. This can either build a chroot or not. If building a chroot, we do just like without Vagrant: we copy all the files and only get what's missing from the host. But we do install automatically the packages whose files are required. If not building a chroot, we install all the packages, and only unpack files that don't come from packages. In short: files from packages with packfiles=True will only be used if building a chroot. """ if not args.pack: logging.critical("setup/create needs the pack filename") sys.exit(1) pack = Path(args.pack[0]) target = Path(args.target[0]) if target.exists(): logging.critical("Target directory exists") sys.exit(1) use_chroot = args.use_chroot mount_bind = args.bind_magic_dirs record_usage(use_chroot=use_chroot, mount_bind=mount_bind) signals.pre_setup(target=target, pack=pack) # Unpacks configuration file tar = tarfile.open(str(pack), 'r:*') member = tar.getmember('METADATA/config.yml') member.name = 'config.yml' tar.extract(member, str(target)) tar.close() # Loads config runs, packages, other_files = load_config(target / 'config.yml', True) if args.base_image and args.base_image[0]: record_usage(vagrant_explicit_image=True) box = args.base_image[0] if args.distribution: target_distribution = args.distribution[0] else: target_distribution = None else: target_distribution, box = select_box(runs) logging.info("Using box %s", box) logging.debug("Distribution: %s", target_distribution or "unknown") # If using chroot, we might still need to install packages to get missing # (not packed) files if use_chroot: packages = [pkg for pkg in packages if not pkg.packfiles] if packages: record_usage(vagrant_install_pkgs=True) logging.info( "Some packages were not packed, so we'll install and " "copy their files\n" "Packages that are missing:\n%s", ' '.join(pkg.name for pkg in packages)) if packages: try: installer = select_installer(pack, runs, target_distribution) except CantFindInstaller as e: logging.error( "Need to install %d packages but couldn't select a " "package installer: %s", len(packages), e) target.mkdir(parents=True) # Writes setup script logging.info("Writing setup script %s...", target / 'setup.sh') with (target / 'setup.sh').open('w', encoding='utf-8', newline='\n') as fp: fp.write('#!/bin/sh\n\nset -e\n\n') if packages: # Updates package sources fp.write(installer.update_script()) fp.write('\n') # Installs necessary packages fp.write(installer.install_script(packages)) fp.write('\n') # TODO : Compare package versions (painful because of sh) # Untar if use_chroot: fp.write('\n' 'mkdir /experimentroot; cd /experimentroot\n') fp.write('tar zpxf /vagrant/experiment.rpz ' '--numeric-owner --strip=1 DATA\n') if mount_bind: fp.write('\n' 'mkdir -p /experimentroot/dev\n' 'mount -o rbind /dev /experimentroot/dev\n' 'mkdir -p /experimentroot/proc\n' 'mount -o rbind /proc /experimentroot/proc\n') for pkg in packages: fp.write('\n# Copies files from package %s\n' % pkg.name) for f in pkg.files: f = f.path dest = join_root(PosixPath('/experimentroot'), f) fp.write('mkdir -p %s\n' % shell_escape(unicode_(f.parent))) fp.write('cp -L %s %s\n' % (shell_escape( unicode_(f)), shell_escape(unicode_(dest)))) else: fp.write('\ncd /\n') paths = set() pathlist = [] dataroot = PosixPath('DATA') # Adds intermediate directories, and checks for existence in the # tar tar = tarfile.open(str(pack), 'r:*') for f in other_files: path = PosixPath('/') for c in f.path.components[1:]: path = path / c if path in paths: continue paths.add(path) datapath = join_root(dataroot, path) try: tar.getmember(str(datapath)) except KeyError: logging.info("Missing file %s", datapath) else: pathlist.append(unicode_(datapath)) tar.close() # FIXME : for some reason we need reversed() here, I'm not sure # why. Need to read more of tar's docs. # TAR bug: --no-overwrite-dir removes --keep-old-files # TAR bug: there is no way to make --keep-old-files not report an # error if an existing file is encountered. --skip-old-files was # introduced too recently. Instead, we just ignore the exit status fp.write('tar zpxf /vagrant/experiment.rpz --keep-old-files ' '--numeric-owner --strip=1 %s || /bin/true\n' % ' '.join(shell_escape(p) for p in reversed(pathlist))) # Copies /bin/sh + dependencies if use_chroot: url = busybox_url(runs[0]['architecture']) fp.write(r''' mkdir -p /experimentroot/bin mkdir -p /experimentroot/usr/bin if [ ! -e /experimentroot/bin/sh -o ! -e /experimentroot/usr/bin/env ]; then wget --quiet -O /experimentroot/bin/busybox {url} chmod +x /experimentroot/bin/busybox fi [ -e /experimentroot/bin/sh ] || \ ln -s /bin/busybox /experimentroot/bin/sh [ -e /experimentroot/usr/bin/env ] || \ ln -s /bin/busybox /experimentroot/usr/bin/env '''.format(url=url)) # Copies pack logging.info("Copying pack file...") pack.copyfile(target / 'experiment.rpz') # Writes Vagrant file logging.info("Writing %s...", target / 'Vagrantfile') with (target / 'Vagrantfile').open('w', encoding='utf-8', newline='\n') as fp: # Vagrant header and version fp.write('# -*- mode: ruby -*-\n' '# vi: set ft=ruby\n\n' 'VAGRANTFILE_API_VERSION = "2"\n\n' 'Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|\n') # Selects which box to install fp.write(' config.vm.box = "%s"\n' % box) # Run the setup script on the virtual machine fp.write(' config.vm.provision "shell", path: "setup.sh"\n') fp.write('end\n') # Meta-data for reprounzip write_dict(target / '.reprounzip', {'use_chroot': use_chroot}) signals.post_setup(target=target)
def vagrant_run(args): """Runs the experiment in the virtual machine. """ target = Path(args.target[0]) use_chroot = read_dict(target / '.reprounzip').get('use_chroot', True) cmdline = args.cmdline check_vagrant_version() # Loads config runs, packages, other_files = load_config(target / 'config.yml', True) selected_runs = get_runs(runs, args.run, cmdline) hostname = runs[selected_runs[0]].get('hostname', 'reprounzip') # X11 handler x11 = X11Handler(args.x11, ('local', hostname), args.x11_display) cmds = [] for run_number in selected_runs: run = runs[run_number] cmd = 'cd %s && ' % shell_escape(run['workingdir']) cmd += '/usr/bin/env -i ' environ = x11.fix_env(run['environ']) cmd += ' '.join('%s=%s' % (k, shell_escape(v)) for k, v in iteritems(environ)) cmd += ' ' # FIXME : Use exec -a or something if binary != argv[0] if cmdline is None: argv = [run['binary']] + run['argv'][1:] else: argv = cmdline cmd += ' '.join(shell_escape(a) for a in argv) uid = run.get('uid', 1000) gid = run.get('gid', 1000) if use_chroot: userspec = '%s:%s' % (uid, gid) cmd = ('chroot --userspec=%s /experimentroot ' '/bin/sh -c %s' % (userspec, shell_escape(cmd))) else: cmd = 'sudo -u \'#%d\' sh -c %s' % (uid, shell_escape(cmd)) cmds.append(cmd) if use_chroot: cmds = [ 'chroot /experimentroot /bin/sh -c %s' % shell_escape(c) for c in x11.init_cmds ] + cmds else: cmds = x11.init_cmds + cmds cmds = ' && '.join(cmds) # Sets the hostname to the original experiment's machine's # FIXME: not reentrant: this restores the Vagrant machine's hostname after # the run, which might cause issues if several "reprounzip vagrant run" are # running at once cmds = ('OLD_HOSTNAME=$(/bin/hostname); /bin/hostname %s; ' % hostname + cmds + '; RES=$?; /bin/hostname "$OLD_HOSTNAME"; exit $RES') cmds = '/usr/bin/sudo /bin/sh -c %s' % shell_escape(cmds) # Gets vagrant SSH parameters info = get_ssh_parameters(target) signals.pre_run(target=target) interactive = not (args.no_stdin or os.environ.get('REPROUNZIP_NON_INTERACTIVE')) retcode = run_interactive(info, interactive, cmds, not args.no_pty, x11.port_forward) sys.stderr.write("\r\n*** Command finished, status: %d\r\n" % retcode) signals.post_run(target=target, retcode=retcode)
def docker_setup_create(args): """Sets up the experiment to be run in a Docker-built container. """ pack = Path(args.pack[0]) target = Path(args.target[0]) if target.exists(): logger.critical("Target directory exists") sys.exit(1) signals.pre_setup(target=target, pack=pack) target.mkdir() try: # Unpacks configuration file rpz_pack = RPZPack(pack) rpz_pack.extract_config(target / 'config.yml') # Loads config runs, packages, other_files = config = load_config( target / 'config.yml', True) if args.base_image: record_usage(docker_explicit_base=True) base_image = args.base_image[0] if args.distribution: target_distribution = args.distribution[0] else: target_distribution = None else: target_distribution, base_image = select_image(runs) logger.info("Using base image %s", base_image) logger.debug("Distribution: %s", target_distribution or "unknown") rpz_pack.copy_data_tar(target / 'data.tgz') arch = runs[0]['architecture'] # Writes Dockerfile logger.info("Writing %s...", target / 'Dockerfile') with (target / 'Dockerfile').open('w', encoding='utf-8', newline='\n') as fp: fp.write('FROM %s\n\n' % base_image) # Installs busybox download_file(busybox_url(arch), target / 'busybox', 'busybox-%s' % arch) fp.write('COPY busybox /busybox\n') # Installs rpzsudo download_file(sudo_url(arch), target / 'rpzsudo', 'rpzsudo-%s' % arch) fp.write('COPY rpzsudo /rpzsudo\n\n') fp.write('COPY data.tgz /reprozip_data.tgz\n\n') fp.write('COPY rpz-files.list /rpz-files.list\n') fp.write('RUN \\\n' ' chmod +x /busybox /rpzsudo && \\\n') if args.install_pkgs: # Install every package through package manager missing_packages = [] else: # Only install packages that were not packed missing_packages = [pkg for pkg in packages if pkg.packfiles] packages = [pkg for pkg in packages if not pkg.packfiles] if packages: record_usage(docker_install_pkgs=True) try: installer = select_installer(pack, runs, target_distribution) except CantFindInstaller as e: logger.error("Need to install %d packages but couldn't " "select a package installer: %s", len(packages), e) sys.exit(1) # Updates package sources update_script = installer.update_script() if update_script: fp.write(' %s && \\\n' % update_script) # Installs necessary packages fp.write(' %s && \\\n' % installer.install_script(packages)) logger.info("Dockerfile will install the %d software " "packages that were not packed", len(packages)) else: record_usage(docker_install_pkgs=False) # Untar paths = set() pathlist = [] # Add intermediate directories, and check for existence in the tar logger.info("Generating file list...") missing_files = chain.from_iterable(pkg.files for pkg in missing_packages) data_files = rpz_pack.data_filenames() listoffiles = list(chain(other_files, missing_files)) for f in listoffiles: if f.path.name == 'resolv.conf' and ( f.path.lies_under('/etc') or f.path.lies_under('/run') or f.path.lies_under('/var')): continue path = PosixPath('/') for c in rpz_pack.remove_data_prefix(f.path).components: path = path / c if path in paths: continue paths.add(path) if path in data_files: pathlist.append(path) else: logger.info("Missing file %s", path) rpz_pack.close() # FIXME : for some reason we need reversed() here, I'm not sure why # Need to read more of tar's docs. # TAR bug: --no-overwrite-dir removes --keep-old-files with (target / 'rpz-files.list').open('wb') as lfp: for p in reversed(pathlist): lfp.write(join_root(rpz_pack.data_prefix, p).path) lfp.write(b'\0') fp.write(' cd / && ' '(tar zpxf /reprozip_data.tgz -U --recursive-unlink ' '--numeric-owner --strip=1 --null -T /rpz-files.list || ' '/busybox echo "TAR reports errors, this might or might ' 'not prevent the execution to run")\n') # Setup entry point fp.write('COPY rpz_entrypoint.sh /rpz_entrypoint.sh\n' 'ENTRYPOINT ["/busybox", "sh", "/rpz_entrypoint.sh"]\n') # Write entry point script logger.info("Writing %s...", target / 'rpz_entrypoint.sh') with (target / 'rpz_entrypoint.sh').open('w', encoding='utf-8', newline='\n') as fp: # The entrypoint gets some arguments from the run command # By default, it just does all the runs # "run N" executes the run with that number # "cmd STR" sets a replacement command-line for the next run # "do STR" executes a command as-is fp.write( '#!/bin/sh\n' '\n' 'COMMAND=\n' 'ENVVARS=\n' '\n' 'if [ $# = 0 ]; then\n' ' exec /busybox sh /rpz_entrypoint.sh') for nb in irange(len(runs)): fp.write(' run %d' % nb) fp.write( '\n' 'fi\n' '\n' 'while [ $# != 0 ]; do\n' ' case "$1" in\n' ' help)\n' ' echo "Image built from reprounzip-docker" >&2\n' ' echo "Usage: docker run <image> [cmd "word [word ' '...]"] [run <R>]" >&2\n' ' echo " \\`cmd ...\\` changes the command for ' 'the next \\`run\\` option" >&2\n' ' echo " \\`run <name|number>\\` runs the ' 'specified run" >&2\n' ' echo "By default, all the runs are executed." ' '>&2\n' ' echo "The runs in this image are:" >&2\n') for run in runs: fp.write( ' echo " {name}: {cmdline}" >&2\n'.format( name=run['id'], cmdline=' '.join(shell_escape(a) for a in run['argv']))) fp.write( ' exit 0\n' ' ;;\n' ' do)\n' ' shift\n' ' $1\n' ' ;;\n' ' env)\n' ' shift\n' ' ENVVARS="$1"\n' ' ;;\n' ' cmd)\n' ' shift\n' ' COMMAND="$1"\n' ' ;;\n' ' run)\n' ' shift\n' ' case "$1" in\n') for i, run in enumerate(runs): cmdline = ' '.join([run['binary']] + run['argv'][1:]) fp.write( ' {name})\n' ' RUNCOMMAND={cmd}\n' ' RUNWD={wd}\n' ' RUNENV={env}\n' ' RUNUID={uid}\n' ' RUNGID={gid}\n' ' ;;\n'.format( name='%s|%d' % (run['id'], i), cmd=shell_escape(cmdline), wd=shell_escape(run['workingdir']), env=shell_escape(' '.join( '%s=%s' % (shell_escape(k), shell_escape(v)) for k, v in iteritems(run['environ']))), uid=run.get('uid', 1000), gid=run.get('gid', 1000))) fp.write( ' *)\n' ' echo "RPZ: Unknown run $1" >&2\n' ' exit 1\n' ' ;;\n' ' esac\n' ' if [ -n "$COMMAND" ]; then\n' ' RUNCOMMAND="$COMMAND"\n' ' COMMAND=\n' ' fi\n' ' export RUNWD; export RUNENV; export ENVVARS; ' 'export RUNCOMMAND\n' ' /rpzsudo "#$RUNUID" "#$RUNGID" /busybox sh -c ' '"cd \\"\\$RUNWD\\" && /busybox env -i $RUNENV $ENVVARS ' '$RUNCOMMAND; echo \\"*** Command finished, status: \\$?\\""\n' ' ENVVARS=\n' ' ;;\n' ' *)\n' ' echo "RPZ: Unknown option $1" >&2\n' ' exit 1\n' ' ;;\n' ' esac\n' ' shift\n' 'done\n') # Meta-data for reprounzip write_dict(target, metadata_initial_iofiles(config)) signals.post_setup(target=target, pack=pack) except Exception: target.rmtree(ignore_errors=True) raise
def do_vistrails(target, pack=None, **kwargs): """Create a VisTrails workflow that runs the experiment. This is called from signals after an experiment has been setup by any unpacker. """ record_usage(do_vistrails=True) config = load_config(target / 'config.yml', canonical=True) # Writes VisTrails workflow bundle = target / 'vistrails.vt' logging.info("Writing VisTrails workflow %s...", bundle) vtdir = Path.tempdir(prefix='reprounzip_vistrails_') ids = IdScope() try: with vtdir.open('w', 'vistrail', encoding='utf-8', newline='\n') as fp: wf = Workflow(fp, ids) # Directory module, refering to this directory d = wf.add_module('%s:Directory' % rpz_id, rpz_version) wf.add_function(d, 'directory', [(directory_sig, str(target.resolve()))]) connect_from = d for i, run in enumerate(config.runs): inputs = sorted(n for n, f in iteritems(config.inputs_outputs) if i in f.read_runs) outputs = sorted(n for n, f in iteritems(config.inputs_outputs) if i in f.write_runs) ports = itertools.chain((('input', p) for p in inputs), (('output', p) for p in outputs)) # Run module r = wf.add_module('%s:Run' % rpz_id, rpz_version) wf.add_function(r, 'cmdline', [ (string_sig, ' '.join(shell_escape(arg) for arg in run['argv']))]) wf.add_function(r, 'run_number', [(integer_sig, i)]) # Port specs for input/output files for type_, name in ports: wf.add_port_spec(r, name, type_, [file_pkg_mod]) # Draw connection wf.connect(connect_from, experiment_sig, 'experiment', r, experiment_sig, 'experiment') connect_from = r wf.close() with bundle.open('wb') as fp: z = zipfile.ZipFile(fp, 'w') with vtdir.in_dir(): for path in Path('.').recursedir(): z.write(str(path)) z.close() finally: vtdir.rmtree()
def docker_run(args): """Runs the experiment in the container. """ target = Path(args.target[0]) unpacked_info = read_dict(target) cmdline = args.cmdline # Sanity check if args.detach and args.x11: logger.critical("Error: Can't use X11 forwarding if you're detaching") raise UsageError # Loads config config = load_config(target / 'config.yml', True) runs = config.runs selected_runs = get_runs(runs, args.run, cmdline) # Get current image name if 'current_image' in unpacked_info: image = unpacked_info['current_image'] logger.debug("Running from image %s", image.decode('ascii')) else: logger.critical("Image doesn't exist yet, have you run setup/build?") sys.exit(1) # Name of new container if args.detach: container = make_unique_name(b'reprounzip_detached_') else: container = make_unique_name(b'reprounzip_run_') hostname = runs[selected_runs[0]].get('hostname', 'reprounzip') # Port forwarding port_options = [] for port_host, port_container, proto in parse_ports(args.expose_port): port_options.extend(['-p', '%s:%s/%s' % (port_host, port_container, proto)]) # X11 handler if args.x11: local_ip = get_local_addr() docker_host = local_ip if os.environ.get('DOCKER_HOST'): m = _dockerhost_re.match(os.environ['DOCKER_HOST']) if m is not None: docker_host = m.group(1) if args.tunneled_x11: x11 = X11Handler(True, ('internet', docker_host), args.x11_display) else: x11 = X11Handler(True, ('internet', local_ip), args.x11_display) if (docker_host != local_ip and docker_host != 'localhost' and not docker_host.startswith('127.') and not docker_host.startswith('192.168.99.')): ssh_cmdline = ' '.join( '-R*:%(p)d:127.0.0.1:%(p)d' % {'p': port} for port, connector in x11.port_forward) logger.warning( "You requested X11 forwarding but the Docker container " "appears to be running remotely. It is probable that it " "won't be able to connect to the local display. Creating " "a remote SSH tunnel and running with --tunneled-x11 " "might help (%s).", ssh_cmdline) else: x11 = X11Handler(False, ('local', hostname), args.x11_display) cmd = [] for run_number in selected_runs: run = runs[run_number] env_set, env_unset = x11.env_fixes(run['environ']) a_env_set, a_env_unset = parse_environment_args(args) env_set.update(a_env_set) env_unset.extend(a_env_unset) if env_set or env_unset: cmd.append('env') env = [] for k in env_unset: env.append('-u') env.append(shell_escape(k)) for k, v in iteritems(env_set): env.append('%s=%s' % (shell_escape(k), shell_escape(v))) cmd.append(' '.join(env)) # FIXME : Use exec -a or something if binary != argv[0] if cmdline is not None: cmd.append('cmd') cmd.append(' '.join(shell_escape(a) for a in cmdline)) cmd.append('run') cmd.append('%d' % run_number) cmd = list(chain.from_iterable([['do', shell_escape(c)] for c in x11.init_cmds] + [cmd])) if logger.isEnabledFor(logging.DEBUG): logger.debug("Passing arguments to Docker image:") for c in cmd: logger.debug(c) signals.pre_run(target=target) # Creates forwarders forwarders = [] for port, connector in x11.port_forward: forwarders.append(LocalForwarder(connector, port)) if args.detach: logger.info("Start container %s (detached)", container.decode('ascii')) retcode = interruptible_call(args.docker_cmd.split() + ['run', b'--name=' + container, '-h', hostname, '-d', '-t'] + port_options + args.docker_option + [image] + cmd) if retcode != 0: logger.critical("docker run failed with code %d", retcode) subprocess.call(['docker', 'rm', '-f', container]) sys.exit(1) return # Run command in container logger.info("Starting container %s", container.decode('ascii')) retcode = interruptible_call(args.docker_cmd.split() + ['run', b'--name=' + container, '-h', hostname, '-i', '-t'] + port_options + args.docker_option + [image] + cmd, request_tty=True) # The image prints out the exit status(es) itself if retcode != 0: logger.critical("docker run failed with code %d", retcode) subprocess.call(['docker', 'rm', '-f', container]) sys.exit(1) # Commit to create new image new_image = make_unique_name(b'reprounzip_image_') logger.info("Committing container %s to image %s", container.decode('ascii'), new_image.decode('ascii')) subprocess.check_call(args.docker_cmd.split() + ['commit', container, new_image]) # Update image name unpacked_info['current_image'] = new_image write_dict(target, unpacked_info) # Remove the container logger.info("Destroying container %s", container.decode('ascii')) retcode = subprocess.call(args.docker_cmd.split() + ['rm', container]) if retcode != 0: logger.error("Error deleting container %s", container.decode('ascii')) # Untag previous image, unless it is the initial_image if image != unpacked_info['initial_image']: logger.info("Untagging previous image %s", image.decode('ascii')) subprocess.check_call(args.docker_cmd.split() + ['rmi', image]) # Update input file status metadata_update_run(config, unpacked_info, selected_runs) write_dict(target, unpacked_info) signals.post_run(target=target, retcode=retcode)
def vagrant_setup_create(args): """Sets up the experiment to be run in a Vagrant-built virtual machine. This can either build a chroot or not. If building a chroot, we do just like without Vagrant: we copy all the files and only get what's missing from the host. But we do install automatically the packages whose files are required. If not building a chroot, we install all the packages, and only unpack files that don't come from packages. In short: files from packages with packfiles=True will only be used if building a chroot. """ if not args.pack: logging.critical("setup/create needs the pack filename") sys.exit(1) pack = Path(args.pack[0]) target = Path(args.target[0]) if target.exists(): logging.critical("Target directory exists") sys.exit(1) use_chroot = args.use_chroot mount_bind = args.bind_magic_dirs record_usage(use_chroot=use_chroot, mount_bind=mount_bind) signals.pre_setup(target=target, pack=pack) # Unpacks configuration file rpz_pack = RPZPack(pack) rpz_pack.extract_config(target / 'config.yml') # Loads config runs, packages, other_files = config = load_config(target / 'config.yml', True) if not args.memory: memory = None else: try: memory = int(args.memory[-1]) except ValueError: logging.critical("Invalid value for memory size: %r", args.memory) sys.exit(1) if args.base_image and args.base_image[0]: record_usage(vagrant_explicit_image=True) box = args.base_image[0] if args.distribution: target_distribution = args.distribution[0] else: target_distribution = None else: target_distribution, box = select_box(runs) logging.info("Using box %s", box) logging.debug("Distribution: %s", target_distribution or "unknown") # If using chroot, we might still need to install packages to get missing # (not packed) files if use_chroot: packages = [pkg for pkg in packages if not pkg.packfiles] if packages: record_usage(vagrant_install_pkgs=True) logging.info("Some packages were not packed, so we'll install and " "copy their files\n" "Packages that are missing:\n%s", ' '.join(pkg.name for pkg in packages)) if packages: try: installer = select_installer(pack, runs, target_distribution) except CantFindInstaller as e: logging.error("Need to install %d packages but couldn't select a " "package installer: %s", len(packages), e) target.mkdir(parents=True) try: # Writes setup script logging.info("Writing setup script %s...", target / 'setup.sh') with (target / 'setup.sh').open('w', encoding='utf-8', newline='\n') as fp: fp.write('#!/bin/sh\n\nset -e\n\n') if packages: # Updates package sources fp.write(installer.update_script()) fp.write('\n') # Installs necessary packages fp.write(installer.install_script(packages)) fp.write('\n') # TODO : Compare package versions (painful because of sh) # Untar if use_chroot: fp.write('\n' 'mkdir /experimentroot; cd /experimentroot\n') fp.write('tar zpxf /vagrant/data.tgz --numeric-owner ' '--strip=1 %s\n' % rpz_pack.data_prefix) if mount_bind: fp.write('\n' 'mkdir -p /experimentroot/dev\n' 'mkdir -p /experimentroot/proc\n') for pkg in packages: fp.write('\n# Copies files from package %s\n' % pkg.name) for f in pkg.files: f = f.path dest = join_root(PosixPath('/experimentroot'), f) fp.write('mkdir -p %s\n' % shell_escape(unicode_(f.parent))) fp.write('cp -L %s %s\n' % ( shell_escape(unicode_(f)), shell_escape(unicode_(dest)))) else: fp.write('\ncd /\n') paths = set() pathlist = [] # Adds intermediate directories, and checks for existence in # the tar for f in other_files: path = PosixPath('/') for c in rpz_pack.remove_data_prefix(f.path).components: path = path / c if path in paths: continue paths.add(path) try: rpz_pack.get_data(path) except KeyError: logging.info("Missing file %s", path) else: pathlist.append(path) # FIXME : for some reason we need reversed() here, I'm not sure # why. Need to read more of tar's docs. # TAR bug: --no-overwrite-dir removes --keep-old-files # TAR bug: there is no way to make --keep-old-files not report # an error if an existing file is encountered. --skip-old-files # was introduced too recently. Instead, we just ignore the exit # status with (target / 'rpz-files.list').open('wb') as lfp: for p in reversed(pathlist): lfp.write(join_root(rpz_pack.data_prefix, p).path) lfp.write(b'\0') fp.write('tar zpxf /vagrant/data.tgz --keep-old-files ' '--numeric-owner --strip=1 ' '--null -T /vagrant/rpz-files.list || /bin/true\n') # Copies busybox if use_chroot: arch = runs[0]['architecture'] download_file(busybox_url(arch), target / 'busybox', 'busybox-%s' % arch) fp.write(r''' cp /vagrant/busybox /experimentroot/busybox chmod +x /experimentroot/busybox mkdir -p /experimentroot/bin [ -e /experimentroot/bin/sh ] || \ ln -s /busybox /experimentroot/bin/sh ''') # Copies pack logging.info("Copying pack file...") rpz_pack.copy_data_tar(target / 'data.tgz') rpz_pack.close() # Writes Vagrant file logging.info("Writing %s...", target / 'Vagrantfile') with (target / 'Vagrantfile').open('w', encoding='utf-8', newline='\n') as fp: # Vagrant header and version fp.write( '# -*- mode: ruby -*-\n' '# vi: set ft=ruby\n\n' 'VAGRANTFILE_API_VERSION = "2"\n\n' 'Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|\n') # Selects which box to install fp.write(' config.vm.box = "%s"\n' % box) # Run the setup script on the virtual machine fp.write(' config.vm.provision "shell", path: "setup.sh"\n') # Memory size if memory is not None: fp.write(' config.vm.provider "virtualbox" do |v|\n' ' v.memory = %d\n' ' end\n' % memory) fp.write('end\n') # Meta-data for reprounzip write_dict(target, metadata_initial_iofiles(config, {'use_chroot': use_chroot})) signals.post_setup(target=target, pack=pack) except Exception: target.rmtree(ignore_errors=True) raise
def generate(target, directory, all_forks=False): """Main function for the graph subcommand. """ # In here, a file is any file on the filesystem. A binary is a file, that # gets executed. A process is a system-level task, identified by its pid # (pids don't get reused in the database). # What I call program is the couple (process, binary), so forking creates a # new program (with the same binary) and exec'ing creates a new program as # well (with the same process) # Because of this, fork+exec will create an intermediate program that # doesn't do anything (new process but still old binary). If that program # doesn't do anything worth showing on the graph, it will be erased, unless # all_forks is True (--all-forks). database = directory / 'trace.sqlite3' # Reads package ownership from the configuration configfile = directory / 'config.yml' if not configfile.is_file(): logging.critical("Configuration file does not exist!\n" "Did you forget to run 'reprozip trace'?\n" "If not, you might want to use --dir to specify an " "alternate location.") sys.exit(1) runs, packages, other_files, patterns = load_config(configfile, canonical=False) packages = dict((f.path, pkg) for pkg in packages for f in pkg.files) if PY3: # On PY3, connect() only accepts unicode conn = sqlite3.connect(str(database)) else: conn = sqlite3.connect(database.path) # This is a bit weird. We need to iterate on all types of events at the # same time, ordering by timestamp, so we decorate-sort-undecorate # Decoration adds timestamp (for sorting) and tags by event type, one of # 'process', 'open' or 'exec' # Reads processes from the database process_cursor = conn.cursor() process_rows = process_cursor.execute( ''' SELECT id, parent, timestamp FROM processes ORDER BY id ''') processes = {} all_programs = [] # ... and opened files... file_cursor = conn.cursor() file_rows = file_cursor.execute( ''' SELECT name, timestamp, mode, process FROM opened_files ORDER BY id ''') binaries = set() files = OrderedSet() edges = OrderedSet() # ... as well as executed files. exec_cursor = conn.cursor() exec_rows = exec_cursor.execute( ''' SELECT name, timestamp, process, argv FROM executed_files ORDER BY id ''') # Loop on all event lists logging.info("Getting all events from database...") rows = heapq.merge(((r[2], 'process', r) for r in process_rows), ((r[1], 'open', r) for r in file_rows), ((r[1], 'exec', r) for r in exec_rows)) for ts, event_type, data in rows: if event_type == 'process': r_id, r_parent, r_timestamp = data if r_parent is not None: parent = processes[r_parent] binary = parent.binary else: parent = None binary = None p = Process(r_id, parent, r_timestamp, False, binary, C_INITIAL if r_parent is None else C_FORK) processes[r_id] = p all_programs.append(p) elif event_type == 'open': r_name, r_timestamp, r_mode, r_process = data r_name = PosixPath(r_name) if r_mode != FILE_WDIR: process = processes[r_process] files.add(r_name) edges.add((process, r_name, r_mode, None)) elif event_type == 'exec': r_name, r_timestamp, r_process, r_argv = data r_name = PosixPath(r_name) process = processes[r_process] binaries.add(r_name) # Here we split this process in two "programs", unless the previous # one hasn't done anything since it was created via fork() if not all_forks and not process.acted: process.binary = r_name process.created = C_FORKEXEC process.acted = True else: process = Process(process.pid, process, r_timestamp, True, # Hides exec only once r_name, C_EXEC) all_programs.append(process) processes[r_process] = process argv = tuple(r_argv.split('\0')) if not argv[-1]: argv = argv[:-1] edges.add((process, r_name, None, argv)) process_cursor.close() file_cursor.close() conn.close() # Puts files in packages logging.info("Organizes packages...") package_files = {} other_files = [] for f in files: pkg = packages.get(f) if pkg is not None: package_files.setdefault((pkg.name, pkg.version), []).append(f) else: other_files.append(f) # Writes DOT file with target.open('w', encoding='utf-8', newline='\n') as fp: fp.write('digraph G {\n /* programs */\n node [shape=box];\n') # Programs logging.info("Writing programs...") for program in all_programs: fp.write(' prog%d [label="%s (%d)"];\n' % ( id(program), program.binary or "-", program.pid)) if program.parent is not None: reason = '' if program.created == C_FORK: reason = "fork" elif program.created == C_EXEC: reason = "exec" elif program.created == C_FORKEXEC: reason = "fork+exec" fp.write(' prog%d -> prog%d [label="%s"];\n' % ( id(program.parent), id(program), reason)) fp.write('\n node [shape=ellipse];\n\n /* system packages */\n') # Files from packages logging.info("Writing packages...") for i, ((name, version), files) in enumerate(iteritems(package_files)): fp.write(' subgraph cluster%d {\n label=' % i) if version: fp.write('"%s %s";\n' % (escape(name), escape(version))) else: fp.write('"%s";\n' % escape(name)) for f in files: fp.write(' "%s";\n' % escape(unicode_(f))) fp.write(' }\n') fp.write('\n /* other files */\n') # Other files logging.info("Writing other files...") for f in other_files: fp.write(' "%s"\n' % escape(unicode_(f))) fp.write('\n') # Edges logging.info("Connecting edges...") for prog, f, mode, argv in edges: if mode is None: fp.write(' "%s" -> prog%d [color=blue, label="%s"];\n' % ( escape(unicode_(f)), id(prog), escape(' '.join(argv)))) elif mode & FILE_WRITE: fp.write(' prog%d -> "%s" [color=red];\n' % ( id(prog), escape(unicode_(f)))) elif mode & FILE_READ: fp.write(' "%s" -> prog%d [color=green];\n' % ( escape(unicode_(f)), id(prog))) fp.write('}\n')
def docker_setup_create(args): """Sets up the experiment to be run in a Docker-built container. """ pack = Path(args.pack[0]) target = Path(args.target[0]) if target.exists(): logging.critical("Target directory exists") sys.exit(1) signals.pre_setup(target=target, pack=pack) target.mkdir() try: # Unpacks configuration file rpz_pack = RPZPack(pack) rpz_pack.extract_config(target / 'config.yml') # Loads config runs, packages, other_files = config = load_config( target / 'config.yml', True) if args.base_image: record_usage(docker_explicit_base=True) base_image = args.base_image[0] if args.distribution: target_distribution = args.distribution[0] else: target_distribution = None else: target_distribution, base_image = select_image(runs) logging.info("Using base image %s", base_image) logging.debug("Distribution: %s", target_distribution or "unknown") rpz_pack.copy_data_tar(target / 'data.tgz') arch = runs[0]['architecture'] # Writes Dockerfile logging.info("Writing %s...", target / 'Dockerfile') with (target / 'Dockerfile').open('w', encoding='utf-8', newline='\n') as fp: fp.write('FROM %s\n\n' % base_image) # Installs busybox download_file(busybox_url(arch), target / 'busybox', 'busybox-%s' % arch) fp.write('COPY busybox /busybox\n') # Installs rpzsudo download_file(sudo_url(arch), target / 'rpzsudo', 'rpzsudo-%s' % arch) fp.write('COPY rpzsudo /rpzsudo\n\n') fp.write('COPY data.tgz /reprozip_data.tgz\n\n') fp.write('COPY rpz-files.list /rpz-files.list\n') fp.write('RUN \\\n' ' chmod +x /busybox /rpzsudo && \\\n') if args.install_pkgs: # Install every package through package manager missing_packages = [] else: # Only install packages that were not packed missing_packages = [pkg for pkg in packages if pkg.packfiles] packages = [pkg for pkg in packages if not pkg.packfiles] if packages: record_usage(docker_install_pkgs=True) try: installer = select_installer(pack, runs, target_distribution) except CantFindInstaller as e: logging.error( "Need to install %d packages but couldn't " "select a package installer: %s", len(packages), e) sys.exit(1) # Updates package sources update_script = installer.update_script() if update_script: fp.write(' %s && \\\n' % update_script) # Installs necessary packages fp.write(' %s && \\\n' % installer.install_script(packages)) logging.info( "Dockerfile will install the %d software " "packages that were not packed", len(packages)) else: record_usage(docker_install_pkgs=False) # Untar paths = set() pathlist = [] # Add intermediate directories, and check for existence in the tar missing_files = chain.from_iterable(pkg.files for pkg in missing_packages) data_files = rpz_pack.data_filenames() listoffiles = list(chain(other_files, missing_files)) for f in listoffiles: path = PosixPath('/') for c in rpz_pack.remove_data_prefix(f.path).components: path = path / c if path in paths: continue paths.add(path) if path in data_files: pathlist.append(path) else: logging.info("Missing file %s", path) rpz_pack.close() # FIXME : for some reason we need reversed() here, I'm not sure why # Need to read more of tar's docs. # TAR bug: --no-overwrite-dir removes --keep-old-files with (target / 'rpz-files.list').open('wb') as lfp: for p in reversed(pathlist): lfp.write(join_root(rpz_pack.data_prefix, p).path) lfp.write(b'\0') fp.write(' cd / && ' '(tar zpxf /reprozip_data.tgz -U --recursive-unlink ' '--numeric-owner --strip=1 --null -T /rpz-files.list || ' '/busybox echo "TAR reports errors, this might or might ' 'not prevent the execution to run")\n') # Meta-data for reprounzip write_dict(target, metadata_initial_iofiles(config)) signals.post_setup(target=target, pack=pack) except Exception: target.rmtree(ignore_errors=True) raise
def docker_setup_create(args): """Sets up the experiment to be run in a Docker-built container. """ pack = Path(args.pack[0]) target = Path(args.target[0]) if target.exists(): logging.critical("Target directory exists") sys.exit(1) signals.pre_setup(target=target, pack=pack) # Unpacks configuration file rpz_pack = RPZPack(pack) rpz_pack.extract_config(target / 'config.yml') # Loads config runs, packages, other_files = load_config(target / 'config.yml', True) if args.base_image: record_usage(docker_explicit_base=True) base_image = args.base_image[0] if args.distribution: target_distribution = args.distribution[0] else: target_distribution = None else: target_distribution, base_image = select_image(runs) logging.info("Using base image %s", base_image) logging.debug("Distribution: %s", target_distribution or "unknown") target.mkdir(parents=True) rpz_pack.copy_data_tar(target / 'data.tgz') arch = runs[0]['architecture'] # Writes Dockerfile logging.info("Writing %s...", target / 'Dockerfile') with (target / 'Dockerfile').open('w', encoding='utf-8', newline='\n') as fp: fp.write('FROM %s\n\n' % base_image) # Installs busybox download_file(busybox_url(arch), target / 'busybox', 'busybox-%s' % arch) fp.write('COPY busybox /busybox\n') # Installs rpzsudo download_file(sudo_url(arch), target / 'rpzsudo', 'rpzsudo-%s' % arch) fp.write('COPY rpzsudo /rpzsudo\n\n') fp.write('COPY data.tgz /reprozip_data.tgz\n\n') fp.write('COPY rpz-files.list /rpz-files.list\n') fp.write('RUN \\\n' ' chmod +x /busybox /rpzsudo && \\\n') if args.install_pkgs: # Install every package through package manager missing_packages = [] else: # Only install packages that were not packed missing_packages = [pkg for pkg in packages if pkg.packfiles] packages = [pkg for pkg in packages if not pkg.packfiles] if packages: record_usage(docker_install_pkgs=True) try: installer = select_installer(pack, runs, target_distribution) except CantFindInstaller as e: logging.error("Need to install %d packages but couldn't " "select a package installer: %s", len(packages), e) sys.exit(1) # Updates package sources fp.write(' %s && \\\n' % installer.update_script()) # Installs necessary packages fp.write(' %s && \\\n' % installer.install_script(packages)) logging.info("Dockerfile will install the %d software packages " "that were not packed", len(packages)) else: record_usage(docker_install_pkgs=False) # Untar paths = set() pathlist = [] # Adds intermediate directories, and checks for existence in the tar missing_files = chain.from_iterable(pkg.files for pkg in missing_packages) for f in chain(other_files, missing_files): path = PosixPath('/') for c in rpz_pack.remove_data_prefix(f.path).components: path = path / c if path in paths: continue paths.add(path) try: rpz_pack.get_data(path) except KeyError: logging.info("Missing file %s", path) else: pathlist.append(path) rpz_pack.close() # FIXME : for some reason we need reversed() here, I'm not sure why. # Need to read more of tar's docs. # TAR bug: --no-overwrite-dir removes --keep-old-files with (target / 'rpz-files.list').open('wb') as lfp: for p in reversed(pathlist): lfp.write(join_root(rpz_pack.data_prefix, p).path) lfp.write(b'\0') fp.write(' cd / && ' '(tar zpxf /reprozip_data.tgz -U --recursive-unlink ' '--numeric-owner --strip=1 --null -T /rpz-files.list || ' '/busybox echo "TAR reports errors, this might or might ' 'not prevent the execution to run")\n') # Meta-data for reprounzip write_dict(target / '.reprounzip', {}) signals.post_setup(target=target, pack=pack)
def generate(target, configfile, database, all_forks=False, graph_format='dot', level_pkgs='file', level_processes='thread', level_other_files='all', regex_filters=None, regex_replaces=None, aggregates=None): """Main function for the graph subcommand. """ try: graph_format = { 'dot': FORMAT_DOT, 'DOT': FORMAT_DOT, 'json': FORMAT_JSON, 'JSON': FORMAT_JSON }[graph_format] except KeyError: logging.critical("Unknown output format %r", graph_format) sys.exit(1) level_pkgs, level_processes, level_other_files, file_depth = \ parse_levels(level_pkgs, level_processes, level_other_files) # Reads package ownership from the configuration if not configfile.is_file(): logging.critical("Configuration file does not exist!\n" "Did you forget to run 'reprozip trace'?\n" "If not, you might want to use --dir to specify an " "alternate location.") sys.exit(1) config = load_config(configfile, canonical=False) inputs_outputs = dict( (f.path, n) for n, f in iteritems(config.inputs_outputs)) has_thread_flag = config.format_version >= LooseVersion('0.7') runs, files, edges = read_events(database, all_forks, has_thread_flag) # Label the runs if len(runs) != len(config.runs): logging.warning("Configuration file doesn't list the same number of " "runs we found in the database!") else: for config_run, run in izip(config.runs, runs): run.name = config_run['id'] # Apply regexes ignore = [ lambda path, r=re.compile(p): r.search(path) is not None for p in regex_filters or [] ] replace = [ lambda path, r=re.compile(p): r.sub(repl, path) for p, repl in regex_replaces or [] ] def filefilter(path): pathuni = unicode_(path) if any(f(pathuni) for f in ignore): logging.debug("IGN %s", pathuni) return None if not (replace or aggregates): return path for fi in replace: pathuni_ = fi(pathuni) if pathuni_ != pathuni: logging.debug("SUB %s -> %s", pathuni, pathuni_) pathuni = pathuni_ for prefix in aggregates or []: if pathuni.startswith(prefix): logging.debug("AGG %s -> %s", pathuni, prefix) pathuni = prefix break return PosixPath(pathuni) files_new = set() for fi in files: fi = filefilter(fi) if fi is not None: files_new.add(fi) files = files_new edges_new = OrderedSet() for prog, fi, mode, argv in edges: fi = filefilter(fi) if fi is not None: edges_new.add((prog, fi, mode, argv)) edges = edges_new # Puts files in packages package_map = {} if level_pkgs == LVL_PKG_IGNORE: packages = [] other_files = files else: logging.info("Organizes packages...") file2package = dict( (f.path, pkg) for pkg in config.packages for f in pkg.files) packages = {} other_files = [] for fi in files: pkg = file2package.get(fi) if pkg is not None: package = packages.get(pkg.name) if package is None: package = Package(pkg.name, pkg.version) packages[pkg.name] = package package.files.add(fi) package_map[fi] = package else: other_files.append(fi) packages = sorted(itervalues(packages), key=lambda pkg: pkg.name) for i, pkg in enumerate(packages): pkg.id = i # Filter other files if level_other_files == LVL_OTHER_ALL and file_depth is not None: other_files = set( PosixPath(*f.components[:file_depth + 1]) for f in other_files) edges = OrderedSet((prog, f if f in package_map else PosixPath( *f.components[:file_depth + 1]), mode, argv) for prog, f, mode, argv in edges) else: if level_other_files == LVL_OTHER_IO: other_files = set(f for f in other_files if f in inputs_outputs) edges = [(prog, f, mode, argv) for prog, f, mode, argv in edges if f in package_map or f in other_files] elif level_other_files == LVL_OTHER_NO: other_files = set() edges = [(prog, f, mode, argv) for prog, f, mode, argv in edges if f in package_map] args = (target, runs, packages, other_files, package_map, edges, inputs_outputs, level_pkgs, level_processes, level_other_files) if graph_format == FORMAT_DOT: graph_dot(*args) elif graph_format == FORMAT_JSON: graph_json(*args) else: assert False
def docker_setup_create(args): """Sets up the experiment to be run in a Docker-built container. """ pack = Path(args.pack[0]) target = Path(args.target[0]) if target.exists(): logging.critical("Target directory exists") sys.exit(1) signals.pre_setup(target=target, pack=pack) # Unpacks configuration file tar = tarfile.open(str(pack), 'r:*') member = tar.getmember('METADATA/config.yml') member.name = 'config.yml' tar.extract(member, str(target)) tar.close() # Loads config runs, packages, other_files = load_config(target / 'config.yml', True) if args.base_image: record_usage(docker_explicit_base=True) base_image = args.base_image[0] if args.distribution: target_distribution = args.distribution[0] else: target_distribution = None else: target_distribution, base_image = select_image(runs) logging.info("Using base image %s", base_image) logging.debug("Distribution: %s", target_distribution or "unknown") target.mkdir(parents=True) pack.copyfile(target / 'experiment.rpz') # Writes Dockerfile logging.info("Writing %s...", target / 'Dockerfile') with (target / 'Dockerfile').open('w', encoding='utf-8', newline='\n') as fp: fp.write('FROM %s\n\n' % base_image) # Installs busybox download_file(busybox_url(runs[0]['architecture']), target / 'busybox') fp.write('COPY busybox /bin/busybox\n') fp.write('COPY experiment.rpz /reprozip_experiment.rpz\n\n') fp.write('RUN \\\n' ' chmod +x /bin/busybox && \\\n') if args.install_pkgs: # Install every package through package manager missing_packages = [] else: # Only install packages that were not packed missing_packages = [pkg for pkg in packages if pkg.packfiles] packages = [pkg for pkg in packages if not pkg.packfiles] # FIXME : Right now, we need 'sudo' to be available (and it's not # necessarily in the base image) if packages: record_usage(docker_install_pkgs=True) else: record_usage(docker_install_pkgs="sudo") packages += [Package('sudo', None, packfiles=False)] if packages: try: installer = select_installer(pack, runs, target_distribution) except CantFindInstaller as e: logging.error("Need to install %d packages but couldn't " "select a package installer: %s", len(packages), e) sys.exit(1) # Updates package sources fp.write(' %s && \\\n' % installer.update_script()) # Installs necessary packages fp.write(' %s && \\\n' % installer.install_script(packages)) logging.info("Dockerfile will install the %d software packages that " "were not packed", len(packages)) # Untar paths = set() pathlist = [] dataroot = PosixPath('DATA') # Adds intermediate directories, and checks for existence in the tar tar = tarfile.open(str(pack), 'r:*') missing_files = chain.from_iterable(pkg.files for pkg in missing_packages) for f in chain(other_files, missing_files): path = PosixPath('/') for c in f.path.components[1:]: path = path / c if path in paths: continue paths.add(path) datapath = join_root(dataroot, path) try: tar.getmember(str(datapath)) except KeyError: logging.info("Missing file %s", datapath) else: pathlist.append(unicode_(datapath)) tar.close() # FIXME : for some reason we need reversed() here, I'm not sure why. # Need to read more of tar's docs. # TAR bug: --no-overwrite-dir removes --keep-old-files fp.write(' cd / && tar zpxf /reprozip_experiment.rpz ' '--numeric-owner --strip=1 %s\n' % ' '.join(shell_escape(p) for p in reversed(pathlist))) # Meta-data for reprounzip write_dict(target / '.reprounzip', {}) signals.post_setup(target=target)
def generate(target, configfile, database): """Go over the trace and generate the graph file. """ # Reads package ownership from the configuration if not configfile.is_file(): logger.critical("Configuration file does not exist!\n" "Did you forget to run 'reprozip trace'?\n" "If not, you might want to use --dir to specify an " "alternate location.") sys.exit(1) config = load_config(configfile, canonical=False) has_thread_flag = config.format_version >= LooseVersion('0.7') assert database.is_file() conn = sqlite3.connect(str(database)) # connect() only accepts str conn.row_factory = sqlite3.Row vertices = [] edges = [] # Create user entity, that initiates the runs vertices.append({'ID': 'user', 'type': 'Agent', 'subtype': 'User', 'label': 'User'}) run = -1 # Read processes cur = conn.cursor() rows = cur.execute( ''' SELECT id, parent, timestamp, is_thread, exitcode FROM processes; ''' if has_thread_flag else ''' SELECT id, parent, timestamp, 0 as is_thread, exitcode FROM processes; ''') for r_id, r_parent, r_timestamp, r_isthread, r_exitcode in rows: if r_parent is None: # Create run entity run += 1 vertices.append({'ID': 'run%d' % run, 'type': 'Activity', 'subtype': 'Run', 'label': "Run #%d" % run, 'date': r_timestamp}) # User -> run edges.append({'ID': 'user_run%d' % run, 'type': 'UserRuns', 'label': "User runs command", 'sourceID': 'user', 'targetID': 'run%d' % run}) # Run -> process edges.append({'ID': 'run_start%d' % run, 'type': 'RunStarts', 'label': "Run #%d command", 'sourceID': 'run%d' % run, 'targetID': 'process%d' % r_id}) # Create process entity vertices.append({'ID': 'process%d' % r_id, 'type': 'Agent', 'subtype': 'Thread' if r_isthread else 'Process', 'label': 'Process #%d' % r_id, 'date': r_timestamp}) # TODO: add process end time (use master branch?) # Add process creation activity if r_parent is not None: # Process creation activity vertex = {'ID': 'fork%d' % r_id, 'type': 'Activity', 'subtype': 'Fork', 'label': "#%d creates %s #%d" % ( r_parent, "thread" if r_isthread else "process", r_id), 'date': r_timestamp} if has_thread_flag: vertex['thread'] = 'true' if r_isthread else 'false' vertices.append(vertex) # Parent -> creation edges.append({'ID': 'fork_p_%d' % r_id, 'type': 'PerformsFork', 'label': "Performs fork", 'sourceID': 'process%d' % r_parent, 'targetID': 'fork%d' % r_id}) # Creation -> child edges.append({'ID': 'fork_c_%d' % r_id, 'type': 'ForkCreates', 'label': "Fork creates", 'sourceID': 'fork%d' % r_id, 'targetID': 'process%d' % r_id}) cur.close() file2package = dict((f.path.path, pkg) for pkg in config.packages for f in pkg.files) inputs_outputs = dict((f.path.path, (bool(f.write_runs), bool(f.read_runs))) for n, f in config.inputs_outputs.items()) # Read opened files cur = conn.cursor() rows = cur.execute( ''' SELECT name, is_directory FROM opened_files GROUP BY name; ''') for r_name, r_directory in rows: # Create file entity vertex = {'ID': r_name, 'type': 'Entity', 'subtype': 'Directory' if r_directory else 'File', 'label': r_name} if r_name in file2package: vertex['package'] = file2package[r_name].name if r_name in inputs_outputs: out_, in_ = inputs_outputs[r_name] if in_: vertex['input'] = True if out_: vertex['output'] = True vertices.append(vertex) cur.close() # Read file opens cur = conn.cursor() rows = cur.execute( ''' SELECT id, name, timestamp, mode, process FROM opened_files; ''') for r_id, r_name, r_timestamp, r_mode, r_process in rows: # Create file access activity vertices.append({'ID': 'access%d' % r_id, 'type': 'Activity', 'subtype': ('FileWrites' if r_mode & FILE_WRITE else 'FileReads'), 'label': ("File write: %s" if r_mode & FILE_WRITE else "File read: %s") % r_name, 'date': r_timestamp, 'mode': r_mode}) # Process -> access edges.append({'ID': 'proc_access%d' % r_id, 'type': 'PerformsFileAccess', 'label': "Process does file access", 'sourceID': 'process%d' % r_process, 'targetID': 'access%d' % r_id}) # Access -> file edges.append({'ID': 'access_file%d' % r_id, 'type': 'AccessFile', 'label': "File access touches", 'sourceID': 'access%d' % r_id, 'targetID': r_name}) cur.close() # Read executions cur = conn.cursor() rows = cur.execute( ''' SELECT id, name, timestamp, process, argv FROM executed_files; ''') for r_id, r_name, r_timestamp, r_process, r_argv in rows: argv = r_argv.split('\0') if not argv[-1]: argv = argv[:-1] cmdline = ' '.join(shell_escape(a) for a in argv) # Create execution activity vertices.append({'ID': 'exec%d' % r_id, 'type': 'Activity', 'subtype': 'ProcessExecutes', 'label': "Process #%d executes file %s" % (r_process, r_name), 'date': r_timestamp, 'cmdline': cmdline, 'process': r_process, 'file': r_name}) # Process -> execution edges.append({'ID': 'proc_exec%d' % r_id, 'type': 'ProcessExecution', 'label': "Process does exec()", 'sourceID': 'process%d' % r_process, 'targetID': 'exec%d' % r_id}) # Execution -> file edges.append({'ID': 'exec_file%d' % r_id, 'type': 'ExecutionFile', 'label': "Execute file", 'sourceID': 'exec%d' % r_id, 'targetID': r_name}) cur.close() # Write the file from the created lists with target.open('w', encoding='utf-8', newline='\n') as out: out.write('<?xml version="1.0"?>\n\n' '<provenancedata xmlns:xsi="http://www.w3.org/2001/XMLSchema' '-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema">\n' ' <vertices>\n') for vertex in vertices: if 'date' not in vertex: vertex['date'] = '-1' tags = {} for k in ('ID', 'type', 'label', 'date'): if k not in vertex: vertex.update(tags) raise ValueError("Vertex is missing tag '%s': %r" % ( k, vertex)) tags[k] = vertex.pop(k) out.write(' <vertex>\n ' + '\n '.join('<{k}>{v}</{k}>'.format(k=k, v=xml_escape(v)) for k, v in tags.items())) if vertex: out.write('\n <attributes>\n') for k, v in vertex.items(): out.write(' <attribute>\n' ' <name>{k}</name>\n' ' <value>{v}</value>\n' ' </attribute>\n' .format(k=xml_escape(k), v=xml_escape(v))) out.write(' </attributes>') out.write('\n </vertex>\n') out.write(' </vertices>\n' ' <edges>\n') for edge in edges: for k in ('ID', 'type', 'label', 'sourceID', 'targetID'): if k not in edge: raise ValueError("Edge is missing tag '%s': %r" % ( k, edge)) if 'value' not in edge: edge['value'] = '' out.write(' <edge>\n ' + '\n '.join('<{k}>{v}</{k}>'.format(k=k, v=xml_escape(v)) for k, v in edge.items()) + '\n </edge>\n') out.write(' </edges>\n' '</provenancedata>\n') conn.close()
def run_from_vistrails(): setup_logging('REPROUNZIP-VISTRAILS', logging.INFO) parser = argparse.ArgumentParser() parser.add_argument('unpacker') parser.add_argument('directory') parser.add_argument('run') parser.add_argument('--input-file', action='append', default=[]) parser.add_argument('--output-file', action='append', default=[]) parser.add_argument('--cmdline', action='store') args = parser.parse_args() runs, packages, other_files = load_config( Path(args.directory) / 'config.yml', canonical=True) run = runs[int(args.run)] python = sys.executable rpuz = [python, '-m', 'reprounzip.main', args.unpacker] os.environ['REPROUNZIP_NON_INTERACTIVE'] = 'y' def cmd(lst, add=None): if add: logging.info("cmd: %s %s", ' '.join(lst), add) string = ' '.join(shell_escape(a) for a in (rpuz + lst)) string += ' ' + add subprocess.check_call(string, shell=True, cwd=args.directory) else: logging.info("cmd: %s", ' '.join(lst)) subprocess.check_call(rpuz + lst, cwd=args.directory) logging.info("reprounzip-vistrails calling reprounzip; dir=%s", args.directory) # Parses input files from the command-line upload_command = [] seen_input_names = set() for input_file in args.input_file: input_name, filename = input_file.split(':', 1) upload_command.append('%s:%s' % (filename, input_name)) seen_input_names.add(input_name) # Resets the input files that were not given for input_name in run['input_files']: if input_name not in seen_input_names: upload_command.append(':%s' % input_name) # Runs the command cmd(['upload', '.'] + upload_command) # Runs the experiment if args.cmdline: cmd(['run', '.', '--cmdline'], add=args.cmdline) else: cmd(['run', '.']) # Gets output files for output_file in args.output_file: output_name, filename = output_file.split(':', 1) cmd(['download', '.', '%s:%s' % (output_name, filename)])