def chroot_run(args): """Runs the command in the chroot. """ target = Path(args.target[0]) unpacked_info = metadata_read(target, 'chroot') cmdline = args.cmdline # Loads config config = load_config_file(target / 'config.yml', True) runs = config.runs selected_runs = get_runs(runs, args.run, cmdline) root = target / 'root' # X11 handler x11 = X11Handler(args.x11, ('local', socket.gethostname()), args.x11_display) cmds = [] for run_number in selected_runs: run = runs[run_number] cmd = 'cd %s && ' % shell_escape(run['workingdir']) cmd += '/usr/bin/env -i ' environ = x11.fix_env(run['environ']) environ = fixup_environment(environ, args) cmd += ' '.join('%s=%s' % (shell_escape(k), shell_escape(v)) for k, v in iteritems(environ)) cmd += ' ' # FIXME : Use exec -a or something if binary != argv[0] if cmdline is None: argv = [run['binary']] + run['argv'][1:] else: argv = cmdline cmd += ' '.join(shell_escape(a) for a in argv) userspec = '%s:%s' % (run.get('uid', 1000), run.get('gid', 1000)) cmd = 'chroot --userspec=%s %s /bin/sh -c %s' % ( userspec, shell_escape(unicode_(root)), shell_escape(cmd)) cmds.append(cmd) cmds = [ 'chroot %s /bin/sh -c %s' % (shell_escape(unicode_(root)), shell_escape(c)) for c in x11.init_cmds ] + cmds cmds = ' && '.join(cmds) # Starts forwarding forwarders = [] for portnum, connector in x11.port_forward: fwd = LocalForwarder(connector, portnum) forwarders.append(fwd) signals.pre_run(target=target) retcode = interruptible_call(cmds, shell=True) stderr.write("\n*** Command finished, status: %d\n" % retcode) signals.post_run(target=target, retcode=retcode) # Update input file status metadata_update_run(config, unpacked_info, selected_runs) metadata_write(target, unpacked_info, 'chroot')
def chroot_run(args): """Runs the command in the chroot. """ target = Path(args.target[0]) read_dict(target / '.reprounzip', 'chroot') cmdline = args.cmdline # Loads config runs, packages, other_files = load_config_file(target / 'config.yml', True) selected_runs = get_runs(runs, args.run, cmdline) root = target / 'root' # X11 handler x11 = X11Handler(args.x11, ('local', socket.gethostname()), args.x11_display) cmds = [] for run_number in selected_runs: run = runs[run_number] cmd = 'cd %s && ' % shell_escape(run['workingdir']) cmd += '/usr/bin/env -i ' environ = x11.fix_env(run['environ']) cmd += ' '.join('%s=%s' % (k, shell_escape(v)) for k, v in iteritems(environ)) cmd += ' ' # FIXME : Use exec -a or something if binary != argv[0] if cmdline is None: argv = [run['binary']] + run['argv'][1:] else: argv = cmdline cmd += ' '.join(shell_escape(a) for a in argv) userspec = '%s:%s' % (run.get('uid', 1000), run.get('gid', 1000)) cmd = 'chroot --userspec=%s %s /bin/sh -c %s' % ( userspec, shell_escape(unicode_(root)), shell_escape(cmd)) cmds.append(cmd) cmds = ['chroot %s /bin/sh -c %s' % (shell_escape(unicode_(root)), shell_escape(c)) for c in x11.init_cmds] + cmds cmds = ' && '.join(cmds) # Starts forwarding forwarders = [] for portnum, connector in x11.port_forward: fwd = LocalForwarder(connector, portnum) forwarders.append(fwd) signals.pre_run(target=target) retcode = interruptible_call(cmds, shell=True) sys.stderr.write("\n*** Command finished, status: %d\n" % retcode) signals.post_run(target=target, retcode=retcode)
def finalize(self): if not self.docker_copy: self.build_directory.rmtree() return from_image = self.unpacked_info['current_image'] with self.build_directory.open('w', 'Dockerfile', encoding='utf-8', newline='\n') as dockerfile: dockerfile.write('FROM %s\n\n' % from_image.decode('ascii')) for src, target in self.docker_copy: # FIXME : spaces in filenames will probably break Docker dockerfile.write( 'COPY \\\n %s \\\n %s\n' % (shell_escape( unicode_(src)), shell_escape(unicode_(target)))) if self.docker_copy: dockerfile.write('RUN /busybox chown 1000:1000 \\\n' ' %s\n' % ' \\\n '.join( shell_escape(unicode_(target)) for src, target in self.docker_copy)) # TODO : restore permissions? image = make_unique_name(b'reprounzip_image_') retcode = subprocess.call(self.docker_cmd + ['build', '-t', image, '.'], cwd=self.build_directory.path) if retcode != 0: logging.critical("docker build failed with code %d", retcode) sys.exit(1) else: logging.info("New image created: %s", image.decode('ascii')) if from_image != self.unpacked_info['initial_image']: logging.info("Untagging previous image %s", from_image.decode('ascii')) retcode = subprocess.call(self.docker_cmd + ['rmi', from_image]) if retcode != 0: logging.warning( "Can't remove previous image, docker " "returned %d", retcode) self.unpacked_info['current_image'] = image write_dict(self.target, self.unpacked_info) self.build_directory.rmtree()
def finalize(self): if not self.docker_copy: self.build_directory.rmtree() return from_image = self.unpacked_info['current_image'] with self.build_directory.open('w', 'Dockerfile', encoding='utf-8', newline='\n') as dockerfile: dockerfile.write('FROM %s\n\n' % from_image.decode('ascii')) for src, target in self.docker_copy: # FIXME : spaces in filenames will probably break Docker dockerfile.write( 'COPY \\\n %s \\\n %s\n' % ( shell_escape(unicode_(src)), shell_escape(unicode_(target)))) if self.docker_copy: dockerfile.write('RUN /busybox chown 1000:1000 \\\n' ' %s\n' % ' \\\n '.join( shell_escape(unicode_(target)) for src, target in self.docker_copy)) # TODO : restore permissions? image = make_unique_name(b'reprounzip_image_') retcode = subprocess.call(self.docker_cmd + ['build', '-t', image, '.'], cwd=self.build_directory.path) if retcode != 0: logger.critical("docker build failed with code %d", retcode) sys.exit(1) else: logger.info("New image created: %s", image.decode('ascii')) if from_image != self.unpacked_info['initial_image']: logger.info("Untagging previous image %s", from_image.decode('ascii')) retcode = subprocess.call(self.docker_cmd + ['rmi', from_image]) if retcode != 0: logger.warning("Can't remove previous image, docker " "returned %d", retcode) self.unpacked_info['current_image'] = image write_dict(self.target, self.unpacked_info) self.build_directory.rmtree()
def graph_json(target, runs, packages, other_files, package_map, edges, inputs_outputs, level_pkgs, level_processes, level_other_files): """Writes a JSON file suitable for further processing. """ # Packages json_packages = [pkg.json(level_pkgs) for pkg in packages] # Other files json_other_files = [unicode_(fi) for fi in sorted(other_files)] # Programs prog_map = {} json_runs = [run.json(prog_map, level_processes) for run in runs] # Connect edges for prog, f, mode, argv in edges: what = unicode_(f) if mode is None: prog_map[prog]['reads'].append(what) # TODO: argv? elif mode & FILE_WRITE: prog_map[prog]['writes'].append(what) elif mode & FILE_READ: prog_map[prog]['reads'].append(what) json_other_files.sort() if PY3: fp = target.open('w', encoding='utf-8', newline='\n') else: fp = target.open('wb') try: json.dump( { 'packages': sorted(json_packages, key=lambda p: p['name']), 'other_files': json_other_files, 'runs': json_runs }, fp, ensure_ascii=False, indent=2, sort_keys=True) finally: fp.close()
def json(self, level_pkgs): if level_pkgs == LVL_PKG_PACKAGE: logging.critical("JSON output doesn't support --packages package") sys.exit(1) elif level_pkgs == LVL_PKG_FILE: files = sorted(unicode_(f) for f in self.files) else: assert False return {'name': self.name, 'version': self.version or None, 'files': files}
def json(self, level_pkgs): if level_pkgs == LVL_PKG_PACKAGE: logger.critical("JSON output doesn't support --packages package") sys.exit(1) elif level_pkgs == LVL_PKG_FILE: files = sorted(unicode_(f) for f in self.files) else: assert False return {'name': self.name, 'version': self.version or None, 'section': self.section, 'files': files}
def graph_json(target, runs, packages, other_files, package_map, edges, inputs_outputs, level_pkgs, level_processes, level_other_files): """Writes a JSON file suitable for further processing. """ # Packages json_packages = [pkg.json(level_pkgs) for pkg in packages] # Other files json_other_files = [unicode_(fi) for fi in sorted(other_files)] # Programs prog_map = {} json_runs = [run.json(prog_map, level_processes) for run in runs] # Connect edges for prog, f, mode, argv in edges: what = unicode_(f) if mode is None: prog_map[prog]['reads'].append(what) # TODO: argv? elif mode & FILE_WRITE: prog_map[prog]['writes'].append(what) elif mode & FILE_READ: prog_map[prog]['reads'].append(what) json_other_files.sort() if PY3: fp = target.open('w', encoding='utf-8', newline='\n') else: fp = target.open('wb') try: json.dump({'packages': sorted(json_packages, key=lambda p: p['name']), 'other_files': json_other_files, 'runs': json_runs}, fp, ensure_ascii=False, indent=2, sort_keys=True) finally: fp.close()
def dot(self, fp, level_processes, indent=1): thread_style = ',fillcolor="#666666"' if self.thread else '' fp.write(' ' * indent + 'prog%d [label="%s (%d)"%s];\n' % ( self.id, escape(unicode_(self.binary) or "-"), self.pid, thread_style)) if self.parent is not None: reason = '' if self.created == C_FORK: if self.thread: reason = "thread" else: reason = "fork" elif self.created == C_EXEC: reason = "exec" elif self.created == C_FORKEXEC: reason = "fork+exec" fp.write(' ' * indent + 'prog%d -> prog%d [label="%s"];\n' % ( self.parent.id, self.id, reason))
def dot(self, fp, level_processes, indent=1): thread_style = ',fillcolor="#666666"' if self.thread else '' fp.write(' ' * indent + 'prog%d [label="%s (%d)"%s];\n' % (self.id, escape(unicode_(self.binary) or "-"), self.pid, thread_style)) if self.parent is not None: reason = '' if self.created == C_FORK: if self.thread: reason = "thread" else: reason = "fork" elif self.created == C_EXEC: reason = "exec" elif self.created == C_FORKEXEC: reason = "fork+exec" fp.write(' ' * indent + 'prog%d -> prog%d [label="%s"];\n' % (self.parent.id, self.id, reason))
def filefilter(path): pathuni = unicode_(path) if any(f(pathuni) for f in ignore): logging.debug("IGN %s", pathuni) return None if not (replace or aggregates): return path for fi in replace: pathuni_ = fi(pathuni) if pathuni_ != pathuni: logging.debug("SUB %s -> %s", pathuni, pathuni_) pathuni = pathuni_ for prefix in aggregates or []: if pathuni.startswith(prefix): logging.debug("AGG %s -> %s", pathuni, prefix) pathuni = prefix break return PosixPath(pathuni)
def dot(self, fp, level_pkgs): assert self.id is not None if not self.files: return if level_pkgs == LVL_PKG_PACKAGE: fp.write(' "pkg %s" [shape=box,label=' % escape(self.name)) if self.version: fp.write('"%s %s"];\n' % (escape(self.name), escape(self.version))) else: fp.write('"%s"];\n' % escape(self.name)) elif level_pkgs == LVL_PKG_FILE: fp.write(' subgraph cluster_pkg%d {\n label=' % self.id) if self.version: fp.write('"%s %s";\n' % (escape(self.name), escape(self.version))) else: fp.write('"%s";\n' % escape(self.name)) for f in sorted(unicode_(f) for f in self.files): fp.write(' "%s";\n' % escape(f)) fp.write(' }\n')
def dot(self, fp, level_pkgs): assert self.id is not None if not self.files: return if level_pkgs == LVL_PKG_PACKAGE: fp.write(' "pkg %s" [shape=box,label=' % escape(self.name)) if self.version: fp.write('"%s %s"];\n' % ( escape(self.name), escape(self.version))) else: fp.write('"%s"];\n' % escape(self.name)) elif level_pkgs == LVL_PKG_FILE: fp.write(' subgraph cluster_pkg%d {\n label=' % self.id) if self.version: fp.write('"%s %s";\n' % ( escape(self.name), escape(self.version))) else: fp.write('"%s";\n' % escape(self.name)) for f in sorted(unicode_(f) for f in self.files): fp.write(' "%s";\n' % escape(f)) fp.write(' }\n')
def chroot_run(args): """Runs the command in the chroot. """ target = Path(args.target[0]) read_dict(target / '.reprounzip', 'chroot') cmdline = args.cmdline # Loads config runs, packages, other_files = load_config_file(target / 'config.yml', True) selected_runs = get_runs(runs, args.run, cmdline) root = target / 'root' cmds = [] for run_number in selected_runs: run = runs[run_number] cmd = 'cd %s && ' % shell_escape(run['workingdir']) cmd += '/usr/bin/env -i ' cmd += ' '.join('%s=%s' % (k, shell_escape(v)) for k, v in iteritems(run['environ'])) cmd += ' ' # FIXME : Use exec -a or something if binary != argv[0] if cmdline is None: argv = [run['binary']] + run['argv'][1:] else: argv = cmdline cmd += ' '.join(shell_escape(a) for a in argv) userspec = '%s:%s' % (run.get('uid', 1000), run.get('gid', 1000)) cmd = 'chroot --userspec=%s %s /bin/sh -c %s' % ( userspec, shell_escape(unicode_(root)), shell_escape(cmd)) cmds.append(cmd) cmds = ' && '.join(cmds) signals.pre_run(target=target) retcode = subprocess.call(cmds, shell=True) sys.stderr.write("\n*** Command finished, status: %d\n" % retcode) signals.post_run(target=target, retcode=retcode)
def graph_json(target, runs, packages, other_files, package_map, edges, inputs_outputs, inputs_outputs_map, level_pkgs, level_processes, level_other_files): """Writes a JSON file suitable for further processing. """ # Packages if level_pkgs in (LVL_PKG_IGNORE, LVL_PKG_DROP): json_packages = [] else: json_packages = [pkg.json(level_pkgs) for pkg in packages] # Other files json_other_files = [unicode_(fi) for fi in sorted(other_files)] # Programs prog_map = {} json_runs = [run.json(prog_map, level_processes) for run in runs] # Connect edges done_edges = set() for prog, fi, mode, argv in edges: endp_prog = prog_map[prog] if fi in package_map: if level_pkgs == LVL_PKG_DROP: continue endp_file = package_map[fi].json_endpoint(fi, level_pkgs) e = endp_prog['name'], endp_file, mode if e in done_edges: continue else: done_edges.add(e) else: endp_file = unicode_(fi) if mode is None: endp_prog['reads'].append(endp_file) elif mode & FILE_WRITE: endp_prog['writes'].append(endp_file) elif mode & FILE_READ: endp_prog['reads'].append(endp_file) json_other_files.sort() if PY3: fp = target.open('w', encoding='utf-8', newline='\n') else: fp = target.open('wb') try: json.dump({'packages': sorted(json_packages, key=lambda p: p['name']), 'other_files': json_other_files, 'runs': json_runs, 'inputs_outputs': [ {'name': k, 'path': unicode_(v.path), 'read_by_runs': v.read_runs, 'written_by_runs': v.write_runs} for k, v in sorted(iteritems(inputs_outputs))]}, fp, ensure_ascii=False, indent=2, sort_keys=True) finally: fp.close()
def vagrant_setup_create(args): """Sets up the experiment to be run in a Vagrant-built virtual machine. This can either build a chroot or not. If building a chroot, we do just like without Vagrant: we copy all the files and only get what's missing from the host. But we do install automatically the packages whose files are required. If not building a chroot, we install all the packages, and only unpack files that don't come from packages. In short: files from packages with packfiles=True will only be used if building a chroot. """ if not args.pack: logging.critical("setup/create needs the pack filename") sys.exit(1) pack = Path(args.pack[0]) target = Path(args.target[0]) if target.exists(): logging.critical("Target directory exists") sys.exit(1) use_chroot = args.use_chroot mount_bind = args.bind_magic_dirs record_usage(use_chroot=use_chroot, mount_bind=mount_bind) signals.pre_setup(target=target, pack=pack) # Unpacks configuration file rpz_pack = RPZPack(pack) rpz_pack.extract_config(target / 'config.yml') # Loads config runs, packages, other_files = config = load_config(target / 'config.yml', True) if not args.memory: memory = None else: try: memory = int(args.memory[-1]) except ValueError: logging.critical("Invalid value for memory size: %r", args.memory) sys.exit(1) if args.base_image and args.base_image[0]: record_usage(vagrant_explicit_image=True) box = args.base_image[0] if args.distribution: target_distribution = args.distribution[0] else: target_distribution = None else: target_distribution, box = select_box(runs) logging.info("Using box %s", box) logging.debug("Distribution: %s", target_distribution or "unknown") # If using chroot, we might still need to install packages to get missing # (not packed) files if use_chroot: packages = [pkg for pkg in packages if not pkg.packfiles] if packages: record_usage(vagrant_install_pkgs=True) logging.info("Some packages were not packed, so we'll install and " "copy their files\n" "Packages that are missing:\n%s", ' '.join(pkg.name for pkg in packages)) if packages: try: installer = select_installer(pack, runs, target_distribution) except CantFindInstaller as e: logging.error("Need to install %d packages but couldn't select a " "package installer: %s", len(packages), e) target.mkdir(parents=True) try: # Writes setup script logging.info("Writing setup script %s...", target / 'setup.sh') with (target / 'setup.sh').open('w', encoding='utf-8', newline='\n') as fp: fp.write('#!/bin/sh\n\nset -e\n\n') if packages: # Updates package sources fp.write(installer.update_script()) fp.write('\n') # Installs necessary packages fp.write(installer.install_script(packages)) fp.write('\n') # TODO : Compare package versions (painful because of sh) # Untar if use_chroot: fp.write('\n' 'mkdir /experimentroot; cd /experimentroot\n') fp.write('tar zpxf /vagrant/data.tgz --numeric-owner ' '--strip=1 %s\n' % rpz_pack.data_prefix) if mount_bind: fp.write('\n' 'mkdir -p /experimentroot/dev\n' 'mkdir -p /experimentroot/proc\n') for pkg in packages: fp.write('\n# Copies files from package %s\n' % pkg.name) for f in pkg.files: f = f.path dest = join_root(PosixPath('/experimentroot'), f) fp.write('mkdir -p %s\n' % shell_escape(unicode_(f.parent))) fp.write('cp -L %s %s\n' % ( shell_escape(unicode_(f)), shell_escape(unicode_(dest)))) else: fp.write('\ncd /\n') paths = set() pathlist = [] # Adds intermediate directories, and checks for existence in # the tar for f in other_files: path = PosixPath('/') for c in rpz_pack.remove_data_prefix(f.path).components: path = path / c if path in paths: continue paths.add(path) try: rpz_pack.get_data(path) except KeyError: logging.info("Missing file %s", path) else: pathlist.append(path) # FIXME : for some reason we need reversed() here, I'm not sure # why. Need to read more of tar's docs. # TAR bug: --no-overwrite-dir removes --keep-old-files # TAR bug: there is no way to make --keep-old-files not report # an error if an existing file is encountered. --skip-old-files # was introduced too recently. Instead, we just ignore the exit # status with (target / 'rpz-files.list').open('wb') as lfp: for p in reversed(pathlist): lfp.write(join_root(rpz_pack.data_prefix, p).path) lfp.write(b'\0') fp.write('tar zpxf /vagrant/data.tgz --keep-old-files ' '--numeric-owner --strip=1 ' '--null -T /vagrant/rpz-files.list || /bin/true\n') # Copies busybox if use_chroot: arch = runs[0]['architecture'] download_file(busybox_url(arch), target / 'busybox', 'busybox-%s' % arch) fp.write(r''' cp /vagrant/busybox /experimentroot/busybox chmod +x /experimentroot/busybox mkdir -p /experimentroot/bin [ -e /experimentroot/bin/sh ] || \ ln -s /busybox /experimentroot/bin/sh ''') # Copies pack logging.info("Copying pack file...") rpz_pack.copy_data_tar(target / 'data.tgz') rpz_pack.close() # Writes Vagrant file logging.info("Writing %s...", target / 'Vagrantfile') with (target / 'Vagrantfile').open('w', encoding='utf-8', newline='\n') as fp: # Vagrant header and version fp.write( '# -*- mode: ruby -*-\n' '# vi: set ft=ruby\n\n' 'VAGRANTFILE_API_VERSION = "2"\n\n' 'Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|\n') # Selects which box to install fp.write(' config.vm.box = "%s"\n' % box) # Run the setup script on the virtual machine fp.write(' config.vm.provision "shell", path: "setup.sh"\n') # Memory size if memory is not None: fp.write(' config.vm.provider "virtualbox" do |v|\n' ' v.memory = %d\n' ' end\n' % memory) fp.write('end\n') # Meta-data for reprounzip write_dict(target, metadata_initial_iofiles(config, {'use_chroot': use_chroot})) signals.post_setup(target=target, pack=pack) except Exception: target.rmtree(ignore_errors=True) raise
def directory_run(args): """Runs the command in the directory. """ target = Path(args.target[0]) unpacked_info = metadata_read(target, 'directory') cmdline = args.cmdline # Loads config config = load_config_file(target / 'config.yml', True) runs = config.runs selected_runs = get_runs(runs, args.run, cmdline) root = (target / 'root').absolute() # Gets library paths lib_dirs = [] p = subprocess.Popen(['/sbin/ldconfig', '-v', '-N'], stdout=subprocess.PIPE) try: for l in p.stdout: if len(l) < 3 or l[0] in (b' ', b'\t'): continue if l.endswith(b':\n'): lib_dirs.append(Path(l[:-2])) finally: p.wait() lib_dirs = ( 'export LD_LIBRARY_PATH=%s' % ':'.join(shell_escape(unicode_(join_root(root, d))) for d in lib_dirs)) cmds = [lib_dirs] for run_number in selected_runs: run = runs[run_number] cmd = 'cd %s && ' % shell_escape( unicode_(join_root(root, Path(run['workingdir'])))) cmd += '/usr/bin/env -i ' environ = run['environ'] environ = fixup_environment(environ, args) if args.x11: if 'DISPLAY' in os.environ: environ['DISPLAY'] = os.environ['DISPLAY'] if 'XAUTHORITY' in os.environ: environ['XAUTHORITY'] = os.environ['XAUTHORITY'] cmd += ' '.join('%s=%s' % (shell_escape(k), shell_escape(v)) for k, v in iteritems(environ) if k != 'PATH') cmd += ' ' # PATH # Get the original PATH components path = [ PosixPath(d) for d in run['environ'].get('PATH', '').split(':') ] # The same paths but in the directory dir_path = [join_root(root, d) for d in path if d.root == '/'] # Rebuild string path = ':'.join(unicode_(d) for d in dir_path + path) cmd += 'PATH=%s ' % shell_escape(path) # FIXME : Use exec -a or something if binary != argv[0] if cmdline is None: argv = run['argv'] # Rewrites command-line arguments that are absolute filenames rewritten = False for i in irange(len(argv)): try: p = Path(argv[i]) except UnicodeEncodeError: continue if p.is_absolute: rp = join_root(root, p) if (rp.exists() or (len(rp.components) > 3 and rp.parent.exists())): argv[i] = str(rp) rewritten = True if rewritten: logger.warning("Rewrote command-line as: %s", ' '.join(shell_escape(a) for a in argv)) else: argv = cmdline cmd += ' '.join(shell_escape(a) for a in argv) cmds.append(cmd) cmds = ' && '.join(cmds) signals.pre_run(target=target) retcode = interruptible_call(cmds, shell=True) stderr.write("\n*** Command finished, status: %d\n" % retcode) signals.post_run(target=target, retcode=retcode) # Update input file status metadata_update_run(config, unpacked_info, selected_runs) metadata_write(target, unpacked_info, 'directory')
def directory_run(args): """Runs the command in the directory. """ target = Path(args.target[0]) unpacked_info = metadata_read(target, 'directory') cmdline = args.cmdline # Loads config config = load_config_file(target / 'config.yml', True) runs = config.runs selected_runs = get_runs(runs, args.run, cmdline) root = (target / 'root').absolute() # Gets library paths lib_dirs = [] p = subprocess.Popen(['/sbin/ldconfig', '-v', '-N'], stdout=subprocess.PIPE) try: for l in p.stdout: if len(l) < 3 or l[0] in (b' ', b'\t'): continue if l.endswith(b':\n'): lib_dirs.append(Path(l[:-2])) finally: p.communicate() lib_dirs = ('export LD_LIBRARY_PATH=%s' % ':'.join( shell_escape(unicode_(join_root(root, d))) for d in lib_dirs)) cmds = [lib_dirs] for run_number in selected_runs: run = runs[run_number] cmd = 'cd %s && ' % shell_escape( unicode_(join_root(root, Path(run['workingdir'])))) cmd += '/usr/bin/env -i ' environ = run['environ'] environ = fixup_environment(environ, args) if args.x11: if 'DISPLAY' in os.environ: environ['DISPLAY'] = os.environ['DISPLAY'] if 'XAUTHORITY' in os.environ: environ['XAUTHORITY'] = os.environ['XAUTHORITY'] cmd += ' '.join('%s=%s' % (shell_escape(k), shell_escape(v)) for k, v in iteritems(environ) if k != 'PATH') cmd += ' ' # PATH # Get the original PATH components path = [PosixPath(d) for d in run['environ'].get('PATH', '').split(':')] # The same paths but in the directory dir_path = [join_root(root, d) for d in path if d.root == '/'] # Rebuild string path = ':'.join(unicode_(d) for d in dir_path + path) cmd += 'PATH=%s ' % shell_escape(path) # FIXME : Use exec -a or something if binary != argv[0] if cmdline is None: argv = run['argv'] # Rewrites command-line arguments that are absolute filenames rewritten = False for i in irange(len(argv)): try: p = Path(argv[i]) except UnicodeEncodeError: continue if p.is_absolute: rp = join_root(root, p) if (rp.exists() or (len(rp.components) > 3 and rp.parent.exists())): argv[i] = str(rp) rewritten = True if rewritten: logging.warning("Rewrote command-line as: %s", ' '.join(shell_escape(a) for a in argv)) else: argv = cmdline cmd += ' '.join(shell_escape(a) for a in argv) cmds.append(cmd) cmds = ' && '.join(cmds) signals.pre_run(target=target) retcode = interruptible_call(cmds, shell=True) stderr.write("\n*** Command finished, status: %d\n" % retcode) signals.post_run(target=target, retcode=retcode) # Update input file status metadata_update_run(config, unpacked_info, selected_runs) metadata_write(target, unpacked_info, 'directory')
def docker_setup_create(args): """Sets up the experiment to be run in a Docker-built container. """ pack = Path(args.pack[0]) target = Path(args.target[0]) if target.exists(): logging.critical("Target directory exists") sys.exit(1) signals.pre_setup(target=target, pack=pack) # Unpacks configuration file tar = tarfile.open(str(pack), 'r:*') member = tar.getmember('METADATA/config.yml') member.name = 'config.yml' tar.extract(member, str(target)) tar.close() # Loads config runs, packages, other_files = load_config(target / 'config.yml', True) if args.base_image: record_usage(docker_explicit_base=True) base_image = args.base_image[0] if args.distribution: target_distribution = args.distribution[0] else: target_distribution = None else: target_distribution, base_image = select_image(runs) logging.info("Using base image %s", base_image) logging.debug("Distribution: %s", target_distribution or "unknown") target.mkdir(parents=True) pack.copyfile(target / 'experiment.rpz') # Writes Dockerfile logging.info("Writing %s...", target / 'Dockerfile') with (target / 'Dockerfile').open('w', encoding='utf-8', newline='\n') as fp: fp.write('FROM %s\n\n' % base_image) # Installs busybox download_file(busybox_url(runs[0]['architecture']), target / 'busybox') fp.write('COPY busybox /bin/busybox\n') fp.write('COPY experiment.rpz /reprozip_experiment.rpz\n\n') fp.write('RUN \\\n' ' chmod +x /bin/busybox && \\\n') if args.install_pkgs: # Install every package through package manager missing_packages = [] else: # Only install packages that were not packed missing_packages = [pkg for pkg in packages if pkg.packfiles] packages = [pkg for pkg in packages if not pkg.packfiles] # FIXME : Right now, we need 'sudo' to be available (and it's not # necessarily in the base image) if packages: record_usage(docker_install_pkgs=True) else: record_usage(docker_install_pkgs="sudo") packages += [Package('sudo', None, packfiles=False)] if packages: try: installer = select_installer(pack, runs, target_distribution) except CantFindInstaller as e: logging.error("Need to install %d packages but couldn't " "select a package installer: %s", len(packages), e) sys.exit(1) # Updates package sources fp.write(' %s && \\\n' % installer.update_script()) # Installs necessary packages fp.write(' %s && \\\n' % installer.install_script(packages)) logging.info("Dockerfile will install the %d software packages that " "were not packed", len(packages)) # Untar paths = set() pathlist = [] dataroot = PosixPath('DATA') # Adds intermediate directories, and checks for existence in the tar tar = tarfile.open(str(pack), 'r:*') missing_files = chain.from_iterable(pkg.files for pkg in missing_packages) for f in chain(other_files, missing_files): path = PosixPath('/') for c in f.path.components[1:]: path = path / c if path in paths: continue paths.add(path) datapath = join_root(dataroot, path) try: tar.getmember(str(datapath)) except KeyError: logging.info("Missing file %s", datapath) else: pathlist.append(unicode_(datapath)) tar.close() # FIXME : for some reason we need reversed() here, I'm not sure why. # Need to read more of tar's docs. # TAR bug: --no-overwrite-dir removes --keep-old-files fp.write(' cd / && tar zpxf /reprozip_experiment.rpz ' '--numeric-owner --strip=1 %s\n' % ' '.join(shell_escape(p) for p in reversed(pathlist))) # Meta-data for reprounzip write_dict(target / '.reprounzip', {}) signals.post_setup(target=target)
def directory_run(args): """Runs the command in the directory. """ target = Path(args.target[0]) read_dict(target / '.reprounzip', 'directory') cmdline = args.cmdline # Loads config runs, packages, other_files = load_config_file(target / 'config.yml', True) selected_runs = get_runs(runs, args.run, cmdline) root = target / 'root' # Gets library paths lib_dirs = [] p = subprocess.Popen(['/sbin/ldconfig', '-v', '-N'], stdout=subprocess.PIPE) try: for l in p.stdout: if len(l) < 3 or l[0] in (b' ', b'\t'): continue if l.endswith(b':\n'): lib_dirs.append(Path(l[:-2])) finally: p.wait() lib_dirs = ( 'export LD_LIBRARY_PATH=%s' % ':'.join(shell_escape(unicode_(join_root(root, d))) for d in lib_dirs)) cmds = [lib_dirs] for run_number in selected_runs: run = runs[run_number] cmd = 'cd %s && ' % shell_escape( unicode_(join_root(root, Path(run['workingdir'])))) cmd += '/usr/bin/env -i ' cmd += ' '.join('%s=%s' % (k, shell_escape(v)) for k, v in iteritems(run['environ']) if k != 'PATH') cmd += ' ' # PATH # Get the original PATH components path = [ PosixPath(d) for d in run['environ'].get('PATH', '').split(':') ] # The same paths but in the directory dir_path = [join_root(root, d) for d in path if d.root == '/'] # Rebuild string path = ':'.join(unicode_(d) for d in dir_path + path) cmd += 'PATH=%s ' % shell_escape(path) # FIXME : Use exec -a or something if binary != argv[0] if cmdline is None: argv = run['argv'] else: argv = cmdline cmd += ' '.join(shell_escape(a) for a in argv) cmds.append(cmd) cmds = ' && '.join(cmds) signals.pre_run(target=target) retcode = subprocess.call(cmds, shell=True) sys.stderr.write("\n*** Command finished, status: %d\n" % retcode) signals.post_run(target=target, retcode=retcode)
def generate(target, directory, all_forks=False): """Main function for the graph subcommand. """ # In here, a file is any file on the filesystem. A binary is a file, that # gets executed. A process is a system-level task, identified by its pid # (pids don't get reused in the database). # What I call program is the couple (process, binary), so forking creates a # new program (with the same binary) and exec'ing creates a new program as # well (with the same process) # Because of this, fork+exec will create an intermediate program that # doesn't do anything (new process but still old binary). If that program # doesn't do anything worth showing on the graph, it will be erased, unless # all_forks is True (--all-forks). database = directory / 'trace.sqlite3' # Reads package ownership from the configuration configfile = directory / 'config.yml' if not configfile.is_file(): logging.critical("Configuration file does not exist!\n" "Did you forget to run 'reprozip trace'?\n" "If not, you might want to use --dir to specify an " "alternate location.") sys.exit(1) runs, packages, other_files, patterns = load_config(configfile, canonical=False) packages = dict((f.path, pkg) for pkg in packages for f in pkg.files) if PY3: # On PY3, connect() only accepts unicode conn = sqlite3.connect(str(database)) else: conn = sqlite3.connect(database.path) # This is a bit weird. We need to iterate on all types of events at the # same time, ordering by timestamp, so we decorate-sort-undecorate # Decoration adds timestamp (for sorting) and tags by event type, one of # 'process', 'open' or 'exec' # Reads processes from the database process_cursor = conn.cursor() process_rows = process_cursor.execute( ''' SELECT id, parent, timestamp FROM processes ORDER BY id ''') processes = {} all_programs = [] # ... and opened files... file_cursor = conn.cursor() file_rows = file_cursor.execute( ''' SELECT name, timestamp, mode, process FROM opened_files ORDER BY id ''') binaries = set() files = OrderedSet() edges = OrderedSet() # ... as well as executed files. exec_cursor = conn.cursor() exec_rows = exec_cursor.execute( ''' SELECT name, timestamp, process, argv FROM executed_files ORDER BY id ''') # Loop on all event lists logging.info("Getting all events from database...") rows = heapq.merge(((r[2], 'process', r) for r in process_rows), ((r[1], 'open', r) for r in file_rows), ((r[1], 'exec', r) for r in exec_rows)) for ts, event_type, data in rows: if event_type == 'process': r_id, r_parent, r_timestamp = data if r_parent is not None: parent = processes[r_parent] binary = parent.binary else: parent = None binary = None p = Process(r_id, parent, r_timestamp, False, binary, C_INITIAL if r_parent is None else C_FORK) processes[r_id] = p all_programs.append(p) elif event_type == 'open': r_name, r_timestamp, r_mode, r_process = data r_name = PosixPath(r_name) if r_mode != FILE_WDIR: process = processes[r_process] files.add(r_name) edges.add((process, r_name, r_mode, None)) elif event_type == 'exec': r_name, r_timestamp, r_process, r_argv = data r_name = PosixPath(r_name) process = processes[r_process] binaries.add(r_name) # Here we split this process in two "programs", unless the previous # one hasn't done anything since it was created via fork() if not all_forks and not process.acted: process.binary = r_name process.created = C_FORKEXEC process.acted = True else: process = Process(process.pid, process, r_timestamp, True, # Hides exec only once r_name, C_EXEC) all_programs.append(process) processes[r_process] = process argv = tuple(r_argv.split('\0')) if not argv[-1]: argv = argv[:-1] edges.add((process, r_name, None, argv)) process_cursor.close() file_cursor.close() conn.close() # Puts files in packages logging.info("Organizes packages...") package_files = {} other_files = [] for f in files: pkg = packages.get(f) if pkg is not None: package_files.setdefault((pkg.name, pkg.version), []).append(f) else: other_files.append(f) # Writes DOT file with target.open('w', encoding='utf-8', newline='\n') as fp: fp.write('digraph G {\n /* programs */\n node [shape=box];\n') # Programs logging.info("Writing programs...") for program in all_programs: fp.write(' prog%d [label="%s (%d)"];\n' % ( id(program), program.binary or "-", program.pid)) if program.parent is not None: reason = '' if program.created == C_FORK: reason = "fork" elif program.created == C_EXEC: reason = "exec" elif program.created == C_FORKEXEC: reason = "fork+exec" fp.write(' prog%d -> prog%d [label="%s"];\n' % ( id(program.parent), id(program), reason)) fp.write('\n node [shape=ellipse];\n\n /* system packages */\n') # Files from packages logging.info("Writing packages...") for i, ((name, version), files) in enumerate(iteritems(package_files)): fp.write(' subgraph cluster%d {\n label=' % i) if version: fp.write('"%s %s";\n' % (escape(name), escape(version))) else: fp.write('"%s";\n' % escape(name)) for f in files: fp.write(' "%s";\n' % escape(unicode_(f))) fp.write(' }\n') fp.write('\n /* other files */\n') # Other files logging.info("Writing other files...") for f in other_files: fp.write(' "%s"\n' % escape(unicode_(f))) fp.write('\n') # Edges logging.info("Connecting edges...") for prog, f, mode, argv in edges: if mode is None: fp.write(' "%s" -> prog%d [color=blue, label="%s"];\n' % ( escape(unicode_(f)), id(prog), escape(' '.join(argv)))) elif mode & FILE_WRITE: fp.write(' prog%d -> "%s" [color=red];\n' % ( id(prog), escape(unicode_(f)))) elif mode & FILE_READ: fp.write(' "%s" -> prog%d [color=green];\n' % ( escape(unicode_(f)), id(prog))) fp.write('}\n')
def dot_endpoint(self, f, level_pkgs): if level_pkgs == LVL_PKG_PACKAGE: return '"pkg %s"' % escape(self.name) else: return '"%s"' % escape(unicode_(f))
def vagrant_setup_create(args): """Sets up the experiment to be run in a Vagrant-built virtual machine. This can either build a chroot or not. If building a chroot, we do just like without Vagrant: we copy all the files and only get what's missing from the host. But we do install automatically the packages whose files are required. If not building a chroot, we install all the packages, and only unpack files that don't come from packages. In short: files from packages with packfiles=True will only be used if building a chroot. """ if not args.pack: logging.critical("setup/create needs the pack filename") sys.exit(1) pack = Path(args.pack[0]) target = Path(args.target[0]) if target.exists(): logging.critical("Target directory exists") sys.exit(1) use_chroot = args.use_chroot mount_bind = args.bind_magic_dirs record_usage(use_chroot=use_chroot, mount_bind=mount_bind) signals.pre_setup(target=target, pack=pack) # Unpacks configuration file tar = tarfile.open(str(pack), 'r:*') member = tar.getmember('METADATA/config.yml') member.name = 'config.yml' tar.extract(member, str(target)) tar.close() # Loads config runs, packages, other_files = load_config(target / 'config.yml', True) if args.base_image and args.base_image[0]: record_usage(vagrant_explicit_image=True) box = args.base_image[0] if args.distribution: target_distribution = args.distribution[0] else: target_distribution = None else: target_distribution, box = select_box(runs) logging.info("Using box %s", box) logging.debug("Distribution: %s", target_distribution or "unknown") # If using chroot, we might still need to install packages to get missing # (not packed) files if use_chroot: packages = [pkg for pkg in packages if not pkg.packfiles] if packages: record_usage(vagrant_install_pkgs=True) logging.info( "Some packages were not packed, so we'll install and " "copy their files\n" "Packages that are missing:\n%s", ' '.join(pkg.name for pkg in packages)) if packages: try: installer = select_installer(pack, runs, target_distribution) except CantFindInstaller as e: logging.error( "Need to install %d packages but couldn't select a " "package installer: %s", len(packages), e) target.mkdir(parents=True) # Writes setup script logging.info("Writing setup script %s...", target / 'setup.sh') with (target / 'setup.sh').open('w', encoding='utf-8', newline='\n') as fp: fp.write('#!/bin/sh\n\nset -e\n\n') if packages: # Updates package sources fp.write(installer.update_script()) fp.write('\n') # Installs necessary packages fp.write(installer.install_script(packages)) fp.write('\n') # TODO : Compare package versions (painful because of sh) # Untar if use_chroot: fp.write('\n' 'mkdir /experimentroot; cd /experimentroot\n') fp.write('tar zpxf /vagrant/experiment.rpz ' '--numeric-owner --strip=1 DATA\n') if mount_bind: fp.write('\n' 'mkdir -p /experimentroot/dev\n' 'mount -o rbind /dev /experimentroot/dev\n' 'mkdir -p /experimentroot/proc\n' 'mount -o rbind /proc /experimentroot/proc\n') for pkg in packages: fp.write('\n# Copies files from package %s\n' % pkg.name) for f in pkg.files: f = f.path dest = join_root(PosixPath('/experimentroot'), f) fp.write('mkdir -p %s\n' % shell_escape(unicode_(f.parent))) fp.write('cp -L %s %s\n' % (shell_escape( unicode_(f)), shell_escape(unicode_(dest)))) else: fp.write('\ncd /\n') paths = set() pathlist = [] dataroot = PosixPath('DATA') # Adds intermediate directories, and checks for existence in the # tar tar = tarfile.open(str(pack), 'r:*') for f in other_files: path = PosixPath('/') for c in f.path.components[1:]: path = path / c if path in paths: continue paths.add(path) datapath = join_root(dataroot, path) try: tar.getmember(str(datapath)) except KeyError: logging.info("Missing file %s", datapath) else: pathlist.append(unicode_(datapath)) tar.close() # FIXME : for some reason we need reversed() here, I'm not sure # why. Need to read more of tar's docs. # TAR bug: --no-overwrite-dir removes --keep-old-files # TAR bug: there is no way to make --keep-old-files not report an # error if an existing file is encountered. --skip-old-files was # introduced too recently. Instead, we just ignore the exit status fp.write('tar zpxf /vagrant/experiment.rpz --keep-old-files ' '--numeric-owner --strip=1 %s || /bin/true\n' % ' '.join(shell_escape(p) for p in reversed(pathlist))) # Copies /bin/sh + dependencies if use_chroot: url = busybox_url(runs[0]['architecture']) fp.write(r''' mkdir -p /experimentroot/bin mkdir -p /experimentroot/usr/bin if [ ! -e /experimentroot/bin/sh -o ! -e /experimentroot/usr/bin/env ]; then wget --quiet -O /experimentroot/bin/busybox {url} chmod +x /experimentroot/bin/busybox fi [ -e /experimentroot/bin/sh ] || \ ln -s /bin/busybox /experimentroot/bin/sh [ -e /experimentroot/usr/bin/env ] || \ ln -s /bin/busybox /experimentroot/usr/bin/env '''.format(url=url)) # Copies pack logging.info("Copying pack file...") pack.copyfile(target / 'experiment.rpz') # Writes Vagrant file logging.info("Writing %s...", target / 'Vagrantfile') with (target / 'Vagrantfile').open('w', encoding='utf-8', newline='\n') as fp: # Vagrant header and version fp.write('# -*- mode: ruby -*-\n' '# vi: set ft=ruby\n\n' 'VAGRANTFILE_API_VERSION = "2"\n\n' 'Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|\n') # Selects which box to install fp.write(' config.vm.box = "%s"\n' % box) # Run the setup script on the virtual machine fp.write(' config.vm.provision "shell", path: "setup.sh"\n') fp.write('end\n') # Meta-data for reprounzip write_dict(target / '.reprounzip', {'use_chroot': use_chroot}) signals.post_setup(target=target)
def json_endpoint(self, f, level_pkgs): if level_pkgs == LVL_PKG_PACKAGE: return self.name else: return unicode_(f)
def graph_json(target, runs, packages, other_files, package_map, edges, inputs_outputs, inputs_outputs_map, level_pkgs, level_processes, level_other_files): """Writes a JSON file suitable for further processing. """ # Packages if level_pkgs in (LVL_PKG_IGNORE, LVL_PKG_DROP): json_packages = [] else: json_packages = [pkg.json(level_pkgs) for pkg in packages] # Other files json_other_files = [unicode_(fi) for fi in sorted(other_files)] # Programs prog_map = {} json_runs = [run.json(prog_map, level_processes) for run in runs] # Connect edges done_edges = set() for prog, fi, mode, argv in edges: endp_prog = prog_map[prog] if fi in package_map: if level_pkgs == LVL_PKG_DROP: continue endp_file = package_map[fi].json_endpoint(fi, level_pkgs) e = endp_prog['name'], endp_file, mode if e in done_edges: continue else: done_edges.add(e) else: endp_file = unicode_(fi) if mode is None: endp_prog['reads'].append(endp_file) # TODO: argv? elif mode & FILE_WRITE: endp_prog['writes'].append(endp_file) elif mode & FILE_READ: endp_prog['reads'].append(endp_file) json_other_files.sort() if PY3: fp = target.open('w', encoding='utf-8', newline='\n') else: fp = target.open('wb') try: json.dump( { 'packages': sorted(json_packages, key=lambda p: p['name']), 'other_files': json_other_files, 'runs': json_runs, 'inputs_outputs': [{ 'name': k, 'path': unicode_(v.path), 'read_by_runs': v.read_runs, 'written_by_runs': v.write_runs } for k, v in sorted(iteritems(inputs_outputs))] }, fp, ensure_ascii=False, indent=2, sort_keys=True) finally: fp.close()
def vagrant_setup_create(args): """Sets up the experiment to be run in a Vagrant-built virtual machine. This can either build a chroot or not. If building a chroot, we do just like without Vagrant: we copy all the files and only get what's missing from the host. But we do install automatically the packages whose files are required. If not building a chroot, we install all the packages, and only unpack files that don't come from packages. In short: files from packages with packfiles=True will only be used if building a chroot. """ if not args.pack: logging.critical("setup/create needs the pack filename") sys.exit(1) pack = Path(args.pack[0]) target = Path(args.target[0]) if target.exists(): logging.critical("Target directory exists") sys.exit(1) use_chroot = args.use_chroot mount_bind = args.bind_magic_dirs record_usage(use_chroot=use_chroot, mount_bind=mount_bind) signals.pre_setup(target=target, pack=pack) # Unpacks configuration file tar = tarfile.open(str(pack), 'r:*') member = tar.getmember('METADATA/config.yml') member.name = 'config.yml' tar.extract(member, str(target)) tar.close() # Loads config runs, packages, other_files = load_config(target / 'config.yml', True) if args.base_image and args.base_image[0]: record_usage(vagrant_explicit_image=True) box = args.base_image[0] if args.distribution: target_distribution = args.distribution[0] else: target_distribution = None else: target_distribution, box = select_box(runs) logging.info("Using box %s", box) logging.debug("Distribution: %s", target_distribution or "unknown") # If using chroot, we might still need to install packages to get missing # (not packed) files if use_chroot: packages = [pkg for pkg in packages if not pkg.packfiles] if packages: record_usage(vagrant_install_pkgs=True) logging.info("Some packages were not packed, so we'll install and " "copy their files\n" "Packages that are missing:\n%s", ' '.join(pkg.name for pkg in packages)) if packages: try: installer = select_installer(pack, runs, target_distribution) except CantFindInstaller as e: logging.error("Need to install %d packages but couldn't select a " "package installer: %s", len(packages), e) target.mkdir(parents=True) # Writes setup script logging.info("Writing setup script %s...", target / 'setup.sh') with (target / 'setup.sh').open('w', encoding='utf-8', newline='\n') as fp: fp.write('#!/bin/sh\n\nset -e\n\n') if packages: # Updates package sources fp.write(installer.update_script()) fp.write('\n') # Installs necessary packages fp.write(installer.install_script(packages)) fp.write('\n') # TODO : Compare package versions (painful because of sh) # Untar if use_chroot: fp.write('\n' 'mkdir /experimentroot; cd /experimentroot\n') fp.write('tar zpxf /vagrant/experiment.rpz ' '--numeric-owner --strip=1 DATA\n') if mount_bind: fp.write('\n' 'mkdir -p /experimentroot/dev\n' 'mount -o rbind /dev /experimentroot/dev\n' 'mkdir -p /experimentroot/proc\n' 'mount -o rbind /proc /experimentroot/proc\n') for pkg in packages: fp.write('\n# Copies files from package %s\n' % pkg.name) for f in pkg.files: f = f.path dest = join_root(PosixPath('/experimentroot'), f) fp.write('mkdir -p %s\n' % shell_escape(unicode_(f.parent))) fp.write('cp -L %s %s\n' % ( shell_escape(unicode_(f)), shell_escape(unicode_(dest)))) else: fp.write('\ncd /\n') paths = set() pathlist = [] dataroot = PosixPath('DATA') # Adds intermediate directories, and checks for existence in the # tar tar = tarfile.open(str(pack), 'r:*') for f in other_files: path = PosixPath('/') for c in f.path.components[1:]: path = path / c if path in paths: continue paths.add(path) datapath = join_root(dataroot, path) try: tar.getmember(str(datapath)) except KeyError: logging.info("Missing file %s", datapath) else: pathlist.append(unicode_(datapath)) tar.close() # FIXME : for some reason we need reversed() here, I'm not sure # why. Need to read more of tar's docs. # TAR bug: --no-overwrite-dir removes --keep-old-files # TAR bug: there is no way to make --keep-old-files not report an # error if an existing file is encountered. --skip-old-files was # introduced too recently. Instead, we just ignore the exit status fp.write('tar zpxf /vagrant/experiment.rpz --keep-old-files ' '--numeric-owner --strip=1 %s || /bin/true\n' % ' '.join(shell_escape(p) for p in reversed(pathlist))) # Copies /bin/sh + dependencies if use_chroot: url = busybox_url(runs[0]['architecture']) fp.write(r''' mkdir -p /experimentroot/bin mkdir -p /experimentroot/usr/bin if [ ! -e /experimentroot/bin/sh -o ! -e /experimentroot/usr/bin/env ]; then wget --quiet -O /experimentroot/bin/busybox {url} chmod +x /experimentroot/bin/busybox fi [ -e /experimentroot/bin/sh ] || \ ln -s /bin/busybox /experimentroot/bin/sh [ -e /experimentroot/usr/bin/env ] || \ ln -s /bin/busybox /experimentroot/usr/bin/env '''.format(url=url)) # Copies pack logging.info("Copying pack file...") pack.copyfile(target / 'experiment.rpz') # Writes Vagrant file logging.info("Writing %s...", target / 'Vagrantfile') with (target / 'Vagrantfile').open('w', encoding='utf-8', newline='\n') as fp: # Vagrant header and version fp.write('# -*- mode: ruby -*-\n' '# vi: set ft=ruby\n\n' 'VAGRANTFILE_API_VERSION = "2"\n\n' 'Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|\n') # Selects which box to install fp.write(' config.vm.box = "%s"\n' % box) # Run the setup script on the virtual machine fp.write(' config.vm.provision "shell", path: "setup.sh"\n') fp.write('end\n') # Meta-data for reprounzip write_dict(target / '.reprounzip', {'use_chroot': use_chroot}) signals.post_setup(target=target)
def graph_dot(target, runs, packages, other_files, package_map, edges, inputs_outputs, level_pkgs, level_processes, level_other_files): """Writes a GraphViz DOT file from the collected information. """ with target.open('w', encoding='utf-8', newline='\n') as fp: fp.write('digraph G {\n /* programs */\n' ' node [shape=box fontcolor=white ' 'fillcolor=black style=filled];\n') # Programs logging.info("Writing programs...") for run in runs: run.dot(fp, level_processes) fp.write('\n' ' node [shape=ellipse fontcolor="#131C39" ' 'fillcolor="#C9D2ED"];\n') # Packages if level_pkgs not in (LVL_PKG_IGNORE, LVL_PKG_DROP): logging.info("Writing packages...") fp.write('\n /* system packages */\n') for package in sorted(packages, key=lambda pkg: pkg.name): package.dot(fp, level_pkgs) fp.write('\n /* other files */\n') # Other files logging.info("Writing other files...") for fi in sorted(other_files): if fi in inputs_outputs: fp.write(' "%(path)s" [fillcolor="#A3B4E0", ' 'label="%(name)s\\n%(path)s"];\n' % {'path': escape(unicode_(fi)), 'name': inputs_outputs[fi]}) else: fp.write(' "%s";\n' % escape(unicode_(fi))) fp.write('\n') # Edges logging.info("Connecting edges...") done_edges = set() for prog, fi, mode, argv in edges: endp_prog = prog.dot_endpoint(level_processes) if fi in package_map: if level_pkgs == LVL_PKG_DROP: continue endp_file = package_map[fi].dot_endpoint(fi, level_pkgs) e = endp_prog, endp_file, mode if e in done_edges: continue else: done_edges.add(e) else: endp_file = '"%s"' % escape(unicode_(fi)) if mode is None: fp.write(' %s -> %s [style=bold, label="%s"];\n' % ( endp_file, endp_prog, escape(format_argv(argv)))) elif mode & FILE_WRITE: fp.write(' %s -> %s [color="#000088"];\n' % ( endp_prog, endp_file)) elif mode & FILE_READ: fp.write(' %s -> %s [color="#8888CC"];\n' % ( endp_file, endp_prog)) fp.write('}\n')
def graph_dot(target, runs, packages, other_files, package_map, edges, inputs_outputs, level_pkgs, level_processes, level_other_files): """Writes a GraphViz DOT file from the collected information. """ with target.open('w', encoding='utf-8', newline='\n') as fp: fp.write('digraph G {\n /* programs */\n' ' node [shape=box fontcolor=white ' 'fillcolor=black style=filled];\n') # Programs logging.info("Writing programs...") for run in runs: run.dot(fp, level_processes) fp.write('\n' ' node [shape=ellipse fontcolor="#131C39" ' 'fillcolor="#C9D2ED"];\n') # Packages if level_pkgs not in (LVL_PKG_IGNORE, LVL_PKG_DROP): logging.info("Writing packages...") fp.write('\n /* system packages */\n') for package in sorted(packages, key=lambda pkg: pkg.name): package.dot(fp, level_pkgs) fp.write('\n /* other files */\n') # Other files logging.info("Writing other files...") for fi in sorted(other_files): if fi in inputs_outputs: fp.write(' "%(path)s" [fillcolor="#A3B4E0", ' 'label="%(name)s\\n%(path)s"];\n' % { 'path': escape(unicode_(fi)), 'name': inputs_outputs[fi] }) else: fp.write(' "%s";\n' % escape(unicode_(fi))) fp.write('\n') # Edges logging.info("Connecting edges...") done_edges = set() for prog, fi, mode, argv in edges: endp_prog = prog.dot_endpoint(level_processes) if fi in package_map: if level_pkgs == LVL_PKG_DROP: continue endp_file = package_map[fi].dot_endpoint(fi, level_pkgs) e = endp_prog, endp_file, mode if e in done_edges: continue else: done_edges.add(e) else: endp_file = '"%s"' % escape(unicode_(fi)) if mode is None: fp.write(' %s -> %s [style=bold, label="%s"];\n' % (endp_file, endp_prog, escape(format_argv(argv)))) elif mode & FILE_WRITE: fp.write(' %s -> %s [color="#000088"];\n' % (endp_prog, endp_file)) elif mode & FILE_READ: fp.write(' %s -> %s [color="#8888CC"];\n' % (endp_file, endp_prog)) fp.write('}\n')
def vagrant_setup_create(args): """Sets up the experiment to be run in a Vagrant-built virtual machine. This can either build a chroot or not. If building a chroot, we do just like without Vagrant: we copy all the files and only get what's missing from the host. But we do install automatically the packages whose files are required. If not building a chroot, we install all the packages, and only unpack files that don't come from packages. In short: files from packages with packfiles=True will only be used if building a chroot. """ if not args.pack: logging.critical("setup/create needs the pack filename") sys.exit(1) pack = Path(args.pack[0]) target = Path(args.target[0]) if target.exists(): logging.critical("Target directory exists") sys.exit(1) use_chroot = args.use_chroot mount_bind = args.bind_magic_dirs record_usage(use_chroot=use_chroot, mount_bind=mount_bind) signals.pre_setup(target=target, pack=pack) # Unpacks configuration file rpz_pack = RPZPack(pack) rpz_pack.extract_config(target / 'config.yml') # Loads config runs, packages, other_files = config = load_config(target / 'config.yml', True) if not args.memory: memory = None else: try: memory = int(args.memory[-1]) except ValueError: logging.critical("Invalid value for memory size: %r", args.memory) sys.exit(1) if args.base_image and args.base_image[0]: record_usage(vagrant_explicit_image=True) box = args.base_image[0] if args.distribution: target_distribution = args.distribution[0] else: target_distribution = None else: target_distribution, box = select_box(runs, gui=args.gui) logging.info("Using box %s", box) logging.debug("Distribution: %s", target_distribution or "unknown") # If using chroot, we might still need to install packages to get missing # (not packed) files if use_chroot: packages = [pkg for pkg in packages if not pkg.packfiles] if packages: record_usage(vagrant_install_pkgs=True) logging.info("Some packages were not packed, so we'll install and " "copy their files\n" "Packages that are missing:\n%s", ' '.join(pkg.name for pkg in packages)) if packages: try: installer = select_installer(pack, runs, target_distribution) except CantFindInstaller as e: logging.error("Need to install %d packages but couldn't select a " "package installer: %s", len(packages), e) target.mkdir(parents=True) try: # Writes setup script logging.info("Writing setup script %s...", target / 'setup.sh') with (target / 'setup.sh').open('w', encoding='utf-8', newline='\n') as fp: fp.write('#!/bin/sh\n\nset -e\n\n') if packages: # Updates package sources update_script = installer.update_script() if update_script: fp.write(update_script) fp.write('\n') # Installs necessary packages fp.write(installer.install_script(packages)) fp.write('\n') # TODO : Compare package versions (painful because of sh) # Untar if use_chroot: fp.write('\n' 'mkdir /experimentroot; cd /experimentroot\n') fp.write('tar zpxf /vagrant/data.tgz --numeric-owner ' '--strip=1 %s\n' % rpz_pack.data_prefix) if mount_bind: fp.write('\n' 'mkdir -p /experimentroot/dev\n' 'mkdir -p /experimentroot/proc\n') for pkg in packages: fp.write('\n# Copies files from package %s\n' % pkg.name) for f in pkg.files: f = f.path dest = join_root(PosixPath('/experimentroot'), f) fp.write('mkdir -p %s\n' % shell_escape(unicode_(f.parent))) fp.write('cp -L %s %s\n' % ( shell_escape(unicode_(f)), shell_escape(unicode_(dest)))) fp.write( '\n' 'cp /etc/resolv.conf /experimentroot/etc/resolv.conf\n') else: fp.write('\ncd /\n') paths = set() pathlist = [] # Adds intermediate directories, and checks for existence in # the tar logging.info("Generating file list...") data_files = rpz_pack.data_filenames() for f in other_files: if f.path.name == 'resolv.conf' and ( f.path.lies_under('/etc') or f.path.lies_under('/run') or f.path.lies_under('/var')): continue path = PosixPath('/') for c in rpz_pack.remove_data_prefix(f.path).components: path = path / c if path in paths: continue paths.add(path) if path in data_files: pathlist.append(path) else: logging.info("Missing file %s", path) # FIXME : for some reason we need reversed() here, I'm not sure # why. Need to read more of tar's docs. # TAR bug: --no-overwrite-dir removes --keep-old-files # TAR bug: there is no way to make --keep-old-files not report # an error if an existing file is encountered. --skip-old-files # was introduced too recently. Instead, we just ignore the exit # status with (target / 'rpz-files.list').open('wb') as lfp: for p in reversed(pathlist): lfp.write(join_root(rpz_pack.data_prefix, p).path) lfp.write(b'\0') fp.write('tar zpxf /vagrant/data.tgz --keep-old-files ' '--numeric-owner --strip=1 ' '--null -T /vagrant/rpz-files.list || /bin/true\n') # Copies busybox if use_chroot: arch = runs[0]['architecture'] download_file(busybox_url(arch), target / 'busybox', 'busybox-%s' % arch) fp.write(r''' cp /vagrant/busybox /experimentroot/busybox chmod +x /experimentroot/busybox mkdir -p /experimentroot/bin [ -e /experimentroot/bin/sh ] || \ ln -s /busybox /experimentroot/bin/sh ''') # Copies pack logging.info("Copying pack file...") rpz_pack.copy_data_tar(target / 'data.tgz') rpz_pack.close() # Writes Vagrant file logging.info("Writing %s...", target / 'Vagrantfile') with (target / 'Vagrantfile').open('w', encoding='utf-8', newline='\n') as fp: # Vagrant header and version fp.write( '# -*- mode: ruby -*-\n' '# vi: set ft=ruby\n\n' 'VAGRANTFILE_API_VERSION = "2"\n\n' 'Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|\n') # Selects which box to install fp.write(' config.vm.box = "%s"\n' % box) # Run the setup script on the virtual machine fp.write(' config.vm.provision "shell", path: "setup.sh"\n') # Memory size if memory is not None or args.gui: fp.write(' config.vm.provider "virtualbox" do |v|\n') if memory is not None: fp.write(' v.memory = %d\n' % memory) if args.gui: fp.write(' v.gui = true\n') fp.write(' end\n') fp.write('end\n') # Meta-data for reprounzip write_dict(target, metadata_initial_iofiles(config, {'use_chroot': use_chroot, 'gui': args.gui})) signals.post_setup(target=target, pack=pack) except Exception: target.rmtree(ignore_errors=True) raise
def generate(target, configfile, database, all_forks=False): """Main function for the graph subcommand. """ # In here, a file is any file on the filesystem. A binary is a file, that # gets executed. A process is a system-level task, identified by its pid # (pids don't get reused in the database). # What I call program is the couple (process, binary), so forking creates a # new program (with the same binary) and exec'ing creates a new program as # well (with the same process) # Because of this, fork+exec will create an intermediate program that # doesn't do anything (new process but still old binary). If that program # doesn't do anything worth showing on the graph, it will be erased, unless # all_forks is True (--all-forks). # Reads package ownership from the configuration if not configfile.is_file(): logging.critical("Configuration file does not exist!\n" "Did you forget to run 'reprozip trace'?\n" "If not, you might want to use --dir to specify an " "alternate location.") sys.exit(1) runs, packages, other_files = load_config(configfile, canonical=False) packages = dict((f.path, pkg) for pkg in packages for f in pkg.files) if PY3: # On PY3, connect() only accepts unicode conn = sqlite3.connect(str(database)) else: conn = sqlite3.connect(database.path) conn.row_factory = sqlite3.Row # This is a bit weird. We need to iterate on all types of events at the # same time, ordering by timestamp, so we decorate-sort-undecorate # Decoration adds timestamp (for sorting) and tags by event type, one of # 'process', 'open' or 'exec' # Reads processes from the database process_cursor = conn.cursor() process_rows = process_cursor.execute( ''' SELECT id, parent, timestamp FROM processes ORDER BY id ''') processes = {} all_programs = [] # ... and opened files... file_cursor = conn.cursor() file_rows = file_cursor.execute( ''' SELECT name, timestamp, mode, process FROM opened_files ORDER BY id ''') binaries = set() files = OrderedSet() edges = OrderedSet() # ... as well as executed files. exec_cursor = conn.cursor() exec_rows = exec_cursor.execute( ''' SELECT name, timestamp, process, argv FROM executed_files ORDER BY id ''') # Loop on all event lists logging.info("Getting all events from database...") rows = heapq.merge(((r[2], 'process', r) for r in process_rows), ((r[1], 'open', r) for r in file_rows), ((r[1], 'exec', r) for r in exec_rows)) for ts, event_type, data in rows: if event_type == 'process': r_id, r_parent, r_timestamp = data if r_parent is not None: parent = processes[r_parent] binary = parent.binary else: parent = None binary = None p = Process(r_id, parent, r_timestamp, False, binary, C_INITIAL if r_parent is None else C_FORK) processes[r_id] = p all_programs.append(p) elif event_type == 'open': r_name, r_timestamp, r_mode, r_process = data r_name = PosixPath(r_name) if r_mode != FILE_WDIR: process = processes[r_process] files.add(r_name) edges.add((process, r_name, r_mode, None)) elif event_type == 'exec': r_name, r_timestamp, r_process, r_argv = data r_name = PosixPath(r_name) process = processes[r_process] binaries.add(r_name) # Here we split this process in two "programs", unless the previous # one hasn't done anything since it was created via fork() if not all_forks and not process.acted: process.binary = r_name process.created = C_FORKEXEC process.acted = True else: process = Process(process.pid, process, r_timestamp, True, # Hides exec only once r_name, C_EXEC) all_programs.append(process) processes[r_process] = process argv = tuple(r_argv.split('\0')) if not argv[-1]: argv = argv[:-1] edges.add((process, r_name, None, argv)) process_cursor.close() file_cursor.close() conn.close() # Puts files in packages logging.info("Organizes packages...") package_files = {} other_files = [] for f in files: pkg = packages.get(f) if pkg is not None: package_files.setdefault((pkg.name, pkg.version), []).append(f) else: other_files.append(f) # Writes DOT file with target.open('w', encoding='utf-8', newline='\n') as fp: fp.write('digraph G {\n /* programs */\n node [shape=box];\n') # Programs logging.info("Writing programs...") for program in all_programs: fp.write(' prog%d [label="%s (%d)"];\n' % ( id(program), program.binary or "-", program.pid)) if program.parent is not None: reason = '' if program.created == C_FORK: reason = "fork" elif program.created == C_EXEC: reason = "exec" elif program.created == C_FORKEXEC: reason = "fork+exec" fp.write(' prog%d -> prog%d [label="%s"];\n' % ( id(program.parent), id(program), reason)) fp.write('\n node [shape=ellipse];\n\n /* system packages */\n') # Files from packages logging.info("Writing packages...") for i, ((name, version), files) in enumerate(iteritems(package_files)): fp.write(' subgraph cluster%d {\n label=' % i) if version: fp.write('"%s %s";\n' % (escape(name), escape(version))) else: fp.write('"%s";\n' % escape(name)) for f in files: fp.write(' "%s";\n' % escape(unicode_(f))) fp.write(' }\n') fp.write('\n /* other files */\n') # Other files logging.info("Writing other files...") for f in other_files: fp.write(' "%s"\n' % escape(unicode_(f))) fp.write('\n') # Edges logging.info("Connecting edges...") for prog, f, mode, argv in edges: if mode is None: fp.write(' "%s" -> prog%d [color=blue, label="%s"];\n' % ( escape(unicode_(f)), id(prog), escape(' '.join(argv)))) elif mode & FILE_WRITE: fp.write(' prog%d -> "%s" [color=red];\n' % ( id(prog), escape(unicode_(f)))) elif mode & FILE_READ: fp.write(' "%s" -> prog%d [color=green];\n' % ( escape(unicode_(f)), id(prog))) fp.write('}\n')