def metadata_initial_iofiles(config, dct=None): """Add the initial state of the {in/out}put files to the unpacker metadata. :param config: The configuration as returned by `load_config()`, which will be used to list the input and output files and to determine which ones have been packed (and therefore exist initially). The `input_files` key contains a dict mapping the name to either: * None (or inexistent): original file and exists * False: doesn't exist (wasn't packed) * True: has been generated by one of the run since the experiment was unpacked * basestring: the user uploaded a file with this path, and no run has overwritten it yet """ if dct is None: dct = {} path2iofile = {f.path: n for n, f in iteritems(config.inputs_outputs)} def packed_files(): yield config.other_files for pkg in config.packages: if pkg.packfiles: yield pkg.files for f in itertools.chain.from_iterable(packed_files()): f = f.path path2iofile.pop(f, None) dct['input_files'] = dict((n, False) for n in itervalues(path2iofile)) return dct
def metadata_initial_iofiles(config, dct=None): """Add the initial state of the input files to the unpacker metadata. :param config: The configuration as returned by `load_config()`, which will be used to list the input files and to determine which ones have been packed (and therefore exist initially). The `input_files` key contains a dict mapping the name to either: * None (or inexistent): original file and exists * False: doesn't exist (wasn't packed) * True: has been generated by one of the run since the experiment was unpacked * basestring: the user uploaded a file with this path, and no run has overwritten it yet """ if dct is None: dct = {} path2iofile = {f.path: n for n, f in iteritems(config.inputs_outputs) if f.read_runs} def packed_files(): yield config.other_files for pkg in config.packages: if pkg.packfiles: yield pkg.files for f in itertools.chain.from_iterable(packed_files()): f = f.path path2iofile.pop(f, None) dct['input_files'] = dict((n, False) for n in itervalues(path2iofile)) return dct
def install(cls, packages, assume_yes=False): options = [] if assume_yes: options.append('-y') required_pkgs = set(pkg.name for pkg in packages) r = subprocess.call(['yum', 'install'] + options + list(required_pkgs)) # Checks on packages pkgs_status = cls.get_packages_info(packages) for pkg, status in itervalues(pkgs_status): if status is not None: required_pkgs.discard(pkg.name) if required_pkgs: logging.error("Error: some packages could not be installed:%s", ''.join("\n %s" % pkg for pkg in required_pkgs)) return r, pkgs_status
def generate(target, configfile, database, all_forks=False, graph_format='dot', level_pkgs='file', level_processes='thread', level_other_files='all', regex_filters=None, regex_replaces=None, aggregates=None): """Main function for the graph subcommand. """ try: graph_format = { 'dot': FORMAT_DOT, 'DOT': FORMAT_DOT, 'json': FORMAT_JSON, 'JSON': FORMAT_JSON }[graph_format] except KeyError: logging.critical("Unknown output format %r", graph_format) sys.exit(1) level_pkgs, level_processes, level_other_files, file_depth = \ parse_levels(level_pkgs, level_processes, level_other_files) # Reads package ownership from the configuration if not configfile.is_file(): logging.critical("Configuration file does not exist!\n" "Did you forget to run 'reprozip trace'?\n" "If not, you might want to use --dir to specify an " "alternate location.") sys.exit(1) config = load_config(configfile, canonical=False) inputs_outputs = dict( (f.path, n) for n, f in iteritems(config.inputs_outputs)) has_thread_flag = config.format_version >= LooseVersion('0.7') runs, files, edges = read_events(database, all_forks, has_thread_flag) # Label the runs if len(runs) != len(config.runs): logging.warning("Configuration file doesn't list the same number of " "runs we found in the database!") else: for config_run, run in izip(config.runs, runs): run.name = config_run['id'] # Apply regexes ignore = [ lambda path, r=re.compile(p): r.search(path) is not None for p in regex_filters or [] ] replace = [ lambda path, r=re.compile(p): r.sub(repl, path) for p, repl in regex_replaces or [] ] def filefilter(path): pathuni = unicode_(path) if any(f(pathuni) for f in ignore): logging.debug("IGN %s", pathuni) return None if not (replace or aggregates): return path for fi in replace: pathuni_ = fi(pathuni) if pathuni_ != pathuni: logging.debug("SUB %s -> %s", pathuni, pathuni_) pathuni = pathuni_ for prefix in aggregates or []: if pathuni.startswith(prefix): logging.debug("AGG %s -> %s", pathuni, prefix) pathuni = prefix break return PosixPath(pathuni) files_new = set() for fi in files: fi = filefilter(fi) if fi is not None: files_new.add(fi) files = files_new edges_new = OrderedSet() for prog, fi, mode, argv in edges: fi = filefilter(fi) if fi is not None: edges_new.add((prog, fi, mode, argv)) edges = edges_new # Puts files in packages package_map = {} if level_pkgs == LVL_PKG_IGNORE: packages = [] other_files = files else: logging.info("Organizes packages...") file2package = dict( (f.path, pkg) for pkg in config.packages for f in pkg.files) packages = {} other_files = [] for fi in files: pkg = file2package.get(fi) if pkg is not None: package = packages.get(pkg.name) if package is None: package = Package(pkg.name, pkg.version) packages[pkg.name] = package package.files.add(fi) package_map[fi] = package else: other_files.append(fi) packages = sorted(itervalues(packages), key=lambda pkg: pkg.name) for i, pkg in enumerate(packages): pkg.id = i # Filter other files if level_other_files == LVL_OTHER_ALL and file_depth is not None: other_files = set( PosixPath(*f.components[:file_depth + 1]) for f in other_files) edges = OrderedSet((prog, f if f in package_map else PosixPath( *f.components[:file_depth + 1]), mode, argv) for prog, f, mode, argv in edges) else: if level_other_files == LVL_OTHER_IO: other_files = set(f for f in other_files if f in inputs_outputs) edges = [(prog, f, mode, argv) for prog, f, mode, argv in edges if f in package_map or f in other_files] elif level_other_files == LVL_OTHER_NO: other_files = set() edges = [(prog, f, mode, argv) for prog, f, mode, argv in edges if f in package_map] args = (target, runs, packages, other_files, package_map, edges, inputs_outputs, level_pkgs, level_processes, level_other_files) if graph_format == FORMAT_DOT: graph_dot(*args) elif graph_format == FORMAT_JSON: graph_json(*args) else: assert False
def directory_create(args): """Unpacks the experiment in a folder. Only the files that are not part of a package are copied (unless they are missing from the system and were packed). In addition, input files are put in a tar.gz (so they can be put back after an upload) and the configuration file is extracted. """ if not args.pack: logger.critical("setup needs the pack filename") sys.exit(1) pack = Path(args.pack[0]) target = Path(args.target[0]) if target.exists(): logger.critical("Target directory exists") sys.exit(1) if not issubclass(DefaultAbstractPath, PosixPath): logger.critical("Not unpacking on POSIX system") sys.exit(1) signals.pre_setup(target=target, pack=pack) # Unpacks configuration file rpz_pack = RPZPack(pack) rpz_pack.extract_config(target / 'config.yml') # Loads config config = load_config_file(target / 'config.yml', True) packages = config.packages target.mkdir() root = (target / 'root').absolute() # Checks packages missing_files = False for pkg in packages: if pkg.packfiles: continue for f in pkg.files: if not Path(f.path).exists(): logger.error( "Missing file %s (from package %s that wasn't packed) " "on host, experiment will probably miss it.", f, pkg.name) missing_files = True if missing_files: record_usage(directory_missing_pkgs=True) logger.error("Some packages are missing, you should probably install " "them.\nUse 'reprounzip installpkgs -h' for help") root.mkdir() try: # Unpacks files members = rpz_pack.list_data() for m in members: # Remove 'DATA/' prefix m.name = str(rpz_pack.remove_data_prefix(m.name)) # Makes symlink targets relative if m.issym(): linkname = PosixPath(m.linkname) if linkname.is_absolute: m.linkname = join_root(root, PosixPath(m.linkname)).path logger.info("Extracting files...") rpz_pack.extract_data(root, members) rpz_pack.close() # Original input files, so upload can restore them input_files = [ f.path for f in itervalues(config.inputs_outputs) if f.read_runs ] if input_files: logger.info("Packing up original input files...") inputtar = tarfile.open(str(target / 'inputs.tar.gz'), 'w:gz') for ifile in input_files: filename = join_root(root, ifile) if filename.exists(): inputtar.add(str(filename), str(ifile)) inputtar.close() # Meta-data for reprounzip metadata_write(target, metadata_initial_iofiles(config), 'directory') signals.post_setup(target=target, pack=pack) except Exception: rmtree_fixed(root) raise
def chroot_create(args): """Unpacks the experiment in a folder so it can be run with chroot. All the files in the pack are unpacked; system files are copied only if they were not packed, and busybox is installed if /bin/sh wasn't packed. In addition, input files are put in a tar.gz (so they can be put back after an upload) and the configuration file is extracted. """ if not args.pack: logger.critical("setup/create needs the pack filename") sys.exit(1) pack = Path(args.pack[0]) target = Path(args.target[0]) if target.exists(): logger.critical("Target directory exists") sys.exit(1) if not issubclass(DefaultAbstractPath, PosixPath): logger.critical("Not unpacking on POSIX system") sys.exit(1) signals.pre_setup(target=target, pack=pack) # We can only restore owner/group of files if running as root restore_owner = should_restore_owner(args.restore_owner) # Unpacks configuration file rpz_pack = RPZPack(pack) rpz_pack.extract_config(target / 'config.yml') # Loads config config = load_config_file(target / 'config.yml', True) packages = config.packages target.mkdir() root = (target / 'root').absolute() root.mkdir() try: # Checks that everything was packed packages_not_packed = [pkg for pkg in packages if not pkg.packfiles] if packages_not_packed: record_usage(chroot_missing_pkgs=True) logger.warning( "According to configuration, some files were left " "out because they belong to the following " "packages:%s\nWill copy files from HOST SYSTEM", ''.join('\n %s' % pkg for pkg in packages_not_packed)) missing_files = False for pkg in packages_not_packed: for f in pkg.files: path = Path(f.path) if not path.exists(): logger.error( "Missing file %s (from package %s) on host, " "experiment will probably miss it", path, pkg.name) missing_files = True continue dest = join_root(root, path) dest.parent.mkdir(parents=True) if path.is_link(): dest.symlink(path.read_link()) else: path.copy(dest) if restore_owner: stat = path.stat() dest.chown(stat.st_uid, stat.st_gid) if missing_files: record_usage(chroot_mising_files=True) # Unpacks files members = rpz_pack.list_data() for m in members: # Remove 'DATA/' prefix m.name = str(rpz_pack.remove_data_prefix(m.name)) if not restore_owner: uid = os.getuid() gid = os.getgid() for m in members: m.uid = uid m.gid = gid logger.info("Extracting files...") rpz_pack.extract_data(root, members) rpz_pack.close() resolvconf_src = Path('/etc/resolv.conf') if resolvconf_src.exists(): try: resolvconf_src.copy(root / 'etc/resolv.conf') except IOError: pass # Sets up /bin/sh and /usr/bin/env, downloading busybox if necessary sh_path = join_root(root, Path('/bin/sh')) env_path = join_root(root, Path('/usr/bin/env')) if not sh_path.lexists() or not env_path.lexists(): logger.info("Setting up busybox...") busybox_path = join_root(root, Path('/bin/busybox')) busybox_path.parent.mkdir(parents=True) with make_dir_writable(join_root(root, Path('/bin'))): download_file(busybox_url(config.runs[0]['architecture']), busybox_path, 'busybox-%s' % config.runs[0]['architecture']) busybox_path.chmod(0o755) if not sh_path.lexists(): sh_path.parent.mkdir(parents=True) sh_path.symlink('/bin/busybox') if not env_path.lexists(): env_path.parent.mkdir(parents=True) env_path.symlink('/bin/busybox') # Original input files, so upload can restore them input_files = [ f.path for f in itervalues(config.inputs_outputs) if f.read_runs ] if input_files: logger.info("Packing up original input files...") inputtar = tarfile.open(str(target / 'inputs.tar.gz'), 'w:gz') for ifile in input_files: filename = join_root(root, ifile) if filename.exists(): inputtar.add(str(filename), str(ifile)) inputtar.close() # Meta-data for reprounzip metadata_write(target, metadata_initial_iofiles(config), 'chroot') signals.post_setup(target=target, pack=pack) except Exception: rmtree_fixed(root) raise
def directory_create(args): """Unpacks the experiment in a folder. Only the files that are not part of a package are copied (unless they are missing from the system and were packed). In addition, input files are put in a tar.gz (so they can be put back after an upload) and the configuration file is extracted. """ if not args.pack: logging.critical("setup needs the pack filename") sys.exit(1) pack = Path(args.pack[0]) target = Path(args.target[0]) if target.exists(): logging.critical("Target directory exists") sys.exit(1) if not issubclass(DefaultAbstractPath, PosixPath): logging.critical("Not unpacking on POSIX system") sys.exit(1) signals.pre_setup(target=target, pack=pack) # Unpacks configuration file rpz_pack = RPZPack(pack) rpz_pack.extract_config(target / 'config.yml') # Loads config config = load_config_file(target / 'config.yml', True) packages = config.packages target.mkdir() root = (target / 'root').absolute() # Checks packages missing_files = False for pkg in packages: if pkg.packfiles: continue for f in pkg.files: if not Path(f.path).exists(): logging.error( "Missing file %s (from package %s that wasn't packed) " "on host, experiment will probably miss it.", f, pkg.name) missing_files = True if missing_files: record_usage(directory_missing_pkgs=True) logging.error("Some packages are missing, you should probably install " "them.\nUse 'reprounzip installpkgs -h' for help") root.mkdir() try: # Unpacks files members = rpz_pack.list_data() for m in members: # Remove 'DATA/' prefix m.name = str(rpz_pack.remove_data_prefix(m.name)) # Makes symlink targets relative if m.issym(): linkname = PosixPath(m.linkname) if linkname.is_absolute: m.linkname = join_root(root, PosixPath(m.linkname)).path logging.info("Extracting files...") rpz_pack.extract_data(root, members) rpz_pack.close() # Original input files, so upload can restore them input_files = [f.path for f in itervalues(config.inputs_outputs) if f.read_runs] if input_files: logging.info("Packing up original input files...") inputtar = tarfile.open(str(target / 'inputs.tar.gz'), 'w:gz') for ifile in input_files: filename = join_root(root, ifile) if filename.exists(): inputtar.add(str(filename), str(ifile)) inputtar.close() # Meta-data for reprounzip metadata_write(target, metadata_initial_iofiles(config), 'directory') signals.post_setup(target=target, pack=pack) except Exception: rmtree_fixed(root) raise
def chroot_create(args): """Unpacks the experiment in a folder so it can be run with chroot. All the files in the pack are unpacked; system files are copied only if they were not packed, and busybox is installed if /bin/sh wasn't packed. In addition, input files are put in a tar.gz (so they can be put back after an upload) and the configuration file is extracted. """ if not args.pack: logging.critical("setup/create needs the pack filename") sys.exit(1) pack = Path(args.pack[0]) target = Path(args.target[0]) if target.exists(): logging.critical("Target directory exists") sys.exit(1) if DefaultAbstractPath is not PosixPath: logging.critical("Not unpacking on POSIX system") sys.exit(1) signals.pre_setup(target=target, pack=pack) # We can only restore owner/group of files if running as root restore_owner = should_restore_owner(args.restore_owner) # Unpacks configuration file rpz_pack = RPZPack(pack) rpz_pack.extract_config(target / 'config.yml') # Loads config config = load_config_file(target / 'config.yml', True) packages = config.packages target.mkdir() root = (target / 'root').absolute() root.mkdir() try: # Checks that everything was packed packages_not_packed = [pkg for pkg in packages if not pkg.packfiles] if packages_not_packed: record_usage(chroot_missing_pkgs=True) logging.warning("According to configuration, some files were left " "out because they belong to the following " "packages:%s\nWill copy files from HOST SYSTEM", ''.join('\n %s' % pkg for pkg in packages_not_packed)) missing_files = False for pkg in packages_not_packed: for f in pkg.files: path = Path(f.path) if not path.exists(): logging.error( "Missing file %s (from package %s) on host, " "experiment will probably miss it", path, pkg.name) missing_files = True continue dest = join_root(root, path) dest.parent.mkdir(parents=True) if path.is_link(): dest.symlink(path.read_link()) else: path.copy(dest) if restore_owner: stat = path.stat() dest.chown(stat.st_uid, stat.st_gid) if missing_files: record_usage(chroot_mising_files=True) # Unpacks files members = rpz_pack.list_data() for m in members: # Remove 'DATA/' prefix m.name = str(rpz_pack.remove_data_prefix(m.name)) if not restore_owner: uid = os.getuid() gid = os.getgid() for m in members: m.uid = uid m.gid = gid logging.info("Extracting files...") rpz_pack.extract_data(root, members) rpz_pack.close() # Sets up /bin/sh and /usr/bin/env, downloading busybox if necessary sh_path = join_root(root, Path('/bin/sh')) env_path = join_root(root, Path('/usr/bin/env')) if not sh_path.lexists() or not env_path.lexists(): logging.info("Setting up busybox...") busybox_path = join_root(root, Path('/bin/busybox')) busybox_path.parent.mkdir(parents=True) with make_dir_writable(join_root(root, Path('/bin'))): download_file(busybox_url(config.runs[0]['architecture']), busybox_path, 'busybox-%s' % config.runs[0]['architecture']) busybox_path.chmod(0o755) if not sh_path.lexists(): sh_path.parent.mkdir(parents=True) sh_path.symlink('/bin/busybox') if not env_path.lexists(): env_path.parent.mkdir(parents=True) env_path.symlink('/bin/busybox') # Original input files, so upload can restore them input_files = [f.path for f in itervalues(config.inputs_outputs) if f.read_runs] if input_files: logging.info("Packing up original input files...") inputtar = tarfile.open(str(target / 'inputs.tar.gz'), 'w:gz') for ifile in input_files: filename = join_root(root, ifile) if filename.exists(): inputtar.add(str(filename), str(ifile)) inputtar.close() # Meta-data for reprounzip metadata_write(target, metadata_initial_iofiles(config), 'chroot') signals.post_setup(target=target, pack=pack) except Exception: rmtree_fixed(root) raise
def showfiles(args): """Writes out the input and output files. Works both for a pack file and for an extracted directory. """ def parse_run(runs, s): for i, run in enumerate(runs): if run['id'] == s: return i try: r = int(s) except ValueError: logging.critical("Error: Unknown run %s", s) raise UsageError if r < 0 or r >= len(runs): logging.critical("Error: Expected 0 <= run <= %d, got %d", len(runs) - 1, r) sys.exit(1) return r show_inputs = args.input or not args.output show_outputs = args.output or not args.input def file_filter(fio): if file_filter.run is None: return ((show_inputs and fio.read_runs) or (show_outputs and fio.write_runs)) else: return ((show_inputs and file_filter.run in fio.read_runs) or (show_outputs and file_filter.run in fio.write_runs)) file_filter.run = None pack = Path(args.pack[0]) if not pack.exists(): logging.critical("Pack or directory %s does not exist", pack) sys.exit(1) if pack.is_dir(): # Reads info from an unpacked directory config = load_config_file(pack / 'config.yml', canonical=True) # Filter files by run if args.run is not None: file_filter.run = parse_run(config.runs, args.run) # The '.reprounzip' file is a pickled dictionary, it contains the name # of the files that replaced each input file (if upload was used) unpacked_info = metadata_read(pack, None) assigned_input_files = unpacked_info.get('input_files', {}) if show_inputs: shown = False for input_name, f in sorted(iteritems(config.inputs_outputs)): if f.read_runs and file_filter(f): if not shown: print("Input files:") shown = True if args.verbosity >= 2: print(" %s (%s)" % (input_name, f.path)) else: print(" %s" % input_name) assigned = assigned_input_files.get(input_name) if assigned is None: assigned = "(original)" elif assigned is False: assigned = "(not created)" elif assigned is True: assigned = "(generated)" else: assert isinstance(assigned, (bytes, unicode_)) print(" %s" % assigned) if not shown: print("Input files: none") if show_outputs: shown = False for output_name, f in sorted(iteritems(config.inputs_outputs)): if f.write_runs and file_filter(f): if not shown: print("Output files:") shown = True if args.verbosity >= 2: print(" %s (%s)" % (output_name, f.path)) else: print(" %s" % output_name) if not shown: print("Output files: none") else: # pack.is_file() # Reads info from a pack file config = load_config(pack) # Filter files by run if args.run is not None: file_filter.run = parse_run(config.runs, args.run) if any(f.read_runs for f in itervalues(config.inputs_outputs)): print("Input files:") for input_name, f in sorted(iteritems(config.inputs_outputs)): if f.read_runs and file_filter(f): if args.verbosity >= 2: print(" %s (%s)" % (input_name, f.path)) else: print(" %s" % input_name) else: print("Input files: none") if any(f.write_runs for f in itervalues(config.inputs_outputs)): print("Output files:") for output_name, f in sorted(iteritems(config.inputs_outputs)): if f.write_runs and file_filter(f): if args.verbosity >= 2: print(" %s (%s)" % (output_name, f.path)) else: print(" %s" % output_name) else: print("Output files: none")
def generate(target, configfile, database, all_forks=False, graph_format='dot', level_pkgs='file', level_processes='thread', level_other_files='all', regex_filters=None, regex_replaces=None, aggregates=None): """Main function for the graph subcommand. """ try: graph_format = {'dot': FORMAT_DOT, 'DOT': FORMAT_DOT, 'json': FORMAT_JSON, 'JSON': FORMAT_JSON}[graph_format] except KeyError: logging.critical("Unknown output format %r", graph_format) sys.exit(1) level_pkgs, level_processes, level_other_files, file_depth = \ parse_levels(level_pkgs, level_processes, level_other_files) # Reads package ownership from the configuration if not configfile.is_file(): logging.critical("Configuration file does not exist!\n" "Did you forget to run 'reprozip trace'?\n" "If not, you might want to use --dir to specify an " "alternate location.") sys.exit(1) config = load_config(configfile, canonical=False) inputs_outputs = dict((f.path, n) for n, f in iteritems(config.inputs_outputs)) has_thread_flag = config.format_version >= LooseVersion('0.7') runs, files, edges = read_events(database, all_forks, has_thread_flag) # Label the runs if len(runs) != len(config.runs): logging.warning("Configuration file doesn't list the same number of " "runs we found in the database!") else: for config_run, run in izip(config.runs, runs): run.name = config_run['id'] # Apply regexes ignore = [lambda path, r=re.compile(p): r.search(path) is not None for p in regex_filters or []] replace = [lambda path, r=re.compile(p): r.sub(repl, path) for p, repl in regex_replaces or []] def filefilter(path): pathuni = unicode_(path) if any(f(pathuni) for f in ignore): logging.debug("IGN %s", pathuni) return None if not (replace or aggregates): return path for fi in replace: pathuni_ = fi(pathuni) if pathuni_ != pathuni: logging.debug("SUB %s -> %s", pathuni, pathuni_) pathuni = pathuni_ for prefix in aggregates or []: if pathuni.startswith(prefix): logging.debug("AGG %s -> %s", pathuni, prefix) pathuni = prefix break return PosixPath(pathuni) files_new = set() for fi in files: fi = filefilter(fi) if fi is not None: files_new.add(fi) files = files_new edges_new = OrderedSet() for prog, fi, mode, argv in edges: fi = filefilter(fi) if fi is not None: edges_new.add((prog, fi, mode, argv)) edges = edges_new # Puts files in packages package_map = {} if level_pkgs == LVL_PKG_IGNORE: packages = [] other_files = files else: logging.info("Organizes packages...") file2package = dict((f.path, pkg) for pkg in config.packages for f in pkg.files) packages = {} other_files = [] for fi in files: pkg = file2package.get(fi) if pkg is not None: package = packages.get(pkg.name) if package is None: package = Package(pkg.name, pkg.version) packages[pkg.name] = package package.files.add(fi) package_map[fi] = package else: other_files.append(fi) packages = sorted(itervalues(packages), key=lambda pkg: pkg.name) for i, pkg in enumerate(packages): pkg.id = i # Filter other files if level_other_files == LVL_OTHER_ALL and file_depth is not None: other_files = set(PosixPath(*f.components[:file_depth + 1]) for f in other_files) edges = OrderedSet((prog, f if f in package_map else PosixPath(*f.components[:file_depth + 1]), mode, argv) for prog, f, mode, argv in edges) else: if level_other_files == LVL_OTHER_IO: other_files = set(f for f in other_files if f in inputs_outputs) edges = [(prog, f, mode, argv) for prog, f, mode, argv in edges if f in package_map or f in other_files] elif level_other_files == LVL_OTHER_NO: other_files = set() edges = [(prog, f, mode, argv) for prog, f, mode, argv in edges if f in package_map] args = (target, runs, packages, other_files, package_map, edges, inputs_outputs, level_pkgs, level_processes, level_other_files) if graph_format == FORMAT_DOT: graph_dot(*args) elif graph_format == FORMAT_JSON: graph_json(*args) else: assert False
def chroot_create(args): """Unpacks the experiment in a folder so it can be run with chroot. All the files in the pack are unpacked; system files are copied only if they were not packed, and busybox is installed if /bin/sh wasn't packed. In addition, input files are put in a tar.gz (so they can be put back after an upload) and the configuration file is extracted. """ if not args.pack: logging.critical("setup/create needs the pack filename") sys.exit(1) pack = Path(args.pack[0]) target = Path(args.target[0]) if target.exists(): logging.critical("Target directory exists") sys.exit(1) if DefaultAbstractPath is not PosixPath: logging.critical("Not unpacking on POSIX system") sys.exit(1) signals.pre_setup(target=target, pack=pack) # We can only restore owner/group of files if running as root restore_owner = should_restore_owner(args.restore_owner) # Unpacks configuration file tar = tarfile.open(str(pack), 'r:*') member = tar.getmember('METADATA/config.yml') member.name = 'config.yml' tar.extract(member, str(target)) # Loads config runs, packages, other_files = load_config_file(target / 'config.yml', True) target.mkdir() root = (target / 'root').absolute() root.mkdir() # Checks that everything was packed packages_not_packed = [pkg for pkg in packages if not pkg.packfiles] if packages_not_packed: record_usage(chroot_missing_pkgs=True) logging.warning("According to configuration, some files were left out " "because they belong to the following packages:%s" "\nWill copy files from HOST SYSTEM", ''.join('\n %s' % pkg for pkg in packages_not_packed)) missing_files = False for pkg in packages_not_packed: for f in pkg.files: f = Path(f.path) if not f.exists(): logging.error( "Missing file %s (from package %s) on host, " "experiment will probably miss it", f, pkg.name) missing_files = True continue dest = join_root(root, f) dest.parent.mkdir(parents=True) if f.is_link(): dest.symlink(f.read_link()) else: f.copy(dest) if restore_owner: stat = f.stat() dest.chown(stat.st_uid, stat.st_gid) if missing_files: record_usage(chroot_mising_files=True) # Unpacks files if any('..' in m.name or m.name.startswith('/') for m in tar.getmembers()): logging.critical("Tar archive contains invalid pathnames") sys.exit(1) members = [m for m in tar.getmembers() if m.name.startswith('DATA/')] for m in members: m.name = m.name[5:] if not restore_owner: uid = os.getuid() gid = os.getgid() for m in members: m.uid = uid m.gid = gid logging.info("Extracting files...") tar.extractall(str(root), members) tar.close() # Sets up /bin/sh and /usr/bin/env, downloading busybox if necessary sh_path = join_root(root, Path('/bin/sh')) env_path = join_root(root, Path('/usr/bin/env')) if not sh_path.lexists() or not env_path.lexists(): logging.info("Setting up busybox...") busybox_path = join_root(root, Path('/bin/busybox')) busybox_path.parent.mkdir(parents=True) with make_dir_writable(join_root(root, Path('/bin'))): download_file(busybox_url(runs[0]['architecture']), busybox_path) busybox_path.chmod(0o755) if not sh_path.lexists(): sh_path.parent.mkdir(parents=True) sh_path.symlink('/bin/busybox') if not env_path.lexists(): env_path.parent.mkdir(parents=True) env_path.symlink('/bin/busybox') # Original input files, so upload can restore them if any(run['input_files'] for run in runs): logging.info("Packing up original input files...") inputtar = tarfile.open(str(target / 'inputs.tar.gz'), 'w:gz') for run in runs: for ifile in itervalues(run['input_files']): inputtar.add(str(join_root(root, PosixPath(ifile))), str(PosixPath(ifile))) inputtar.close() # Meta-data for reprounzip write_dict(target / '.reprounzip', {}, 'chroot') signals.post_setup(target=target)
def directory_create(args): """Unpacks the experiment in a folder. Only the files that are not part of a package are copied (unless they are missing from the system and were packed). In addition, input files are put in a tar.gz (so they can be put back after an upload) and the configuration file is extracted. """ if not args.pack: logging.critical("setup needs the pack filename") sys.exit(1) pack = Path(args.pack[0]) target = Path(args.target[0]) if target.exists(): logging.critical("Target directory exists") sys.exit(1) if not issubclass(DefaultAbstractPath, PosixPath): logging.critical("Not unpacking on POSIX system") sys.exit(1) signals.pre_setup(target=target, pack=pack) # Unpacks configuration file tar = tarfile.open(str(pack), 'r:*') member = tar.getmember('METADATA/config.yml') member.name = 'config.yml' tar.extract(member, str(target)) # Loads config runs, packages, other_files = load_config_file(target / 'config.yml', True) target.mkdir() root = (target / 'root').absolute() root.mkdir() # Checks packages missing_files = False for pkg in packages: if pkg.packfiles: continue for f in pkg.files: f = Path(f.path) if not f.exists(): logging.error( "Missing file %s (from package %s that wasn't packed) " "on host, experiment will probably miss it.", f, pkg.name) missing_files = True if missing_files: record_usage(directory_missing_pkgs=True) logging.error( "Some packages are missing, you should probably install " "them.\nUse 'reprounzip installpkgs -h' for help") # Unpacks files if any('..' in m.name or m.name.startswith('/') for m in tar.getmembers()): logging.critical("Tar archive contains invalid pathnames") sys.exit(1) members = [m for m in tar.getmembers() if m.name.startswith('DATA/')] for m in members: m.name = m.name[5:] # Makes symlink targets relative for m in members: if not m.issym(): continue linkname = PosixPath(m.linkname) if linkname.is_absolute: m.linkname = join_root(root, PosixPath(m.linkname)).path logging.info("Extracting files...") tar.extractall(str(root), members) tar.close() # Gets library paths lib_dirs = [] p = subprocess.Popen(['/sbin/ldconfig', '-v', '-N'], stdout=subprocess.PIPE) try: for l in p.stdout: if len(l) < 3 or l[0] in (b' ', b'\t'): continue if l.endswith(b':\n'): lib_dirs.append(Path(l[:-2])) finally: p.wait() # Original input files, so upload can restore them if any(run['input_files'] for run in runs): logging.info("Packing up original input files...") inputtar = tarfile.open(str(target / 'inputs.tar.gz'), 'w:gz') for run in runs: for ifile in itervalues(run['input_files']): inputtar.add(str(join_root(root, PosixPath(ifile))), str(PosixPath(ifile))) inputtar.close() # Meta-data for reprounzip write_dict(target / '.reprounzip', {}, 'directory') signals.post_setup(target=target)
def directory_create(args): """Unpacks the experiment in a folder. Only the files that are not part of a package are copied (unless they are missing from the system and were packed). In addition, input files are put in a tar.gz (so they can be put back after an upload) and the configuration file is extracted. """ if not args.pack: logging.critical("setup needs the pack filename") sys.exit(1) pack = Path(args.pack[0]) target = Path(args.target[0]) if target.exists(): logging.critical("Target directory exists") sys.exit(1) if DefaultAbstractPath is not PosixPath: logging.critical("Not unpacking on POSIX system") sys.exit(1) signals.pre_setup(target=target, pack=pack) # Unpacks configuration file tar = tarfile.open(str(pack), 'r:*') member = tar.getmember('METADATA/config.yml') member.name = 'config.yml' tar.extract(member, str(target)) # Loads config runs, packages, other_files = load_config_file(target / 'config.yml', True) target.mkdir() root = (target / 'root').absolute() root.mkdir() # Checks packages missing_files = False for pkg in packages: if pkg.packfiles: continue for f in pkg.files: f = Path(f.path) if not f.exists(): logging.error( "Missing file %s (from package %s that wasn't packed) " "on host, experiment will probably miss it.", f, pkg.name) missing_files = True if missing_files: logging.error("Some packages are missing, you should probably install " "them.\nUse 'reprounzip installpkgs -h' for help") # Unpacks files if any('..' in m.name or m.name.startswith('/') for m in tar.getmembers()): logging.critical("Tar archive contains invalid pathnames") sys.exit(1) members = [m for m in tar.getmembers() if m.name.startswith('DATA/')] for m in members: m.name = m.name[5:] # Makes symlink targets relative for m in members: if not m.issym(): continue linkname = PosixPath(m.linkname) if linkname.is_absolute: m.linkname = join_root(root, PosixPath(m.linkname)).path logging.info("Extracting files...") tar.extractall(str(root), members) tar.close() # Gets library paths lib_dirs = [] p = subprocess.Popen(['/sbin/ldconfig', '-v', '-N'], stdout=subprocess.PIPE) try: for l in p.stdout: if len(l) < 3 or l[0] in (b' ', b'\t'): continue if l.endswith(b':\n'): lib_dirs.append(Path(l[:-2])) finally: p.wait() # Original input files, so upload can restore them if any(run['input_files'] for run in runs): logging.info("Packing up original input files...") inputtar = tarfile.open(str(target / 'inputs.tar.gz'), 'w:gz') for run in runs: for ifile in itervalues(run['input_files']): inputtar.add(str(join_root(root, PosixPath(ifile))), str(PosixPath(ifile))) inputtar.close() # Meta-data for reprounzip write_dict(target / '.reprounzip', {}, 'directory') signals.post_setup(target=target)
def chroot_create(args): """Unpacks the experiment in a folder so it can be run with chroot. All the files in the pack are unpacked; system files are copied only if they were not packed, and busybox is installed if /bin/sh wasn't packed. In addition, input files are put in a tar.gz (so they can be put back after an upload) and the configuration file is extracted. """ if not args.pack: logging.critical("setup/create needs the pack filename") sys.exit(1) pack = Path(args.pack[0]) target = Path(args.target[0]) if target.exists(): logging.critical("Target directory exists") sys.exit(1) if DefaultAbstractPath is not PosixPath: logging.critical("Not unpacking on POSIX system") sys.exit(1) signals.pre_setup(target=target, pack=pack) # We can only restore owner/group of files if running as root restore_owner = should_restore_owner(args.restore_owner) # Unpacks configuration file tar = tarfile.open(str(pack), 'r:*') member = tar.getmember('METADATA/config.yml') member.name = 'config.yml' tar.extract(member, str(target)) # Loads config runs, packages, other_files = load_config_file(target / 'config.yml', True) target.mkdir() root = (target / 'root').absolute() root.mkdir() # Checks that everything was packed packages_not_packed = [pkg for pkg in packages if not pkg.packfiles] if packages_not_packed: logging.warning( "According to configuration, some files were left out " "because they belong to the following packages:%s" "\nWill copy files from HOST SYSTEM", ''.join('\n %s' % pkg for pkg in packages_not_packed)) for pkg in packages_not_packed: for f in pkg.files: f = Path(f.path) if not f.exists(): logging.error( "Missing file %s (from package %s) on host, " "experiment will probably miss it", f, pkg.name) dest = join_root(root, f) dest.parent.mkdir(parents=True) if f.is_link(): dest.symlink(f.read_link()) else: f.copy(dest) if restore_owner: stat = f.stat() dest.chown(stat.st_uid, stat.st_gid) # Unpacks files if any('..' in m.name or m.name.startswith('/') for m in tar.getmembers()): logging.critical("Tar archive contains invalid pathnames") sys.exit(1) members = [m for m in tar.getmembers() if m.name.startswith('DATA/')] for m in members: m.name = m.name[5:] if not restore_owner: uid = os.getuid() gid = os.getgid() for m in members: m.uid = uid m.gid = gid logging.info("Extracting files...") tar.extractall(str(root), members) tar.close() # Sets up /bin/sh and /usr/bin/env, downloading busybox if necessary sh_path = join_root(root, Path('/bin/sh')) env_path = join_root(root, Path('/usr/bin/env')) if not sh_path.lexists() or not env_path.lexists(): logging.info("Setting up busybox...") busybox_path = join_root(root, Path('/bin/busybox')) busybox_path.parent.mkdir(parents=True) with make_dir_writable(join_root(root, Path('/bin'))): download_file(busybox_url(runs[0]['architecture']), busybox_path) busybox_path.chmod(0o755) if not sh_path.lexists(): sh_path.parent.mkdir(parents=True) sh_path.symlink('/bin/busybox') if not env_path.lexists(): env_path.parent.mkdir(parents=True) env_path.symlink('/bin/busybox') # Original input files, so upload can restore them if any(run['input_files'] for run in runs): logging.info("Packing up original input files...") inputtar = tarfile.open(str(target / 'inputs.tar.gz'), 'w:gz') for run in runs: for ifile in itervalues(run['input_files']): inputtar.add(str(join_root(root, PosixPath(ifile))), str(PosixPath(ifile))) inputtar.close() # Meta-data for reprounzip write_dict(target / '.reprounzip', {}, 'chroot') signals.post_setup(target=target)