def chroot_create(args): """Unpacks the experiment in a folder so it can be run with chroot. All the files in the pack are unpacked; system files are copied only if they were not packed, and busybox is installed if /bin/sh wasn't packed. In addition, input files are put in a tar.gz (so they can be put back after an upload) and the configuration file is extracted. """ if not args.pack: logger.critical("setup/create needs the pack filename") sys.exit(1) pack = Path(args.pack[0]) target = Path(args.target[0]) if target.exists(): logger.critical("Target directory exists") sys.exit(1) if not issubclass(DefaultAbstractPath, PosixPath): logger.critical("Not unpacking on POSIX system") sys.exit(1) signals.pre_setup(target=target, pack=pack) # We can only restore owner/group of files if running as root restore_owner = should_restore_owner(args.restore_owner) # Unpacks configuration file rpz_pack = RPZPack(pack) rpz_pack.extract_config(target / 'config.yml') # Loads config config = load_config_file(target / 'config.yml', True) packages = config.packages target.mkdir() root = (target / 'root').absolute() root.mkdir() try: # Checks that everything was packed packages_not_packed = [pkg for pkg in packages if not pkg.packfiles] if packages_not_packed: record_usage(chroot_missing_pkgs=True) logger.warning("According to configuration, some files were left " "out because they belong to the following " "packages:%s\nWill copy files from HOST SYSTEM", ''.join('\n %s' % pkg for pkg in packages_not_packed)) missing_files = False for pkg in packages_not_packed: for f in pkg.files: path = Path(f.path) if not path.exists(): logger.error( "Missing file %s (from package %s) on host, " "experiment will probably miss it", path, pkg.name) missing_files = True continue dest = join_root(root, path) dest.parent.mkdir(parents=True) if path.is_link(): dest.symlink(path.read_link()) else: path.copy(dest) if restore_owner: stat = path.stat() dest.chown(stat.st_uid, stat.st_gid) if missing_files: record_usage(chroot_mising_files=True) # Unpacks files members = rpz_pack.list_data() for m in members: # Remove 'DATA/' prefix m.name = str(rpz_pack.remove_data_prefix(m.name)) if not restore_owner: uid = os.getuid() gid = os.getgid() for m in members: m.uid = uid m.gid = gid logger.info("Extracting files...") rpz_pack.extract_data(root, members) rpz_pack.close() resolvconf_src = Path('/etc/resolv.conf') if resolvconf_src.exists(): try: resolvconf_src.copy(root / 'etc/resolv.conf') except IOError: pass # Sets up /bin/sh and /usr/bin/env, downloading busybox if necessary sh_path = join_root(root, Path('/bin/sh')) env_path = join_root(root, Path('/usr/bin/env')) if not sh_path.lexists() or not env_path.lexists(): logger.info("Setting up busybox...") busybox_path = join_root(root, Path('/bin/busybox')) busybox_path.parent.mkdir(parents=True) with make_dir_writable(join_root(root, Path('/bin'))): download_file(busybox_url(config.runs[0]['architecture']), busybox_path, 'busybox-%s' % config.runs[0]['architecture']) busybox_path.chmod(0o755) if not sh_path.lexists(): sh_path.parent.mkdir(parents=True) sh_path.symlink('/bin/busybox') if not env_path.lexists(): env_path.parent.mkdir(parents=True) env_path.symlink('/bin/busybox') # Original input files, so upload can restore them input_files = [f.path for f in config.inputs_outputs.values() if f.read_runs] if input_files: logger.info("Packing up original input files...") inputtar = tarfile.open(str(target / 'inputs.tar.gz'), 'w:gz') for ifile in input_files: filename = join_root(root, ifile) if filename.exists(): inputtar.add(str(filename), str(ifile)) inputtar.close() # Meta-data for reprounzip metadata_write(target, metadata_initial_iofiles(config), 'chroot') signals.post_setup(target=target, pack=pack) except Exception: rmtree_fixed(root) raise
def directory_create(args): """Unpacks the experiment in a folder. Only the files that are not part of a package are copied (unless they are missing from the system and were packed). In addition, input files are put in a tar.gz (so they can be put back after an upload) and the configuration file is extracted. """ if not args.pack: logger.critical("setup needs the pack filename") sys.exit(1) pack = Path(args.pack[0]) target = Path(args.target[0]) if target.exists(): logger.critical("Target directory exists") sys.exit(1) if not issubclass(DefaultAbstractPath, PosixPath): logger.critical("Not unpacking on POSIX system") sys.exit(1) signals.pre_setup(target=target, pack=pack) # Unpacks configuration file rpz_pack = RPZPack(pack) rpz_pack.extract_config(target / 'config.yml') # Loads config config = load_config_file(target / 'config.yml', True) packages = config.packages target.mkdir() root = (target / 'root').absolute() # Checks packages missing_files = False for pkg in packages: if pkg.packfiles: continue for f in pkg.files: if not Path(f.path).exists(): logger.error( "Missing file %s (from package %s that wasn't packed) " "on host, experiment will probably miss it.", f, pkg.name) missing_files = True if missing_files: record_usage(directory_missing_pkgs=True) logger.error("Some packages are missing, you should probably install " "them.\nUse 'reprounzip installpkgs -h' for help") root.mkdir() try: # Unpacks files members = rpz_pack.list_data() for m in members: # Remove 'DATA/' prefix m.name = str(rpz_pack.remove_data_prefix(m.name)) # Makes symlink targets relative if m.issym(): linkname = PosixPath(m.linkname) if linkname.is_absolute: m.linkname = join_root(root, PosixPath(m.linkname)).path logger.info("Extracting files...") rpz_pack.extract_data(root, members) rpz_pack.close() # Original input files, so upload can restore them input_files = [f.path for f in config.inputs_outputs.values() if f.read_runs] if input_files: logger.info("Packing up original input files...") inputtar = tarfile.open(str(target / 'inputs.tar.gz'), 'w:gz') for ifile in input_files: filename = join_root(root, ifile) if filename.exists(): inputtar.add(str(filename), str(ifile)) inputtar.close() # Meta-data for reprounzip metadata_write(target, metadata_initial_iofiles(config), 'directory') signals.post_setup(target=target, pack=pack) except Exception: rmtree_fixed(root) raise
def get_package_info(pack, read_data=False): """Get information about a package. """ runs, packages, other_files = config = load_config(pack) inputs_outputs = config.inputs_outputs information = {} if read_data: total_size = 0 total_paths = 0 files = 0 dirs = 0 symlinks = 0 hardlinks = 0 others = 0 rpz_pack = RPZPack(pack) for m in rpz_pack.list_data(): total_size += m.size total_paths += 1 if m.isfile(): files += 1 elif m.isdir(): dirs += 1 elif m.issym(): symlinks += 1 elif hasattr(m, 'islnk') and m.islnk(): hardlinks += 1 else: others += 1 rpz_pack.close() information['pack'] = { 'total_size': total_size, 'total_paths': total_paths, 'files': files, 'dirs': dirs, 'symlinks': symlinks, 'hardlinks': hardlinks, 'others': others, } total_paths = 0 packed_packages_files = 0 unpacked_packages_files = 0 packed_packages = 0 for package in packages: nb = len(package.files) total_paths += nb if package.packfiles: packed_packages_files += nb packed_packages += 1 else: unpacked_packages_files += nb nb = len(other_files) total_paths += nb information['meta'] = { 'total_paths': total_paths, 'packed_packages_files': packed_packages_files, 'unpacked_packages_files': unpacked_packages_files, 'packages': len(packages), 'packed_packages': packed_packages, 'packed_paths': packed_packages_files + nb, } if runs: architecture = runs[0]['architecture'] if any(r['architecture'] != architecture for r in runs): logger.warning("Runs have different architectures") information['meta']['architecture'] = architecture distribution = runs[0]['distribution'] if any(r['distribution'] != distribution for r in runs): logger.warning("Runs have different distributions") information['meta']['distribution'] = distribution information['runs'] = [ dict((k, run[k]) for k in [ 'id', 'binary', 'argv', 'environ', 'workingdir', 'signal', 'exitcode' ] if k in run) for run in runs ] information['inputs_outputs'] = { name: { 'path': str(iofile.path), 'read_runs': iofile.read_runs, 'write_runs': iofile.write_runs } for name, iofile in inputs_outputs.items() } # Unpacker compatibility unpacker_status = {} for name, upk in unpackers.items(): if 'test_compatibility' in upk: compat = upk['test_compatibility'] if callable(compat): compat = compat(pack, config=config) if isinstance(compat, (tuple, list)): compat, msg = compat else: msg = None unpacker_status.setdefault(compat, []).append((name, msg)) else: unpacker_status.setdefault(None, []).append((name, None)) information['unpacker_status'] = unpacker_status return information
def directory_create(args): """Unpacks the experiment in a folder. Only the files that are not part of a package are copied (unless they are missing from the system and were packed). In addition, input files are put in a tar.gz (so they can be put back after an upload) and the configuration file is extracted. """ if not args.pack: logging.critical("setup needs the pack filename") sys.exit(1) pack = Path(args.pack[0]) target = Path(args.target[0]) if target.exists(): logging.critical("Target directory exists") sys.exit(1) if not issubclass(DefaultAbstractPath, PosixPath): logging.critical("Not unpacking on POSIX system") sys.exit(1) signals.pre_setup(target=target, pack=pack) # Unpacks configuration file rpz_pack = RPZPack(pack) rpz_pack.extract_config(target / 'config.yml') # Loads config config = load_config_file(target / 'config.yml', True) packages = config.packages target.mkdir() root = (target / 'root').absolute() # Checks packages missing_files = False for pkg in packages: if pkg.packfiles: continue for f in pkg.files: if not Path(f.path).exists(): logging.error( "Missing file %s (from package %s that wasn't packed) " "on host, experiment will probably miss it.", f, pkg.name) missing_files = True if missing_files: record_usage(directory_missing_pkgs=True) logging.error("Some packages are missing, you should probably install " "them.\nUse 'reprounzip installpkgs -h' for help") root.mkdir() try: # Unpacks files members = rpz_pack.list_data() for m in members: # Remove 'DATA/' prefix m.name = str(rpz_pack.remove_data_prefix(m.name)) # Makes symlink targets relative if m.issym(): linkname = PosixPath(m.linkname) if linkname.is_absolute: m.linkname = join_root(root, PosixPath(m.linkname)).path logging.info("Extracting files...") rpz_pack.extract_data(root, members) rpz_pack.close() # Original input files, so upload can restore them input_files = [f.path for f in itervalues(config.inputs_outputs) if f.read_runs] if input_files: logging.info("Packing up original input files...") inputtar = tarfile.open(str(target / 'inputs.tar.gz'), 'w:gz') for ifile in input_files: filename = join_root(root, ifile) if filename.exists(): inputtar.add(str(filename), str(ifile)) inputtar.close() # Meta-data for reprounzip metadata_write(target, metadata_initial_iofiles(config), 'directory') signals.post_setup(target=target, pack=pack) except Exception: rmtree_fixed(root) raise
def chroot_create(args): """Unpacks the experiment in a folder so it can be run with chroot. All the files in the pack are unpacked; system files are copied only if they were not packed, and busybox is installed if /bin/sh wasn't packed. In addition, input files are put in a tar.gz (so they can be put back after an upload) and the configuration file is extracted. """ if not args.pack: logging.critical("setup/create needs the pack filename") sys.exit(1) pack = Path(args.pack[0]) target = Path(args.target[0]) if target.exists(): logging.critical("Target directory exists") sys.exit(1) if DefaultAbstractPath is not PosixPath: logging.critical("Not unpacking on POSIX system") sys.exit(1) signals.pre_setup(target=target, pack=pack) # We can only restore owner/group of files if running as root restore_owner = should_restore_owner(args.restore_owner) # Unpacks configuration file rpz_pack = RPZPack(pack) rpz_pack.extract_config(target / 'config.yml') # Loads config config = load_config_file(target / 'config.yml', True) packages = config.packages target.mkdir() root = (target / 'root').absolute() root.mkdir() try: # Checks that everything was packed packages_not_packed = [pkg for pkg in packages if not pkg.packfiles] if packages_not_packed: record_usage(chroot_missing_pkgs=True) logging.warning("According to configuration, some files were left " "out because they belong to the following " "packages:%s\nWill copy files from HOST SYSTEM", ''.join('\n %s' % pkg for pkg in packages_not_packed)) missing_files = False for pkg in packages_not_packed: for f in pkg.files: path = Path(f.path) if not path.exists(): logging.error( "Missing file %s (from package %s) on host, " "experiment will probably miss it", path, pkg.name) missing_files = True continue dest = join_root(root, path) dest.parent.mkdir(parents=True) if path.is_link(): dest.symlink(path.read_link()) else: path.copy(dest) if restore_owner: stat = path.stat() dest.chown(stat.st_uid, stat.st_gid) if missing_files: record_usage(chroot_mising_files=True) # Unpacks files members = rpz_pack.list_data() for m in members: # Remove 'DATA/' prefix m.name = str(rpz_pack.remove_data_prefix(m.name)) if not restore_owner: uid = os.getuid() gid = os.getgid() for m in members: m.uid = uid m.gid = gid logging.info("Extracting files...") rpz_pack.extract_data(root, members) rpz_pack.close() # Sets up /bin/sh and /usr/bin/env, downloading busybox if necessary sh_path = join_root(root, Path('/bin/sh')) env_path = join_root(root, Path('/usr/bin/env')) if not sh_path.lexists() or not env_path.lexists(): logging.info("Setting up busybox...") busybox_path = join_root(root, Path('/bin/busybox')) busybox_path.parent.mkdir(parents=True) with make_dir_writable(join_root(root, Path('/bin'))): download_file(busybox_url(config.runs[0]['architecture']), busybox_path, 'busybox-%s' % config.runs[0]['architecture']) busybox_path.chmod(0o755) if not sh_path.lexists(): sh_path.parent.mkdir(parents=True) sh_path.symlink('/bin/busybox') if not env_path.lexists(): env_path.parent.mkdir(parents=True) env_path.symlink('/bin/busybox') # Original input files, so upload can restore them input_files = [f.path for f in itervalues(config.inputs_outputs) if f.read_runs] if input_files: logging.info("Packing up original input files...") inputtar = tarfile.open(str(target / 'inputs.tar.gz'), 'w:gz') for ifile in input_files: filename = join_root(root, ifile) if filename.exists(): inputtar.add(str(filename), str(ifile)) inputtar.close() # Meta-data for reprounzip metadata_write(target, metadata_initial_iofiles(config), 'chroot') signals.post_setup(target=target, pack=pack) except Exception: rmtree_fixed(root) raise
def print_info(args): """Writes out some information about a pack file. """ pack = Path(args.pack[0]) # Loads config runs, packages, other_files = config = load_config(pack) inputs_outputs = config.inputs_outputs pack_total_size = 0 pack_total_paths = 0 pack_files = 0 pack_dirs = 0 pack_symlinks = 0 pack_others = 0 rpz_pack = RPZPack(pack) for m in rpz_pack.list_data(): pack_total_size += m.size pack_total_paths += 1 if m.isfile(): pack_files += 1 elif m.isdir(): pack_dirs += 1 elif m.issym(): pack_symlinks += 1 else: pack_others += 1 rpz_pack.close() meta_total_paths = 0 meta_packed_packages_files = 0 meta_unpacked_packages_files = 0 meta_packages = len(packages) meta_packed_packages = 0 for package in packages: nb = len(package.files) meta_total_paths += nb if package.packfiles: meta_packed_packages_files += nb meta_packed_packages += 1 else: meta_unpacked_packages_files += nb nb = len(other_files) meta_total_paths += nb meta_packed_paths = meta_packed_packages_files + nb if runs: meta_architecture = runs[0]['architecture'] if any(r['architecture'] != meta_architecture for r in runs): logging.warning("Runs have different architectures") meta_distribution = runs[0]['distribution'] if any(r['distribution'] != meta_distribution for r in runs): logging.warning("Runs have different distributions") meta_distribution = ' '.join(t for t in meta_distribution if t) current_architecture = platform.machine().lower() current_distribution = platform.linux_distribution()[0:2] current_distribution = ' '.join(t for t in current_distribution if t) print("Pack file: %s" % pack) print("\n----- Pack information -----") print("Compressed size: %s" % hsize(pack.size())) print("Unpacked size: %s" % hsize(pack_total_size)) print("Total packed paths: %d" % pack_total_paths) if args.verbosity >= 3: print(" Files: %d" % pack_files) print(" Directories: %d" % pack_dirs) print(" Symbolic links: %d" % pack_symlinks) if pack_others: print(" Unknown (what!?): %d" % pack_others) print("\n----- Metadata -----") if args.verbosity >= 3: print("Total paths: %d" % meta_total_paths) print("Listed packed paths: %d" % meta_packed_paths) if packages: print("Total software packages: %d" % meta_packages) print("Packed software packages: %d" % meta_packed_packages) if args.verbosity >= 3: print("Files from packed software packages: %d" % meta_packed_packages_files) print("Files from unpacked software packages: %d" % meta_unpacked_packages_files) if runs: print("Architecture: %s (current: %s)" % (meta_architecture, current_architecture)) print("Distribution: %s (current: %s)" % ( meta_distribution, current_distribution or "(not Linux)")) print("Executions (%d):" % len(runs)) for i, run in enumerate(runs): cmdline = ' '.join(shell_escape(a) for a in run['argv']) if len(runs) > 1: print(" %d: %s" % (i, cmdline)) else: print(" %s" % cmdline) if args.verbosity >= 2: print(" wd: %s" % run['workingdir']) if 'signal' in run: print(" signal: %d" % run['signal']) else: print(" exitcode: %d" % run['exitcode']) if inputs_outputs: if args.verbosity < 2: print("Inputs/outputs files (%d) :%s" % ( len(inputs_outputs), ", ".join(inputs_outputs))) else: print("Inputs/outputs files (%d):" % len(inputs_outputs)) for name, f in iteritems(inputs_outputs): t = [] if f.read_runs: t.append("in") if f.write_runs: t.append("out") print(" %s (%s): %s" % (name, ' '.join(t), f.path)) # Unpacker compatibility print("\n----- Unpackers -----") unpacker_status = {} for name, upk in iteritems(unpackers): if 'test_compatibility' in upk: compat = upk['test_compatibility'] if callable(compat): compat = compat(pack, config=config) if isinstance(compat, (tuple, list)): compat, msg = compat else: msg = None unpacker_status.setdefault(compat, []).append((name, msg)) else: unpacker_status.setdefault(None, []).append((name, None)) for s, n in [(COMPAT_OK, "Compatible"), (COMPAT_MAYBE, "Unknown"), (COMPAT_NO, "Incompatible")]: if s != COMPAT_OK and args.verbosity < 2: continue if s not in unpacker_status: continue upks = unpacker_status[s] print("%s (%d):" % (n, len(upks))) for upk_name, msg in upks: if msg is not None: print(" %s (%s)" % (upk_name, msg)) else: print(" %s" % upk_name)
def get_package_info(pack, read_data=False): """Get information about a package. """ runs, packages, other_files = config = load_config(pack) inputs_outputs = config.inputs_outputs information = {} if read_data: total_size = 0 total_paths = 0 files = 0 dirs = 0 symlinks = 0 hardlinks = 0 others = 0 rpz_pack = RPZPack(pack) for m in rpz_pack.list_data(): total_size += m.size total_paths += 1 if m.isfile(): files += 1 elif m.isdir(): dirs += 1 elif m.issym(): symlinks += 1 elif hasattr(m, 'islnk') and m.islnk(): hardlinks += 1 else: others += 1 rpz_pack.close() information['pack'] = { 'total_size': total_size, 'total_paths': total_paths, 'files': files, 'dirs': dirs, 'symlinks': symlinks, 'hardlinks': hardlinks, 'others': others, } total_paths = 0 packed_packages_files = 0 unpacked_packages_files = 0 packed_packages = 0 for package in packages: nb = len(package.files) total_paths += nb if package.packfiles: packed_packages_files += nb packed_packages += 1 else: unpacked_packages_files += nb nb = len(other_files) total_paths += nb information['meta'] = { 'total_paths': total_paths, 'packed_packages_files': packed_packages_files, 'unpacked_packages_files': unpacked_packages_files, 'packages': len(packages), 'packed_packages': packed_packages, 'packed_paths': packed_packages_files + nb, } if runs: architecture = runs[0]['architecture'] if any(r['architecture'] != architecture for r in runs): logger.warning("Runs have different architectures") information['meta']['architecture'] = architecture distribution = runs[0]['distribution'] if any(r['distribution'] != distribution for r in runs): logger.warning("Runs have different distributions") information['meta']['distribution'] = distribution information['runs'] = [ dict((k, run[k]) for k in ['id', 'binary', 'argv', 'environ', 'workingdir', 'signal', 'exitcode'] if k in run) for run in runs] information['inputs_outputs'] = { name: {'path': str(iofile.path), 'read_runs': iofile.read_runs, 'write_runs': iofile.write_runs} for name, iofile in iteritems(inputs_outputs)} # Unpacker compatibility unpacker_status = {} for name, upk in iteritems(unpackers): if 'test_compatibility' in upk: compat = upk['test_compatibility'] if callable(compat): compat = compat(pack, config=config) if isinstance(compat, (tuple, list)): compat, msg = compat else: msg = None unpacker_status.setdefault(compat, []).append((name, msg)) else: unpacker_status.setdefault(None, []).append((name, None)) information['unpacker_status'] = unpacker_status return information