def download_file(url, dest, cachename=None): """Downloads a file using a local cache. If the file cannot be downloaded or if it wasn't modified, the cached version will be used instead. The cache lives in ``~/.cache/reprozip/``. """ if cachename is None: cachename = dest.name request = Request(url) if 'XDG_CACHE_HOME' in os.environ: cache = Path(os.environ['XDG_CACHE_HOME']) else: cache = Path('~/.cache').expand_user() cache = cache / 'reprozip' / cachename if cache.exists(): mtime = email.utils.formatdate(cache.mtime(), usegmt=True) request.add_header('If-Modified-Since', mtime) cache.parent.mkdir(parents=True) try: response = urlopen(request) except URLError as e: if cache.exists(): if isinstance(e, HTTPError) and e.code == 304: logging.info("Cached file %s is up to date", cachename) else: logging.warning("Couldn't download %s: %s", url, e) cache.copy(dest) return else: raise if response is None: logging.info("Cached file %s is up to date", cachename) cache.copy(dest) return logging.info("Downloading %s", url) try: CHUNK_SIZE = 4096 with cache.open('wb') as f: while True: chunk = response.read(CHUNK_SIZE) if not chunk: break f.write(chunk) response.close() except Exception as e: # pragma: no cover try: cache.remove() except OSError: pass raise e cache.copy(dest)
def download_file(url, dest, cachename=None): """Downloads a file using a local cache. If the file cannot be downloaded or if it wasn't modified, the cached version will be used instead. The cache lives in ``~/.cache/reprozip/``. """ if cachename is None: cachename = dest.components[-1] request = Request(url) if 'XDG_CACHE_HOME' in os.environ: cache = Path(os.environ['XDG_CACHE_HOME']) else: cache = Path('~/.cache').expand_user() cache = cache / 'reprozip' / cachename if cache.exists(): mtime = email.utils.formatdate(cache.mtime(), usegmt=True) request.add_header('If-Modified-Since', mtime) cache.parent.mkdir(parents=True) try: response = urlopen(request, timeout=2 if cache.exists() else 10) except URLError as e: if cache.exists(): if isinstance(e, HTTPError) and e.code == 304: logging.info("Download %s: cache is up to date", cachename) else: logging.warning("Download %s: error downloading %s: %s", cachename, url, e) cache.copy(dest) return else: raise if response is None: logging.info("Download %s: cache is up to date", cachename) cache.copy(dest) return logging.info("Download %s: downloading %s", cachename, url) try: with cache.open('wb') as f: copyfile(response, f) response.close() except Exception as e: # pragma: no cover try: cache.remove() except OSError: pass raise e logging.info("Downloaded %s successfully", cachename) cache.copy(dest)
def chroot_create(args): """Unpacks the experiment in a folder so it can be run with chroot. All the files in the pack are unpacked; system files are copied only if they were not packed, and busybox is installed if /bin/sh wasn't packed. In addition, input files are put in a tar.gz (so they can be put back after an upload) and the configuration file is extracted. """ if not args.pack: logger.critical("setup/create needs the pack filename") sys.exit(1) pack = Path(args.pack[0]) target = Path(args.target[0]) if target.exists(): logger.critical("Target directory exists") sys.exit(1) if not issubclass(DefaultAbstractPath, PosixPath): logger.critical("Not unpacking on POSIX system") sys.exit(1) signals.pre_setup(target=target, pack=pack) # We can only restore owner/group of files if running as root restore_owner = should_restore_owner(args.restore_owner) # Unpacks configuration file rpz_pack = RPZPack(pack) rpz_pack.extract_config(target / 'config.yml') # Loads config config = load_config_file(target / 'config.yml', True) packages = config.packages target.mkdir() root = (target / 'root').absolute() root.mkdir() try: # Checks that everything was packed packages_not_packed = [pkg for pkg in packages if not pkg.packfiles] if packages_not_packed: record_usage(chroot_missing_pkgs=True) logger.warning("According to configuration, some files were left " "out because they belong to the following " "packages:%s\nWill copy files from HOST SYSTEM", ''.join('\n %s' % pkg for pkg in packages_not_packed)) missing_files = False for pkg in packages_not_packed: for f in pkg.files: path = Path(f.path) if not path.exists(): logger.error( "Missing file %s (from package %s) on host, " "experiment will probably miss it", path, pkg.name) missing_files = True continue dest = join_root(root, path) dest.parent.mkdir(parents=True) if path.is_link(): dest.symlink(path.read_link()) else: path.copy(dest) if restore_owner: stat = path.stat() dest.chown(stat.st_uid, stat.st_gid) if missing_files: record_usage(chroot_mising_files=True) # Unpacks files members = rpz_pack.list_data() for m in members: # Remove 'DATA/' prefix m.name = str(rpz_pack.remove_data_prefix(m.name)) if not restore_owner: uid = os.getuid() gid = os.getgid() for m in members: m.uid = uid m.gid = gid logger.info("Extracting files...") rpz_pack.extract_data(root, members) rpz_pack.close() resolvconf_src = Path('/etc/resolv.conf') if resolvconf_src.exists(): try: resolvconf_src.copy(root / 'etc/resolv.conf') except IOError: pass # Sets up /bin/sh and /usr/bin/env, downloading busybox if necessary sh_path = join_root(root, Path('/bin/sh')) env_path = join_root(root, Path('/usr/bin/env')) if not sh_path.lexists() or not env_path.lexists(): logger.info("Setting up busybox...") busybox_path = join_root(root, Path('/bin/busybox')) busybox_path.parent.mkdir(parents=True) with make_dir_writable(join_root(root, Path('/bin'))): download_file(busybox_url(config.runs[0]['architecture']), busybox_path, 'busybox-%s' % config.runs[0]['architecture']) busybox_path.chmod(0o755) if not sh_path.lexists(): sh_path.parent.mkdir(parents=True) sh_path.symlink('/bin/busybox') if not env_path.lexists(): env_path.parent.mkdir(parents=True) env_path.symlink('/bin/busybox') # Original input files, so upload can restore them input_files = [f.path for f in config.inputs_outputs.values() if f.read_runs] if input_files: logger.info("Packing up original input files...") inputtar = tarfile.open(str(target / 'inputs.tar.gz'), 'w:gz') for ifile in input_files: filename = join_root(root, ifile) if filename.exists(): inputtar.add(str(filename), str(ifile)) inputtar.close() # Meta-data for reprounzip metadata_write(target, metadata_initial_iofiles(config), 'chroot') signals.post_setup(target=target, pack=pack) except Exception: rmtree_fixed(root) raise
def download_file(url, dest, cachename=None, ssl_verify=None): """Downloads a file using a local cache. If the file cannot be downloaded or if it wasn't modified, the cached version will be used instead. The cache lives in ``~/.cache/reprozip/``. """ if cachename is None: if dest is None: raise ValueError("One of 'dest' or 'cachename' must be specified") cachename = dest.components[-1] headers = {} if 'XDG_CACHE_HOME' in os.environ: cache = Path(os.environ['XDG_CACHE_HOME']) else: cache = Path('~/.cache').expand_user() cache = cache / 'reprozip' / cachename if cache.exists(): mtime = email.utils.formatdate(cache.mtime(), usegmt=True) headers['If-Modified-Since'] = mtime cache.parent.mkdir(parents=True) try: response = requests.get(url, headers=headers, timeout=2 if cache.exists() else 10, stream=True, verify=ssl_verify) response.raise_for_status() if response.status_code == 304: raise requests.HTTPError( '304 File is up to date, no data returned', response=response) except requests.RequestException as e: if cache.exists(): if e.response and e.response.status_code == 304: logging.info("Download %s: cache is up to date", cachename) else: logging.warning("Download %s: error downloading %s: %s", cachename, url, e) if dest is not None: cache.copy(dest) return dest else: return cache else: raise logging.info("Download %s: downloading %s", cachename, url) try: with cache.open('wb') as f: for chunk in response.iter_content(4096): f.write(chunk) response.close() except Exception as e: # pragma: no cover try: cache.remove() except OSError: pass raise e logging.info("Downloaded %s successfully", cachename) if dest is not None: cache.copy(dest) return dest else: return cache
def chroot_create(args): """Unpacks the experiment in a folder so it can be run with chroot. All the files in the pack are unpacked; system files are copied only if they were not packed, and busybox is installed if /bin/sh wasn't packed. In addition, input files are put in a tar.gz (so they can be put back after an upload) and the configuration file is extracted. """ if not args.pack: logging.critical("setup/create needs the pack filename") sys.exit(1) pack = Path(args.pack[0]) target = Path(args.target[0]) if target.exists(): logging.critical("Target directory exists") sys.exit(1) if DefaultAbstractPath is not PosixPath: logging.critical("Not unpacking on POSIX system") sys.exit(1) signals.pre_setup(target=target, pack=pack) # We can only restore owner/group of files if running as root restore_owner = should_restore_owner(args.restore_owner) # Unpacks configuration file rpz_pack = RPZPack(pack) rpz_pack.extract_config(target / 'config.yml') # Loads config config = load_config_file(target / 'config.yml', True) packages = config.packages target.mkdir() root = (target / 'root').absolute() root.mkdir() try: # Checks that everything was packed packages_not_packed = [pkg for pkg in packages if not pkg.packfiles] if packages_not_packed: record_usage(chroot_missing_pkgs=True) logging.warning("According to configuration, some files were left " "out because they belong to the following " "packages:%s\nWill copy files from HOST SYSTEM", ''.join('\n %s' % pkg for pkg in packages_not_packed)) missing_files = False for pkg in packages_not_packed: for f in pkg.files: path = Path(f.path) if not path.exists(): logging.error( "Missing file %s (from package %s) on host, " "experiment will probably miss it", path, pkg.name) missing_files = True continue dest = join_root(root, path) dest.parent.mkdir(parents=True) if path.is_link(): dest.symlink(path.read_link()) else: path.copy(dest) if restore_owner: stat = path.stat() dest.chown(stat.st_uid, stat.st_gid) if missing_files: record_usage(chroot_mising_files=True) # Unpacks files members = rpz_pack.list_data() for m in members: # Remove 'DATA/' prefix m.name = str(rpz_pack.remove_data_prefix(m.name)) if not restore_owner: uid = os.getuid() gid = os.getgid() for m in members: m.uid = uid m.gid = gid logging.info("Extracting files...") rpz_pack.extract_data(root, members) rpz_pack.close() # Sets up /bin/sh and /usr/bin/env, downloading busybox if necessary sh_path = join_root(root, Path('/bin/sh')) env_path = join_root(root, Path('/usr/bin/env')) if not sh_path.lexists() or not env_path.lexists(): logging.info("Setting up busybox...") busybox_path = join_root(root, Path('/bin/busybox')) busybox_path.parent.mkdir(parents=True) with make_dir_writable(join_root(root, Path('/bin'))): download_file(busybox_url(config.runs[0]['architecture']), busybox_path, 'busybox-%s' % config.runs[0]['architecture']) busybox_path.chmod(0o755) if not sh_path.lexists(): sh_path.parent.mkdir(parents=True) sh_path.symlink('/bin/busybox') if not env_path.lexists(): env_path.parent.mkdir(parents=True) env_path.symlink('/bin/busybox') # Original input files, so upload can restore them input_files = [f.path for f in itervalues(config.inputs_outputs) if f.read_runs] if input_files: logging.info("Packing up original input files...") inputtar = tarfile.open(str(target / 'inputs.tar.gz'), 'w:gz') for ifile in input_files: filename = join_root(root, ifile) if filename.exists(): inputtar.add(str(filename), str(ifile)) inputtar.close() # Meta-data for reprounzip metadata_write(target, metadata_initial_iofiles(config), 'chroot') signals.post_setup(target=target, pack=pack) except Exception: rmtree_fixed(root) raise
def chroot_create(args): """Unpacks the experiment in a folder so it can be run with chroot. All the files in the pack are unpacked; system files are copied only if they were not packed, and busybox is installed if /bin/sh wasn't packed. In addition, input files are put in a tar.gz (so they can be put back after an upload) and the configuration file is extracted. """ if not args.pack: logging.critical("setup/create needs the pack filename") sys.exit(1) pack = Path(args.pack[0]) target = Path(args.target[0]) if target.exists(): logging.critical("Target directory exists") sys.exit(1) if DefaultAbstractPath is not PosixPath: logging.critical("Not unpacking on POSIX system") sys.exit(1) signals.pre_setup(target=target, pack=pack) # We can only restore owner/group of files if running as root restore_owner = should_restore_owner(args.restore_owner) # Unpacks configuration file tar = tarfile.open(str(pack), 'r:*') member = tar.getmember('METADATA/config.yml') member.name = 'config.yml' tar.extract(member, str(target)) # Loads config runs, packages, other_files = load_config_file(target / 'config.yml', True) target.mkdir() root = (target / 'root').absolute() root.mkdir() # Checks that everything was packed packages_not_packed = [pkg for pkg in packages if not pkg.packfiles] if packages_not_packed: record_usage(chroot_missing_pkgs=True) logging.warning("According to configuration, some files were left out " "because they belong to the following packages:%s" "\nWill copy files from HOST SYSTEM", ''.join('\n %s' % pkg for pkg in packages_not_packed)) missing_files = False for pkg in packages_not_packed: for f in pkg.files: f = Path(f.path) if not f.exists(): logging.error( "Missing file %s (from package %s) on host, " "experiment will probably miss it", f, pkg.name) missing_files = True continue dest = join_root(root, f) dest.parent.mkdir(parents=True) if f.is_link(): dest.symlink(f.read_link()) else: f.copy(dest) if restore_owner: stat = f.stat() dest.chown(stat.st_uid, stat.st_gid) if missing_files: record_usage(chroot_mising_files=True) # Unpacks files if any('..' in m.name or m.name.startswith('/') for m in tar.getmembers()): logging.critical("Tar archive contains invalid pathnames") sys.exit(1) members = [m for m in tar.getmembers() if m.name.startswith('DATA/')] for m in members: m.name = m.name[5:] if not restore_owner: uid = os.getuid() gid = os.getgid() for m in members: m.uid = uid m.gid = gid logging.info("Extracting files...") tar.extractall(str(root), members) tar.close() # Sets up /bin/sh and /usr/bin/env, downloading busybox if necessary sh_path = join_root(root, Path('/bin/sh')) env_path = join_root(root, Path('/usr/bin/env')) if not sh_path.lexists() or not env_path.lexists(): logging.info("Setting up busybox...") busybox_path = join_root(root, Path('/bin/busybox')) busybox_path.parent.mkdir(parents=True) with make_dir_writable(join_root(root, Path('/bin'))): download_file(busybox_url(runs[0]['architecture']), busybox_path) busybox_path.chmod(0o755) if not sh_path.lexists(): sh_path.parent.mkdir(parents=True) sh_path.symlink('/bin/busybox') if not env_path.lexists(): env_path.parent.mkdir(parents=True) env_path.symlink('/bin/busybox') # Original input files, so upload can restore them if any(run['input_files'] for run in runs): logging.info("Packing up original input files...") inputtar = tarfile.open(str(target / 'inputs.tar.gz'), 'w:gz') for run in runs: for ifile in itervalues(run['input_files']): inputtar.add(str(join_root(root, PosixPath(ifile))), str(PosixPath(ifile))) inputtar.close() # Meta-data for reprounzip write_dict(target / '.reprounzip', {}, 'chroot') signals.post_setup(target=target)
def chroot_create(args): """Unpacks the experiment in a folder so it can be run with chroot. All the files in the pack are unpacked; system files are copied only if they were not packed, and busybox is installed if /bin/sh wasn't packed. In addition, input files are put in a tar.gz (so they can be put back after an upload) and the configuration file is extracted. """ if not args.pack: logging.critical("setup/create needs the pack filename") sys.exit(1) pack = Path(args.pack[0]) target = Path(args.target[0]) if target.exists(): logging.critical("Target directory exists") sys.exit(1) if DefaultAbstractPath is not PosixPath: logging.critical("Not unpacking on POSIX system") sys.exit(1) signals.pre_setup(target=target, pack=pack) # We can only restore owner/group of files if running as root restore_owner = should_restore_owner(args.restore_owner) # Unpacks configuration file tar = tarfile.open(str(pack), 'r:*') member = tar.getmember('METADATA/config.yml') member.name = 'config.yml' tar.extract(member, str(target)) # Loads config runs, packages, other_files = load_config_file(target / 'config.yml', True) target.mkdir() root = (target / 'root').absolute() root.mkdir() # Checks that everything was packed packages_not_packed = [pkg for pkg in packages if not pkg.packfiles] if packages_not_packed: logging.warning( "According to configuration, some files were left out " "because they belong to the following packages:%s" "\nWill copy files from HOST SYSTEM", ''.join('\n %s' % pkg for pkg in packages_not_packed)) for pkg in packages_not_packed: for f in pkg.files: f = Path(f.path) if not f.exists(): logging.error( "Missing file %s (from package %s) on host, " "experiment will probably miss it", f, pkg.name) dest = join_root(root, f) dest.parent.mkdir(parents=True) if f.is_link(): dest.symlink(f.read_link()) else: f.copy(dest) if restore_owner: stat = f.stat() dest.chown(stat.st_uid, stat.st_gid) # Unpacks files if any('..' in m.name or m.name.startswith('/') for m in tar.getmembers()): logging.critical("Tar archive contains invalid pathnames") sys.exit(1) members = [m for m in tar.getmembers() if m.name.startswith('DATA/')] for m in members: m.name = m.name[5:] if not restore_owner: uid = os.getuid() gid = os.getgid() for m in members: m.uid = uid m.gid = gid logging.info("Extracting files...") tar.extractall(str(root), members) tar.close() # Sets up /bin/sh and /usr/bin/env, downloading busybox if necessary sh_path = join_root(root, Path('/bin/sh')) env_path = join_root(root, Path('/usr/bin/env')) if not sh_path.lexists() or not env_path.lexists(): logging.info("Setting up busybox...") busybox_path = join_root(root, Path('/bin/busybox')) busybox_path.parent.mkdir(parents=True) with make_dir_writable(join_root(root, Path('/bin'))): download_file(busybox_url(runs[0]['architecture']), busybox_path) busybox_path.chmod(0o755) if not sh_path.lexists(): sh_path.parent.mkdir(parents=True) sh_path.symlink('/bin/busybox') if not env_path.lexists(): env_path.parent.mkdir(parents=True) env_path.symlink('/bin/busybox') # Original input files, so upload can restore them if any(run['input_files'] for run in runs): logging.info("Packing up original input files...") inputtar = tarfile.open(str(target / 'inputs.tar.gz'), 'w:gz') for run in runs: for ifile in itervalues(run['input_files']): inputtar.add(str(join_root(root, PosixPath(ifile))), str(PosixPath(ifile))) inputtar.close() # Meta-data for reprounzip write_dict(target / '.reprounzip', {}, 'chroot') signals.post_setup(target=target)