def create(prefix, file_list, out_fn, out_folder=os.getcwd(), **kw): with TemporaryDirectory() as tmp: out_fn = out_fn.replace('.conda', '') conda_pkg_fn = os.path.join(tmp, out_fn) + '.conda' pkg_files = utils.filter_info_files(file_list, prefix) info_files = set(file_list) - set(pkg_files) ext, comp_filter, filter_opts = kw.get( 'compression_tuple') or DEFAULT_COMPRESSION_TUPLE info_tarball = create_compressed_tarball(prefix, info_files, tmp, 'info-' + out_fn, ext, comp_filter, filter_opts) pkg_tarball = create_compressed_tarball(prefix, pkg_files, tmp, 'pkg-' + out_fn, ext, comp_filter, filter_opts) _write_conda_pkg_version_spec(tmp) with utils.tmp_chdir(tmp): with zipfile.ZipFile(conda_pkg_fn, 'w', compression=zipfile.ZIP_STORED) as zf: for pkg in (info_tarball, pkg_tarball): zf.write(os.path.basename(pkg)) zf.write('metadata.json') final_path = os.path.join(out_folder, os.path.basename(conda_pkg_fn)) shutil.move(conda_pkg_fn, final_path) return final_path
def _tar_xf(tarball, dir_path): flags = libarchive.extract.EXTRACT_TIME | \ libarchive.extract.EXTRACT_PERM | \ libarchive.extract.EXTRACT_SECURE_NODOTDOT | \ libarchive.extract.EXTRACT_SECURE_SYMLINKS | \ libarchive.extract.EXTRACT_SECURE_NOABSOLUTEPATHS if not os.path.isabs(tarball): tarball = os.path.join(os.getcwd(), tarball) with utils.tmp_chdir(dir_path): libarchive.extract_file(tarball, flags)
def _extract_component(fn, file_id, component_name, dest_dir=None): with TemporaryDirectory() as tmp: with utils.tmp_chdir(tmp): with zipfile.ZipFile(fn, compression=zipfile.ZIP_STORED) as zf: component_filename = _lookup_component_filename( zf, file_id, component_name) if not component_filename: raise RuntimeError("didn't find {} in {}".format( component_filename_without_ext, fn)) component_filename = component_filename[0] zf.extract(component_filename) _tar_xf(component_filename, dest_dir)
def _extract_component(fn, file_id, component_name, dest_dir=os.getcwd()): try: with ZipFile(fn, compression=ZIP_STORED) as zf: with utils.TemporaryDirectory() as tmpdir: with utils.tmp_chdir(tmpdir): component_filename = _lookup_component_filename( zf, file_id, component_name) if not component_filename: raise RuntimeError( "didn't find {} component in {}".format( component_name, fn)) component_filename = component_filename[0] zf.extract(component_filename) _tar_xf(component_filename, dest_dir) except BadZipFile as e: raise InvalidArchiveError(fn, str(e))
def create_compressed_tarball(prefix, files, tmpdir, basename, ext, compression_filter, filter_opts=''): tmp_path = os.path.join(tmpdir, basename) files = _sort_file_order(prefix, files) # add files in order of a) in info directory, b) increasing size so # we can access small manifest or json files without decompressing # possible large binary or data files fullpath = tmp_path + ext with utils.tmp_chdir(prefix): with libarchive.file_writer(fullpath, 'gnutar', filter_name=compression_filter, options=filter_opts) as archive: archive.add_files(*files) return fullpath
def _sort_file_order(prefix, files): """Sort by filesize or by binsort, to optimize compression""" def order(f): # we don't care about empty files so send them back via 100000 fsize = os.lstat(os.path.join(prefix, f)).st_size or 100000 # info/* records will be False == 0, others will be 1. info_order = int(os.path.dirname(f) != 'info') if info_order: _, ext = os.path.splitext(f) # Strip any .dylib.* and .so.* and rename .dylib to .so ext = re.sub(r'(\.dylib|\.so).*$', r'.so', ext) if not ext: # Files without extensions should be sorted by dirname info_order = 1 + hash(os.path.dirname(f)) % (10**8) else: info_order = 1 + abs(hash(ext)) % (10**8) return info_order, fsize binsort = os.path.join(sys.prefix, 'bin', 'binsort') if os.path.exists(binsort): with NamedTemporaryFile(mode='w', suffix='.filelist', delete=False) as fl: with utils.tmp_chdir(prefix): fl.writelines(map(lambda x: '.' + os.sep + x + '\n', files)) fl.close() cmd = binsort + ' -t 1 -q -d -o 1000 {}'.format(fl.name) out, _ = subprocess.Popen( cmd, shell=True, stdout=subprocess.PIPE).communicate() files_list = out.decode('utf-8').strip().split('\n') # binsort returns the absolute paths. files_list = [ f.split(prefix + os.sep, 1)[-1] for f in files_list ] os.unlink(fl.name) else: files_list = list(f for f in sorted(files, key=order)) return files_list