def write_fileobj(self, source_ext: str, source_fileobj: IO[bytes], bundle_path: str, unpack_archive: bool): if unpack_archive: zip_util.unpack(source_ext, source_fileobj, bundle_path) else: with open(bundle_path, 'wb') as out: shutil.copyfileobj(cast(IO, source_fileobj), out)
def _unpack_file(self, source_path, dest_path, remove_source, simplify_archive): zip_util.unpack(zip_util.get_archive_ext(source_path), source_path, dest_path) if remove_source: path_util.remove(source_path) if simplify_archive: self._simplify_archive(dest_path)
def write_fileobj( self, source_ext: str, source_fileobj: IO[bytes], bundle_path: str, unpack_archive: bool, bundle_conn_str=None, index_conn_str=None, progress_callback=None, ): if unpack_archive: zip_util.unpack(source_ext, source_fileobj, bundle_path) else: with open(bundle_path, 'wb') as out: shutil.copyfileobj(cast(IO, source_fileobj), out)
def test_unpack_to_archive_single_compressed_file(self): """Unpack a single compressed file to a .gz file.""" for (compress_fn, extension) in [ (bz2.compress, ".bz2"), (gzip.compress, ".gz"), ]: with self.subTest( extension=extension ), tempfile.TemporaryDirectory() as tmpdir, open( os.path.join(tmpdir, "file.txt"), "wb") as f, tempfile.TemporaryDirectory() as dest_path: f.write(compress_fn(SAMPLE_CONTENTS)) f.flush() archive_fileobj = unpack_to_archive( extension, open(os.path.join(tmpdir, "file.txt"), "rb")) unpack(".gz", archive_fileobj, os.path.join(dest_path, "out")) self.assertEqual( SAMPLE_CONTENTS, open(os.path.join(dest_path, "out"), "rb").read())
def test_unpack_single_archive(self): """Unpack a single archive.""" for (compress_fn, extension) in [ (tar_gzip_directory, ".tar.gz"), (tar_bz2_directory, ".tar.bz2"), (zip_directory, ".zip"), ]: with self.subTest( extension=extension ), tempfile.TemporaryDirectory() as tmpdir, open( os.path.join(tmpdir, "file.txt"), "wb") as f, tempfile.TemporaryDirectory() as dest_path: f.write(SAMPLE_CONTENTS) f.flush() unpack(extension, compress_fn(tmpdir), os.path.join(dest_path, "out")) self.assertEqual(os.listdir(tmpdir), ["file.txt"]) self.assertEqual(os.listdir(os.path.join(dest_path, "out")), ["file.txt"]) self.assertEqual( open(os.path.join(dest_path, "out", "file.txt"), "rb").read(), SAMPLE_CONTENTS)
def upload(self, sources, follow_symlinks, exclude_patterns, git, unpack, remove_sources): ''' |sources|: specifies the locations of the contents to upload. Each element is either a URL or a local path. |follow_symlinks|: for local path(s), whether to follow (resolve) symlinks |exclude_patterns|: for local path(s), don't upload these patterns (e.g., *.o) |git|: for URL, whether |source| is a git repo to clone. |unpack|: for each source in |sources|, whether to unpack it if it's an archive. |remove_sources|: remove |sources|. If |sources| contains one source, then the bundle contents will be that source. Otherwise, the bundle contents will be a directory with each of the sources. Exceptions: - If |git|, then each source is replaced with the result of running 'git clone |source|' - If |unpack| is True or a source is an archive (zip, tar.gz, etc.), then unpack the source. Install the contents of the directory at |source| into DATA_SUBDIRECTORY in a subdirectory named by a hash of the contents. Return a (data_hash, metadata) pair, where the metadata is a dict mapping keys to precomputed statistics about the new data directory. ''' to_delete = [] # Create temporary directory as a staging area and put everything there. temp_path = tempfile.mkdtemp('-bundle_store_upload') temp_subpaths = [] for source in sources: # Where to save |source| to (might change this value if we unpack). temp_subpath = os.path.join(temp_path, os.path.basename(source)) if remove_sources: to_delete.append(source) source_unpack = unpack and zip_util.path_is_archive(source) if path_util.path_is_url(source): # Download the URL. print_util.open_line('BundleStore.upload: downloading %s to %s' % (source, temp_path)) if git: file_util.git_clone(source, temp_subpath) else: file_util.download_url(source, temp_subpath, print_status=True) if source_unpack: zip_util.unpack(temp_subpath, zip_util.strip_archive_ext(temp_subpath)) path_util.remove(temp_subpath) temp_subpath = zip_util.strip_archive_ext(temp_subpath) print_util.clear_line() else: # Copy the local path. source_path = path_util.normalize(source) path_util.check_isvalid(source_path, 'upload') # Recursively copy the directory into a new BundleStore temp directory. print_util.open_line('BundleStore.upload: %s => %s' % (source_path, temp_subpath)) if source_unpack: zip_util.unpack(source_path, zip_util.strip_archive_ext(temp_subpath)) temp_subpath = zip_util.strip_archive_ext(temp_subpath) else: if remove_sources: path_util.rename(source_path, temp_subpath) else: path_util.copy(source_path, temp_subpath, follow_symlinks=follow_symlinks, exclude_patterns=exclude_patterns) print_util.clear_line() temp_subpaths.append(temp_subpath) # If exactly one source, then upload that directly. if len(temp_subpaths) == 1: to_delete.append(temp_path) temp_path = temp_subpaths[0] # Multiplex between uploading a directory and uploading a file here. # All other path_util calls will use these lists of directories and files. if os.path.isdir(temp_path): dirs_and_files = path_util.recursive_ls(temp_path) else: dirs_and_files = ([], [temp_path]) # Hash the contents of the temporary directory, and then if there is no # data with this hash value, move this directory into the data directory. print_util.open_line('BundleStore.upload: hashing %s' % temp_path) data_hash = '0x%s' % (path_util.hash_directory(temp_path, dirs_and_files),) print_util.clear_line() print_util.open_line('BundleStore.upload: computing size of %s' % temp_path) data_size = path_util.get_size(temp_path, dirs_and_files) print_util.clear_line() final_path = os.path.join(self.data, data_hash) if os.path.exists(final_path): # Already exists, just delete it path_util.remove(temp_path) else: print >>sys.stderr, 'BundleStore.upload: moving %s to %s' % (temp_path, final_path) path_util.rename(temp_path, final_path) # Delete paths. for path in to_delete: if os.path.exists(path): path_util.remove(path) # After this operation there should always be a directory at the final path. assert(os.path.lexists(final_path)), 'Uploaded to %s failed!' % (final_path,) return (data_hash, {'data_size': data_size})
def _unpack_fileobj(self, source_filename, source_fileobj, dest_path, simplify_archive): zip_util.unpack(zip_util.get_archive_ext(source_filename), source_fileobj, dest_path) if simplify_archive: self._simplify_archive(dest_path)
def download_target(self, target, final_path): source_uuid = self.open_target_archive(target) source = RPCFileHandle(source_uuid, self.proxy) zip_util.unpack(source, final_path) self.finalize_file(source_uuid)
def upload(self, sources, follow_symlinks, exclude_patterns, git, unpack, remove_sources, uuid): """ |sources|: specifies the locations of the contents to upload. Each element is either a URL or a local path. |follow_symlinks|: for local path(s), whether to follow (resolve) symlinks |exclude_patterns|: for local path(s), don't upload these patterns (e.g., *.o) |git|: for URL, whether |source| is a git repo to clone. |unpack|: for each source in |sources|, whether to unpack it if it's an archive. |remove_sources|: remove |sources|. If |sources| contains one source, then the bundle contents will be that source. Otherwise, the bundle contents will be a directory with each of the sources. Exceptions: - If |git|, then each source is replaced with the result of running 'git clone |source|' - If |unpack| is True or a source is an archive (zip, tar.gz, etc.), then unpack the source. Install the contents of the directory at |source| into DATA_SUBDIRECTORY in a subdirectory named by a hash of the contents. Return a (data_hash, metadata) pair, where the metadata is a dict mapping keys to precomputed statistics about the new data directory. """ to_delete = [] # If just a single file, set the final path to be equal to that file single_path = len(sources) == 1 # Determine which disk this will go on disk_choice = self.ring.get_node(uuid) final_path = os.path.join(self.partitions, disk_choice, self.DATA_SUBDIRECTORY, uuid) if os.path.exists(final_path): raise UsageError('Path %s already present in bundle store' % final_path) # Only make if not there elif not single_path: path_util.make_directory(final_path) # Paths to resources subpaths = [] for source in sources: # Where to save |source| to (might change this value if we unpack). if not single_path: subpath = os.path.join(final_path, os.path.basename(source)) else: subpath = final_path if remove_sources: to_delete.append(source) source_unpack = unpack and zip_util.path_is_archive(source) if source_unpack and single_path: # Load the file into the bundle store under the given path subpath += zip_util.get_archive_ext(source) if path_util.path_is_url(source): # Download the URL. print_util.open_line('BundleStore.upload: downloading %s to %s' % (source, subpath)) if git: file_util.git_clone(source, subpath) else: file_util.download_url(source, subpath, print_status=True) if source_unpack: zip_util.unpack(subpath, zip_util.strip_archive_ext(subpath)) path_util.remove(subpath) subpath = zip_util.strip_archive_ext(subpath) print_util.clear_line() else: # Copy the local path. source_path = path_util.normalize(source) path_util.check_isvalid(source_path, 'upload') # Recursively copy the directory into the BundleStore print_util.open_line('BundleStore.upload: %s => %s' % (source_path, subpath)) if source_unpack: zip_util.unpack(source_path, zip_util.strip_archive_ext(subpath)) subpath = zip_util.strip_archive_ext(subpath) else: if remove_sources: path_util.rename(source_path, subpath) else: path_util.copy(source_path, subpath, follow_symlinks=follow_symlinks, exclude_patterns=exclude_patterns) print_util.clear_line() subpaths.append(subpath) dirs_and_files = None if os.path.isdir(final_path): dirs_and_files = path_util.recursive_ls(final_path) else: dirs_and_files = [], [final_path] # Hash the contents of the bundle directory. Update the data_hash attribute # for the bundle print_util.open_line('BundleStore.upload: hashing %s' % final_path) data_hash = '0x%s' % (path_util.hash_directory(final_path, dirs_and_files)) print_util.clear_line() print_util.open_line('BundleStore.upload: computing size of %s' % final_path) data_size = path_util.get_size(final_path, dirs_and_files) print_util.clear_line() # Delete paths. for path in to_delete: if os.path.exists(path): path_util.remove(path) # After this operation there should always be a directory at the final path. assert (os.path.lexists(final_path)), 'Uploaded to %s failed!' % (final_path,) return (data_hash, {'data_size': data_size})