def test_gzip_stream(self): with tempfile.NamedTemporaryFile(delete=False) as temp_file: self.addCleanup(lambda: os.remove(temp_file.name)) temp_file.write('contents') name = temp_file.name self.assertEquals(un_gzip_stream(gzip_file(name)).read(), 'contents')
def test_gzip_stream(self): with tempfile.NamedTemporaryFile(delete=False) as temp_file: self.addCleanup(lambda: os.remove(temp_file.name)) temp_file.write(b'contents') name = temp_file.name self.assertEqual(un_gzip_stream(gzip_file(name)).read(), b'contents')
def _stream_file(self, uuid, path, gzipped): """ Returns a file-like object reading the given file. This file is gzipped if gzipped is True. """ if self._is_available_locally(uuid): file_path = self._get_target_path(uuid, path) if gzipped: return file_util.gzip_file(file_path) else: return open(file_path) else: worker = self.get_bundle_worker(uuid) response_socket_id = self._worker_model.allocate_socket( worker['user_id'], worker['worker_id']) try: read_args = { 'type': 'stream_file', } self._send_read_message(worker, response_socket_id, uuid, path, read_args) fileobj = self._get_read_response_stream(response_socket_id) if not gzipped: fileobj = file_util.un_gzip_stream(fileobj) return Deallocating(fileobj, self._worker_model, response_socket_id) except: self._worker_model.deallocate_socket(response_socket_id) raise
def stream_file(self, uuid, path, gzipped): """ Returns a file-like object reading the given file. This file is gzipped if gzipped is True. """ if self._is_available_locally(uuid): file_path = self._get_target_path(uuid, path) if gzipped: return file_util.gzip_file(file_path) else: return open(file_path) else: worker = self._worker_model.get_bundle_worker(uuid) response_socket_id = self._worker_model.allocate_socket( worker['user_id'], worker['worker_id'] ) try: read_args = {'type': 'stream_file'} self._send_read_message(worker, response_socket_id, uuid, path, read_args) fileobj = self._get_read_response_stream(response_socket_id) if not gzipped: fileobj = file_util.un_gzip_stream(fileobj) return Deallocating(fileobj, self._worker_model, response_socket_id) except Exception: self._worker_model.deallocate_socket(response_socket_id) raise
def stream_file(final_path): with closing(gzip_file(final_path)) as fileobj: reply_fn(None, {}, fileobj)
def pack_files_for_upload(sources, should_unpack, follow_symlinks, exclude_patterns=None, force_compression=False): """ Create a single flat tarfile containing all the sources. Caller is responsible for closing the returned fileobj. Note: It may be possible to achieve additional speed gains on certain cases if we disable compression when tar-ing directories. But for now, force_compression only affects the case of single, uncompressed files. :param sources: list of paths to files to pack :param should_unpack: will unpack archives iff True :param follow_symlinks: will follow symlinks if True else behavior undefined :param exclude_patterns: list of glob patterns for files to ignore, or None to include all files :param force_compression: True to always use compression :return: dict with { 'fileobj': <file object of archive>, 'filename': <name of archive file>, 'filesize': <size of archive in bytes, or None if unknown>, 'should_unpack': <True iff archive should be unpacked at server>, 'should_simplify': <True iff directory should be 'simplified' at server> } """ exclude_patterns = exclude_patterns or [] def resolve_source(source): # Resolve symlink if desired resolved_source = source if follow_symlinks: resolved_source = os.path.realpath(source) if not os.path.exists(resolved_source): raise UsageError('Broken symlink') elif os.path.islink(source): raise UsageError('Not following symlinks.') return resolved_source sources = map(resolve_source, sources) # For efficiency, return single files and directories directly if len(sources) == 1: source = sources[0] filename = os.path.basename(source) if os.path.isdir(sources[0]): archived = tar_gzip_directory( source, follow_symlinks=follow_symlinks, exclude_patterns=exclude_patterns) return { 'fileobj': archived, 'filename': filename + '.tar.gz', 'filesize': None, 'should_unpack': True, 'should_simplify': False, } elif path_is_archive(source): return { 'fileobj': open(source), 'filename': filename, 'filesize': os.path.getsize(source), 'should_unpack': should_unpack, 'should_simplify': True, } elif force_compression: return { 'fileobj': gzip_file(source), 'filename': filename + '.gz', 'filesize': None, 'should_unpack': True, 'should_simplify': False, } else: return { 'fileobj': open(source), 'filename': filename, 'filesize': os.path.getsize(source), 'should_unpack': False, 'should_simplify': False, } # Build archive file incrementally from all sources # TODO: For further optimization, could either uses a temporary named pipe # or a wrapper around a TemporaryFile to concurrently write to the tarfile # while the REST client reads and sends it to the server. At the moment, # we wait for the tarfile to be created until we rewind and pass the file # to the client to be sent to the server. scratch_dir = tempfile.mkdtemp() archive_fileobj = tempfile.SpooledTemporaryFile() archive = tarfile.open(name='we', mode='w:gz', fileobj=archive_fileobj) def should_exclude(fn): basefn = os.path.basename(fn) return any(fnmatch(basefn, p) for p in exclude_patterns) for source in sources: if should_unpack and path_is_archive(source): # Unpack archive into scratch space dest_basename = strip_archive_ext(os.path.basename(source)) dest_path = os.path.join(scratch_dir, dest_basename) unpack(get_archive_ext(source), source, dest_path) # Add file or directory to archive archive.add(dest_path, arcname=dest_basename, recursive=True) else: # Add file to archive, or add files recursively if directory archive.add(source, arcname=os.path.basename(source), recursive=True, exclude=should_exclude) # Clean up, rewind archive file, and return it archive.close() shutil.rmtree(scratch_dir) filesize = archive_fileobj.tell() archive_fileobj.seek(0) return { 'fileobj': archive_fileobj, 'filename': 'contents.tar.gz', 'filesize': filesize, 'should_unpack': True, 'should_simplify': False, }
def pack_files_for_upload( sources, should_unpack, follow_symlinks, exclude_patterns=None, force_compression=False ): """ Create a single flat tarfile containing all the sources. Caller is responsible for closing the returned fileobj. Note: It may be possible to achieve additional speed gains on certain cases if we disable compression when tar-ing directories. But for now, force_compression only affects the case of single, uncompressed files. :param sources: list of paths to files to pack :param should_unpack: will unpack archives iff True :param follow_symlinks: will follow symlinks if True else behavior undefined :param exclude_patterns: list of glob patterns for files to ignore, or None to include all files :param force_compression: True to always use compression :return: dict with { 'fileobj': <file object of archive>, 'filename': <name of archive file>, 'filesize': <size of archive in bytes, or None if unknown>, 'should_unpack': <True iff archive should be unpacked at server>, 'should_simplify': <True iff directory should be 'simplified' at server> } """ exclude_patterns = exclude_patterns or [] def resolve_source(source): # Resolve symlink if desired resolved_source = source if follow_symlinks: resolved_source = os.path.realpath(source) if not os.path.exists(resolved_source): raise UsageError('Broken symlink') elif os.path.islink(source): raise UsageError('Not following symlinks.') return resolved_source sources = map(resolve_source, sources) # For efficiency, return single files and directories directly if len(sources) == 1: source = sources[0] filename = os.path.basename(source) if os.path.isdir(sources[0]): archived = tar_gzip_directory( source, follow_symlinks=follow_symlinks, exclude_patterns=exclude_patterns ) return { 'fileobj': archived, 'filename': filename + '.tar.gz', 'filesize': None, 'should_unpack': True, 'should_simplify': False, } elif path_is_archive(source): return { 'fileobj': open(source, mode='rb'), 'filename': filename, 'filesize': os.path.getsize(source), 'should_unpack': should_unpack, 'should_simplify': True, } elif force_compression: return { 'fileobj': gzip_file(source), 'filename': filename + '.gz', 'filesize': None, 'should_unpack': True, 'should_simplify': False, } else: return { 'fileobj': open(source, mode='rb'), 'filename': filename, 'filesize': os.path.getsize(source), 'should_unpack': False, 'should_simplify': False, } # Build archive file incrementally from all sources # TODO: For further optimization, could either uses a temporary named pipe # or a wrapper around a TemporaryFile to concurrently write to the tarfile # while the REST client reads and sends it to the server. At the moment, # we wait for the tarfile to be created until we rewind and pass the file # to the client to be sent to the server. scratch_dir = tempfile.mkdtemp() archive_fileobj = tempfile.SpooledTemporaryFile() archive = tarfile.open(name='we', mode='w:gz', fileobj=archive_fileobj) def should_exclude(fn): basefn = os.path.basename(fn) return any(fnmatch(basefn, p) for p in exclude_patterns) for source in sources: if should_unpack and path_is_archive(source): # Unpack archive into scratch space dest_basename = strip_archive_ext(os.path.basename(source)) dest_path = os.path.join(scratch_dir, dest_basename) unpack(get_archive_ext(source), source, dest_path) # Add file or directory to archive archive.add(dest_path, arcname=dest_basename, recursive=True) else: # Add file to archive, or add files recursively if directory archive.add( source, arcname=os.path.basename(source), recursive=True, exclude=should_exclude ) # Clean up, rewind archive file, and return it archive.close() shutil.rmtree(scratch_dir) filesize = archive_fileobj.tell() archive_fileobj.seek(0) return { 'fileobj': archive_fileobj, 'filename': 'contents.tar.gz', 'filesize': filesize, 'should_unpack': True, 'should_simplify': False, }