Python gzip_file Examples, codalabworker.file_util.gzip_file Python Examples

Example #1

0

Show file

File: file_util_test.py Project: codalab/codalab-cli

    def test_gzip_stream(self):
        with tempfile.NamedTemporaryFile(delete=False) as temp_file:
            self.addCleanup(lambda: os.remove(temp_file.name))
            temp_file.write('contents')
            name = temp_file.name

        self.assertEquals(un_gzip_stream(gzip_file(name)).read(), 'contents')

Example #2

0

Show file

File: file_util_test.py Project: jyh1/codalab-worksheets

    def test_gzip_stream(self):
        with tempfile.NamedTemporaryFile(delete=False) as temp_file:
            self.addCleanup(lambda: os.remove(temp_file.name))
            temp_file.write(b'contents')
            name = temp_file.name

        self.assertEqual(un_gzip_stream(gzip_file(name)).read(), b'contents')

Example #3

0

Show file

File: download_manager.py Project: semanticmachines/codalab-cli

 def _stream_file(self, uuid, path, gzipped):
     """
     Returns a file-like object reading the given file. This file is gzipped
     if gzipped is True.
     """
     if self._is_available_locally(uuid):
         file_path = self._get_target_path(uuid, path)
         if gzipped:
             return file_util.gzip_file(file_path)
         else:
             return open(file_path)
     else:
         worker = self.get_bundle_worker(uuid)
         response_socket_id = self._worker_model.allocate_socket(
             worker['user_id'], worker['worker_id'])
         try:
             read_args = {
                 'type': 'stream_file',
             }
             self._send_read_message(worker, response_socket_id, uuid, path,
                                     read_args)
             fileobj = self._get_read_response_stream(response_socket_id)
             if not gzipped:
                 fileobj = file_util.un_gzip_stream(fileobj)
             return Deallocating(fileobj, self._worker_model,
                                 response_socket_id)
         except:
             self._worker_model.deallocate_socket(response_socket_id)
             raise

Example #4

0

Show file

File: download_manager.py Project: codalab/codalab-cli

 def stream_file(self, uuid, path, gzipped):
     """
     Returns a file-like object reading the given file. This file is gzipped
     if gzipped is True.
     """
     if self._is_available_locally(uuid):
         file_path = self._get_target_path(uuid, path)
         if gzipped:
             return file_util.gzip_file(file_path)
         else:
             return open(file_path)
     else:
         worker = self._worker_model.get_bundle_worker(uuid)
         response_socket_id = self._worker_model.allocate_socket(
             worker['user_id'], worker['worker_id']
         )
         try:
             read_args = {'type': 'stream_file'}
             self._send_read_message(worker, response_socket_id, uuid, path, read_args)
             fileobj = self._get_read_response_stream(response_socket_id)
             if not gzipped:
                 fileobj = file_util.un_gzip_stream(fileobj)
             return Deallocating(fileobj, self._worker_model, response_socket_id)
         except Exception:
             self._worker_model.deallocate_socket(response_socket_id)
             raise

Example #5

0

Show file

File: local_reader.py Project: wh-forker/codalab-worksheets-1

 def stream_file(final_path):
     with closing(gzip_file(final_path)) as fileobj:
         reply_fn(None, {}, fileobj)

Example #6

0

Show file

File: zip_util.py Project: solar5/codalab-cli

def pack_files_for_upload(sources, should_unpack, follow_symlinks,
                          exclude_patterns=None, force_compression=False):
    """
    Create a single flat tarfile containing all the sources.
    Caller is responsible for closing the returned fileobj.

    Note: It may be possible to achieve additional speed gains on certain
    cases if we disable compression when tar-ing directories. But for now,
    force_compression only affects the case of single, uncompressed files.

    :param sources: list of paths to files to pack
    :param should_unpack: will unpack archives iff True
    :param follow_symlinks: will follow symlinks if True else behavior undefined
    :param exclude_patterns: list of glob patterns for files to ignore, or
                             None to include all files
    :param force_compression: True to always use compression
    :return: dict with {
        'fileobj': <file object of archive>,
        'filename': <name of archive file>,
        'filesize': <size of archive in bytes, or None if unknown>,
        'should_unpack': <True iff archive should be unpacked at server>,
        'should_simplify': <True iff directory should be 'simplified' at server>
        }
    """
    exclude_patterns = exclude_patterns or []

    def resolve_source(source):
        # Resolve symlink if desired
        resolved_source = source
        if follow_symlinks:
            resolved_source = os.path.realpath(source)
            if not os.path.exists(resolved_source):
                raise UsageError('Broken symlink')
        elif os.path.islink(source):
            raise UsageError('Not following symlinks.')
        return resolved_source

    sources = map(resolve_source, sources)

    # For efficiency, return single files and directories directly
    if len(sources) == 1:
        source = sources[0]
        filename = os.path.basename(source)
        if os.path.isdir(sources[0]):
            archived = tar_gzip_directory(
                source, follow_symlinks=follow_symlinks,
                exclude_patterns=exclude_patterns)
            return {
                'fileobj': archived,
                'filename': filename + '.tar.gz',
                'filesize': None,
                'should_unpack': True,
                'should_simplify': False,
            }
        elif path_is_archive(source):
            return {
                'fileobj': open(source),
                'filename': filename,
                'filesize': os.path.getsize(source),
                'should_unpack': should_unpack,
                'should_simplify': True,
            }
        elif force_compression:
            return {
                'fileobj': gzip_file(source),
                'filename': filename + '.gz',
                'filesize': None,
                'should_unpack': True,
                'should_simplify': False,
            }
        else:
            return {
                'fileobj': open(source),
                'filename': filename,
                'filesize': os.path.getsize(source),
                'should_unpack': False,
                'should_simplify': False,
            }

    # Build archive file incrementally from all sources
    # TODO: For further optimization, could either uses a temporary named pipe
    # or a wrapper around a TemporaryFile to concurrently write to the tarfile
    # while the REST client reads and sends it to the server. At the moment,
    # we wait for the tarfile to be created until we rewind and pass the file
    # to the client to be sent to the server.
    scratch_dir = tempfile.mkdtemp()
    archive_fileobj = tempfile.SpooledTemporaryFile()
    archive = tarfile.open(name='we', mode='w:gz', fileobj=archive_fileobj)

    def should_exclude(fn):
        basefn = os.path.basename(fn)
        return any(fnmatch(basefn, p) for p in exclude_patterns)

    for source in sources:
        if should_unpack and path_is_archive(source):
            # Unpack archive into scratch space
            dest_basename = strip_archive_ext(os.path.basename(source))
            dest_path = os.path.join(scratch_dir, dest_basename)
            unpack(get_archive_ext(source), source, dest_path)

            # Add file or directory to archive
            archive.add(dest_path, arcname=dest_basename, recursive=True)
        else:
            # Add file to archive, or add files recursively if directory
            archive.add(source, arcname=os.path.basename(source),
                        recursive=True, exclude=should_exclude)

    # Clean up, rewind archive file, and return it
    archive.close()
    shutil.rmtree(scratch_dir)
    filesize = archive_fileobj.tell()
    archive_fileobj.seek(0)
    return {
        'fileobj': archive_fileobj,
        'filename': 'contents.tar.gz',
        'filesize': filesize,
        'should_unpack': True,
        'should_simplify': False,
    }

Example #7

0

Show file

File: zip_util.py Project: codalab/codalab-cli

def pack_files_for_upload(
    sources, should_unpack, follow_symlinks, exclude_patterns=None, force_compression=False
):
    """
    Create a single flat tarfile containing all the sources.
    Caller is responsible for closing the returned fileobj.

    Note: It may be possible to achieve additional speed gains on certain
    cases if we disable compression when tar-ing directories. But for now,
    force_compression only affects the case of single, uncompressed files.

    :param sources: list of paths to files to pack
    :param should_unpack: will unpack archives iff True
    :param follow_symlinks: will follow symlinks if True else behavior undefined
    :param exclude_patterns: list of glob patterns for files to ignore, or
                             None to include all files
    :param force_compression: True to always use compression
    :return: dict with {
        'fileobj': <file object of archive>,
        'filename': <name of archive file>,
        'filesize': <size of archive in bytes, or None if unknown>,
        'should_unpack': <True iff archive should be unpacked at server>,
        'should_simplify': <True iff directory should be 'simplified' at server>
        }
    """
    exclude_patterns = exclude_patterns or []

    def resolve_source(source):
        # Resolve symlink if desired
        resolved_source = source
        if follow_symlinks:
            resolved_source = os.path.realpath(source)
            if not os.path.exists(resolved_source):
                raise UsageError('Broken symlink')
        elif os.path.islink(source):
            raise UsageError('Not following symlinks.')
        return resolved_source

    sources = map(resolve_source, sources)

    # For efficiency, return single files and directories directly
    if len(sources) == 1:
        source = sources[0]
        filename = os.path.basename(source)
        if os.path.isdir(sources[0]):
            archived = tar_gzip_directory(
                source, follow_symlinks=follow_symlinks, exclude_patterns=exclude_patterns
            )
            return {
                'fileobj': archived,
                'filename': filename + '.tar.gz',
                'filesize': None,
                'should_unpack': True,
                'should_simplify': False,
            }
        elif path_is_archive(source):
            return {
                'fileobj': open(source, mode='rb'),
                'filename': filename,
                'filesize': os.path.getsize(source),
                'should_unpack': should_unpack,
                'should_simplify': True,
            }
        elif force_compression:
            return {
                'fileobj': gzip_file(source),
                'filename': filename + '.gz',
                'filesize': None,
                'should_unpack': True,
                'should_simplify': False,
            }
        else:
            return {
                'fileobj': open(source, mode='rb'),
                'filename': filename,
                'filesize': os.path.getsize(source),
                'should_unpack': False,
                'should_simplify': False,
            }

    # Build archive file incrementally from all sources
    # TODO: For further optimization, could either uses a temporary named pipe
    # or a wrapper around a TemporaryFile to concurrently write to the tarfile
    # while the REST client reads and sends it to the server. At the moment,
    # we wait for the tarfile to be created until we rewind and pass the file
    # to the client to be sent to the server.
    scratch_dir = tempfile.mkdtemp()
    archive_fileobj = tempfile.SpooledTemporaryFile()
    archive = tarfile.open(name='we', mode='w:gz', fileobj=archive_fileobj)

    def should_exclude(fn):
        basefn = os.path.basename(fn)
        return any(fnmatch(basefn, p) for p in exclude_patterns)

    for source in sources:
        if should_unpack and path_is_archive(source):
            # Unpack archive into scratch space
            dest_basename = strip_archive_ext(os.path.basename(source))
            dest_path = os.path.join(scratch_dir, dest_basename)
            unpack(get_archive_ext(source), source, dest_path)

            # Add file or directory to archive
            archive.add(dest_path, arcname=dest_basename, recursive=True)
        else:
            # Add file to archive, or add files recursively if directory
            archive.add(
                source, arcname=os.path.basename(source), recursive=True, exclude=should_exclude
            )

    # Clean up, rewind archive file, and return it
    archive.close()
    shutil.rmtree(scratch_dir)
    filesize = archive_fileobj.tell()
    archive_fileobj.seek(0)
    return {
        'fileobj': archive_fileobj,
        'filename': 'contents.tar.gz',
        'filesize': filesize,
        'should_unpack': True,
        'should_simplify': False,
    }