Beispiel #1
0
    def test_seek_outside(self):
        for compression_type in [
                CompressionTypes.BZIP2, CompressionTypes.DEFLATE,
                CompressionTypes.GZIP
        ]:
            file_name = self._create_compressed_file(compression_type,
                                                     self.content)
            with open(file_name, 'rb') as f:
                compressed_fd = CompressedFile(f,
                                               compression_type,
                                               read_size=self.read_block_size)

                for whence in (os.SEEK_CUR, os.SEEK_SET, os.SEEK_END):
                    seek_position = -1 * len(self.content) - 10
                    compressed_fd.seek(seek_position, whence)

                    expected_position = 0
                    uncompressed_position = compressed_fd.tell()
                    self.assertEqual(uncompressed_position, expected_position)

                    seek_position = len(self.content) + 20
                    compressed_fd.seek(seek_position, whence)

                    expected_position = len(self.content)
                    uncompressed_position = compressed_fd.tell()
                    self.assertEqual(uncompressed_position, expected_position)
Beispiel #2
0
  def test_seek_cur(self):
    for compression_type in [CompressionTypes.BZIP2, CompressionTypes.DEFLATE,
                             CompressionTypes.GZIP]:
      file_name = self._create_compressed_file(compression_type, self.content)
      with open(file_name, 'rb') as f:
        compressed_fd = CompressedFile(f, compression_type,
                                       read_size=self.read_block_size)
        reference_fd = BytesIO(self.content)

        # Test out of bound, inbound seeking in both directions
        # Note: BytesIO's seek() reports out of bound positions (if we seek
        # beyond the file), therefore we need to cap it to max_position (to
        # make it consistent with the old StringIO behavior
        for seek_position in (-1, 0, 1,
                              len(self.content) // 2,
                              len(self.content) // 2,
                              -1 * len(self.content) // 2):
          compressed_fd.seek(seek_position, os.SEEK_CUR)
          reference_fd.seek(seek_position, os.SEEK_CUR)

          uncompressed_line = compressed_fd.readline()
          expected_line = reference_fd.readline()
          self.assertEqual(uncompressed_line, expected_line)

          reference_position = reference_fd.tell()
          uncompressed_position = compressed_fd.tell()
          max_position = len(self.content)
          reference_position = min(reference_position, max_position)
          reference_fd.seek(reference_position, os.SEEK_SET)
          self.assertEqual(uncompressed_position, reference_position)
Beispiel #3
0
  def test_seek_set(self):
    for compression_type in [CompressionTypes.BZIP2, CompressionTypes.DEFLATE,
                             CompressionTypes.GZIP]:
      file_name = self._create_compressed_file(compression_type, self.content)
      with open(file_name, 'rb') as f:
        compressed_fd = CompressedFile(f, compression_type,
                                       read_size=self.read_block_size)
        reference_fd = BytesIO(self.content)

        # Note: BytesIO's tell() reports out of bound positions (if we seek
        # beyond the file), therefore we need to cap it to max_position
        # _CompressedFile.tell() always stays within the bounds of the
        # uncompressed content.
        # Negative seek position argument is not supported for BytesIO with
        # whence set to SEEK_SET.
        for seek_position in (0, 1,
                              len(self.content)-1, len(self.content),
                              len(self.content) + 1):
          compressed_fd.seek(seek_position, os.SEEK_SET)
          reference_fd.seek(seek_position, os.SEEK_SET)

          uncompressed_line = compressed_fd.readline()
          reference_line = reference_fd.readline()
          self.assertEqual(uncompressed_line, reference_line)

          uncompressed_position = compressed_fd.tell()
          reference_position = reference_fd.tell()
          max_position = len(self.content)
          reference_position = min(reference_position, max_position)
          self.assertEqual(uncompressed_position, reference_position)
Beispiel #4
0
    def test_seek_cur(self):
        for compression_type in [
                CompressionTypes.BZIP2, CompressionTypes.DEFLATE,
                CompressionTypes.GZIP
        ]:
            file_name = self._create_compressed_file(compression_type,
                                                     self.content)
            with open(file_name, 'rb') as f:
                compressed_fd = CompressedFile(f,
                                               compression_type,
                                               read_size=self.read_block_size)
                reference_fd = BytesIO(self.content)

                # Test out of bound, inbound seeking in both directions
                # Note: BytesIO's seek() reports out of bound positions (if we seek
                # beyond the file), therefore we need to cap it to max_position (to
                # make it consistent with the old StringIO behavior
                for seek_position in (-1, 0, 1, len(self.content) // 2,
                                      len(self.content) // 2,
                                      -1 * len(self.content) // 2):
                    compressed_fd.seek(seek_position, os.SEEK_CUR)
                    reference_fd.seek(seek_position, os.SEEK_CUR)

                    uncompressed_line = compressed_fd.readline()
                    expected_line = reference_fd.readline()
                    self.assertEqual(uncompressed_line, expected_line)

                    reference_position = reference_fd.tell()
                    uncompressed_position = compressed_fd.tell()
                    max_position = len(self.content)
                    reference_position = min(reference_position, max_position)
                    reference_fd.seek(reference_position, os.SEEK_SET)
                    self.assertEqual(uncompressed_position, reference_position)
Beispiel #5
0
    def test_seek_set(self):
        for compression_type in [
                CompressionTypes.BZIP2, CompressionTypes.GZIP
        ]:
            file_name = self._create_compressed_file(compression_type,
                                                     self.content)
            with open(file_name, 'rb') as f:
                compressed_fd = CompressedFile(f,
                                               compression_type,
                                               read_size=self.read_block_size)
                reference_fd = StringIO(self.content)

                # Note: content (readline) check must come before position (tell) check
                # because cStringIO's tell() reports out of bound positions (if we seek
                # beyond the file) up until a real read occurs.
                # _CompressedFile.tell() always stays within the bounds of the
                # uncompressed content.
                for seek_position in (-1, 0, 1, len(self.content) - 1,
                                      len(self.content),
                                      len(self.content) + 1):
                    compressed_fd.seek(seek_position, os.SEEK_SET)
                    reference_fd.seek(seek_position, os.SEEK_SET)

                    uncompressed_line = compressed_fd.readline()
                    reference_line = reference_fd.readline()
                    self.assertEqual(uncompressed_line, reference_line)

                    uncompressed_position = compressed_fd.tell()
                    reference_position = reference_fd.tell()
                    self.assertEqual(uncompressed_position, reference_position)
Beispiel #6
0
    def test_seek_set(self):
        for compression_type in [
                CompressionTypes.BZIP2, CompressionTypes.DEFLATE,
                CompressionTypes.GZIP
        ]:
            file_name = self._create_compressed_file(compression_type,
                                                     self.content)
            with open(file_name, 'rb') as f:
                compressed_fd = CompressedFile(f,
                                               compression_type,
                                               read_size=self.read_block_size)
                reference_fd = BytesIO(self.content)

                # Note: BytesIO's tell() reports out of bound positions (if we seek
                # beyond the file), therefore we need to cap it to max_position
                # _CompressedFile.tell() always stays within the bounds of the
                # uncompressed content.
                # Negative seek position argument is not supported for BytesIO with
                # whence set to SEEK_SET.
                for seek_position in (0, 1, len(self.content) - 1,
                                      len(self.content),
                                      len(self.content) + 1):
                    compressed_fd.seek(seek_position, os.SEEK_SET)
                    reference_fd.seek(seek_position, os.SEEK_SET)

                    uncompressed_line = compressed_fd.readline()
                    reference_line = reference_fd.readline()
                    self.assertEqual(uncompressed_line, reference_line)

                    uncompressed_position = compressed_fd.tell()
                    reference_position = reference_fd.tell()
                    max_position = len(self.content)
                    reference_position = min(reference_position, max_position)
                    self.assertEqual(uncompressed_position, reference_position)
Beispiel #7
0
    def test_seek_cur(self):
        for compression_type in [
                CompressionTypes.BZIP2, CompressionTypes.GZIP
        ]:
            file_name = self._create_compressed_file(compression_type,
                                                     self.content)
            with open(file_name, 'rb') as f:
                compressed_fd = CompressedFile(f,
                                               compression_type,
                                               read_size=self.read_block_size)
                reference_fd = StringIO(self.content)

                # Test out of bound, inbound seeking in both directions
                for seek_position in (-1, 0, 1, len(self.content) / 2,
                                      len(self.content) / 2,
                                      -1 * len(self.content) / 2):
                    compressed_fd.seek(seek_position, os.SEEK_CUR)
                    reference_fd.seek(seek_position, os.SEEK_CUR)

                    uncompressed_line = compressed_fd.readline()
                    expected_line = reference_fd.readline()
                    self.assertEqual(uncompressed_line, expected_line)

                    reference_position = reference_fd.tell()
                    uncompressed_position = compressed_fd.tell()
                    self.assertEqual(uncompressed_position, reference_position)
Beispiel #8
0
  def test_seek_set(self):
    for compression_type in [CompressionTypes.BZIP2, CompressionTypes.GZIP]:
      file_name = self._create_compressed_file(compression_type, self.content)
      with open(file_name, 'rb') as f:
        compressed_fd = CompressedFile(f, compression_type,
                                       read_size=self.read_block_size)
        reference_fd = StringIO(self.content)

        # Note: content (readline) check must come before position (tell) check
        # because cStringIO's tell() reports out of bound positions (if we seek
        # beyond the file) up until a real read occurs.
        # _CompressedFile.tell() always stays within the bounds of the
        # uncompressed content.
        for seek_position in (-1, 0, 1,
                              len(self.content)-1, len(self.content),
                              len(self.content) + 1):
          compressed_fd.seek(seek_position, os.SEEK_SET)
          reference_fd.seek(seek_position, os.SEEK_SET)

          uncompressed_line = compressed_fd.readline()
          reference_line = reference_fd.readline()
          self.assertEqual(uncompressed_line, reference_line)

          uncompressed_position = compressed_fd.tell()
          reference_position = reference_fd.tell()
          self.assertEqual(uncompressed_position, reference_position)
Beispiel #9
0
    def test_tell(self):
        lines = ['line%d\n' % i for i in range(10)]
        tmpfile = self._create_temp_file()
        writeable = CompressedFile(open(tmpfile, 'w'))
        current_offset = 0
        for line in lines:
            writeable.write(line)
            current_offset += len(line)
            self.assertEqual(current_offset, writeable.tell())

        writeable.close()
        readable = CompressedFile(open(tmpfile))
        current_offset = 0
        while True:
            line = readable.readline()
            current_offset += len(line)
            self.assertEqual(current_offset, readable.tell())
            if not line:
                break
Beispiel #10
0
  def test_tell(self):
    lines = [b'line%d\n' % i for i in range(10)]
    tmpfile = self._create_temp_file()
    with open(tmpfile, 'wb') as f:
      writeable = CompressedFile(f)
      current_offset = 0
      for line in lines:
        writeable.write(line)
        current_offset += len(line)
        self.assertEqual(current_offset, writeable.tell())

    with open(tmpfile, 'rb') as f:
      readable = CompressedFile(f)
      current_offset = 0
      while True:
        line = readable.readline()
        current_offset += len(line)
        self.assertEqual(current_offset, readable.tell())
        if not line:
          break
Beispiel #11
0
  def test_seek_outside(self):
    for compression_type in [CompressionTypes.BZIP2, CompressionTypes.GZIP]:
      file_name = self._create_compressed_file(compression_type, self.content)
      with open(file_name, 'rb') as f:
        compressed_fd = CompressedFile(f, compression_type,
                                       read_size=self.read_block_size)

        for whence in (os.SEEK_CUR, os.SEEK_SET, os.SEEK_END):
          seek_position = -1 * len(self.content) - 10
          compressed_fd.seek(seek_position, whence)

          expected_position = 0
          uncompressed_position = compressed_fd.tell()
          self.assertEqual(uncompressed_position, expected_position)

          seek_position = len(self.content) + 20
          compressed_fd.seek(seek_position, whence)

          expected_position = len(self.content)
          uncompressed_position = compressed_fd.tell()
          self.assertEqual(uncompressed_position, expected_position)
Beispiel #12
0
  def test_seek_cur(self):
    for compression_type in [CompressionTypes.BZIP2, CompressionTypes.GZIP]:
      file_name = self._create_compressed_file(compression_type, self.content)
      with open(file_name, 'rb') as f:
        compressed_fd = CompressedFile(f, compression_type,
                                       read_size=self.read_block_size)
        reference_fd = StringIO(self.content)

        # Test out of bound, inbound seeking in both directions
        for seek_position in (-1, 0, 1,
                              len(self.content) / 2,
                              len(self.content) / 2,
                              -1 * len(self.content) / 2):
          compressed_fd.seek(seek_position, os.SEEK_CUR)
          reference_fd.seek(seek_position, os.SEEK_CUR)

          uncompressed_line = compressed_fd.readline()
          expected_line = reference_fd.readline()
          self.assertEqual(uncompressed_line, expected_line)

          reference_position = reference_fd.tell()
          uncompressed_position = compressed_fd.tell()
          self.assertEqual(uncompressed_position, reference_position)