예제 #1
0
 def test_read_delimited_chunks_empty_input(self):
     """Test empty input for read_delimited_chunks."""
     # splittign an empty file yields no chunks at all, similar to
     # how a string's readline method yields an empty list for an
     # empty string.
     self.assertEqual(list(read_delimited_chunks(io.BytesIO(b""), 1000)),
                      [])
예제 #2
0
    def test_read_delimited_chunks_long_lines(self):
        """Test lines longer than chunksize, data longer than chunk_size."""
        chunk_size = 100

        # Make sure the test data fits the testcase:
        #
        # 1. At least one lines must be longer than the chunk_size, so
        # that at least one chunk has to be longer as well
        self.assertTrue(max(map(len, csv_test_data.splitlines())) > chunk_size)

        # The actual test
        chunked = list(
            read_delimited_chunks(io.BytesIO(csv_test_data), chunk_size))

        # Appending all chunks yields the original unchunked data
        self.assertEqual(csv_test_data, b"".join(chunked))

        # Chunks longer than chunk_size have newlines only at the end.
        long_chunks = list(
            filter(lambda chunk: len(chunk) > chunk_size, chunked))
        self.assertTrue(len(long_chunks) > 0)
        for chunk in long_chunks:
            if chunk.endswith(b"\n"):
                chunk = chunk[:-1]
            self.assertFalse(b"\n" in chunk)
예제 #3
0
    def test_read_delimited_chunks_short_lines(self):
        """Test lines shorter than chunksize, data longer than chunk_size."""
        chunk_size = 500

        # Make sure the test data fits the testcase:
        #
        # 1. The lines must be shorter than the chunk_size, so that all
        # chunks can be shorter than chunk_size
        self.assertTrue(max(map(len, csv_test_data.splitlines())) < chunk_size)
        # 2. The data must be longer than chunk_size so that chunking is
        # useful
        self.assertTrue(len(csv_test_data) > chunk_size)

        # The actual test
        chunked = list(
            read_delimited_chunks(io.BytesIO(csv_test_data), chunk_size))

        # Appending all chunks yields the original unchunked data
        self.assertEqual(csv_test_data, b"".join(chunked))

        # All chunks are shorter than chunk_size
        self.assertTrue(max(map(len, chunked)) <= chunk_size)

        # All chunks were split at line separators
        self.assertEqual(
            csv_test_data.splitlines(),
            list(
                itertools.chain.from_iterable(chunk.splitlines()
                                              for chunk in chunked)))
예제 #4
0
    def test_read_delimited_chunks_long_lines(self):
        """Test lines longer than chunksize, data longer than chunk_size."""
        chunk_size = 100

        # Make sure the test data fits the testcase:
        #
        # 1. At least one lines must be longer than the chunk_size, so
        # that at least one chunk has to be longer as well
        self.assertTrue(max(map(len, csv_test_data.splitlines())) > chunk_size)

        # The actual test
        chunked = list(read_delimited_chunks(io.BytesIO(csv_test_data),
                                             chunk_size))

        # Appending all chunks yields the original unchunked data
        self.assertEqual(csv_test_data, b"".join(chunked))

        # Chunks longer than chunk_size have newlines only at the end.
        long_chunks = list(filter(lambda chunk: len(chunk) > chunk_size,
                                  chunked))
        self.assertTrue(len(long_chunks) > 0)
        for chunk in long_chunks:
            if chunk.endswith(b"\n"):
                chunk = chunk[:-1]
            self.assertFalse(b"\n" in chunk)
예제 #5
0
    def test_read_delimited_chunks_short_lines(self):
        """Test lines shorter than chunksize, data longer than chunk_size."""
        chunk_size = 500

        # Make sure the test data fits the testcase:
        #
        # 1. The lines must be shorter than the chunk_size, so that all
        # chunks can be shorter than chunk_size
        self.assertTrue(max(map(len, csv_test_data.splitlines())) < chunk_size)
        # 2. The data must be longer than chunk_size so that chunking is
        # useful
        self.assertTrue(len(csv_test_data) > chunk_size)

        # The actual test
        chunked = list(read_delimited_chunks(io.BytesIO(csv_test_data),
                                             chunk_size))

        # Appending all chunks yields the original unchunked data
        self.assertEqual(csv_test_data, b"".join(chunked))

        # All chunks are shorter than chunk_size
        self.assertTrue(max(map(len, chunked)) <= chunk_size)

        # All chunks were split at line separators
        self.assertEqual(csv_test_data.splitlines(),
                         list(itertools.chain.from_iterable(
                             chunk.splitlines() for chunk in chunked)))
예제 #6
0
 def test_read_delimited_chunks_empty_input(self):
     """Test empty input for read_delimited_chunks."""
     # splittign an empty file yields no chunks at all, similar to
     # how a string's readline method yields an empty list for an
     # empty string.
     self.assertEqual(list(read_delimited_chunks(io.BytesIO(b""),
                                                 1000)),
                      [])