def test_read_single_file_without_striping_eol_crlf(self): file_name, written_data = write_data(TextSourceTest.DEFAULT_NUM_RECORDS, eol=EOL.CRLF) assert len(written_data) == TextSourceTest.DEFAULT_NUM_RECORDS source = TextSource(file_name, 0, CompressionTypes.UNCOMPRESSED, False, coders.StrUtf8Coder()) range_tracker = source.get_range_tracker(None, None) read_data = list(source.read(range_tracker)) self.assertCountEqual([line + '\r\n' for line in written_data], read_data)
def _run_read_test(self, file_or_pattern, expected_data, buffer_size=DEFAULT_NUM_RECORDS, compression=CompressionTypes.UNCOMPRESSED): # Since each record usually takes more than 1 byte, default buffer size is # smaller than the total size of the file. This is done to # increase test coverage for cases that hit the buffer boundary. source = TextSource(file_or_pattern, 0, compression, True, coders.StrUtf8Coder(), buffer_size) range_tracker = source.get_range_tracker(None, None) read_data = list(source.read(range_tracker)) self.assertCountEqual(expected_data, read_data)
def _read_skip_header_lines(self, file_or_pattern, skip_header_lines): """Simple wrapper function for instantiating TextSource.""" source = TextSource(file_or_pattern, 0, CompressionTypes.UNCOMPRESSED, True, coders.StrUtf8Coder(), skip_header_lines=skip_header_lines) range_tracker = source.get_range_tracker(None, None) return list(source.read(range_tracker))
def _read_skip_header_lines(self, file_or_pattern, skip_header_lines): """Simple wrapper function for instantiating TextSource.""" source = TextSource( file_or_pattern, 0, CompressionTypes.UNCOMPRESSED, True, coders.StrUtf8Coder(), skip_header_lines=skip_header_lines) range_tracker = source.get_range_tracker(None, None) return list(source.read(range_tracker))