def test_to_csv_compression(self, df, encoding, compression): with ensure_clean() as filename: df.to_csv(filename, compression=compression, encoding=encoding) # test the round trip - to_csv -> read_csv result = read_csv(filename, compression=compression, index_col=0, encoding=encoding) assert_frame_equal(df, result) # test the round trip using file handle - to_csv -> read_csv f, _handles = _get_handle(filename, 'w', compression=compression, encoding=encoding) with f: df.to_csv(f, encoding=encoding) result = pd.read_csv(filename, compression=compression, encoding=encoding, index_col=0, squeeze=True) assert_frame_equal(df, result) # explicitly make sure file is compressed with tm.decompress_file(filename, compression) as fh: text = fh.read().decode(encoding or 'utf8') for col in df.columns: assert col in text with tm.decompress_file(filename, compression) as fh: assert_frame_equal(df, read_csv(fh, index_col=0, encoding=encoding))
def test_to_csv_compression(self, df, encoding, compression): with ensure_clean() as filename: df.to_csv(filename, compression=compression, encoding=encoding) # test the round trip - to_csv -> read_csv result = read_csv( filename, compression=compression, index_col=0, encoding=encoding ) assert_frame_equal(df, result) # test the round trip using file handle - to_csv -> read_csv f, _handles = _get_handle( filename, "w", compression=compression, encoding=encoding ) with f: df.to_csv(f, encoding=encoding) result = pd.read_csv( filename, compression=compression, encoding=encoding, index_col=0, squeeze=True, ) assert_frame_equal(df, result) # explicitly make sure file is compressed with tm.decompress_file(filename, compression) as fh: text = fh.read().decode(encoding or "utf8") for col in df.columns: assert col in text with tm.decompress_file(filename, compression) as fh: assert_frame_equal(df, read_csv(fh, index_col=0, encoding=encoding))
def test_to_csv_compression(self, compression): s = Series([0.123456, 0.234567, 0.567567], index=['A', 'B', 'C'], name='X') with ensure_clean() as filename: s.to_csv(filename, compression=compression, header=True) # test the round trip - to_csv -> read_csv rs = pd.read_csv(filename, compression=compression, index_col=0, squeeze=True) assert_series_equal(s, rs) # explicitly ensure file was compressed f = tm.decompress_file(filename, compression=compression) text = f.read().decode('utf8') assert s.name in text f.close() f = tm.decompress_file(filename, compression=compression) assert_series_equal(s, pd.read_csv(f, index_col=0, squeeze=True)) f.close()
def test_to_csv_compression(self, s, encoding, compression): with ensure_clean() as filename: s.to_csv(filename, compression=compression, encoding=encoding, header=True) # test the round trip - to_csv -> read_csv result = pd.read_csv(filename, compression=compression, encoding=encoding, index_col=0, squeeze=True) assert_series_equal(s, result) # test the round trip using file handle - to_csv -> read_csv f, _handles = _get_handle(filename, 'w', compression=compression, encoding=encoding) with f: s.to_csv(f, encoding=encoding, header=True) result = pd.read_csv(filename, compression=compression, encoding=encoding, index_col=0, squeeze=True) assert_series_equal(s, result) # explicitly ensure file was compressed with tm.decompress_file(filename, compression) as fh: text = fh.read().decode(encoding or 'utf8') assert s.name in text with tm.decompress_file(filename, compression) as fh: assert_series_equal(s, pd.read_csv(fh, index_col=0, squeeze=True, encoding=encoding))
def test_to_csv_compression(self, df, encoding, compression): with ensure_clean() as filename: df.to_csv(filename, compression=compression, encoding=encoding) # test the round trip - to_csv -> read_csv result = read_csv(filename, compression=compression, index_col=0, encoding=encoding) with open(filename, 'w') as fh: df.to_csv(fh, compression=compression, encoding=encoding) result_fh = read_csv(filename, compression=compression, index_col=0, encoding=encoding) assert_frame_equal(df, result) assert_frame_equal(df, result_fh) # explicitly make sure file is compressed with tm.decompress_file(filename, compression) as fh: text = fh.read().decode(encoding or 'utf8') for col in df.columns: assert col in text with tm.decompress_file(filename, compression) as fh: assert_frame_equal( df, read_csv(fh, index_col=0, encoding=encoding))
def test_to_csv_compression(self, df, encoding, compression): with ensure_clean() as filename: df.to_csv(filename, compression=compression, encoding=encoding) # test the round trip - to_csv -> read_csv result = read_csv(filename, compression=compression, index_col=0, encoding=encoding) with open(filename, 'w') as fh: df.to_csv(fh, compression=compression, encoding=encoding) result_fh = read_csv(filename, compression=compression, index_col=0, encoding=encoding) assert_frame_equal(df, result) assert_frame_equal(df, result_fh) # explicitly make sure file is compressed with tm.decompress_file(filename, compression) as fh: text = fh.read().decode(encoding or 'utf8') for col in df.columns: assert col in text with tm.decompress_file(filename, compression) as fh: assert_frame_equal(df, read_csv(fh, index_col=0, encoding=encoding))
def test_to_csv_compression(self, compression_no_zip): df = DataFrame( [[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]], index=['A', 'B'], columns=['X', 'Y', 'Z']) with ensure_clean() as filename: df.to_csv(filename, compression=compression_no_zip) # test the round trip - to_csv -> read_csv rs = read_csv(filename, compression=compression_no_zip, index_col=0) assert_frame_equal(df, rs) # explicitly make sure file is compressed with tm.decompress_file(filename, compression_no_zip) as fh: text = fh.read().decode('utf8') for col in df.columns: assert col in text with tm.decompress_file(filename, compression_no_zip) as fh: assert_frame_equal(df, read_csv(fh, index_col=0))
def test_write_infer(self, ext, get_random_path): base = get_random_path path1 = base + ext path2 = base + ".raw" compression = None for c in self._compression_to_extension: if self._compression_to_extension[c] == ext: compression = c break with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2: df = tm.makeDataFrame() # write to compressed file by inferred compression method df.to_pickle(p1) # decompress with tm.decompress_file(p1, compression=compression) as f: with open(p2, "wb") as fh: fh.write(f.read()) # read decompressed file df2 = pd.read_pickle(p2, compression=None) tm.assert_frame_equal(df, df2)
def test_gz_lineend(self): # GH 25311 df = pd.DataFrame({'a': [1, 2]}) expected_rows = ['a', '1', '2'] expected = tm.convert_rows_list_to_csv_str(expected_rows) with ensure_clean('__test_gz_lineend.csv.gz') as path: df.to_csv(path, index=False) with tm.decompress_file(path, compression='gzip') as f: result = f.read().decode('utf-8') assert result == expected
def test_gz_lineend(self): # GH 25311 df = pd.DataFrame({"a": [1, 2]}) expected_rows = ["a", "1", "2"] expected = tm.convert_rows_list_to_csv_str(expected_rows) with ensure_clean("__test_gz_lineend.csv.gz") as path: df.to_csv(path, index=False) with tm.decompress_file(path, compression="gzip") as f: result = f.read().decode("utf-8") assert result == expected
def test_to_csv_compression(self, compression): s = Series([0.123456, 0.234567, 0.567567], index=['A', 'B', 'C'], name='X') with ensure_clean() as filename: s.to_csv(filename, compression=compression, header=True) # test the round trip - to_csv -> read_csv rs = pd.read_csv(filename, compression=compression, index_col=0, squeeze=True) assert_series_equal(s, rs) # explicitly ensure file was compressed with tm.decompress_file(filename, compression=compression) as fh: text = fh.read().decode('utf8') assert s.name in text with tm.decompress_file(filename, compression=compression) as fh: assert_series_equal(s, pd.read_csv(fh, index_col=0, squeeze=True))
def test_compression_roundtrip(compression): df = pd.DataFrame([[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]], index=['A', 'B'], columns=['X', 'Y', 'Z']) with tm.ensure_clean() as path: df.to_json(path, compression=compression) assert_frame_equal(df, pd.read_json(path, compression=compression)) # explicitly ensure file was compressed. with tm.decompress_file(path, compression) as fh: result = fh.read().decode('utf8') assert_frame_equal(df, pd.read_json(result))
def test_to_csv_compression(self, compression): df = DataFrame([[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]], index=['A', 'B'], columns=['X', 'Y', 'Z']) with ensure_clean() as filename: df.to_csv(filename, compression=compression) # test the round trip - to_csv -> read_csv rs = read_csv(filename, compression=compression, index_col=0) assert_frame_equal(df, rs) # explicitly make sure file is compressed with tm.decompress_file(filename, compression) as fh: text = fh.read().decode('utf8') for col in df.columns: assert col in text with tm.decompress_file(filename, compression) as fh: assert_frame_equal(df, read_csv(fh, index_col=0))
def test_compression_roundtrip(compression): df = pd.DataFrame( [[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]], index=['A', 'B'], columns=['X', 'Y', 'Z']) with tm.ensure_clean() as path: df.to_json(path, compression=compression) assert_frame_equal(df, pd.read_json(path, compression=compression)) # explicitly ensure file was compressed. with tm.decompress_file(path, compression) as fh: result = fh.read().decode('utf8') assert_frame_equal(df, pd.read_json(result))
def test_write_explicit(self, compression, get_random_path): base = get_random_path path1 = base + ".compressed" path2 = base + ".raw" with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2: df = tm.makeDataFrame() # write to compressed file df.to_pickle(p1, compression=compression) # decompress with tm.decompress_file(p1, compression=compression) as f: with open(p2, "wb") as fh: fh.write(f.read()) # read decompressed file df2 = pd.read_pickle(p2, compression=None) tm.assert_frame_equal(df, df2)