Esempio n. 1
0
    def test_to_csv_compression(self, df, encoding, compression):

        with tm.ensure_clean() as filename:

            df.to_csv(filename, compression=compression, encoding=encoding)
            # test the round trip - to_csv -> read_csv
            result = read_csv(
                filename, compression=compression, index_col=0, encoding=encoding
            )
            tm.assert_frame_equal(df, result)

            # test the round trip using file handle - to_csv -> read_csv
            handles = get_handle(
                filename, "w", compression=compression, encoding=encoding
            )
            df.to_csv(handles.handle, encoding=encoding)
            assert not handles.handle.closed
            handles.close()
            result = pd.read_csv(
                filename,
                compression=compression,
                encoding=encoding,
                index_col=0,
                squeeze=True,
            )
            tm.assert_frame_equal(df, result)

            # explicitly make sure file is compressed
            with tm.decompress_file(filename, compression) as fh:
                text = fh.read().decode(encoding or "utf8")
                for col in df.columns:
                    assert col in text

            with tm.decompress_file(filename, compression) as fh:
                tm.assert_frame_equal(df, read_csv(fh, index_col=0, encoding=encoding))
Esempio n. 2
0
    def test_write_infer(self, ext, get_random_path):
        base = get_random_path
        path1 = base + ext
        path2 = base + ".raw"
        compression = None
        for c in self._compression_to_extension:
            if self._compression_to_extension[c] == ext:
                compression = c
                break

        with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2:
            df = tm.makeDataFrame()

            # write to compressed file by inferred compression method
            df.to_pickle(p1)

            # decompress
            with tm.decompress_file(p1, compression=compression) as f:
                with open(p2, "wb") as fh:
                    fh.write(f.read())

            # read decompressed file
            df2 = pd.read_pickle(p2, compression=None)

            tm.assert_frame_equal(df, df2)
    def test_to_csv_compression(self, s, encoding, compression):

        with tm.ensure_clean() as filename:

            s.to_csv(filename,
                     compression=compression,
                     encoding=encoding,
                     header=True)
            # test the round trip - to_csv -> read_csv
            result = pd.read_csv(
                filename,
                compression=compression,
                encoding=encoding,
                index_col=0,
                squeeze=True,
            )
            tm.assert_series_equal(s, result)

            # test the round trip using file handle - to_csv -> read_csv
            f, _handles = get_handle(filename,
                                     "w",
                                     compression=compression,
                                     encoding=encoding)
            with f:
                s.to_csv(f, encoding=encoding, header=True)
            result = pd.read_csv(
                filename,
                compression=compression,
                encoding=encoding,
                index_col=0,
                squeeze=True,
            )
            tm.assert_series_equal(s, result)

            # explicitly ensure file was compressed
            with tm.decompress_file(filename, compression) as fh:
                text = fh.read().decode(encoding or "utf8")
                assert s.name in text

            with tm.decompress_file(filename, compression) as fh:
                tm.assert_series_equal(
                    s,
                    pd.read_csv(fh,
                                index_col=0,
                                squeeze=True,
                                encoding=encoding))
Esempio n. 4
0
    def test_gz_lineend(self):
        # GH 25311
        df = DataFrame({"a": [1, 2]})
        expected_rows = ["a", "1", "2"]
        expected = tm.convert_rows_list_to_csv_str(expected_rows)
        with tm.ensure_clean("__test_gz_lineend.csv.gz") as path:
            df.to_csv(path, index=False)
            with tm.decompress_file(path, compression="gzip") as f:
                result = f.read().decode("utf-8")

        assert result == expected
Esempio n. 5
0
def test_compression_roundtrip(compression):
    df = pd.DataFrame(
        [[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]],
        index=["A", "B"],
        columns=["X", "Y", "Z"],
    )

    with tm.ensure_clean() as path:
        df.to_json(path, compression=compression)
        tm.assert_frame_equal(df, pd.read_json(path, compression=compression))

        # explicitly ensure file was compressed.
        with tm.decompress_file(path, compression) as fh:
            result = fh.read().decode("utf8")
        tm.assert_frame_equal(df, pd.read_json(result))
Esempio n. 6
0
    def test_write_explicit(self, compression, get_random_path):
        base = get_random_path
        path1 = base + ".compressed"
        path2 = base + ".raw"

        with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2:
            df = tm.makeDataFrame()

            # write to compressed file
            df.to_pickle(p1, compression=compression)

            # decompress
            with tm.decompress_file(p1, compression=compression) as f:
                with open(p2, "wb") as fh:
                    fh.write(f.read())

            # read decompressed file
            df2 = pd.read_pickle(p2, compression=None)

            tm.assert_frame_equal(df, df2)