Esempio n. 1
0
    def test_to_csv_na_rep(self):
        # see gh-11553
        #
        # Testing if NaN values are correctly represented in the index.
        df = DataFrame({"a": [0, np.NaN], "b": [0, 1], "c": [2, 3]})
        expected_rows = ["a,b,c", "0.0,0,2", "_,1,3"]
        expected = tm.convert_rows_list_to_csv_str(expected_rows)

        assert df.set_index("a").to_csv(na_rep="_") == expected
        assert df.set_index(["a", "b"]).to_csv(na_rep="_") == expected

        # now with an index containing only NaNs
        df = DataFrame({"a": np.NaN, "b": [0, 1], "c": [2, 3]})
        expected_rows = ["a,b,c", "_,0,2", "_,1,3"]
        expected = tm.convert_rows_list_to_csv_str(expected_rows)

        assert df.set_index("a").to_csv(na_rep="_") == expected
        assert df.set_index(["a", "b"]).to_csv(na_rep="_") == expected

        # check if na_rep parameter does not break anything when no NaN
        df = DataFrame({"a": 0, "b": [0, 1], "c": [2, 3]})
        expected_rows = ["a,b,c", "0,0,2", "0,1,3"]
        expected = tm.convert_rows_list_to_csv_str(expected_rows)

        assert df.set_index("a").to_csv(na_rep="_") == expected
        assert df.set_index(["a", "b"]).to_csv(na_rep="_") == expected

        csv = pd.Series(["a", pd.NA, "c"]).to_csv(na_rep="ZZZZZ")
        expected = tm.convert_rows_list_to_csv_str(
            [",0", "0,a", "1,ZZZZZ", "2,c"])
        assert expected == csv
Esempio n. 2
0
    def test_to_csv_decimal(self):
        # see gh-781
        df = DataFrame({"col1": [1], "col2": ["a"], "col3": [10.1]})

        expected_rows = [",col1,col2,col3", "0,1,a,10.1"]
        expected_default = tm.convert_rows_list_to_csv_str(expected_rows)
        assert df.to_csv() == expected_default

        expected_rows = [";col1;col2;col3", "0;1;a;10,1"]
        expected_european_excel = tm.convert_rows_list_to_csv_str(
            expected_rows)
        assert df.to_csv(decimal=",", sep=";") == expected_european_excel

        expected_rows = [",col1,col2,col3", "0,1,a,10.10"]
        expected_float_format_default = tm.convert_rows_list_to_csv_str(
            expected_rows)
        assert df.to_csv(float_format="%.2f") == expected_float_format_default

        expected_rows = [";col1;col2;col3", "0;1;a;10,10"]
        expected_float_format = tm.convert_rows_list_to_csv_str(expected_rows)
        assert (df.to_csv(decimal=",", sep=";",
                          float_format="%.2f") == expected_float_format)

        # see gh-11553: testing if decimal is taken into account for '0.0'
        df = DataFrame({"a": [0, 1.1], "b": [2.2, 3.3], "c": 1})

        expected_rows = ["a,b,c", "0^0,2^2,1", "1^1,3^3,1"]
        expected = tm.convert_rows_list_to_csv_str(expected_rows)
        assert df.to_csv(index=False, decimal="^") == expected

        # same but for an index
        assert df.set_index("a").to_csv(decimal="^") == expected

        # same for a multi-index
        assert df.set_index(["a", "b"]).to_csv(decimal="^") == expected
Esempio n. 3
0
    def test_period_index_date_overflow(self):
        # see gh-15982

        dates = ["1990-01-01", "2000-01-01", "3005-01-01"]
        index = pd.PeriodIndex(dates, freq="D")

        df = DataFrame([4, 5, 6], index=index)
        result = df.to_csv()

        expected_rows = [",0", "1990-01-01,4", "2000-01-01,5", "3005-01-01,6"]
        expected = tm.convert_rows_list_to_csv_str(expected_rows)
        assert result == expected

        date_format = "%m-%d-%Y"
        result = df.to_csv(date_format=date_format)

        expected_rows = [",0", "01-01-1990,4", "01-01-2000,5", "01-01-3005,6"]
        expected = tm.convert_rows_list_to_csv_str(expected_rows)
        assert result == expected

        # Overflow with pd.NaT
        dates = ["1990-01-01", pd.NaT, "3005-01-01"]
        index = pd.PeriodIndex(dates, freq="D")

        df = DataFrame([4, 5, 6], index=index)
        result = df.to_csv()

        expected_rows = [",0", "1990-01-01,4", ",5", "3005-01-01,6"]
        expected = tm.convert_rows_list_to_csv_str(expected_rows)
        assert result == expected
Esempio n. 4
0
    def test_na_rep_truncated(self):
        # https://github.com/pandas-dev/pandas/issues/31447
        result = pd.Series(range(8, 12)).to_csv(na_rep="-")
        expected = tm.convert_rows_list_to_csv_str([",0", "0,8", "1,9", "2,10", "3,11"])
        assert result == expected

        result = pd.Series([True, False]).to_csv(na_rep="nan")
        expected = tm.convert_rows_list_to_csv_str([",0", "0,True", "1,False"])
        assert result == expected

        result = pd.Series([1.1, 2.2]).to_csv(na_rep=".")
        expected = tm.convert_rows_list_to_csv_str([",0", "0,1.1", "1,2.2"])
        assert result == expected
Esempio n. 5
0
 def test_to_csv_na_rep_nullable_string(self, nullable_string_dtype):
     # GH 29975
     # Make sure full na_rep shows up when a dtype is provided
     expected = tm.convert_rows_list_to_csv_str(
         [",0", "0,a", "1,ZZZZZ", "2,c"])
     csv = pd.Series(["a", pd.NA, "c"],
                     dtype=nullable_string_dtype).to_csv(na_rep="ZZZZZ")
     assert expected == csv
Esempio n. 6
0
 def test_to_csv_float_ea_no_float_format(self):
     # GH#45991
     df = DataFrame({"a": [1.1, 2.02, pd.NA, 6.000006], "b": "c"})
     df["a"] = df["a"].astype("Float64")
     result = df.to_csv(index=False)
     expected = tm.convert_rows_list_to_csv_str(
         ["a,b", "1.1,c", "2.02,c", ",c", "6.000006,c"])
     assert result == expected
Esempio n. 7
0
 def test_to_csv_categorical_and_ea(self):
     # GH#46812
     df = DataFrame({"a": "x", "b": [1, pd.NA]})
     df["b"] = df["b"].astype("Int16")
     df["b"] = df["b"].astype("category")
     result = df.to_csv()
     expected_rows = [",a,b", "0,x,1", "1,x,"]
     expected = tm.convert_rows_list_to_csv_str(expected_rows)
     assert result == expected
Esempio n. 8
0
    def test_to_csv_index_no_leading_comma(self):
        df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}, index=["one", "two", "three"])

        buf = StringIO()
        df.to_csv(buf, index_label=False)

        expected_rows = ["A,B", "one,1,4", "two,2,5", "three,3,6"]
        expected = tm.convert_rows_list_to_csv_str(expected_rows)
        assert buf.getvalue() == expected
Esempio n. 9
0
    def test_to_csv_na_rep_long_string(self, df_new_type):
        # see gh-25099
        df = DataFrame({"c": [float("nan")] * 3})
        df = df.astype(df_new_type)
        expected_rows = ["c", "mynull", "mynull", "mynull"]
        expected = tm.convert_rows_list_to_csv_str(expected_rows)

        result = df.to_csv(index=False, na_rep="mynull", encoding="ascii")

        assert expected == result
Esempio n. 10
0
    def test_to_csv_unicodewriter_quoting(self):
        df = DataFrame({"A": [1, 2, 3], "B": ["foo", "bar", "baz"]})

        buf = StringIO()
        df.to_csv(buf, index=False, quoting=csv.QUOTE_NONNUMERIC, encoding="utf-8")

        result = buf.getvalue()
        expected_rows = ['"A","B"', '1,"foo"', '2,"bar"', '3,"baz"']
        expected = tm.convert_rows_list_to_csv_str(expected_rows)
        assert result == expected
Esempio n. 11
0
    def test_to_csv_quote_none(self, encoding):
        # GH4328
        df = DataFrame({"A": ["hello", '{"hello"}']})
        buf = StringIO()
        df.to_csv(buf, quoting=csv.QUOTE_NONE, encoding=encoding, index=False)

        result = buf.getvalue()
        expected_rows = ["A", "hello", '{"hello"}']
        expected = tm.convert_rows_list_to_csv_str(expected_rows)
        assert result == expected
Esempio n. 12
0
def test_convert_rows_list_to_csv_str():
    rows_list = ["aaa", "bbb", "ccc"]
    ret = tm.convert_rows_list_to_csv_str(rows_list)

    if compat.is_platform_windows():
        expected = "aaa\r\nbbb\r\nccc\r\n"
    else:
        expected = "aaa\nbbb\nccc\n"

    assert ret == expected
Esempio n. 13
0
    def test_to_csv_float_format(self):
        # testing if float_format is taken into account for the index
        # GH 11553
        df = DataFrame({"a": [0, 1], "b": [2.2, 3.3], "c": 1})

        expected_rows = ["a,b,c", "0,2.20,1", "1,3.30,1"]
        expected = tm.convert_rows_list_to_csv_str(expected_rows)
        assert df.set_index("a").to_csv(float_format="%.2f") == expected

        # same for a multi-index
        assert df.set_index(["a", "b"]).to_csv(float_format="%.2f") == expected
Esempio n. 14
0
    def test_to_csv_stdout_file(self, capsys):
        # GH 21561
        df = DataFrame([["foo", "bar"], ["baz", "qux"]], columns=["name_1", "name_2"])
        expected_rows = [",name_1,name_2", "0,foo,bar", "1,baz,qux"]
        expected_ascii = tm.convert_rows_list_to_csv_str(expected_rows)

        df.to_csv(sys.stdout, encoding="ascii")
        captured = capsys.readouterr()

        assert captured.out == expected_ascii
        assert not sys.stdout.closed
Esempio n. 15
0
    def test_gz_lineend(self):
        # GH 25311
        df = DataFrame({"a": [1, 2]})
        expected_rows = ["a", "1", "2"]
        expected = tm.convert_rows_list_to_csv_str(expected_rows)
        with tm.ensure_clean("__test_gz_lineend.csv.gz") as path:
            df.to_csv(path, index=False)
            with tm.decompress_file(path, compression="gzip") as f:
                result = f.read().decode("utf-8")

        assert result == expected
Esempio n. 16
0
 def test_to_csv_float_format_over_decimal(self):
     # GH#47436
     df = DataFrame({"a": [0.5, 1.0]})
     result = df.to_csv(
         decimal=",",
         float_format=lambda x: np.format_float_positional(x, trim="-"),
         index=False,
     )
     expected_rows = ["a", "0.5", "1"]
     expected = tm.convert_rows_list_to_csv_str(expected_rows)
     assert result == expected
Esempio n. 17
0
    def test_multi_index_header(self):
        # see gh-5539
        columns = pd.MultiIndex.from_tuples([("a", 1), ("a", 2), ("b", 1), ("b", 2)])
        df = DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]])
        df.columns = columns

        header = ["a", "b", "c", "d"]
        result = df.to_csv(header=header)

        expected_rows = [",a,b,c,d", "0,1,2,3,4", "1,5,6,7,8"]
        expected = tm.convert_rows_list_to_csv_str(expected_rows)
        assert result == expected
Esempio n. 18
0
    def test_to_csv_multi_index(self):
        # see gh-6618
        df = DataFrame([1], columns=pd.MultiIndex.from_arrays([[1], [2]]))

        exp_rows = [",1", ",2", "0,1"]
        exp = tm.convert_rows_list_to_csv_str(exp_rows)
        assert df.to_csv() == exp

        exp_rows = ["1", "2", "1"]
        exp = tm.convert_rows_list_to_csv_str(exp_rows)
        assert df.to_csv(index=False) == exp

        df = DataFrame(
            [1],
            columns=pd.MultiIndex.from_arrays([[1], [2]]),
            index=pd.MultiIndex.from_arrays([[1], [2]]),
        )

        exp_rows = [",,1", ",,2", "1,2,1"]
        exp = tm.convert_rows_list_to_csv_str(exp_rows)
        assert df.to_csv() == exp

        exp_rows = ["1", "2", "1"]
        exp = tm.convert_rows_list_to_csv_str(exp_rows)
        assert df.to_csv(index=False) == exp

        df = DataFrame([1], columns=pd.MultiIndex.from_arrays([["foo"], ["bar"]]))

        exp_rows = [",foo", ",bar", "0,1"]
        exp = tm.convert_rows_list_to_csv_str(exp_rows)
        assert df.to_csv() == exp

        exp_rows = ["foo", "bar", "1"]
        exp = tm.convert_rows_list_to_csv_str(exp_rows)
        assert df.to_csv(index=False) == exp
Esempio n. 19
0
 def test_to_csv_timedelta_precision(self):
     # GH 6783
     s = pd.Series([1, 1]).astype("timedelta64[ns]")
     buf = io.StringIO()
     s.to_csv(buf)
     result = buf.getvalue()
     expected_rows = [
         ",0",
         "0,0 days 00:00:00.000000001",
         "1,0 days 00:00:00.000000001",
     ]
     expected = tm.convert_rows_list_to_csv_str(expected_rows)
     assert result == expected
Esempio n. 20
0
    def test_to_csv_write_to_open_file_with_newline_py3(self):
        # see gh-21696
        # see gh-20353
        df = pd.DataFrame({"a": ["x", "y", "z"]})
        expected_rows = ["x", "y", "z"]
        expected = "manual header\n" + tm.convert_rows_list_to_csv_str(expected_rows)
        with tm.ensure_clean("test.txt") as path:
            with open(path, "w", newline="") as f:
                f.write("manual header\n")
                df.to_csv(f, header=None, index=None)

            with open(path, "rb") as f:
                assert f.read() == bytes(expected, "utf-8")
Esempio n. 21
0
    def test_to_csv_date_format_in_categorical(self):
        # GH#40754
        ser = pd.Series(pd.to_datetime(["2021-03-27", pd.NaT], format="%Y-%m-%d"))
        ser = ser.astype("category")
        expected = tm.convert_rows_list_to_csv_str(["0", "2021-03-27", '""'])
        assert ser.to_csv(index=False) == expected

        ser = pd.Series(
            pd.date_range(
                start="2021-03-27", freq="D", periods=1, tz="Europe/Berlin"
            ).append(pd.DatetimeIndex([pd.NaT]))
        )
        ser = ser.astype("category")
        assert ser.to_csv(index=False, date_format="%Y-%m-%d") == expected
Esempio n. 22
0
 def test_to_csv_different_datetime_formats(self):
     # GH#21734
     df = DataFrame(
         {
             "date": pd.to_datetime("1970-01-01"),
             "datetime": pd.date_range("1970-01-01", periods=2, freq="H"),
         }
     )
     expected_rows = [
         "date,datetime",
         "1970-01-01,1970-01-01 00:00:00",
         "1970-01-01,1970-01-01 01:00:00",
     ]
     expected = tm.convert_rows_list_to_csv_str(expected_rows)
     assert df.to_csv(index=False) == expected
Esempio n. 23
0
 def test_to_csv_categorical_and_interval(self):
     # GH#46297
     df = DataFrame({
         "a": [
             pd.Interval(
                 Timestamp("2020-01-01"),
                 Timestamp("2020-01-02"),
                 inclusive="both",
             )
         ]
     })
     df["a"] = df["a"].astype("category")
     result = df.to_csv()
     expected_rows = [",a", '0,"[2020-01-01, 2020-01-02]"']
     expected = tm.convert_rows_list_to_csv_str(expected_rows)
     assert result == expected
Esempio n. 24
0
    def test_to_csv_quoting(self):
        df = DataFrame(
            {
                "c_bool": [True, False],
                "c_float": [1.0, 3.2],
                "c_int": [42, np.nan],
                "c_string": ["a", "b,c"],
            }
        )

        expected_rows = [
            ",c_bool,c_float,c_int,c_string",
            "0,True,1.0,42.0,a",
            '1,False,3.2,,"b,c"',
        ]
        expected = tm.convert_rows_list_to_csv_str(expected_rows)

        result = df.to_csv()
        assert result == expected

        result = df.to_csv(quoting=None)
        assert result == expected

        expected_rows = [
            ",c_bool,c_float,c_int,c_string",
            "0,True,1.0,42.0,a",
            '1,False,3.2,,"b,c"',
        ]
        expected = tm.convert_rows_list_to_csv_str(expected_rows)

        result = df.to_csv(quoting=csv.QUOTE_MINIMAL)
        assert result == expected

        expected_rows = [
            '"","c_bool","c_float","c_int","c_string"',
            '"0","True","1.0","42.0","a"',
            '"1","False","3.2","","b,c"',
        ]
        expected = tm.convert_rows_list_to_csv_str(expected_rows)

        result = df.to_csv(quoting=csv.QUOTE_ALL)
        assert result == expected

        # see gh-12922, gh-13259: make sure changes to
        # the formatters do not break this behaviour
        expected_rows = [
            '"","c_bool","c_float","c_int","c_string"',
            '0,True,1.0,42.0,"a"',
            '1,False,3.2,"","b,c"',
        ]
        expected = tm.convert_rows_list_to_csv_str(expected_rows)
        result = df.to_csv(quoting=csv.QUOTE_NONNUMERIC)
        assert result == expected

        msg = "need to escape, but no escapechar set"
        with pytest.raises(csv.Error, match=msg):
            df.to_csv(quoting=csv.QUOTE_NONE)

        with pytest.raises(csv.Error, match=msg):
            df.to_csv(quoting=csv.QUOTE_NONE, escapechar=None)

        expected_rows = [
            ",c_bool,c_float,c_int,c_string",
            "0,True,1.0,42.0,a",
            "1,False,3.2,,b!,c",
        ]
        expected = tm.convert_rows_list_to_csv_str(expected_rows)
        result = df.to_csv(quoting=csv.QUOTE_NONE, escapechar="!")
        assert result == expected

        expected_rows = [
            ",c_bool,c_ffloat,c_int,c_string",
            "0,True,1.0,42.0,a",
            "1,False,3.2,,bf,c",
        ]
        expected = tm.convert_rows_list_to_csv_str(expected_rows)
        result = df.to_csv(quoting=csv.QUOTE_NONE, escapechar="f")
        assert result == expected

        # see gh-3503: quoting Windows line terminators
        # presents with encoding?
        text_rows = ["a,b,c", '1,"test \r\n",3']
        text = tm.convert_rows_list_to_csv_str(text_rows)
        df = pd.read_csv(StringIO(text))

        buf = StringIO()
        df.to_csv(buf, encoding="utf-8", index=False)
        assert buf.getvalue() == text

        # xref gh-7791: make sure the quoting parameter is passed through
        # with multi-indexes
        df = DataFrame({"a": [1, 2], "b": [3, 4], "c": [5, 6]})
        df = df.set_index(["a", "b"])

        expected_rows = ['"a","b","c"', '"1","3","5"', '"2","4","6"']
        expected = tm.convert_rows_list_to_csv_str(expected_rows)
        assert df.to_csv(quoting=csv.QUOTE_ALL) == expected
Esempio n. 25
0
    def test_to_csv_date_format(self):
        # GH 10209
        df_sec = DataFrame(
            {"A": pd.date_range("20130101", periods=5, freq="s")})
        df_day = DataFrame(
            {"A": pd.date_range("20130101", periods=5, freq="d")})

        expected_rows = [
            ",A",
            "0,2013-01-01 00:00:00",
            "1,2013-01-01 00:00:01",
            "2,2013-01-01 00:00:02",
            "3,2013-01-01 00:00:03",
            "4,2013-01-01 00:00:04",
        ]
        expected_default_sec = tm.convert_rows_list_to_csv_str(expected_rows)
        assert df_sec.to_csv() == expected_default_sec

        expected_rows = [
            ",A",
            "0,2013-01-01 00:00:00",
            "1,2013-01-02 00:00:00",
            "2,2013-01-03 00:00:00",
            "3,2013-01-04 00:00:00",
            "4,2013-01-05 00:00:00",
        ]
        expected_ymdhms_day = tm.convert_rows_list_to_csv_str(expected_rows)
        assert df_day.to_csv(
            date_format="%Y-%m-%d %H:%M:%S") == expected_ymdhms_day

        expected_rows = [
            ",A",
            "0,2013-01-01",
            "1,2013-01-01",
            "2,2013-01-01",
            "3,2013-01-01",
            "4,2013-01-01",
        ]
        expected_ymd_sec = tm.convert_rows_list_to_csv_str(expected_rows)
        assert df_sec.to_csv(date_format="%Y-%m-%d") == expected_ymd_sec

        expected_rows = [
            ",A",
            "0,2013-01-01",
            "1,2013-01-02",
            "2,2013-01-03",
            "3,2013-01-04",
            "4,2013-01-05",
        ]
        expected_default_day = tm.convert_rows_list_to_csv_str(expected_rows)
        assert df_day.to_csv() == expected_default_day
        assert df_day.to_csv(date_format="%Y-%m-%d") == expected_default_day

        # see gh-7791
        #
        # Testing if date_format parameter is taken into account
        # for multi-indexed DataFrames.
        df_sec["B"] = 0
        df_sec["C"] = 1

        expected_rows = ["A,B,C", "2013-01-01,0,1"]
        expected_ymd_sec = tm.convert_rows_list_to_csv_str(expected_rows)

        df_sec_grouped = df_sec.groupby([pd.Grouper(key="A", freq="1h"), "B"])
        assert df_sec_grouped.mean().to_csv(
            date_format="%Y-%m-%d") == expected_ymd_sec
Esempio n. 26
0
 def test_csv_to_string(self):
     df = DataFrame({"col": [1, 2]})
     expected_rows = [",col", "0,1", "1,2"]
     expected = tm.convert_rows_list_to_csv_str(expected_rows)
     assert df.to_csv() == expected