Example #1
0
    def test_to_csv_na_rep(self):
        # see gh-11553
        #
        # Testing if NaN values are correctly represented in the index.
        df = DataFrame({"a": [0, np.NaN], "b": [0, 1], "c": [2, 3]})
        expected_rows = ["a,b,c", "0.0,0,2", "_,1,3"]
        expected = tm.convert_rows_list_to_csv_str(expected_rows)

        assert df.set_index("a").to_csv(na_rep="_") == expected
        assert df.set_index(["a", "b"]).to_csv(na_rep="_") == expected

        # now with an index containing only NaNs
        df = DataFrame({"a": np.NaN, "b": [0, 1], "c": [2, 3]})
        expected_rows = ["a,b,c", "_,0,2", "_,1,3"]
        expected = tm.convert_rows_list_to_csv_str(expected_rows)

        assert df.set_index("a").to_csv(na_rep="_") == expected
        assert df.set_index(["a", "b"]).to_csv(na_rep="_") == expected

        # check if na_rep parameter does not break anything when no NaN
        df = DataFrame({"a": 0, "b": [0, 1], "c": [2, 3]})
        expected_rows = ["a,b,c", "0,0,2", "0,1,3"]
        expected = tm.convert_rows_list_to_csv_str(expected_rows)

        assert df.set_index("a").to_csv(na_rep="_") == expected
        assert df.set_index(["a", "b"]).to_csv(na_rep="_") == expected
Example #2
0
    def test_to_csv_na_rep(self):
        # see gh-11553
        #
        # Testing if NaN values are correctly represented in the index.
        df = DataFrame({'a': [0, np.NaN], 'b': [0, 1], 'c': [2, 3]})
        expected_rows = ['a,b,c',
                         '0.0,0,2',
                         '_,1,3']
        expected = tm.convert_rows_list_to_csv_str(expected_rows)

        assert df.set_index('a').to_csv(na_rep='_') == expected
        assert df.set_index(['a', 'b']).to_csv(na_rep='_') == expected

        # now with an index containing only NaNs
        df = DataFrame({'a': np.NaN, 'b': [0, 1], 'c': [2, 3]})
        expected_rows = ['a,b,c',
                         '_,0,2',
                         '_,1,3']
        expected = tm.convert_rows_list_to_csv_str(expected_rows)

        assert df.set_index('a').to_csv(na_rep='_') == expected
        assert df.set_index(['a', 'b']).to_csv(na_rep='_') == expected

        # check if na_rep parameter does not break anything when no NaN
        df = DataFrame({'a': 0, 'b': [0, 1], 'c': [2, 3]})
        expected_rows = ['a,b,c',
                         '0,0,2',
                         '0,1,3']
        expected = tm.convert_rows_list_to_csv_str(expected_rows)

        assert df.set_index('a').to_csv(na_rep='_') == expected
        assert df.set_index(['a', 'b']).to_csv(na_rep='_') == expected
Example #3
0
    def test_to_csv_decimal(self):
        # see gh-781
        df = DataFrame({"col1": [1], "col2": ["a"], "col3": [10.1]})

        expected_rows = [",col1,col2,col3", "0,1,a,10.1"]
        expected_default = tm.convert_rows_list_to_csv_str(expected_rows)
        assert df.to_csv() == expected_default

        expected_rows = [";col1;col2;col3", "0;1;a;10,1"]
        expected_european_excel = tm.convert_rows_list_to_csv_str(expected_rows)
        assert df.to_csv(decimal=",", sep=";") == expected_european_excel

        expected_rows = [",col1,col2,col3", "0,1,a,10.10"]
        expected_float_format_default = tm.convert_rows_list_to_csv_str(expected_rows)
        assert df.to_csv(float_format="%.2f") == expected_float_format_default

        expected_rows = [";col1;col2;col3", "0;1;a;10,10"]
        expected_float_format = tm.convert_rows_list_to_csv_str(expected_rows)
        assert (
            df.to_csv(decimal=",", sep=";", float_format="%.2f")
            == expected_float_format
        )

        # see gh-11553: testing if decimal is taken into account for '0.0'
        df = pd.DataFrame({"a": [0, 1.1], "b": [2.2, 3.3], "c": 1})

        expected_rows = ["a,b,c", "0^0,2^2,1", "1^1,3^3,1"]
        expected = tm.convert_rows_list_to_csv_str(expected_rows)
        assert df.to_csv(index=False, decimal="^") == expected

        # same but for an index
        assert df.set_index("a").to_csv(decimal="^") == expected

        # same for a multi-index
        assert df.set_index(["a", "b"]).to_csv(decimal="^") == expected
Example #4
0
    def test_to_csv_na_rep(self):
        # see gh-11553
        #
        # Testing if NaN values are correctly represented in the index.
        df = DataFrame({'a': [0, np.NaN], 'b': [0, 1], 'c': [2, 3]})
        expected_rows = ['a,b,c',
                         '0.0,0,2',
                         '_,1,3']
        expected = tm.convert_rows_list_to_csv_str(expected_rows)

        assert df.set_index('a').to_csv(na_rep='_') == expected
        assert df.set_index(['a', 'b']).to_csv(na_rep='_') == expected

        # now with an index containing only NaNs
        df = DataFrame({'a': np.NaN, 'b': [0, 1], 'c': [2, 3]})
        expected_rows = ['a,b,c',
                         '_,0,2',
                         '_,1,3']
        expected = tm.convert_rows_list_to_csv_str(expected_rows)

        assert df.set_index('a').to_csv(na_rep='_') == expected
        assert df.set_index(['a', 'b']).to_csv(na_rep='_') == expected

        # check if na_rep parameter does not break anything when no NaN
        df = DataFrame({'a': 0, 'b': [0, 1], 'c': [2, 3]})
        expected_rows = ['a,b,c',
                         '0,0,2',
                         '0,1,3']
        expected = tm.convert_rows_list_to_csv_str(expected_rows)

        assert df.set_index('a').to_csv(na_rep='_') == expected
        assert df.set_index(['a', 'b']).to_csv(na_rep='_') == expected
Example #5
0
    def test_period_index_date_overflow(self):
        # see gh-15982

        dates = ["1990-01-01", "2000-01-01", "3005-01-01"]
        index = pd.PeriodIndex(dates, freq="D")

        df = pd.DataFrame([4, 5, 6], index=index)
        result = df.to_csv()

        expected_rows = [",0", "1990-01-01,4", "2000-01-01,5", "3005-01-01,6"]
        expected = tm.convert_rows_list_to_csv_str(expected_rows)
        assert result == expected

        date_format = "%m-%d-%Y"
        result = df.to_csv(date_format=date_format)

        expected_rows = [",0", "01-01-1990,4", "01-01-2000,5", "01-01-3005,6"]
        expected = tm.convert_rows_list_to_csv_str(expected_rows)
        assert result == expected

        # Overflow with pd.NaT
        dates = ["1990-01-01", pd.NaT, "3005-01-01"]
        index = pd.PeriodIndex(dates, freq="D")

        df = pd.DataFrame([4, 5, 6], index=index)
        result = df.to_csv()

        expected_rows = [",0", "1990-01-01,4", ",5", "3005-01-01,6"]
        expected = tm.convert_rows_list_to_csv_str(expected_rows)
        assert result == expected
Example #6
0
    def test_to_csv_decimal(self):
        # see gh-781
        df = DataFrame({'col1': [1], 'col2': ['a'], 'col3': [10.1]})

        expected_rows = [',col1,col2,col3', '0,1,a,10.1']
        expected_default = tm.convert_rows_list_to_csv_str(expected_rows)
        assert df.to_csv() == expected_default

        expected_rows = [';col1;col2;col3', '0;1;a;10,1']
        expected_european_excel = tm.convert_rows_list_to_csv_str(
            expected_rows)
        assert df.to_csv(decimal=',', sep=';') == expected_european_excel

        expected_rows = [',col1,col2,col3', '0,1,a,10.10']
        expected_float_format_default = tm.convert_rows_list_to_csv_str(
            expected_rows)
        assert df.to_csv(float_format='%.2f') == expected_float_format_default

        expected_rows = [';col1;col2;col3', '0;1;a;10,10']
        expected_float_format = tm.convert_rows_list_to_csv_str(expected_rows)
        assert df.to_csv(decimal=',', sep=';',
                         float_format='%.2f') == expected_float_format

        # see gh-11553: testing if decimal is taken into account for '0.0'
        df = pd.DataFrame({'a': [0, 1.1], 'b': [2.2, 3.3], 'c': 1})

        expected_rows = ['a,b,c', '0^0,2^2,1', '1^1,3^3,1']
        expected = tm.convert_rows_list_to_csv_str(expected_rows)
        assert df.to_csv(index=False, decimal='^') == expected

        # same but for an index
        assert df.set_index('a').to_csv(decimal='^') == expected

        # same for a multi-index
        assert df.set_index(['a', 'b']).to_csv(decimal="^") == expected
Example #7
0
    def test_to_csv_date_format(self):
        # GH 10209
        df_sec = DataFrame({'A': pd.date_range('20130101', periods=5, freq='s')
                            })
        df_day = DataFrame({'A': pd.date_range('20130101', periods=5, freq='d')
                            })

        expected_rows = [',A',
                         '0,2013-01-01 00:00:00',
                         '1,2013-01-01 00:00:01',
                         '2,2013-01-01 00:00:02',
                         '3,2013-01-01 00:00:03',
                         '4,2013-01-01 00:00:04']
        expected_default_sec = tm.convert_rows_list_to_csv_str(expected_rows)
        assert df_sec.to_csv() == expected_default_sec

        expected_rows = [',A',
                         '0,2013-01-01 00:00:00',
                         '1,2013-01-02 00:00:00',
                         '2,2013-01-03 00:00:00',
                         '3,2013-01-04 00:00:00',
                         '4,2013-01-05 00:00:00']
        expected_ymdhms_day = tm.convert_rows_list_to_csv_str(expected_rows)
        assert (df_day.to_csv(date_format='%Y-%m-%d %H:%M:%S') ==
                expected_ymdhms_day)

        expected_rows = [',A',
                         '0,2013-01-01',
                         '1,2013-01-01',
                         '2,2013-01-01',
                         '3,2013-01-01',
                         '4,2013-01-01']
        expected_ymd_sec = tm.convert_rows_list_to_csv_str(expected_rows)
        assert df_sec.to_csv(date_format='%Y-%m-%d') == expected_ymd_sec

        expected_rows = [',A',
                         '0,2013-01-01',
                         '1,2013-01-02',
                         '2,2013-01-03',
                         '3,2013-01-04',
                         '4,2013-01-05']
        expected_default_day = tm.convert_rows_list_to_csv_str(expected_rows)
        assert df_day.to_csv() == expected_default_day
        assert df_day.to_csv(date_format='%Y-%m-%d') == expected_default_day

        # see gh-7791
        #
        # Testing if date_format parameter is taken into account
        # for multi-indexed DataFrames.
        df_sec['B'] = 0
        df_sec['C'] = 1

        expected_rows = ['A,B,C',
                         '2013-01-01,0,1']
        expected_ymd_sec = tm.convert_rows_list_to_csv_str(expected_rows)

        df_sec_grouped = df_sec.groupby([pd.Grouper(key='A', freq='1h'), 'B'])
        assert (df_sec_grouped.mean().to_csv(date_format='%Y-%m-%d') ==
                expected_ymd_sec)
Example #8
0
 def test_csv_to_string(self):
     df = DataFrame({'col': [1, 2]})
     expected_rows = [',col',
                      '0,1',
                      '1,2']
     expected = tm.convert_rows_list_to_csv_str(expected_rows)
     assert df.to_csv() == expected
Example #9
0
    def test_to_csv_index_no_leading_comma(self):
        df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}, index=["one", "two", "three"])

        buf = StringIO()
        df.to_csv(buf, index_label=False)

        expected_rows = ["A,B", "one,1,4", "two,2,5", "three,3,6"]
        expected = tm.convert_rows_list_to_csv_str(expected_rows)
        assert buf.getvalue() == expected
Example #10
0
    def test_convert_rows_list_to_csv_str(self):
        rows_list = ["aaa", "bbb", "ccc"]
        ret = tm.convert_rows_list_to_csv_str(rows_list)

        if compat.is_platform_windows():
            expected = "aaa\r\nbbb\r\nccc\r\n"
        else:
            expected = "aaa\nbbb\nccc\n"

        assert ret == expected
Example #11
0
    def test_to_csv_unicodewriter_quoting(self):
        df = DataFrame({"A": [1, 2, 3], "B": ["foo", "bar", "baz"]})

        buf = StringIO()
        df.to_csv(buf, index=False, quoting=csv.QUOTE_NONNUMERIC, encoding="utf-8")

        result = buf.getvalue()
        expected_rows = ['"A","B"', '1,"foo"', '2,"bar"', '3,"baz"']
        expected = tm.convert_rows_list_to_csv_str(expected_rows)
        assert result == expected
Example #12
0
    def test_convert_rows_list_to_csv_str(self):
        rows_list = ["aaa", "bbb", "ccc"]
        ret = tm.convert_rows_list_to_csv_str(rows_list)

        if compat.is_platform_windows():
            expected = "aaa\r\nbbb\r\nccc\r\n"
        else:
            expected = "aaa\nbbb\nccc\n"

        assert ret == expected
Example #13
0
    def test_to_csv_na_rep_long_string(self, df_new_type):
        # see gh-25099
        df = pd.DataFrame({"c": [float("nan")] * 3})
        df = df.astype(df_new_type)
        expected_rows = ["c", "mynull", "mynull", "mynull"]
        expected = tm.convert_rows_list_to_csv_str(expected_rows)

        result = df.to_csv(index=False, na_rep="mynull", encoding="ascii")

        assert expected == result
Example #14
0
    def test_to_csv_float_format(self):
        # testing if float_format is taken into account for the index
        # GH 11553
        df = pd.DataFrame({"a": [0, 1], "b": [2.2, 3.3], "c": 1})

        expected_rows = ["a,b,c", "0,2.20,1", "1,3.30,1"]
        expected = tm.convert_rows_list_to_csv_str(expected_rows)
        assert df.set_index("a").to_csv(float_format="%.2f") == expected

        # same for a multi-index
        assert df.set_index(["a", "b"]).to_csv(float_format="%.2f") == expected
Example #15
0
    def test_to_csv_quote_none(self):
        # GH4328
        df = DataFrame({"A": ["hello", '{"hello"}']})
        for encoding in (None, "utf-8"):
            buf = StringIO()
            df.to_csv(buf, quoting=csv.QUOTE_NONE, encoding=encoding, index=False)

            result = buf.getvalue()
            expected_rows = ["A", "hello", '{"hello"}']
            expected = tm.convert_rows_list_to_csv_str(expected_rows)
            assert result == expected
Example #16
0
    def test_gz_lineend(self):
        # GH 25311
        df = pd.DataFrame({'a': [1, 2]})
        expected_rows = ['a', '1', '2']
        expected = tm.convert_rows_list_to_csv_str(expected_rows)
        with ensure_clean('__test_gz_lineend.csv.gz') as path:
            df.to_csv(path, index=False)
            with tm.decompress_file(path, compression='gzip') as f:
                result = f.read().decode('utf-8')

        assert result == expected
Example #17
0
    def test_gz_lineend(self):
        # GH 25311
        df = pd.DataFrame({'a': [1, 2]})
        expected_rows = ['a', '1', '2']
        expected = tm.convert_rows_list_to_csv_str(expected_rows)
        with ensure_clean('__test_gz_lineend.csv.gz') as path:
            df.to_csv(path, index=False)
            with tm.decompress_file(path, compression='gzip') as f:
                result = f.read().decode('utf-8')

        assert result == expected
Example #18
0
    def test_gz_lineend(self):
        # GH 25311
        df = pd.DataFrame({"a": [1, 2]})
        expected_rows = ["a", "1", "2"]
        expected = tm.convert_rows_list_to_csv_str(expected_rows)
        with ensure_clean("__test_gz_lineend.csv.gz") as path:
            df.to_csv(path, index=False)
            with tm.decompress_file(path, compression="gzip") as f:
                result = f.read().decode("utf-8")

        assert result == expected
Example #19
0
    def test_to_csv_float_format(self):
        # testing if float_format is taken into account for the index
        # GH 11553
        df = pd.DataFrame({'a': [0, 1], 'b': [2.2, 3.3], 'c': 1})

        expected_rows = ['a,b,c', '0,2.20,1', '1,3.30,1']
        expected = tm.convert_rows_list_to_csv_str(expected_rows)
        assert df.set_index('a').to_csv(float_format='%.2f') == expected

        # same for a multi-index
        assert df.set_index(['a', 'b']).to_csv(float_format='%.2f') == expected
Example #20
0
    def test_to_csv_decimal(self):
        # see gh-781
        df = DataFrame({'col1': [1], 'col2': ['a'], 'col3': [10.1]})

        expected_rows = [',col1,col2,col3',
                         '0,1,a,10.1']
        expected_default = tm.convert_rows_list_to_csv_str(expected_rows)
        assert df.to_csv() == expected_default

        expected_rows = [';col1;col2;col3',
                         '0;1;a;10,1']
        expected_european_excel = tm.convert_rows_list_to_csv_str(
            expected_rows)
        assert df.to_csv(decimal=',', sep=';') == expected_european_excel

        expected_rows = [',col1,col2,col3',
                         '0,1,a,10.10']
        expected_float_format_default = tm.convert_rows_list_to_csv_str(
            expected_rows)
        assert df.to_csv(float_format='%.2f') == expected_float_format_default

        expected_rows = [';col1;col2;col3',
                         '0;1;a;10,10']
        expected_float_format = tm.convert_rows_list_to_csv_str(expected_rows)
        assert df.to_csv(decimal=',', sep=';',
                         float_format='%.2f') == expected_float_format

        # see gh-11553: testing if decimal is taken into account for '0.0'
        df = pd.DataFrame({'a': [0, 1.1], 'b': [2.2, 3.3], 'c': 1})

        expected_rows = ['a,b,c',
                         '0^0,2^2,1',
                         '1^1,3^3,1']
        expected = tm.convert_rows_list_to_csv_str(expected_rows)
        assert df.to_csv(index=False, decimal='^') == expected

        # same but for an index
        assert df.set_index('a').to_csv(decimal='^') == expected

        # same for a multi-index
        assert df.set_index(['a', 'b']).to_csv(decimal="^") == expected
Example #21
0
    def test_multi_index_header(self):
        # see gh-5539
        columns = pd.MultiIndex.from_tuples([("a", 1), ("a", 2), ("b", 1), ("b", 2)])
        df = pd.DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]])
        df.columns = columns

        header = ["a", "b", "c", "d"]
        result = df.to_csv(header=header)

        expected_rows = [",a,b,c,d", "0,1,2,3,4", "1,5,6,7,8"]
        expected = tm.convert_rows_list_to_csv_str(expected_rows)
        assert result == expected
Example #22
0
    def test_to_csv_stdout_file(self, capsys):
        # GH 21561
        df = pd.DataFrame([['foo', 'bar'], ['baz', 'qux']],
                          columns=['name_1', 'name_2'])
        expected_rows = [',name_1,name_2', '0,foo,bar', '1,baz,qux']
        expected_ascii = tm.convert_rows_list_to_csv_str(expected_rows)

        df.to_csv(sys.stdout, encoding='ascii')
        captured = capsys.readouterr()

        assert captured.out == expected_ascii
        assert not sys.stdout.closed
Example #23
0
    def test_to_csv_stdout_file(self, capsys):
        # GH 21561
        df = pd.DataFrame([["foo", "bar"], ["baz", "qux"]],
                          columns=["name_1", "name_2"])
        expected_rows = [",name_1,name_2", "0,foo,bar", "1,baz,qux"]
        expected_ascii = tm.convert_rows_list_to_csv_str(expected_rows)

        df.to_csv(sys.stdout, encoding="ascii")
        captured = capsys.readouterr()

        assert captured.out == expected_ascii
        assert not sys.stdout.closed
Example #24
0
    def test_to_csv_multi_index(self):
        # see gh-6618
        df = DataFrame([1], columns=pd.MultiIndex.from_arrays([[1], [2]]))

        exp_rows = [",1", ",2", "0,1"]
        exp = tm.convert_rows_list_to_csv_str(exp_rows)
        assert df.to_csv() == exp

        exp_rows = ["1", "2", "1"]
        exp = tm.convert_rows_list_to_csv_str(exp_rows)
        assert df.to_csv(index=False) == exp

        df = DataFrame(
            [1],
            columns=pd.MultiIndex.from_arrays([[1], [2]]),
            index=pd.MultiIndex.from_arrays([[1], [2]]),
        )

        exp_rows = [",,1", ",,2", "1,2,1"]
        exp = tm.convert_rows_list_to_csv_str(exp_rows)
        assert df.to_csv() == exp

        exp_rows = ["1", "2", "1"]
        exp = tm.convert_rows_list_to_csv_str(exp_rows)
        assert df.to_csv(index=False) == exp

        df = DataFrame([1], columns=pd.MultiIndex.from_arrays([["foo"], ["bar"]]))

        exp_rows = [",foo", ",bar", "0,1"]
        exp = tm.convert_rows_list_to_csv_str(exp_rows)
        assert df.to_csv() == exp

        exp_rows = ["foo", "bar", "1"]
        exp = tm.convert_rows_list_to_csv_str(exp_rows)
        assert df.to_csv(index=False) == exp
Example #25
0
    def test_to_csv_multi_index(self):
        # see gh-6618
        df = DataFrame([1], columns=pd.MultiIndex.from_arrays([[1], [2]]))

        exp_rows = [',1',
                    ',2',
                    '0,1']
        exp = tm.convert_rows_list_to_csv_str(exp_rows)
        assert df.to_csv() == exp

        exp_rows = ['1', '2', '1']
        exp = tm.convert_rows_list_to_csv_str(exp_rows)
        assert df.to_csv(index=False) == exp

        df = DataFrame([1], columns=pd.MultiIndex.from_arrays([[1], [2]]),
                       index=pd.MultiIndex.from_arrays([[1], [2]]))

        exp_rows = [',,1', ',,2', '1,2,1']
        exp = tm.convert_rows_list_to_csv_str(exp_rows)
        assert df.to_csv() == exp

        exp_rows = ['1', '2', '1']
        exp = tm.convert_rows_list_to_csv_str(exp_rows)
        assert df.to_csv(index=False) == exp

        df = DataFrame(
            [1], columns=pd.MultiIndex.from_arrays([['foo'], ['bar']]))

        exp_rows = [',foo', ',bar', '0,1']
        exp = tm.convert_rows_list_to_csv_str(exp_rows)
        assert df.to_csv() == exp

        exp_rows = ['foo', 'bar', '1']
        exp = tm.convert_rows_list_to_csv_str(exp_rows)
        assert df.to_csv(index=False) == exp
Example #26
0
    def test_to_csv_index_no_leading_comma(self):
        df = DataFrame({
            'A': [1, 2, 3],
            'B': [4, 5, 6]
        },
                       index=['one', 'two', 'three'])

        buf = StringIO()
        df.to_csv(buf, index_label=False)

        expected_rows = ['A,B', 'one,1,4', 'two,2,5', 'three,3,6']
        expected = tm.convert_rows_list_to_csv_str(expected_rows)
        assert buf.getvalue() == expected
Example #27
0
    def test_to_csv_index_no_leading_comma(self):
        df = DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]},
                       index=['one', 'two', 'three'])

        buf = StringIO()
        df.to_csv(buf, index_label=False)

        expected_rows = ['A,B',
                         'one,1,4',
                         'two,2,5',
                         'three,3,6']
        expected = tm.convert_rows_list_to_csv_str(expected_rows)
        assert buf.getvalue() == expected
Example #28
0
    def test_period_index_date_overflow(self):
        # see gh-15982

        dates = ["1990-01-01", "2000-01-01", "3005-01-01"]
        index = pd.PeriodIndex(dates, freq="D")

        df = pd.DataFrame([4, 5, 6], index=index)
        result = df.to_csv()

        expected_rows = [',0',
                         '1990-01-01,4',
                         '2000-01-01,5',
                         '3005-01-01,6']
        expected = tm.convert_rows_list_to_csv_str(expected_rows)
        assert result == expected

        date_format = "%m-%d-%Y"
        result = df.to_csv(date_format=date_format)

        expected_rows = [',0',
                         '01-01-1990,4',
                         '01-01-2000,5',
                         '01-01-3005,6']
        expected = tm.convert_rows_list_to_csv_str(expected_rows)
        assert result == expected

        # Overflow with pd.NaT
        dates = ["1990-01-01", pd.NaT, "3005-01-01"]
        index = pd.PeriodIndex(dates, freq="D")

        df = pd.DataFrame([4, 5, 6], index=index)
        result = df.to_csv()

        expected_rows = [',0',
                         '1990-01-01,4',
                         ',5',
                         '3005-01-01,6']
        expected = tm.convert_rows_list_to_csv_str(expected_rows)
        assert result == expected
Example #29
0
 def test_to_csv_timedelta_precision(self):
     # GH 6783
     s = pd.Series([1, 1]).astype("timedelta64[ns]")
     buf = io.StringIO()
     s.to_csv(buf)
     result = buf.getvalue()
     expected_rows = [
         ",0",
         "0,0 days 00:00:00.000000001",
         "1,0 days 00:00:00.000000001",
     ]
     expected = tm.convert_rows_list_to_csv_str(expected_rows)
     assert result == expected
Example #30
0
    def test_to_csv_write_to_open_file_with_newline_py3(self):
        # see gh-21696
        # see gh-20353
        df = pd.DataFrame({"a": ["x", "y", "z"]})
        expected_rows = ["x", "y", "z"]
        expected = "manual header\n" + tm.convert_rows_list_to_csv_str(expected_rows)
        with tm.ensure_clean("test.txt") as path:
            with open(path, "w", newline="") as f:
                f.write("manual header\n")
                df.to_csv(f, header=None, index=None)

            with open(path, "rb") as f:
                assert f.read() == bytes(expected, "utf-8")
Example #31
0
    def test_to_csv_stdout_file(self, capsys):
        # GH 21561
        df = pd.DataFrame([['foo', 'bar'], ['baz', 'qux']],
                          columns=['name_1', 'name_2'])
        expected_rows = [',name_1,name_2',
                         '0,foo,bar',
                         '1,baz,qux']
        expected_ascii = tm.convert_rows_list_to_csv_str(expected_rows)

        df.to_csv(sys.stdout, encoding='ascii')
        captured = capsys.readouterr()

        assert captured.out == expected_ascii
        assert not sys.stdout.closed
Example #32
0
    def test_to_csv_unicodewriter_quoting(self):
        df = DataFrame({'A': [1, 2, 3], 'B': ['foo', 'bar', 'baz']})

        buf = StringIO()
        df.to_csv(buf, index=False, quoting=csv.QUOTE_NONNUMERIC,
                  encoding='utf-8')

        result = buf.getvalue()
        expected_rows = ['"A","B"',
                         '1,"foo"',
                         '2,"bar"',
                         '3,"baz"']
        expected = tm.convert_rows_list_to_csv_str(expected_rows)
        assert result == expected
Example #33
0
    def test_to_csv_quote_none(self):
        # GH4328
        df = DataFrame({'A': ['hello', '{"hello"}']})
        for encoding in (None, 'utf-8'):
            buf = StringIO()
            df.to_csv(buf, quoting=csv.QUOTE_NONE,
                      encoding=encoding, index=False)

            result = buf.getvalue()
            expected_rows = ['A',
                             'hello',
                             '{"hello"}']
            expected = tm.convert_rows_list_to_csv_str(expected_rows)
            assert result == expected
Example #34
0
    def test_to_csv_float_format(self):
        # testing if float_format is taken into account for the index
        # GH 11553
        df = pd.DataFrame({'a': [0, 1], 'b': [2.2, 3.3], 'c': 1})

        expected_rows = ['a,b,c',
                         '0,2.20,1',
                         '1,3.30,1']
        expected = tm.convert_rows_list_to_csv_str(expected_rows)
        assert df.set_index('a').to_csv(float_format='%.2f') == expected

        # same for a multi-index
        assert df.set_index(['a', 'b']).to_csv(
            float_format='%.2f') == expected
Example #35
0
    def test_to_csv_write_to_open_file_with_newline_py3(self):
        # see gh-21696
        # see gh-20353
        df = pd.DataFrame({'a': ['x', 'y', 'z']})
        expected_rows = ["x", "y", "z"]
        expected = ("manual header\n" +
                    tm.convert_rows_list_to_csv_str(expected_rows))
        with tm.ensure_clean('test.txt') as path:
            with open(path, 'w', newline='') as f:
                f.write('manual header\n')
                df.to_csv(f, header=None, index=None)

            with open(path, 'rb') as f:
                assert f.read() == bytes(expected, 'utf-8')
Example #36
0
    def test_multi_index_header(self):
        # see gh-5539
        columns = pd.MultiIndex.from_tuples([("a", 1), ("a", 2),
                                             ("b", 1), ("b", 2)])
        df = pd.DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]])
        df.columns = columns

        header = ["a", "b", "c", "d"]
        result = df.to_csv(header=header)

        expected_rows = [',a,b,c,d',
                         '0,1,2,3,4',
                         '1,5,6,7,8']
        expected = tm.convert_rows_list_to_csv_str(expected_rows)
        assert result == expected
Example #37
0
    def test_to_csv_write_to_open_file_with_newline_py3(self):
        # see gh-21696
        # see gh-20353
        df = pd.DataFrame({'a': ['x', 'y', 'z']})
        expected_rows = ["x",
                         "y",
                         "z"]
        expected = ("manual header\n" +
                    tm.convert_rows_list_to_csv_str(expected_rows))
        with tm.ensure_clean('test.txt') as path:
            with open(path, 'w', newline='') as f:
                f.write('manual header\n')
                df.to_csv(f, header=None, index=None)

            with open(path, 'rb') as f:
                assert f.read() == bytes(expected, 'utf-8')
Example #38
0
 def test_csv_to_string(self):
     df = DataFrame({"col": [1, 2]})
     expected_rows = [",col", "0,1", "1,2"]
     expected = tm.convert_rows_list_to_csv_str(expected_rows)
     assert df.to_csv() == expected
Example #39
0
    def test_to_csv_date_format(self):
        # GH 10209
        df_sec = DataFrame(
            {"A": pd.date_range("20130101", periods=5, freq="s")})
        df_day = DataFrame(
            {"A": pd.date_range("20130101", periods=5, freq="d")})

        expected_rows = [
            ",A",
            "0,2013-01-01 00:00:00",
            "1,2013-01-01 00:00:01",
            "2,2013-01-01 00:00:02",
            "3,2013-01-01 00:00:03",
            "4,2013-01-01 00:00:04",
        ]
        expected_default_sec = tm.convert_rows_list_to_csv_str(expected_rows)
        assert df_sec.to_csv() == expected_default_sec

        expected_rows = [
            ",A",
            "0,2013-01-01 00:00:00",
            "1,2013-01-02 00:00:00",
            "2,2013-01-03 00:00:00",
            "3,2013-01-04 00:00:00",
            "4,2013-01-05 00:00:00",
        ]
        expected_ymdhms_day = tm.convert_rows_list_to_csv_str(expected_rows)
        assert df_day.to_csv(
            date_format="%Y-%m-%d %H:%M:%S") == expected_ymdhms_day

        expected_rows = [
            ",A",
            "0,2013-01-01",
            "1,2013-01-01",
            "2,2013-01-01",
            "3,2013-01-01",
            "4,2013-01-01",
        ]
        expected_ymd_sec = tm.convert_rows_list_to_csv_str(expected_rows)
        assert df_sec.to_csv(date_format="%Y-%m-%d") == expected_ymd_sec

        expected_rows = [
            ",A",
            "0,2013-01-01",
            "1,2013-01-02",
            "2,2013-01-03",
            "3,2013-01-04",
            "4,2013-01-05",
        ]
        expected_default_day = tm.convert_rows_list_to_csv_str(expected_rows)
        assert df_day.to_csv() == expected_default_day
        assert df_day.to_csv(date_format="%Y-%m-%d") == expected_default_day

        # see gh-7791
        #
        # Testing if date_format parameter is taken into account
        # for multi-indexed DataFrames.
        df_sec["B"] = 0
        df_sec["C"] = 1

        expected_rows = ["A,B,C", "2013-01-01,0,1"]
        expected_ymd_sec = tm.convert_rows_list_to_csv_str(expected_rows)

        df_sec_grouped = df_sec.groupby([pd.Grouper(key="A", freq="1h"), "B"])
        assert df_sec_grouped.mean().to_csv(
            date_format="%Y-%m-%d") == expected_ymd_sec
Example #40
0
    def test_to_csv_quoting(self):
        df = DataFrame({
            'c_bool': [True, False],
            'c_float': [1.0, 3.2],
            'c_int': [42, np.nan],
            'c_string': ['a', 'b,c'],
        })

        expected_rows = [',c_bool,c_float,c_int,c_string',
                         '0,True,1.0,42.0,a',
                         '1,False,3.2,,"b,c"']
        expected = tm.convert_rows_list_to_csv_str(expected_rows)

        result = df.to_csv()
        assert result == expected

        result = df.to_csv(quoting=None)
        assert result == expected

        expected_rows = [',c_bool,c_float,c_int,c_string',
                         '0,True,1.0,42.0,a',
                         '1,False,3.2,,"b,c"']
        expected = tm.convert_rows_list_to_csv_str(expected_rows)

        result = df.to_csv(quoting=csv.QUOTE_MINIMAL)
        assert result == expected

        expected_rows = ['"","c_bool","c_float","c_int","c_string"',
                         '"0","True","1.0","42.0","a"',
                         '"1","False","3.2","","b,c"']
        expected = tm.convert_rows_list_to_csv_str(expected_rows)

        result = df.to_csv(quoting=csv.QUOTE_ALL)
        assert result == expected

        # see gh-12922, gh-13259: make sure changes to
        # the formatters do not break this behaviour
        expected_rows = ['"","c_bool","c_float","c_int","c_string"',
                         '0,True,1.0,42.0,"a"',
                         '1,False,3.2,"","b,c"']
        expected = tm.convert_rows_list_to_csv_str(expected_rows)
        result = df.to_csv(quoting=csv.QUOTE_NONNUMERIC)
        assert result == expected

        msg = "need to escape, but no escapechar set"
        with pytest.raises(csv.Error, match=msg):
            df.to_csv(quoting=csv.QUOTE_NONE)

        with pytest.raises(csv.Error, match=msg):
            df.to_csv(quoting=csv.QUOTE_NONE, escapechar=None)

        expected_rows = [',c_bool,c_float,c_int,c_string',
                         '0,True,1.0,42.0,a',
                         '1,False,3.2,,b!,c']
        expected = tm.convert_rows_list_to_csv_str(expected_rows)
        result = df.to_csv(quoting=csv.QUOTE_NONE,
                           escapechar='!')
        assert result == expected

        expected_rows = [',c_bool,c_ffloat,c_int,c_string',
                         '0,True,1.0,42.0,a',
                         '1,False,3.2,,bf,c']
        expected = tm.convert_rows_list_to_csv_str(expected_rows)
        result = df.to_csv(quoting=csv.QUOTE_NONE,
                           escapechar='f')
        assert result == expected

        # see gh-3503: quoting Windows line terminators
        # presents with encoding?
        text_rows = ['a,b,c',
                     '1,"test \r\n",3']
        text = tm.convert_rows_list_to_csv_str(text_rows)
        df = pd.read_csv(StringIO(text))

        buf = StringIO()
        df.to_csv(buf, encoding='utf-8', index=False)
        assert buf.getvalue() == text

        # xref gh-7791: make sure the quoting parameter is passed through
        # with multi-indexes
        df = pd.DataFrame({'a': [1, 2], 'b': [3, 4], 'c': [5, 6]})
        df = df.set_index(['a', 'b'])

        expected_rows = ['"a","b","c"',
                         '"1","3","5"',
                         '"2","4","6"']
        expected = tm.convert_rows_list_to_csv_str(expected_rows)
        assert df.to_csv(quoting=csv.QUOTE_ALL) == expected
Example #41
0
    def test_to_csv_quoting(self):
        df = DataFrame(
            {
                "c_bool": [True, False],
                "c_float": [1.0, 3.2],
                "c_int": [42, np.nan],
                "c_string": ["a", "b,c"],
            }
        )

        expected_rows = [
            ",c_bool,c_float,c_int,c_string",
            "0,True,1.0,42.0,a",
            '1,False,3.2,,"b,c"',
        ]
        expected = tm.convert_rows_list_to_csv_str(expected_rows)

        result = df.to_csv()
        assert result == expected

        result = df.to_csv(quoting=None)
        assert result == expected

        expected_rows = [
            ",c_bool,c_float,c_int,c_string",
            "0,True,1.0,42.0,a",
            '1,False,3.2,,"b,c"',
        ]
        expected = tm.convert_rows_list_to_csv_str(expected_rows)

        result = df.to_csv(quoting=csv.QUOTE_MINIMAL)
        assert result == expected

        expected_rows = [
            '"","c_bool","c_float","c_int","c_string"',
            '"0","True","1.0","42.0","a"',
            '"1","False","3.2","","b,c"',
        ]
        expected = tm.convert_rows_list_to_csv_str(expected_rows)

        result = df.to_csv(quoting=csv.QUOTE_ALL)
        assert result == expected

        # see gh-12922, gh-13259: make sure changes to
        # the formatters do not break this behaviour
        expected_rows = [
            '"","c_bool","c_float","c_int","c_string"',
            '0,True,1.0,42.0,"a"',
            '1,False,3.2,"","b,c"',
        ]
        expected = tm.convert_rows_list_to_csv_str(expected_rows)
        result = df.to_csv(quoting=csv.QUOTE_NONNUMERIC)
        assert result == expected

        msg = "need to escape, but no escapechar set"
        with pytest.raises(csv.Error, match=msg):
            df.to_csv(quoting=csv.QUOTE_NONE)

        with pytest.raises(csv.Error, match=msg):
            df.to_csv(quoting=csv.QUOTE_NONE, escapechar=None)

        expected_rows = [
            ",c_bool,c_float,c_int,c_string",
            "0,True,1.0,42.0,a",
            "1,False,3.2,,b!,c",
        ]
        expected = tm.convert_rows_list_to_csv_str(expected_rows)
        result = df.to_csv(quoting=csv.QUOTE_NONE, escapechar="!")
        assert result == expected

        expected_rows = [
            ",c_bool,c_ffloat,c_int,c_string",
            "0,True,1.0,42.0,a",
            "1,False,3.2,,bf,c",
        ]
        expected = tm.convert_rows_list_to_csv_str(expected_rows)
        result = df.to_csv(quoting=csv.QUOTE_NONE, escapechar="f")
        assert result == expected

        # see gh-3503: quoting Windows line terminators
        # presents with encoding?
        text_rows = ["a,b,c", '1,"test \r\n",3']
        text = tm.convert_rows_list_to_csv_str(text_rows)
        df = pd.read_csv(StringIO(text))

        buf = StringIO()
        df.to_csv(buf, encoding="utf-8", index=False)
        assert buf.getvalue() == text

        # xref gh-7791: make sure the quoting parameter is passed through
        # with multi-indexes
        df = pd.DataFrame({"a": [1, 2], "b": [3, 4], "c": [5, 6]})
        df = df.set_index(["a", "b"])

        expected_rows = ['"a","b","c"', '"1","3","5"', '"2","4","6"']
        expected = tm.convert_rows_list_to_csv_str(expected_rows)
        assert df.to_csv(quoting=csv.QUOTE_ALL) == expected