def test_to_csv_na_rep(self): # see gh-11553 # # Testing if NaN values are correctly represented in the index. df = DataFrame({"a": [0, np.NaN], "b": [0, 1], "c": [2, 3]}) expected_rows = ["a,b,c", "0.0,0,2", "_,1,3"] expected = tm.convert_rows_list_to_csv_str(expected_rows) assert df.set_index("a").to_csv(na_rep="_") == expected assert df.set_index(["a", "b"]).to_csv(na_rep="_") == expected # now with an index containing only NaNs df = DataFrame({"a": np.NaN, "b": [0, 1], "c": [2, 3]}) expected_rows = ["a,b,c", "_,0,2", "_,1,3"] expected = tm.convert_rows_list_to_csv_str(expected_rows) assert df.set_index("a").to_csv(na_rep="_") == expected assert df.set_index(["a", "b"]).to_csv(na_rep="_") == expected # check if na_rep parameter does not break anything when no NaN df = DataFrame({"a": 0, "b": [0, 1], "c": [2, 3]}) expected_rows = ["a,b,c", "0,0,2", "0,1,3"] expected = tm.convert_rows_list_to_csv_str(expected_rows) assert df.set_index("a").to_csv(na_rep="_") == expected assert df.set_index(["a", "b"]).to_csv(na_rep="_") == expected
def test_to_csv_na_rep(self): # see gh-11553 # # Testing if NaN values are correctly represented in the index. df = DataFrame({'a': [0, np.NaN], 'b': [0, 1], 'c': [2, 3]}) expected_rows = ['a,b,c', '0.0,0,2', '_,1,3'] expected = tm.convert_rows_list_to_csv_str(expected_rows) assert df.set_index('a').to_csv(na_rep='_') == expected assert df.set_index(['a', 'b']).to_csv(na_rep='_') == expected # now with an index containing only NaNs df = DataFrame({'a': np.NaN, 'b': [0, 1], 'c': [2, 3]}) expected_rows = ['a,b,c', '_,0,2', '_,1,3'] expected = tm.convert_rows_list_to_csv_str(expected_rows) assert df.set_index('a').to_csv(na_rep='_') == expected assert df.set_index(['a', 'b']).to_csv(na_rep='_') == expected # check if na_rep parameter does not break anything when no NaN df = DataFrame({'a': 0, 'b': [0, 1], 'c': [2, 3]}) expected_rows = ['a,b,c', '0,0,2', '0,1,3'] expected = tm.convert_rows_list_to_csv_str(expected_rows) assert df.set_index('a').to_csv(na_rep='_') == expected assert df.set_index(['a', 'b']).to_csv(na_rep='_') == expected
def test_to_csv_decimal(self): # see gh-781 df = DataFrame({"col1": [1], "col2": ["a"], "col3": [10.1]}) expected_rows = [",col1,col2,col3", "0,1,a,10.1"] expected_default = tm.convert_rows_list_to_csv_str(expected_rows) assert df.to_csv() == expected_default expected_rows = [";col1;col2;col3", "0;1;a;10,1"] expected_european_excel = tm.convert_rows_list_to_csv_str(expected_rows) assert df.to_csv(decimal=",", sep=";") == expected_european_excel expected_rows = [",col1,col2,col3", "0,1,a,10.10"] expected_float_format_default = tm.convert_rows_list_to_csv_str(expected_rows) assert df.to_csv(float_format="%.2f") == expected_float_format_default expected_rows = [";col1;col2;col3", "0;1;a;10,10"] expected_float_format = tm.convert_rows_list_to_csv_str(expected_rows) assert ( df.to_csv(decimal=",", sep=";", float_format="%.2f") == expected_float_format ) # see gh-11553: testing if decimal is taken into account for '0.0' df = pd.DataFrame({"a": [0, 1.1], "b": [2.2, 3.3], "c": 1}) expected_rows = ["a,b,c", "0^0,2^2,1", "1^1,3^3,1"] expected = tm.convert_rows_list_to_csv_str(expected_rows) assert df.to_csv(index=False, decimal="^") == expected # same but for an index assert df.set_index("a").to_csv(decimal="^") == expected # same for a multi-index assert df.set_index(["a", "b"]).to_csv(decimal="^") == expected
def test_to_csv_na_rep(self): # see gh-11553 # # Testing if NaN values are correctly represented in the index. df = DataFrame({'a': [0, np.NaN], 'b': [0, 1], 'c': [2, 3]}) expected_rows = ['a,b,c', '0.0,0,2', '_,1,3'] expected = tm.convert_rows_list_to_csv_str(expected_rows) assert df.set_index('a').to_csv(na_rep='_') == expected assert df.set_index(['a', 'b']).to_csv(na_rep='_') == expected # now with an index containing only NaNs df = DataFrame({'a': np.NaN, 'b': [0, 1], 'c': [2, 3]}) expected_rows = ['a,b,c', '_,0,2', '_,1,3'] expected = tm.convert_rows_list_to_csv_str(expected_rows) assert df.set_index('a').to_csv(na_rep='_') == expected assert df.set_index(['a', 'b']).to_csv(na_rep='_') == expected # check if na_rep parameter does not break anything when no NaN df = DataFrame({'a': 0, 'b': [0, 1], 'c': [2, 3]}) expected_rows = ['a,b,c', '0,0,2', '0,1,3'] expected = tm.convert_rows_list_to_csv_str(expected_rows) assert df.set_index('a').to_csv(na_rep='_') == expected assert df.set_index(['a', 'b']).to_csv(na_rep='_') == expected
def test_period_index_date_overflow(self): # see gh-15982 dates = ["1990-01-01", "2000-01-01", "3005-01-01"] index = pd.PeriodIndex(dates, freq="D") df = pd.DataFrame([4, 5, 6], index=index) result = df.to_csv() expected_rows = [",0", "1990-01-01,4", "2000-01-01,5", "3005-01-01,6"] expected = tm.convert_rows_list_to_csv_str(expected_rows) assert result == expected date_format = "%m-%d-%Y" result = df.to_csv(date_format=date_format) expected_rows = [",0", "01-01-1990,4", "01-01-2000,5", "01-01-3005,6"] expected = tm.convert_rows_list_to_csv_str(expected_rows) assert result == expected # Overflow with pd.NaT dates = ["1990-01-01", pd.NaT, "3005-01-01"] index = pd.PeriodIndex(dates, freq="D") df = pd.DataFrame([4, 5, 6], index=index) result = df.to_csv() expected_rows = [",0", "1990-01-01,4", ",5", "3005-01-01,6"] expected = tm.convert_rows_list_to_csv_str(expected_rows) assert result == expected
def test_to_csv_decimal(self): # see gh-781 df = DataFrame({'col1': [1], 'col2': ['a'], 'col3': [10.1]}) expected_rows = [',col1,col2,col3', '0,1,a,10.1'] expected_default = tm.convert_rows_list_to_csv_str(expected_rows) assert df.to_csv() == expected_default expected_rows = [';col1;col2;col3', '0;1;a;10,1'] expected_european_excel = tm.convert_rows_list_to_csv_str( expected_rows) assert df.to_csv(decimal=',', sep=';') == expected_european_excel expected_rows = [',col1,col2,col3', '0,1,a,10.10'] expected_float_format_default = tm.convert_rows_list_to_csv_str( expected_rows) assert df.to_csv(float_format='%.2f') == expected_float_format_default expected_rows = [';col1;col2;col3', '0;1;a;10,10'] expected_float_format = tm.convert_rows_list_to_csv_str(expected_rows) assert df.to_csv(decimal=',', sep=';', float_format='%.2f') == expected_float_format # see gh-11553: testing if decimal is taken into account for '0.0' df = pd.DataFrame({'a': [0, 1.1], 'b': [2.2, 3.3], 'c': 1}) expected_rows = ['a,b,c', '0^0,2^2,1', '1^1,3^3,1'] expected = tm.convert_rows_list_to_csv_str(expected_rows) assert df.to_csv(index=False, decimal='^') == expected # same but for an index assert df.set_index('a').to_csv(decimal='^') == expected # same for a multi-index assert df.set_index(['a', 'b']).to_csv(decimal="^") == expected
def test_to_csv_date_format(self): # GH 10209 df_sec = DataFrame({'A': pd.date_range('20130101', periods=5, freq='s') }) df_day = DataFrame({'A': pd.date_range('20130101', periods=5, freq='d') }) expected_rows = [',A', '0,2013-01-01 00:00:00', '1,2013-01-01 00:00:01', '2,2013-01-01 00:00:02', '3,2013-01-01 00:00:03', '4,2013-01-01 00:00:04'] expected_default_sec = tm.convert_rows_list_to_csv_str(expected_rows) assert df_sec.to_csv() == expected_default_sec expected_rows = [',A', '0,2013-01-01 00:00:00', '1,2013-01-02 00:00:00', '2,2013-01-03 00:00:00', '3,2013-01-04 00:00:00', '4,2013-01-05 00:00:00'] expected_ymdhms_day = tm.convert_rows_list_to_csv_str(expected_rows) assert (df_day.to_csv(date_format='%Y-%m-%d %H:%M:%S') == expected_ymdhms_day) expected_rows = [',A', '0,2013-01-01', '1,2013-01-01', '2,2013-01-01', '3,2013-01-01', '4,2013-01-01'] expected_ymd_sec = tm.convert_rows_list_to_csv_str(expected_rows) assert df_sec.to_csv(date_format='%Y-%m-%d') == expected_ymd_sec expected_rows = [',A', '0,2013-01-01', '1,2013-01-02', '2,2013-01-03', '3,2013-01-04', '4,2013-01-05'] expected_default_day = tm.convert_rows_list_to_csv_str(expected_rows) assert df_day.to_csv() == expected_default_day assert df_day.to_csv(date_format='%Y-%m-%d') == expected_default_day # see gh-7791 # # Testing if date_format parameter is taken into account # for multi-indexed DataFrames. df_sec['B'] = 0 df_sec['C'] = 1 expected_rows = ['A,B,C', '2013-01-01,0,1'] expected_ymd_sec = tm.convert_rows_list_to_csv_str(expected_rows) df_sec_grouped = df_sec.groupby([pd.Grouper(key='A', freq='1h'), 'B']) assert (df_sec_grouped.mean().to_csv(date_format='%Y-%m-%d') == expected_ymd_sec)
def test_csv_to_string(self): df = DataFrame({'col': [1, 2]}) expected_rows = [',col', '0,1', '1,2'] expected = tm.convert_rows_list_to_csv_str(expected_rows) assert df.to_csv() == expected
def test_to_csv_index_no_leading_comma(self): df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}, index=["one", "two", "three"]) buf = StringIO() df.to_csv(buf, index_label=False) expected_rows = ["A,B", "one,1,4", "two,2,5", "three,3,6"] expected = tm.convert_rows_list_to_csv_str(expected_rows) assert buf.getvalue() == expected
def test_convert_rows_list_to_csv_str(self): rows_list = ["aaa", "bbb", "ccc"] ret = tm.convert_rows_list_to_csv_str(rows_list) if compat.is_platform_windows(): expected = "aaa\r\nbbb\r\nccc\r\n" else: expected = "aaa\nbbb\nccc\n" assert ret == expected
def test_to_csv_unicodewriter_quoting(self): df = DataFrame({"A": [1, 2, 3], "B": ["foo", "bar", "baz"]}) buf = StringIO() df.to_csv(buf, index=False, quoting=csv.QUOTE_NONNUMERIC, encoding="utf-8") result = buf.getvalue() expected_rows = ['"A","B"', '1,"foo"', '2,"bar"', '3,"baz"'] expected = tm.convert_rows_list_to_csv_str(expected_rows) assert result == expected
def test_convert_rows_list_to_csv_str(self): rows_list = ["aaa", "bbb", "ccc"] ret = tm.convert_rows_list_to_csv_str(rows_list) if compat.is_platform_windows(): expected = "aaa\r\nbbb\r\nccc\r\n" else: expected = "aaa\nbbb\nccc\n" assert ret == expected
def test_to_csv_na_rep_long_string(self, df_new_type): # see gh-25099 df = pd.DataFrame({"c": [float("nan")] * 3}) df = df.astype(df_new_type) expected_rows = ["c", "mynull", "mynull", "mynull"] expected = tm.convert_rows_list_to_csv_str(expected_rows) result = df.to_csv(index=False, na_rep="mynull", encoding="ascii") assert expected == result
def test_to_csv_float_format(self): # testing if float_format is taken into account for the index # GH 11553 df = pd.DataFrame({"a": [0, 1], "b": [2.2, 3.3], "c": 1}) expected_rows = ["a,b,c", "0,2.20,1", "1,3.30,1"] expected = tm.convert_rows_list_to_csv_str(expected_rows) assert df.set_index("a").to_csv(float_format="%.2f") == expected # same for a multi-index assert df.set_index(["a", "b"]).to_csv(float_format="%.2f") == expected
def test_to_csv_quote_none(self): # GH4328 df = DataFrame({"A": ["hello", '{"hello"}']}) for encoding in (None, "utf-8"): buf = StringIO() df.to_csv(buf, quoting=csv.QUOTE_NONE, encoding=encoding, index=False) result = buf.getvalue() expected_rows = ["A", "hello", '{"hello"}'] expected = tm.convert_rows_list_to_csv_str(expected_rows) assert result == expected
def test_gz_lineend(self): # GH 25311 df = pd.DataFrame({'a': [1, 2]}) expected_rows = ['a', '1', '2'] expected = tm.convert_rows_list_to_csv_str(expected_rows) with ensure_clean('__test_gz_lineend.csv.gz') as path: df.to_csv(path, index=False) with tm.decompress_file(path, compression='gzip') as f: result = f.read().decode('utf-8') assert result == expected
def test_gz_lineend(self): # GH 25311 df = pd.DataFrame({'a': [1, 2]}) expected_rows = ['a', '1', '2'] expected = tm.convert_rows_list_to_csv_str(expected_rows) with ensure_clean('__test_gz_lineend.csv.gz') as path: df.to_csv(path, index=False) with tm.decompress_file(path, compression='gzip') as f: result = f.read().decode('utf-8') assert result == expected
def test_gz_lineend(self): # GH 25311 df = pd.DataFrame({"a": [1, 2]}) expected_rows = ["a", "1", "2"] expected = tm.convert_rows_list_to_csv_str(expected_rows) with ensure_clean("__test_gz_lineend.csv.gz") as path: df.to_csv(path, index=False) with tm.decompress_file(path, compression="gzip") as f: result = f.read().decode("utf-8") assert result == expected
def test_to_csv_float_format(self): # testing if float_format is taken into account for the index # GH 11553 df = pd.DataFrame({'a': [0, 1], 'b': [2.2, 3.3], 'c': 1}) expected_rows = ['a,b,c', '0,2.20,1', '1,3.30,1'] expected = tm.convert_rows_list_to_csv_str(expected_rows) assert df.set_index('a').to_csv(float_format='%.2f') == expected # same for a multi-index assert df.set_index(['a', 'b']).to_csv(float_format='%.2f') == expected
def test_to_csv_decimal(self): # see gh-781 df = DataFrame({'col1': [1], 'col2': ['a'], 'col3': [10.1]}) expected_rows = [',col1,col2,col3', '0,1,a,10.1'] expected_default = tm.convert_rows_list_to_csv_str(expected_rows) assert df.to_csv() == expected_default expected_rows = [';col1;col2;col3', '0;1;a;10,1'] expected_european_excel = tm.convert_rows_list_to_csv_str( expected_rows) assert df.to_csv(decimal=',', sep=';') == expected_european_excel expected_rows = [',col1,col2,col3', '0,1,a,10.10'] expected_float_format_default = tm.convert_rows_list_to_csv_str( expected_rows) assert df.to_csv(float_format='%.2f') == expected_float_format_default expected_rows = [';col1;col2;col3', '0;1;a;10,10'] expected_float_format = tm.convert_rows_list_to_csv_str(expected_rows) assert df.to_csv(decimal=',', sep=';', float_format='%.2f') == expected_float_format # see gh-11553: testing if decimal is taken into account for '0.0' df = pd.DataFrame({'a': [0, 1.1], 'b': [2.2, 3.3], 'c': 1}) expected_rows = ['a,b,c', '0^0,2^2,1', '1^1,3^3,1'] expected = tm.convert_rows_list_to_csv_str(expected_rows) assert df.to_csv(index=False, decimal='^') == expected # same but for an index assert df.set_index('a').to_csv(decimal='^') == expected # same for a multi-index assert df.set_index(['a', 'b']).to_csv(decimal="^") == expected
def test_multi_index_header(self): # see gh-5539 columns = pd.MultiIndex.from_tuples([("a", 1), ("a", 2), ("b", 1), ("b", 2)]) df = pd.DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]]) df.columns = columns header = ["a", "b", "c", "d"] result = df.to_csv(header=header) expected_rows = [",a,b,c,d", "0,1,2,3,4", "1,5,6,7,8"] expected = tm.convert_rows_list_to_csv_str(expected_rows) assert result == expected
def test_to_csv_stdout_file(self, capsys): # GH 21561 df = pd.DataFrame([['foo', 'bar'], ['baz', 'qux']], columns=['name_1', 'name_2']) expected_rows = [',name_1,name_2', '0,foo,bar', '1,baz,qux'] expected_ascii = tm.convert_rows_list_to_csv_str(expected_rows) df.to_csv(sys.stdout, encoding='ascii') captured = capsys.readouterr() assert captured.out == expected_ascii assert not sys.stdout.closed
def test_to_csv_stdout_file(self, capsys): # GH 21561 df = pd.DataFrame([["foo", "bar"], ["baz", "qux"]], columns=["name_1", "name_2"]) expected_rows = [",name_1,name_2", "0,foo,bar", "1,baz,qux"] expected_ascii = tm.convert_rows_list_to_csv_str(expected_rows) df.to_csv(sys.stdout, encoding="ascii") captured = capsys.readouterr() assert captured.out == expected_ascii assert not sys.stdout.closed
def test_to_csv_multi_index(self): # see gh-6618 df = DataFrame([1], columns=pd.MultiIndex.from_arrays([[1], [2]])) exp_rows = [",1", ",2", "0,1"] exp = tm.convert_rows_list_to_csv_str(exp_rows) assert df.to_csv() == exp exp_rows = ["1", "2", "1"] exp = tm.convert_rows_list_to_csv_str(exp_rows) assert df.to_csv(index=False) == exp df = DataFrame( [1], columns=pd.MultiIndex.from_arrays([[1], [2]]), index=pd.MultiIndex.from_arrays([[1], [2]]), ) exp_rows = [",,1", ",,2", "1,2,1"] exp = tm.convert_rows_list_to_csv_str(exp_rows) assert df.to_csv() == exp exp_rows = ["1", "2", "1"] exp = tm.convert_rows_list_to_csv_str(exp_rows) assert df.to_csv(index=False) == exp df = DataFrame([1], columns=pd.MultiIndex.from_arrays([["foo"], ["bar"]])) exp_rows = [",foo", ",bar", "0,1"] exp = tm.convert_rows_list_to_csv_str(exp_rows) assert df.to_csv() == exp exp_rows = ["foo", "bar", "1"] exp = tm.convert_rows_list_to_csv_str(exp_rows) assert df.to_csv(index=False) == exp
def test_to_csv_multi_index(self): # see gh-6618 df = DataFrame([1], columns=pd.MultiIndex.from_arrays([[1], [2]])) exp_rows = [',1', ',2', '0,1'] exp = tm.convert_rows_list_to_csv_str(exp_rows) assert df.to_csv() == exp exp_rows = ['1', '2', '1'] exp = tm.convert_rows_list_to_csv_str(exp_rows) assert df.to_csv(index=False) == exp df = DataFrame([1], columns=pd.MultiIndex.from_arrays([[1], [2]]), index=pd.MultiIndex.from_arrays([[1], [2]])) exp_rows = [',,1', ',,2', '1,2,1'] exp = tm.convert_rows_list_to_csv_str(exp_rows) assert df.to_csv() == exp exp_rows = ['1', '2', '1'] exp = tm.convert_rows_list_to_csv_str(exp_rows) assert df.to_csv(index=False) == exp df = DataFrame( [1], columns=pd.MultiIndex.from_arrays([['foo'], ['bar']])) exp_rows = [',foo', ',bar', '0,1'] exp = tm.convert_rows_list_to_csv_str(exp_rows) assert df.to_csv() == exp exp_rows = ['foo', 'bar', '1'] exp = tm.convert_rows_list_to_csv_str(exp_rows) assert df.to_csv(index=False) == exp
def test_to_csv_index_no_leading_comma(self): df = DataFrame({ 'A': [1, 2, 3], 'B': [4, 5, 6] }, index=['one', 'two', 'three']) buf = StringIO() df.to_csv(buf, index_label=False) expected_rows = ['A,B', 'one,1,4', 'two,2,5', 'three,3,6'] expected = tm.convert_rows_list_to_csv_str(expected_rows) assert buf.getvalue() == expected
def test_to_csv_index_no_leading_comma(self): df = DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}, index=['one', 'two', 'three']) buf = StringIO() df.to_csv(buf, index_label=False) expected_rows = ['A,B', 'one,1,4', 'two,2,5', 'three,3,6'] expected = tm.convert_rows_list_to_csv_str(expected_rows) assert buf.getvalue() == expected
def test_period_index_date_overflow(self): # see gh-15982 dates = ["1990-01-01", "2000-01-01", "3005-01-01"] index = pd.PeriodIndex(dates, freq="D") df = pd.DataFrame([4, 5, 6], index=index) result = df.to_csv() expected_rows = [',0', '1990-01-01,4', '2000-01-01,5', '3005-01-01,6'] expected = tm.convert_rows_list_to_csv_str(expected_rows) assert result == expected date_format = "%m-%d-%Y" result = df.to_csv(date_format=date_format) expected_rows = [',0', '01-01-1990,4', '01-01-2000,5', '01-01-3005,6'] expected = tm.convert_rows_list_to_csv_str(expected_rows) assert result == expected # Overflow with pd.NaT dates = ["1990-01-01", pd.NaT, "3005-01-01"] index = pd.PeriodIndex(dates, freq="D") df = pd.DataFrame([4, 5, 6], index=index) result = df.to_csv() expected_rows = [',0', '1990-01-01,4', ',5', '3005-01-01,6'] expected = tm.convert_rows_list_to_csv_str(expected_rows) assert result == expected
def test_to_csv_timedelta_precision(self): # GH 6783 s = pd.Series([1, 1]).astype("timedelta64[ns]") buf = io.StringIO() s.to_csv(buf) result = buf.getvalue() expected_rows = [ ",0", "0,0 days 00:00:00.000000001", "1,0 days 00:00:00.000000001", ] expected = tm.convert_rows_list_to_csv_str(expected_rows) assert result == expected
def test_to_csv_write_to_open_file_with_newline_py3(self): # see gh-21696 # see gh-20353 df = pd.DataFrame({"a": ["x", "y", "z"]}) expected_rows = ["x", "y", "z"] expected = "manual header\n" + tm.convert_rows_list_to_csv_str(expected_rows) with tm.ensure_clean("test.txt") as path: with open(path, "w", newline="") as f: f.write("manual header\n") df.to_csv(f, header=None, index=None) with open(path, "rb") as f: assert f.read() == bytes(expected, "utf-8")
def test_to_csv_stdout_file(self, capsys): # GH 21561 df = pd.DataFrame([['foo', 'bar'], ['baz', 'qux']], columns=['name_1', 'name_2']) expected_rows = [',name_1,name_2', '0,foo,bar', '1,baz,qux'] expected_ascii = tm.convert_rows_list_to_csv_str(expected_rows) df.to_csv(sys.stdout, encoding='ascii') captured = capsys.readouterr() assert captured.out == expected_ascii assert not sys.stdout.closed
def test_to_csv_unicodewriter_quoting(self): df = DataFrame({'A': [1, 2, 3], 'B': ['foo', 'bar', 'baz']}) buf = StringIO() df.to_csv(buf, index=False, quoting=csv.QUOTE_NONNUMERIC, encoding='utf-8') result = buf.getvalue() expected_rows = ['"A","B"', '1,"foo"', '2,"bar"', '3,"baz"'] expected = tm.convert_rows_list_to_csv_str(expected_rows) assert result == expected
def test_to_csv_quote_none(self): # GH4328 df = DataFrame({'A': ['hello', '{"hello"}']}) for encoding in (None, 'utf-8'): buf = StringIO() df.to_csv(buf, quoting=csv.QUOTE_NONE, encoding=encoding, index=False) result = buf.getvalue() expected_rows = ['A', 'hello', '{"hello"}'] expected = tm.convert_rows_list_to_csv_str(expected_rows) assert result == expected
def test_to_csv_float_format(self): # testing if float_format is taken into account for the index # GH 11553 df = pd.DataFrame({'a': [0, 1], 'b': [2.2, 3.3], 'c': 1}) expected_rows = ['a,b,c', '0,2.20,1', '1,3.30,1'] expected = tm.convert_rows_list_to_csv_str(expected_rows) assert df.set_index('a').to_csv(float_format='%.2f') == expected # same for a multi-index assert df.set_index(['a', 'b']).to_csv( float_format='%.2f') == expected
def test_to_csv_write_to_open_file_with_newline_py3(self): # see gh-21696 # see gh-20353 df = pd.DataFrame({'a': ['x', 'y', 'z']}) expected_rows = ["x", "y", "z"] expected = ("manual header\n" + tm.convert_rows_list_to_csv_str(expected_rows)) with tm.ensure_clean('test.txt') as path: with open(path, 'w', newline='') as f: f.write('manual header\n') df.to_csv(f, header=None, index=None) with open(path, 'rb') as f: assert f.read() == bytes(expected, 'utf-8')
def test_multi_index_header(self): # see gh-5539 columns = pd.MultiIndex.from_tuples([("a", 1), ("a", 2), ("b", 1), ("b", 2)]) df = pd.DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]]) df.columns = columns header = ["a", "b", "c", "d"] result = df.to_csv(header=header) expected_rows = [',a,b,c,d', '0,1,2,3,4', '1,5,6,7,8'] expected = tm.convert_rows_list_to_csv_str(expected_rows) assert result == expected
def test_to_csv_write_to_open_file_with_newline_py3(self): # see gh-21696 # see gh-20353 df = pd.DataFrame({'a': ['x', 'y', 'z']}) expected_rows = ["x", "y", "z"] expected = ("manual header\n" + tm.convert_rows_list_to_csv_str(expected_rows)) with tm.ensure_clean('test.txt') as path: with open(path, 'w', newline='') as f: f.write('manual header\n') df.to_csv(f, header=None, index=None) with open(path, 'rb') as f: assert f.read() == bytes(expected, 'utf-8')
def test_csv_to_string(self): df = DataFrame({"col": [1, 2]}) expected_rows = [",col", "0,1", "1,2"] expected = tm.convert_rows_list_to_csv_str(expected_rows) assert df.to_csv() == expected
def test_to_csv_date_format(self): # GH 10209 df_sec = DataFrame( {"A": pd.date_range("20130101", periods=5, freq="s")}) df_day = DataFrame( {"A": pd.date_range("20130101", periods=5, freq="d")}) expected_rows = [ ",A", "0,2013-01-01 00:00:00", "1,2013-01-01 00:00:01", "2,2013-01-01 00:00:02", "3,2013-01-01 00:00:03", "4,2013-01-01 00:00:04", ] expected_default_sec = tm.convert_rows_list_to_csv_str(expected_rows) assert df_sec.to_csv() == expected_default_sec expected_rows = [ ",A", "0,2013-01-01 00:00:00", "1,2013-01-02 00:00:00", "2,2013-01-03 00:00:00", "3,2013-01-04 00:00:00", "4,2013-01-05 00:00:00", ] expected_ymdhms_day = tm.convert_rows_list_to_csv_str(expected_rows) assert df_day.to_csv( date_format="%Y-%m-%d %H:%M:%S") == expected_ymdhms_day expected_rows = [ ",A", "0,2013-01-01", "1,2013-01-01", "2,2013-01-01", "3,2013-01-01", "4,2013-01-01", ] expected_ymd_sec = tm.convert_rows_list_to_csv_str(expected_rows) assert df_sec.to_csv(date_format="%Y-%m-%d") == expected_ymd_sec expected_rows = [ ",A", "0,2013-01-01", "1,2013-01-02", "2,2013-01-03", "3,2013-01-04", "4,2013-01-05", ] expected_default_day = tm.convert_rows_list_to_csv_str(expected_rows) assert df_day.to_csv() == expected_default_day assert df_day.to_csv(date_format="%Y-%m-%d") == expected_default_day # see gh-7791 # # Testing if date_format parameter is taken into account # for multi-indexed DataFrames. df_sec["B"] = 0 df_sec["C"] = 1 expected_rows = ["A,B,C", "2013-01-01,0,1"] expected_ymd_sec = tm.convert_rows_list_to_csv_str(expected_rows) df_sec_grouped = df_sec.groupby([pd.Grouper(key="A", freq="1h"), "B"]) assert df_sec_grouped.mean().to_csv( date_format="%Y-%m-%d") == expected_ymd_sec
def test_to_csv_quoting(self): df = DataFrame({ 'c_bool': [True, False], 'c_float': [1.0, 3.2], 'c_int': [42, np.nan], 'c_string': ['a', 'b,c'], }) expected_rows = [',c_bool,c_float,c_int,c_string', '0,True,1.0,42.0,a', '1,False,3.2,,"b,c"'] expected = tm.convert_rows_list_to_csv_str(expected_rows) result = df.to_csv() assert result == expected result = df.to_csv(quoting=None) assert result == expected expected_rows = [',c_bool,c_float,c_int,c_string', '0,True,1.0,42.0,a', '1,False,3.2,,"b,c"'] expected = tm.convert_rows_list_to_csv_str(expected_rows) result = df.to_csv(quoting=csv.QUOTE_MINIMAL) assert result == expected expected_rows = ['"","c_bool","c_float","c_int","c_string"', '"0","True","1.0","42.0","a"', '"1","False","3.2","","b,c"'] expected = tm.convert_rows_list_to_csv_str(expected_rows) result = df.to_csv(quoting=csv.QUOTE_ALL) assert result == expected # see gh-12922, gh-13259: make sure changes to # the formatters do not break this behaviour expected_rows = ['"","c_bool","c_float","c_int","c_string"', '0,True,1.0,42.0,"a"', '1,False,3.2,"","b,c"'] expected = tm.convert_rows_list_to_csv_str(expected_rows) result = df.to_csv(quoting=csv.QUOTE_NONNUMERIC) assert result == expected msg = "need to escape, but no escapechar set" with pytest.raises(csv.Error, match=msg): df.to_csv(quoting=csv.QUOTE_NONE) with pytest.raises(csv.Error, match=msg): df.to_csv(quoting=csv.QUOTE_NONE, escapechar=None) expected_rows = [',c_bool,c_float,c_int,c_string', '0,True,1.0,42.0,a', '1,False,3.2,,b!,c'] expected = tm.convert_rows_list_to_csv_str(expected_rows) result = df.to_csv(quoting=csv.QUOTE_NONE, escapechar='!') assert result == expected expected_rows = [',c_bool,c_ffloat,c_int,c_string', '0,True,1.0,42.0,a', '1,False,3.2,,bf,c'] expected = tm.convert_rows_list_to_csv_str(expected_rows) result = df.to_csv(quoting=csv.QUOTE_NONE, escapechar='f') assert result == expected # see gh-3503: quoting Windows line terminators # presents with encoding? text_rows = ['a,b,c', '1,"test \r\n",3'] text = tm.convert_rows_list_to_csv_str(text_rows) df = pd.read_csv(StringIO(text)) buf = StringIO() df.to_csv(buf, encoding='utf-8', index=False) assert buf.getvalue() == text # xref gh-7791: make sure the quoting parameter is passed through # with multi-indexes df = pd.DataFrame({'a': [1, 2], 'b': [3, 4], 'c': [5, 6]}) df = df.set_index(['a', 'b']) expected_rows = ['"a","b","c"', '"1","3","5"', '"2","4","6"'] expected = tm.convert_rows_list_to_csv_str(expected_rows) assert df.to_csv(quoting=csv.QUOTE_ALL) == expected
def test_to_csv_quoting(self): df = DataFrame( { "c_bool": [True, False], "c_float": [1.0, 3.2], "c_int": [42, np.nan], "c_string": ["a", "b,c"], } ) expected_rows = [ ",c_bool,c_float,c_int,c_string", "0,True,1.0,42.0,a", '1,False,3.2,,"b,c"', ] expected = tm.convert_rows_list_to_csv_str(expected_rows) result = df.to_csv() assert result == expected result = df.to_csv(quoting=None) assert result == expected expected_rows = [ ",c_bool,c_float,c_int,c_string", "0,True,1.0,42.0,a", '1,False,3.2,,"b,c"', ] expected = tm.convert_rows_list_to_csv_str(expected_rows) result = df.to_csv(quoting=csv.QUOTE_MINIMAL) assert result == expected expected_rows = [ '"","c_bool","c_float","c_int","c_string"', '"0","True","1.0","42.0","a"', '"1","False","3.2","","b,c"', ] expected = tm.convert_rows_list_to_csv_str(expected_rows) result = df.to_csv(quoting=csv.QUOTE_ALL) assert result == expected # see gh-12922, gh-13259: make sure changes to # the formatters do not break this behaviour expected_rows = [ '"","c_bool","c_float","c_int","c_string"', '0,True,1.0,42.0,"a"', '1,False,3.2,"","b,c"', ] expected = tm.convert_rows_list_to_csv_str(expected_rows) result = df.to_csv(quoting=csv.QUOTE_NONNUMERIC) assert result == expected msg = "need to escape, but no escapechar set" with pytest.raises(csv.Error, match=msg): df.to_csv(quoting=csv.QUOTE_NONE) with pytest.raises(csv.Error, match=msg): df.to_csv(quoting=csv.QUOTE_NONE, escapechar=None) expected_rows = [ ",c_bool,c_float,c_int,c_string", "0,True,1.0,42.0,a", "1,False,3.2,,b!,c", ] expected = tm.convert_rows_list_to_csv_str(expected_rows) result = df.to_csv(quoting=csv.QUOTE_NONE, escapechar="!") assert result == expected expected_rows = [ ",c_bool,c_ffloat,c_int,c_string", "0,True,1.0,42.0,a", "1,False,3.2,,bf,c", ] expected = tm.convert_rows_list_to_csv_str(expected_rows) result = df.to_csv(quoting=csv.QUOTE_NONE, escapechar="f") assert result == expected # see gh-3503: quoting Windows line terminators # presents with encoding? text_rows = ["a,b,c", '1,"test \r\n",3'] text = tm.convert_rows_list_to_csv_str(text_rows) df = pd.read_csv(StringIO(text)) buf = StringIO() df.to_csv(buf, encoding="utf-8", index=False) assert buf.getvalue() == text # xref gh-7791: make sure the quoting parameter is passed through # with multi-indexes df = pd.DataFrame({"a": [1, 2], "b": [3, 4], "c": [5, 6]}) df = df.set_index(["a", "b"]) expected_rows = ['"a","b","c"', '"1","3","5"', '"2","4","6"'] expected = tm.convert_rows_list_to_csv_str(expected_rows) assert df.to_csv(quoting=csv.QUOTE_ALL) == expected