def test_read_csv_filepath_dtype(self): result = rcf.read_csv(self.testdata, dtype={0: str, 1: int, 2: float}) self.assertEqual( [], result, "Non-empty list returned by correct call to read_csv!") result = rcf.read_csv(self.testdata, dtype={ "A": str, "B": int, "C": float }) self.assertEqual( [], result, "Non-empty list returned by correct call to read_csv!") result = rcf.read_csv("data/data_noHeader.csv", names=["Alpha", "Beta", "Gamma"], dtype={ "Alpha": str, "Beta": int, "Gamma": float }) self.assertEqual( [], result, "Non-empty list returned by correct call to read_csv!") result = rcf.read_csv(self.testdata, dtype={0: str, 4: int}) self.assertEqual(["read_csv.filepath-dtype"], result, "Wrong result when dtype contains too high index!") result = rcf.read_csv(self.testdata, dtype={"B": str, "Q": int}) self.assertEqual(["read_csv.filepath-dtype"], result, "Wrong result when dtype contains non-existing key!")
def test_read_csv_filepath_skiprows_skipfooter(self): result = rcf.read_csv(self.testdata, skiprows=2, skipfooter=2, engine="python") self.assertEqual( [], result, "Non-empty list returned by correct call to read_csv!") result = rcf.read_csv(self.testdata, skiprows=3, skipfooter=4, engine="python") self.assertEqual( ["read_csv.filepath-skiprows-skipfooter"], result, "Wrong result returned when skiprows + skipfooter > number of rows!" )
def test_read_csv_filepath_names(self): result = rcf.read_csv(self.testdata, names=["D", "E", "F"]) self.assertEqual( [], result, "Non-empty list returned by correct call to read_csv!") result = rcf.read_csv("data/data_tabbed.csv", sep="\t", names=["D", "E", "F"]) self.assertEqual( [], result, "Non-empty list returned by correct call to read_csv!") result = rcf.read_csv("data/data_plus.csv", skiprows=[0], names=['Alpha', 'Beta', 'Gamma']) self.assertEqual( [], result, "Non-empty list returned by correct call to read_csv!") result = rcf.read_csv(self.testdata, names=["D", "E"]) self.assertEqual( [], result, "Non-empty list returned by correct call to read_csv!") result = rcf.read_csv(self.testdata, names=["D"]) self.assertEqual(["read_csv.filepath-names"], result, "Wrong result when names has too few elements!") result = rcf.read_csv(self.testdata, names=["D", "E", "F", "G"]) self.assertEqual(["read_csv.filepath-names"], result, "Wrong result when names has too many elements!") result = rcf.read_csv(self.testdata, names="string") self.assertEqual( ["read_csv.filepath-names"], result, "Wrong result when names is a string longer than number of columns!" )
def test_read_csv_filepath_parse_dates(self): result = rcf.read_csv("data/date_data.csv", index_col=0, parse_dates=True) self.assertEqual( [], result, "Non-empty list returned by correct call to read_csv!") result = rcf.read_csv("data/date_data.csv", parse_dates=[0]) self.assertEqual( [], result, "Non-empty list returned by correct call to read_csv!") result = rcf.read_csv("data/date_data.csv", parse_dates=["A"]) self.assertEqual( [], result, "Non-empty list returned by correct call to read_csv!") result = rcf.read_csv("data/date_data.csv", parse_dates=[[2, 3, 4]]) self.assertEqual( [], result, "Non-empty list returned by correct call to read_csv!") result = rcf.read_csv("data/date_data.csv", parse_dates=[[2, 3, 4]], index_col=1) self.assertEqual( [], result, "Non-empty list returned by correct call to read_csv!") result = rcf.read_csv("data/date_data.csv", parse_dates={"P": [2, 3, 4]}) self.assertEqual( [], result, "Non-empty list returned by correct call to read_csv!") result = rcf.read_csv("data/date_data.csv", parse_dates=True) self.assertEqual( ["read_csv.filepath-parse_dates"], result, "Wrong result returned with parse_dates=True for non-date index!") result = rcf.read_csv("data/date_data.csv", parse_dates=[1]) self.assertEqual( ["read_csv.filepath-parse_dates"], result, "Wrong result returned when parse_dates points at non-date column!" ) result = rcf.read_csv("data/date_data.csv", parse_dates=[[0, 3, 4]]) self.assertEqual( ["read_csv.filepath-parse_dates"], result, "Wrong result returned when parse_dates points at columns that cannot be combined as a date!" ) result = rcf.read_csv("data/date_data.csv", parse_dates={"P": [0, 3, 4]}) self.assertEqual( ["read_csv.filepath-parse_dates"], result, "Wrong result returned when parse_dates points at columns that cannot be combined as a date!" )
def test_read_csv_na(self): result = rcf.read_csv(self.testdata, na_values=["NaN", "Nan"], keep_default_na=True) self.assertEqual( [], result, "Non-empty list returned by correct call to read_csv!") result = rcf.read_csv(self.testdata, na_filter=False, na_values=["NaN", "Nan"]) self.assertEqual( ["read_csv.na_filter-na_values"], result, "Wrong result when calling read_csv with na_value specified, but na_filter=False!" ) result = rcf.read_csv(self.testdata, na_filter=False, keep_default_na=True) self.assertEqual( ["read_csv.na_filter-keep_default_na"], result, "Wrong result when calling read_csv with keep_default_na specified, but na_filter=False!" )
def test_read_csv_quotechar_doublequote(self): result = rcf.read_csv(self.testdata, doublequote=True) self.assertEqual( [], result, "Non-empty list returned by correct call to read_csv!") result = rcf.read_csv(self.testdata, doublequote=False) self.assertEqual( [], result, "Non-empty list returned by correct call to read_csv!") result = rcf.read_csv(self.testdata, quoting=csv.QUOTE_NONE, doublequote=True) self.assertEqual( ["read_csv.quoting-doublequote"], result, "Wrong result when doublequote specified despite quoting being QOUTE_NONE!" ) result = rcf.read_csv(self.testdata, quoting=csv.QUOTE_NONE, doublequote=False) self.assertEqual( ["read_csv.quoting-doublequote"], result, "Wrong result when doublequote specified despite quoting being QOUTE_NONE!" )
def test_read_csv_bad_lines(self): result = rcf.read_csv(self.testdata) self.assertEqual( [], result, "Non-empty list returned by correct call to read_csv!") result = rcf.read_csv(self.testdata, warn_bad_lines=False, error_bad_lines=False) self.assertEqual( [], result, "Non-empty list returned by correct call to read_csv!") result = rcf.read_csv(self.testdata, error_bad_lines=True) self.assertEqual( [], result, "Non-empty list returned by correct call to read_csv!") result = rcf.read_csv(self.testdata, warn_bad_lines=False, error_bad_lines=True) self.assertEqual( [], result, "Non-empty list returned by correct call to read_csv!") result = rcf.read_csv(self.testdata, warn_bad_lines=True) self.assertEqual( ["read_csv.error_bad_lines-warn_bad_lines"], result, "Wrong result when read_csv is called with warn_bad_lines=True without setting error_bad_lines to False!" ) result = rcf.read_csv(self.testdata, warn_bad_lines=True, error_bad_lines=True) self.assertEqual( ["read_csv.error_bad_lines-warn_bad_lines"], result, "Wrong result when read_csv is called with both error_bad_lines and warn_bad_lines explicitly set to True!" )
def test_read_csv_header(self): result = rcf.read_csv(self.testdata, header=1) self.assertEqual( [], result, "Non-empty list returned by correct call to read_csv!") result = rcf.read_csv(self.testdata, prefix="X", header=None) self.assertEqual( [], result, "Non-empty list returned by correct call to read_csv!") result = rcf.read_csv(self.testdata, names=["A", "B", "C"]) self.assertEqual( [], result, "Non-empty list returned by correct call to read_csv!") result = rcf.read_csv(self.testdata, names=["A", "B", "C"], header=None, prefix="X") self.assertEqual( ["read_csv.names-prefix", "read_csv.names-header"], result, "Wrong result when calling read_csv with both names and prefix, and None-header!" ) result = rcf.read_csv(self.testdata, names=["A", "B", "C"], header=1) self.assertEqual( ["read_csv.names-header"], result, "Wrong result when calling read_csv with both names and header!") result = rcf.read_csv(self.testdata, prefix="X") self.assertEqual( ["read_csv.header-prefix"], result, "Wrong result when calling read_csv with both prefix without setting header=None!" ) result = rcf.read_csv(self.testdata, header=0, prefix="X") self.assertEqual( ["read_csv.header-prefix"], result, "Wrong result when calling read_csv with both non-None header and prefix!" ) result = rcf.read_csv(self.testdata, names=["A", "B", "C"], header=0, prefix="X") expectedResult = [ "read_csv.names-prefix", "read_csv.names-header", "read_csv.header-prefix" ] self.assertEqual( expectedResult, result, "Wrong result when read_csv is called with both names, prefix and non-None header!" )
def test_read_csv_parse_dates(self): def f(d): """ Create date from string on format YYYY-mm-dd """ date_parts = d.split("-") return datetime.date(int(date_parts[0]), int(date_parts[1]), int(date_parts[2])) result = rcf.read_csv("data/date_data.csv", parse_dates=[0], infer_datetime_format=True, keep_date_col=False, date_parser=f, dayfirst=True, cache_dates=True) self.assertEqual( [], result, "Non-empty list returned by correct call to read_csv!") result = rcf.read_csv(self.testdata, infer_datetime_format=True) self.assertEqual( ["read_csv.parse_dates-infer_datetime_format"], result, "Wrong result returned when infer_datetime_format is specified but not parse_dates!" ) result = rcf.read_csv(self.testdata, parse_dates=False, infer_datetime_format=True) self.assertEqual( ["read_csv.parse_dates-infer_datetime_format"], result, "Wrong result returned when infer_datetime_format is specified but parse_dates is False!" ) result = rcf.read_csv(self.testdata, keep_date_col=True) self.assertEqual( ["read_csv.parse_dates-keep_date_col"], result, "Wrong result returned when keep_date_col is specified but not parse_dates!" ) result = rcf.read_csv(self.testdata, date_parser=f) self.assertEqual( ["read_csv.parse_dates-date_parser"], result, "Wrong result returned when date_parser is specified but not parse_dates!" ) result = rcf.read_csv(self.testdata, dayfirst=True) self.assertEqual( ["read_csv.parse_dates-dayfirst"], result, "Wrong result returned when dayfirst is specified but not parse_dates!" ) result = rcf.read_csv(self.testdata, cache_dates=True) self.assertEqual( ["read_csv.parse_dates-cache_dates"], result, "Wrong result returned when cache_dates is specified but not parse_dates!" )
def test_read_csv_duplicated_value_ignored(self): result = rcf.read_csv(self.testdata) self.assertEqual( [], result, "Non-empty list returned by correct call to read_csv!") result = rcf.read_csv(self.testdata, lineterminator='\n', escapechar='\\', delimiter=',', comment='#', thousands=' ', decimal='.', quotechar='\'', na_values=["NaN", "nan"], true_values=["True", "1"], false_values=["False", "0"]) self.assertEqual( [], result, "Non-empty list returned by correct call to read_csv!") result = rcf.read_csv(self.testdata, lineterminator=',', sep=',') self.assertEqual( ["read_csv.lineterminator-sep"], result, "Wrong result when calling read_csv with lineterminator=sep!") result = rcf.read_csv(self.testdata, delimiter=',', thousands=',') self.assertEqual( ["read_csv.delimiter-thousands"], result, "Wrong result when calling read_csv with delimiter=thousands!") result = rcf.read_csv(self.testdata, true_values=["1", "true"], false_values=["1", "false"]) self.assertEqual( ["read_csv.true_values-false_values"], result, "Wrong result when calling read_csv with same value in true_values and false_values!" ) result = rcf.read_csv(self.testdata, quotechar="#", na_values=["NA", "#", "NaN"]) self.assertEqual( ["read_csv.quotechar-na_values"], result, "Wrong result when calling read_csv with quotechar value in na_values!" ) result = rcf.read_csv( self.testdata, sep=',', delimiter=',') # Should only contain sep-delimiter once self.assertEqual( ["read_csv.sep-delimiter"], result, "Wrong result when calling read_csv with sep and delimiter set to same value!" )
def test_read_csv_delims(self): result = rcf.read_csv(self.testdata, sep=",") self.assertEqual( [], result, "Non-empty list returned by correct call to read_csv!") result = rcf.read_csv(self.testdata, delimiter=",") self.assertEqual( [], result, "Non-empty list returned by correct call to read_csv!") result = rcf.read_csv(self.testdata, delim_whitespace=True) self.assertEqual( [], result, "Non-empty list returned by correct call to read_csv!") result = rcf.read_csv(self.testdata, delim_whitespace=False, sep='.') self.assertEqual( [], result, "Non-empty list returned by correct call to read_csv!") result = rcf.read_csv(self.testdata, delim_whitespace=True, sep=',') self.assertEqual( ["read_csv.delim_whitespace-sep"], result, "Wrong result when calling read_csv with delim_whitespace=True and sep defined!" ) result = rcf.read_csv(self.testdata, delim_whitespace=True, delimiter=',') self.assertEqual( ["read_csv.delim_whitespace-delimiter"], result, "Wrong result when calling read_csv with delim_whitespace=True and delimiter defined!" ) result = rcf.read_csv(self.testdata, sep=" ", delimiter=':') self.assertEqual( ["read_csv.sep-delimiter"], result, "Wrong result when calling read_csv with both sep and delimiter defined!" ) result = rcf.read_csv(self.testdata, sep=" ", delimiter=',', delim_whitespace=True) self.assertEqual( [ "read_csv.delim_whitespace-sep", "read_csv.delim_whitespace-delimiter", "read_csv.sep-delimiter" ], result, "Wrong result when calling read_csv with both sep and delimiter defined!" )
def test_read_csv_quoting_doublequote(self): result = rcf.read_csv(self.testdata, doublequote=True) self.assertEqual( [], result, "Non-empty list returned by correct call to read_csv!") result = rcf.read_csv(self.testdata, quoting=csv.QUOTE_ALL, doublequote=True) self.assertEqual( [], result, "Non-empty list returned by correct call to read_csv!") result = rcf.read_csv(self.testdata, quoting=csv.QUOTE_NONE, doublequote=True) self.assertEqual( ["read_csv.quoting-doublequote"], result, "Wrong result when quoting is QUOTE_NONE and doublequote is set to True!" ) result = rcf.read_csv(self.testdata, quoting=csv.QUOTE_NONE, quotechar="", doublequote=True) self.assertEqual( ["read_csv.quoting-doublequote"], result, "Wrong result when quoting is QUOTE_NONE, quotechar is empty and doublequote is set to True!" ) result = rcf.read_csv(self.testdata, quoting=csv.QUOTE_NONE, quotechar="", doublequote=False) self.assertEqual( ["read_csv.quoting-doublequote"], result, "Wrong result when quoting is QUOTE_NONE, quotechar is empty and doublequote is set to False!" ) result = rcf.read_csv(self.testdata, quoting=csv.QUOTE_NONE, doublequote=False) self.assertEqual( ["read_csv.quoting-doublequote"], result, "Wrong result when quoting is QUOTE_NONE and doublequote is set to False!" )