Example #1
0
 def test_read_csv_filepath_dtype(self):
     result = rcf.read_csv(self.testdata, dtype={0: str, 1: int, 2: float})
     self.assertEqual(
         [], result, "Non-empty list returned by correct call to read_csv!")
     result = rcf.read_csv(self.testdata,
                           dtype={
                               "A": str,
                               "B": int,
                               "C": float
                           })
     self.assertEqual(
         [], result, "Non-empty list returned by correct call to read_csv!")
     result = rcf.read_csv("data/data_noHeader.csv",
                           names=["Alpha", "Beta", "Gamma"],
                           dtype={
                               "Alpha": str,
                               "Beta": int,
                               "Gamma": float
                           })
     self.assertEqual(
         [], result, "Non-empty list returned by correct call to read_csv!")
     result = rcf.read_csv(self.testdata, dtype={0: str, 4: int})
     self.assertEqual(["read_csv.filepath-dtype"], result,
                      "Wrong result when dtype contains too high index!")
     result = rcf.read_csv(self.testdata, dtype={"B": str, "Q": int})
     self.assertEqual(["read_csv.filepath-dtype"], result,
                      "Wrong result when dtype contains non-existing key!")
Example #2
0
 def test_read_csv_filepath_skiprows_skipfooter(self):
     result = rcf.read_csv(self.testdata,
                           skiprows=2,
                           skipfooter=2,
                           engine="python")
     self.assertEqual(
         [], result, "Non-empty list returned by correct call to read_csv!")
     result = rcf.read_csv(self.testdata,
                           skiprows=3,
                           skipfooter=4,
                           engine="python")
     self.assertEqual(
         ["read_csv.filepath-skiprows-skipfooter"], result,
         "Wrong result returned when skiprows + skipfooter > number of rows!"
     )
Example #3
0
 def test_read_csv_filepath_names(self):
     result = rcf.read_csv(self.testdata, names=["D", "E", "F"])
     self.assertEqual(
         [], result, "Non-empty list returned by correct call to read_csv!")
     result = rcf.read_csv("data/data_tabbed.csv",
                           sep="\t",
                           names=["D", "E", "F"])
     self.assertEqual(
         [], result, "Non-empty list returned by correct call to read_csv!")
     result = rcf.read_csv("data/data_plus.csv",
                           skiprows=[0],
                           names=['Alpha', 'Beta', 'Gamma'])
     self.assertEqual(
         [], result, "Non-empty list returned by correct call to read_csv!")
     result = rcf.read_csv(self.testdata, names=["D", "E"])
     self.assertEqual(
         [], result, "Non-empty list returned by correct call to read_csv!")
     result = rcf.read_csv(self.testdata, names=["D"])
     self.assertEqual(["read_csv.filepath-names"], result,
                      "Wrong result when names has too few elements!")
     result = rcf.read_csv(self.testdata, names=["D", "E", "F", "G"])
     self.assertEqual(["read_csv.filepath-names"], result,
                      "Wrong result when names has too many elements!")
     result = rcf.read_csv(self.testdata, names="string")
     self.assertEqual(
         ["read_csv.filepath-names"], result,
         "Wrong result when names is a string longer than number of columns!"
     )
Example #4
0
 def test_read_csv_filepath_parse_dates(self):
     result = rcf.read_csv("data/date_data.csv",
                           index_col=0,
                           parse_dates=True)
     self.assertEqual(
         [], result, "Non-empty list returned by correct call to read_csv!")
     result = rcf.read_csv("data/date_data.csv", parse_dates=[0])
     self.assertEqual(
         [], result, "Non-empty list returned by correct call to read_csv!")
     result = rcf.read_csv("data/date_data.csv", parse_dates=["A"])
     self.assertEqual(
         [], result, "Non-empty list returned by correct call to read_csv!")
     result = rcf.read_csv("data/date_data.csv", parse_dates=[[2, 3, 4]])
     self.assertEqual(
         [], result, "Non-empty list returned by correct call to read_csv!")
     result = rcf.read_csv("data/date_data.csv",
                           parse_dates=[[2, 3, 4]],
                           index_col=1)
     self.assertEqual(
         [], result, "Non-empty list returned by correct call to read_csv!")
     result = rcf.read_csv("data/date_data.csv",
                           parse_dates={"P": [2, 3, 4]})
     self.assertEqual(
         [], result, "Non-empty list returned by correct call to read_csv!")
     result = rcf.read_csv("data/date_data.csv", parse_dates=True)
     self.assertEqual(
         ["read_csv.filepath-parse_dates"], result,
         "Wrong result returned with parse_dates=True for non-date index!")
     result = rcf.read_csv("data/date_data.csv", parse_dates=[1])
     self.assertEqual(
         ["read_csv.filepath-parse_dates"], result,
         "Wrong result returned when parse_dates points at non-date column!"
     )
     result = rcf.read_csv("data/date_data.csv", parse_dates=[[0, 3, 4]])
     self.assertEqual(
         ["read_csv.filepath-parse_dates"], result,
         "Wrong result returned when parse_dates points at columns that cannot be combined as a date!"
     )
     result = rcf.read_csv("data/date_data.csv",
                           parse_dates={"P": [0, 3, 4]})
     self.assertEqual(
         ["read_csv.filepath-parse_dates"], result,
         "Wrong result returned when parse_dates points at columns that cannot be combined as a date!"
     )
Example #5
0
 def test_read_csv_na(self):
     result = rcf.read_csv(self.testdata,
                           na_values=["NaN", "Nan"],
                           keep_default_na=True)
     self.assertEqual(
         [], result, "Non-empty list returned by correct call to read_csv!")
     result = rcf.read_csv(self.testdata,
                           na_filter=False,
                           na_values=["NaN", "Nan"])
     self.assertEqual(
         ["read_csv.na_filter-na_values"], result,
         "Wrong result when calling read_csv with na_value specified, but na_filter=False!"
     )
     result = rcf.read_csv(self.testdata,
                           na_filter=False,
                           keep_default_na=True)
     self.assertEqual(
         ["read_csv.na_filter-keep_default_na"], result,
         "Wrong result when calling read_csv with keep_default_na specified, but na_filter=False!"
     )
Example #6
0
 def test_read_csv_quotechar_doublequote(self):
     result = rcf.read_csv(self.testdata, doublequote=True)
     self.assertEqual(
         [], result, "Non-empty list returned by correct call to read_csv!")
     result = rcf.read_csv(self.testdata, doublequote=False)
     self.assertEqual(
         [], result, "Non-empty list returned by correct call to read_csv!")
     result = rcf.read_csv(self.testdata,
                           quoting=csv.QUOTE_NONE,
                           doublequote=True)
     self.assertEqual(
         ["read_csv.quoting-doublequote"], result,
         "Wrong result when doublequote specified despite quoting being QOUTE_NONE!"
     )
     result = rcf.read_csv(self.testdata,
                           quoting=csv.QUOTE_NONE,
                           doublequote=False)
     self.assertEqual(
         ["read_csv.quoting-doublequote"], result,
         "Wrong result when doublequote specified despite quoting being QOUTE_NONE!"
     )
Example #7
0
 def test_read_csv_bad_lines(self):
     result = rcf.read_csv(self.testdata)
     self.assertEqual(
         [], result, "Non-empty list returned by correct call to read_csv!")
     result = rcf.read_csv(self.testdata,
                           warn_bad_lines=False,
                           error_bad_lines=False)
     self.assertEqual(
         [], result, "Non-empty list returned by correct call to read_csv!")
     result = rcf.read_csv(self.testdata, error_bad_lines=True)
     self.assertEqual(
         [], result, "Non-empty list returned by correct call to read_csv!")
     result = rcf.read_csv(self.testdata,
                           warn_bad_lines=False,
                           error_bad_lines=True)
     self.assertEqual(
         [], result, "Non-empty list returned by correct call to read_csv!")
     result = rcf.read_csv(self.testdata, warn_bad_lines=True)
     self.assertEqual(
         ["read_csv.error_bad_lines-warn_bad_lines"], result,
         "Wrong result when read_csv is called with warn_bad_lines=True without setting error_bad_lines to False!"
     )
     result = rcf.read_csv(self.testdata,
                           warn_bad_lines=True,
                           error_bad_lines=True)
     self.assertEqual(
         ["read_csv.error_bad_lines-warn_bad_lines"], result,
         "Wrong result when read_csv is called with both error_bad_lines and warn_bad_lines explicitly set to True!"
     )
Example #8
0
    def test_read_csv_header(self):
        result = rcf.read_csv(self.testdata, header=1)
        self.assertEqual(
            [], result, "Non-empty list returned by correct call to read_csv!")
        result = rcf.read_csv(self.testdata, prefix="X", header=None)
        self.assertEqual(
            [], result, "Non-empty list returned by correct call to read_csv!")
        result = rcf.read_csv(self.testdata, names=["A", "B", "C"])
        self.assertEqual(
            [], result, "Non-empty list returned by correct call to read_csv!")

        result = rcf.read_csv(self.testdata,
                              names=["A", "B", "C"],
                              header=None,
                              prefix="X")
        self.assertEqual(
            ["read_csv.names-prefix", "read_csv.names-header"], result,
            "Wrong result when calling read_csv with both names and prefix, and None-header!"
        )
        result = rcf.read_csv(self.testdata, names=["A", "B", "C"], header=1)
        self.assertEqual(
            ["read_csv.names-header"], result,
            "Wrong result when calling read_csv with both names and header!")
        result = rcf.read_csv(self.testdata, prefix="X")
        self.assertEqual(
            ["read_csv.header-prefix"], result,
            "Wrong result when calling read_csv with both prefix without setting header=None!"
        )
        result = rcf.read_csv(self.testdata, header=0, prefix="X")
        self.assertEqual(
            ["read_csv.header-prefix"], result,
            "Wrong result when calling read_csv with both non-None header and prefix!"
        )
        result = rcf.read_csv(self.testdata,
                              names=["A", "B", "C"],
                              header=0,
                              prefix="X")
        expectedResult = [
            "read_csv.names-prefix", "read_csv.names-header",
            "read_csv.header-prefix"
        ]
        self.assertEqual(
            expectedResult, result,
            "Wrong result when read_csv is called with both names, prefix and non-None header!"
        )
Example #9
0
    def test_read_csv_parse_dates(self):
        def f(d):
            """ Create date from string on format YYYY-mm-dd """
            date_parts = d.split("-")
            return datetime.date(int(date_parts[0]), int(date_parts[1]),
                                 int(date_parts[2]))

        result = rcf.read_csv("data/date_data.csv",
                              parse_dates=[0],
                              infer_datetime_format=True,
                              keep_date_col=False,
                              date_parser=f,
                              dayfirst=True,
                              cache_dates=True)
        self.assertEqual(
            [], result, "Non-empty list returned by correct call to read_csv!")
        result = rcf.read_csv(self.testdata, infer_datetime_format=True)
        self.assertEqual(
            ["read_csv.parse_dates-infer_datetime_format"], result,
            "Wrong result returned when infer_datetime_format is specified but not parse_dates!"
        )
        result = rcf.read_csv(self.testdata,
                              parse_dates=False,
                              infer_datetime_format=True)
        self.assertEqual(
            ["read_csv.parse_dates-infer_datetime_format"], result,
            "Wrong result returned when infer_datetime_format is specified but parse_dates is False!"
        )
        result = rcf.read_csv(self.testdata, keep_date_col=True)
        self.assertEqual(
            ["read_csv.parse_dates-keep_date_col"], result,
            "Wrong result returned when keep_date_col is specified but not parse_dates!"
        )
        result = rcf.read_csv(self.testdata, date_parser=f)
        self.assertEqual(
            ["read_csv.parse_dates-date_parser"], result,
            "Wrong result returned when date_parser is specified but not parse_dates!"
        )
        result = rcf.read_csv(self.testdata, dayfirst=True)
        self.assertEqual(
            ["read_csv.parse_dates-dayfirst"], result,
            "Wrong result returned when dayfirst is specified but not parse_dates!"
        )
        result = rcf.read_csv(self.testdata, cache_dates=True)
        self.assertEqual(
            ["read_csv.parse_dates-cache_dates"], result,
            "Wrong result returned when cache_dates is specified but not parse_dates!"
        )
Example #10
0
    def test_read_csv_duplicated_value_ignored(self):
        result = rcf.read_csv(self.testdata)
        self.assertEqual(
            [], result, "Non-empty list returned by correct call to read_csv!")
        result = rcf.read_csv(self.testdata,
                              lineterminator='\n',
                              escapechar='\\',
                              delimiter=',',
                              comment='#',
                              thousands=' ',
                              decimal='.',
                              quotechar='\'',
                              na_values=["NaN", "nan"],
                              true_values=["True", "1"],
                              false_values=["False", "0"])
        self.assertEqual(
            [], result, "Non-empty list returned by correct call to read_csv!")

        result = rcf.read_csv(self.testdata, lineterminator=',', sep=',')
        self.assertEqual(
            ["read_csv.lineterminator-sep"], result,
            "Wrong result when calling read_csv with lineterminator=sep!")
        result = rcf.read_csv(self.testdata, delimiter=',', thousands=',')
        self.assertEqual(
            ["read_csv.delimiter-thousands"], result,
            "Wrong result when calling read_csv with delimiter=thousands!")
        result = rcf.read_csv(self.testdata,
                              true_values=["1", "true"],
                              false_values=["1", "false"])
        self.assertEqual(
            ["read_csv.true_values-false_values"], result,
            "Wrong result when calling read_csv with same value in true_values and false_values!"
        )
        result = rcf.read_csv(self.testdata,
                              quotechar="#",
                              na_values=["NA", "#", "NaN"])
        self.assertEqual(
            ["read_csv.quotechar-na_values"], result,
            "Wrong result when calling read_csv with quotechar value in na_values!"
        )

        result = rcf.read_csv(
            self.testdata, sep=',',
            delimiter=',')  # Should only contain sep-delimiter once
        self.assertEqual(
            ["read_csv.sep-delimiter"], result,
            "Wrong result when calling read_csv with sep and delimiter set to same value!"
        )
Example #11
0
    def test_read_csv_delims(self):
        result = rcf.read_csv(self.testdata, sep=",")
        self.assertEqual(
            [], result, "Non-empty list returned by correct call to read_csv!")
        result = rcf.read_csv(self.testdata, delimiter=",")
        self.assertEqual(
            [], result, "Non-empty list returned by correct call to read_csv!")
        result = rcf.read_csv(self.testdata, delim_whitespace=True)
        self.assertEqual(
            [], result, "Non-empty list returned by correct call to read_csv!")
        result = rcf.read_csv(self.testdata, delim_whitespace=False, sep='.')
        self.assertEqual(
            [], result, "Non-empty list returned by correct call to read_csv!")

        result = rcf.read_csv(self.testdata, delim_whitespace=True, sep=',')
        self.assertEqual(
            ["read_csv.delim_whitespace-sep"], result,
            "Wrong result when calling read_csv with delim_whitespace=True and sep defined!"
        )
        result = rcf.read_csv(self.testdata,
                              delim_whitespace=True,
                              delimiter=',')
        self.assertEqual(
            ["read_csv.delim_whitespace-delimiter"], result,
            "Wrong result when calling read_csv with delim_whitespace=True and delimiter defined!"
        )
        result = rcf.read_csv(self.testdata, sep=" ", delimiter=':')
        self.assertEqual(
            ["read_csv.sep-delimiter"], result,
            "Wrong result when calling read_csv with both sep and delimiter defined!"
        )
        result = rcf.read_csv(self.testdata,
                              sep=" ",
                              delimiter=',',
                              delim_whitespace=True)
        self.assertEqual(
            [
                "read_csv.delim_whitespace-sep",
                "read_csv.delim_whitespace-delimiter", "read_csv.sep-delimiter"
            ], result,
            "Wrong result when calling read_csv with both sep and delimiter defined!"
        )
Example #12
0
 def test_read_csv_quoting_doublequote(self):
     result = rcf.read_csv(self.testdata, doublequote=True)
     self.assertEqual(
         [], result, "Non-empty list returned by correct call to read_csv!")
     result = rcf.read_csv(self.testdata,
                           quoting=csv.QUOTE_ALL,
                           doublequote=True)
     self.assertEqual(
         [], result, "Non-empty list returned by correct call to read_csv!")
     result = rcf.read_csv(self.testdata,
                           quoting=csv.QUOTE_NONE,
                           doublequote=True)
     self.assertEqual(
         ["read_csv.quoting-doublequote"], result,
         "Wrong result when quoting is QUOTE_NONE and doublequote is set to True!"
     )
     result = rcf.read_csv(self.testdata,
                           quoting=csv.QUOTE_NONE,
                           quotechar="",
                           doublequote=True)
     self.assertEqual(
         ["read_csv.quoting-doublequote"], result,
         "Wrong result when quoting is QUOTE_NONE, quotechar is empty and doublequote is set to True!"
     )
     result = rcf.read_csv(self.testdata,
                           quoting=csv.QUOTE_NONE,
                           quotechar="",
                           doublequote=False)
     self.assertEqual(
         ["read_csv.quoting-doublequote"], result,
         "Wrong result when quoting is QUOTE_NONE, quotechar is empty and doublequote is set to False!"
     )
     result = rcf.read_csv(self.testdata,
                           quoting=csv.QUOTE_NONE,
                           doublequote=False)
     self.assertEqual(
         ["read_csv.quoting-doublequote"], result,
         "Wrong result when quoting is QUOTE_NONE and doublequote is set to False!"
     )