Beispiel #1
0
 def test_file_handle(self):
     try:
         f = open(self.csv1, 'rb')
         reader = TextReader(f)
         result = reader.read()  # noqa
     finally:
         f.close()
Beispiel #2
0
 def test_file_handle_mmap(self):
     try:
         f = open(self.csv1, 'rb')
         reader = TextReader(f, memory_map=True, header=None)
         reader.read()
     finally:
         f.close()
    def test_parse_booleans(self):
        data = 'True\nFalse\nTrue\nTrue'

        reader = TextReader(StringIO(data), header=None)
        result = reader.read()

        assert result[0].dtype == np.bool_
    def test_embedded_newline(self):
        data = 'a\n"hello\nthere"\nthis'

        reader = TextReader(StringIO(data), header=None)
        result = reader.read()

        expected = np.array(['a', 'hello\nthere', 'this'], dtype=np.object_)
        tm.assert_numpy_array_equal(result[0], expected)
    def test_integer_thousands(self):
        data = '123,456\n12,500'

        reader = TextReader(StringIO(data), delimiter=':',
                            thousands=',', header=None)
        result = reader.read()

        expected = np.array([123456, 12500], dtype=np.int64)
        tm.assert_almost_equal(result[0], expected)
    def test_euro_decimal(self):
        data = '12345,67\n345,678'

        reader = TextReader(StringIO(data), delimiter=':',
                            decimal=',', header=None)
        result = reader.read()

        expected = np.array([12345.67, 345.678])
        tm.assert_almost_equal(result[0], expected)
    def test_escapechar(self):
        data = ('\\"hello world\"\n'
                '\\"hello world\"\n'
                '\\"hello world\"')

        reader = TextReader(StringIO(data), delimiter=',', header=None,
                            escapechar='\\')
        result = reader.read()
        expected = {0: np.array(['"hello world"'] * 3, dtype=object)}
        assert_array_dicts_equal(result, expected)
    def test_delimit_whitespace(self):
        data = 'a  b\na\t\t "b"\n"a"\t \t b'

        reader = TextReader(StringIO(data), delim_whitespace=True,
                            header=None)
        result = reader.read()

        tm.assert_numpy_array_equal(result[0], np.array(['a', 'a', 'a'],
                                                        dtype=np.object_))
        tm.assert_numpy_array_equal(result[1], np.array(['b', 'b', 'b'],
                                                        dtype=np.object_))
    def test_skipinitialspace(self):
        data = ('a,   b\n'
                'a,   b\n'
                'a,   b\n'
                'a,   b')

        reader = TextReader(StringIO(data), skipinitialspace=True,
                            header=None)
        result = reader.read()

        tm.assert_numpy_array_equal(result[0], np.array(['a', 'a', 'a', 'a'],
                                                        dtype=np.object_))
        tm.assert_numpy_array_equal(result[1], np.array(['b', 'b', 'b', 'b'],
                                                        dtype=np.object_))
Beispiel #10
0
 def _make_reader(**kwds):
     if "dtype" in kwds:
         kwds["dtype"] = ensure_dtype_objs(kwds["dtype"])
     return TextReader(StringIO(data),
                       delimiter=",",
                       header=None,
                       **kwds)
Beispiel #11
0
    def test_empty_field_eof(self):
        data = 'a,b,c\n1,2,3\n4,,'

        result = TextReader(StringIO(data), delimiter=',').read()

        expected = {0: np.array([1, 4], dtype=np.int64),
                    1: np.array(['2', ''], dtype=object),
                    2: np.array(['3', ''], dtype=object)}
        assert_array_dicts_equal(result, expected)

        # GH5664
        a = DataFrame([['b'], [nan]], columns=['a'], index=['a', 'c'])
        b = DataFrame([[1, 1, 1, 0], [1, 1, 1, 0]],
                      columns=list('abcd'),
                      index=[1, 1])
        c = DataFrame([[1, 2, 3, 4], [6, nan, nan, nan],
                       [8, 9, 10, 11], [13, 14, nan, nan]],
                      columns=list('abcd'),
                      index=[0, 5, 7, 12])

        for _ in range(100):
            df = read_csv(StringIO('a,b\nc\n'), skiprows=0,
                          names=['a'], engine='c')
            assert_frame_equal(df, a)

            df = read_csv(StringIO('1,1,1,1,0\n' * 2 + '\n' * 2),
                          names=list("abcd"), engine='c')
            assert_frame_equal(df, b)

            df = read_csv(StringIO('0,1,2,3,4\n5,6\n7,8,9,10,11\n12,13,14'),
                          names=list('abcd'), engine='c')
            assert_frame_equal(df, c)
Beispiel #12
0
    def test_header_not_enough_lines(self):
        data = ('skip this\n'
                'skip this\n'
                'a,b,c\n'
                '1,2,3\n'
                '4,5,6')

        reader = TextReader(StringIO(data), delimiter=',', header=2)
        header = reader.header
        expected = [['a', 'b', 'c']]
        assert header == expected

        recs = reader.read()
        expected = {0: np.array([1, 4], dtype=np.int64),
                    1: np.array([2, 5], dtype=np.int64),
                    2: np.array([3, 6], dtype=np.int64)}
        assert_array_dicts_equal(recs, expected)
Beispiel #13
0
    def test_header_not_enough_lines(self):
        data = ('skip this\n' 'skip this\n' 'a,b,c\n' '1,2,3\n' '4,5,6')

        reader = TextReader(StringIO(data), delimiter=',', header=2)
        header = reader.header
        expected = [['a', 'b', 'c']]
        assert header == expected

        recs = reader.read()
        expected = {0: [1, 4], 1: [2, 5], 2: [3, 6]}
        assert_array_dicts_equal(expected, recs)

        # not enough rows
        pytest.raises(parser.ParserError,
                      TextReader,
                      StringIO(data),
                      delimiter=',',
                      header=5,
                      as_recarray=True)
Beispiel #14
0
    def test_header_not_enough_lines(self):
        data = ('skip this\n'
                'skip this\n'
                'a,b,c\n'
                '1,2,3\n'
                '4,5,6')

        reader = TextReader(StringIO(data), delimiter=',', header=2)
        header = reader.header
        expected = [['a', 'b', 'c']]
        assert header == expected

        recs = reader.read()
        expected = {0: [1, 4], 1: [2, 5], 2: [3, 6]}
        assert_array_dicts_equal(expected, recs)

        # not enough rows
        pytest.raises(parser.ParserError, TextReader, StringIO(data),
                      delimiter=',', header=5, as_recarray=True)
Beispiel #15
0
    def test_skip_bad_lines(self):
        # too many lines, see #2430 for why
        data = ('a:b:c\n'
                'd:e:f\n'
                'g:h:i\n'
                'j:k:l:m\n'
                'l:m:n\n'
                'o:p:q:r')

        reader = TextReader(StringIO(data), delimiter=':',
                            header=None)
        pytest.raises(parser.ParserError, reader.read)

        reader = TextReader(StringIO(data), delimiter=':',
                            header=None,
                            error_bad_lines=False,
                            warn_bad_lines=False)
        result = reader.read()
        expected = {0: ['a', 'd', 'g', 'l'],
                    1: ['b', 'e', 'h', 'm'],
                    2: ['c', 'f', 'i', 'n']}
        assert_array_dicts_equal(result, expected)

        reader = TextReader(StringIO(data), delimiter=':',
                            header=None,
                            error_bad_lines=False,
                            warn_bad_lines=True)
        reader.read()
        val = sys.stderr.getvalue()

        assert 'Skipping line 4' in val
        assert 'Skipping line 6' in val
Beispiel #16
0
    def test_empty_field_eof(self):
        data = "a,b,c\n1,2,3\n4,,"

        result = TextReader(StringIO(data), delimiter=",").read()

        expected = {
            0: np.array([1, 4], dtype=np.int64),
            1: np.array(["2", ""], dtype=object),
            2: np.array(["3", ""], dtype=object),
        }
        assert_array_dicts_equal(result, expected)

        # GH5664
        a = DataFrame([["b"], [np.nan]], columns=["a"], index=["a", "c"])
        b = DataFrame([[1, 1, 1, 0], [1, 1, 1, 0]],
                      columns=list("abcd"),
                      index=[1, 1])
        c = DataFrame(
            [
                [1, 2, 3, 4],
                [6, np.nan, np.nan, np.nan],
                [8, 9, 10, 11],
                [13, 14, np.nan, np.nan],
            ],
            columns=list("abcd"),
            index=[0, 5, 7, 12],
        )

        for _ in range(100):
            df = read_csv(StringIO("a,b\nc\n"),
                          skiprows=0,
                          names=["a"],
                          engine="c")
            tm.assert_frame_equal(df, a)

            df = read_csv(StringIO("1,1,1,1,0\n" * 2 + "\n" * 2),
                          names=list("abcd"),
                          engine="c")
            tm.assert_frame_equal(df, b)

            df = read_csv(
                StringIO("0,1,2,3,4\n5,6\n7,8,9,10,11\n12,13,14"),
                names=list("abcd"),
                engine="c",
            )
            tm.assert_frame_equal(df, c)
Beispiel #17
0
 def test_cr_delimited(self, text, kwargs):
     nice_text = text.replace("\r", "\r\n")
     result = TextReader(StringIO(text), **kwargs).read()
     expected = TextReader(StringIO(nice_text), **kwargs).read()
     assert_array_dicts_equal(result, expected)
Beispiel #18
0
 def test_StringIO(self, csv_path):
     with open(csv_path, "rb") as f:
         text = f.read()
     src = BytesIO(text)
     reader = TextReader(src, header=None)
     reader.read()
Beispiel #19
0
 def test_file_handle_mmap(self):
     # this was never using memory_map=True
     with open(self.csv1, "rb") as f:
         reader = TextReader(f, header=None)
         reader.read()
 def test_StringIO(self):
     with open(self.csv1, 'rb') as f:
         text = f.read()
     src = BytesIO(text)
     reader = TextReader(src, header=None)
     reader.read()
 def test_string_filename(self):
     reader = TextReader(self.csv1, header=None)
     reader.read()
 def _test(text, **kwargs):
     nice_text = text.replace('\r', '\r\n')
     result = TextReader(StringIO(text), **kwargs).read()
     expected = TextReader(StringIO(nice_text), **kwargs).read()
     assert_array_dicts_equal(result, expected)
Beispiel #23
0
 def test_file_handle(self):
     with open(self.csv1, 'rb') as f:
         reader = TextReader(f)
         reader.read()
Beispiel #24
0
    def test_skip_bad_lines(self, capsys):
        # too many lines, see #2430 for why
        data = ('a:b:c\n'
                'd:e:f\n'
                'g:h:i\n'
                'j:k:l:m\n'
                'l:m:n\n'
                'o:p:q:r')

        reader = TextReader(StringIO(data), delimiter=':',
                            header=None)
        msg = (r"Error tokenizing data\. C error: Expected 3 fields in"
               " line 4, saw 4")
        with pytest.raises(parser.ParserError, match=msg):
            reader.read()

        reader = TextReader(StringIO(data), delimiter=':',
                            header=None,
                            error_bad_lines=False,
                            warn_bad_lines=False)
        result = reader.read()
        expected = {0: np.array(['a', 'd', 'g', 'l'], dtype=object),
                    1: np.array(['b', 'e', 'h', 'm'], dtype=object),
                    2: np.array(['c', 'f', 'i', 'n'], dtype=object)}
        assert_array_dicts_equal(result, expected)

        reader = TextReader(StringIO(data), delimiter=':',
                            header=None,
                            error_bad_lines=False,
                            warn_bad_lines=True)
        reader.read()
        captured = capsys.readouterr()

        assert 'Skipping line 4' in captured.err
        assert 'Skipping line 6' in captured.err
Beispiel #25
0
 def _make_reader(**kwds):
     return TextReader(StringIO(data),
                       delimiter=",",
                       header=None,
                       **kwds)
Beispiel #26
0
    def test_skip_bad_lines(self, capsys):
        # too many lines, see #2430 for why
        data = "a:b:c\nd:e:f\ng:h:i\nj:k:l:m\nl:m:n\no:p:q:r"

        reader = TextReader(StringIO(data), delimiter=":", header=None)
        msg = r"Error tokenizing data\. C error: Expected 3 fields in line 4, saw 4"
        with pytest.raises(parser.ParserError, match=msg):
            reader.read()

        reader = TextReader(
            StringIO(data),
            delimiter=":",
            header=None,
            error_bad_lines=False,
            warn_bad_lines=False,
        )
        result = reader.read()
        expected = {
            0: np.array(["a", "d", "g", "l"], dtype=object),
            1: np.array(["b", "e", "h", "m"], dtype=object),
            2: np.array(["c", "f", "i", "n"], dtype=object),
        }
        assert_array_dicts_equal(result, expected)

        reader = TextReader(
            StringIO(data),
            delimiter=":",
            header=None,
            error_bad_lines=False,
            warn_bad_lines=True,
        )
        reader.read()
        captured = capsys.readouterr()

        assert "Skipping line 4" in captured.err
        assert "Skipping line 6" in captured.err
Beispiel #27
0
 def test_file_handle(self, csv_path):
     with open(csv_path, "rb") as f:
         reader = TextReader(f)
         reader.read()
 def _make_reader(**kwds):
     return TextReader(StringIO(data), delimiter=',', **kwds)
Beispiel #29
0
 def test_string_filename(self):
     reader = TextReader(self.csv1, header=None)
     reader.read()
 def test_file_handle(self):
     with open(self.csv1, 'rb') as f:
         reader = TextReader(f)
         reader.read()
Beispiel #31
0
 def test_file_handle_mmap(self):
     with open(self.csv1, 'rb') as f:
         reader = TextReader(f, memory_map=True, header=None)
         reader.read()
 def test_file_handle_mmap(self):
     with open(self.csv1, 'rb') as f:
         reader = TextReader(f, memory_map=True, header=None)
         reader.read()
Beispiel #33
0
 def test_StringIO(self):
     with open(self.csv1, 'rb') as f:
         text = f.read()
     src = BytesIO(text)
     reader = TextReader(src, header=None)
     reader.read()
 def test_string_factorize(self):
     # should this be optional?
     data = 'a\nb\na\nb\na'
     reader = TextReader(StringIO(data), header=None)
     result = reader.read()
     assert len(set(map(id, result[0]))) == 2
Beispiel #35
0
 def test_string_factorize(self):
     # should this be optional?
     data = 'a\nb\na\nb\na'
     reader = TextReader(StringIO(data), header=None)
     result = reader.read()
     assert len(set(map(id, result[0]))) == 2