def test_as_py(self):
        dd = CSV(self.csv_file, schema=self.schema)

        self.assertEqual(dd.as_py(), [
            {u'f0': u'k1', u'f1': u'v1', u'f2': 1, u'f3': False},
            {u'f0': u'k2', u'f1': u'v2', u'f2': 2, u'f3': True},
            {u'f0': u'k3', u'f1': u'v3', u'f2': 3, u'f3': False}])
Example #2
0
def test_table_resource():
    with tmpfile('csv') as filename:
        csv = CSV(filename, 'w', schema='{x: int, y: int}')
        csv.extend([[1, 2], [10, 20]])

        t = Data(filename)
        assert isinstance(t.data, CSV)
        assert list(compute(t)) == list(csv)
Example #3
0
def csv():
    csv = CSV('test.csv', schema=schema, mode='w')
    csv.extend(data)
    yield csv
    try:
        os.remove(csv.path)
    except OSError:
        pass
Example #4
0
def date_data():
    data = [('Alice', 100.0, datetime(2014, 9, 11, 0, 0, 0, 0)),
            ('Alice', -200.0, datetime(2014, 9, 10, 0, 0, 0, 0)),
            ('Bob', 300.0, None)]
    schema = dshape('{name: string, amount: float32, date: ?datetime}')
    with tmpfile('.csv') as f:
        csv = CSV(f, schema=schema, mode='w')
        csv.extend(data)
        yield CSV(f, schema=schema, mode='r')
Example #5
0
 def test_extend_structured_many_newlines(self):
     inan = np.array([np.nan]).astype('int32').item()
     with filetext('1,1.0\n2,2.0\n\n\n\n') as fn:
         csv = CSV(fn, 'r+', schema='{x: int32, y: float32}', delimiter=',')
         csv.extend([(3, 3)])
         result = tuplify(tuple(csv))
         expected = ((1, 1.0), (2, 2.0), (inan, np.nan), (inan, np.nan),
                     (inan, np.nan), (3, 3.0))
         assert np.isclose(result, expected, equal_nan=True).all()
Example #6
0
def test_into_DataFrame_concat():
    csv = CSV(os.path.join(os.path.dirname(__file__),
                           'accounts.csv'))
    df = into(pd.DataFrame, Concat([csv, csv]))
    csv_df = csv.pandas_read_csv()
    assert df.index.tolist() == list(range(len(df)))
    assert df.values.tolist() == (csv_df.values.tolist() +
                                  csv_df.values.tolist())
    assert df.columns.tolist() == csv_df.columns.tolist()
Example #7
0
    def test_a_mode(self):
        text = ("id, name, balance\n1, Alice, 100\n2, Bob, 200\n"
                "3, Charlie, 300\n4, Denis, 400\n5, Edith, 500")
        with filetext(text) as fn:
            csv = CSV(fn, 'a')
            csv.extend([(6, 'Frank', 600),
                        (7, 'Georgina', 700)])

            assert 'Georgina' in set(csv.py[:, 'name'])
Example #8
0
def test_datetime_csv_reader_same_as_into_types():
    csv = CSV(os.path.join(os.path.dirname(__file__),
                           'accounts.csv'))
    rhs = csv.pandas_read_csv().dtypes
    df = into(pd.DataFrame, csv)
    dtypes = df.dtypes
    expected = pd.Series([np.dtype(x) for x in
                          ['i8', 'i8', 'O', 'datetime64[ns]']],
                         index=csv.columns)
    assert dtypes.index.tolist() == expected.index.tolist()
    assert dtypes.tolist() == expected.tolist()
    def test_chunks(self):
        dd = CSV(self.csv_file, schema=self.schema)

        vals = []
        for el in dd.chunks(blen=2):
            self.assertTrue(isinstance(el, nd.array))
            vals.extend(nd.as_py(el))
        self.assertEqual(vals, [
            {u'f0': u'k1', u'f1': u'v1', u'f2': 1, u'f3': False},
            {u'f0': u'k2', u'f1': u'v2', u'f2': 2, u'f3': True},
            {u'f0': u'k3', u'f1': u'v3', u'f2': 3, u'f3': False}])
Example #10
0
def test_extend(tmpcsv, schema):
    dd = CSV(tmpcsv, 'w', schema=schema, delimiter=' ')
    dd.extend(data)
    with open(tmpcsv) as f:
        lines = f.readlines()
    expected_lines = 'Alice 100', 'Bob 200', 'Alice 50'
    for i, eline in enumerate(expected_lines):
        assert lines[i].strip() == eline

    expected_dshape = datashape.DataShape(datashape.Var(),
                                          datashape.dshape(schema))

    assert str(dd.dshape) == str(expected_dshape)
Example #11
0
def test_datetime_csv_reader_same_as_into():
    csv = CSV(os.path.join(os.path.dirname(__file__),
                           'accounts.csv'))
    rhs = csv.pandas_read_csv().dtypes
    df = into(pd.DataFrame, csv)
    dtypes = df.dtypes
    expected = pd.Series([np.dtype(x) for x in
                          ['i8', 'i8', 'O', 'datetime64[ns]']],
                         index=csv.columns)
    # make sure reader with no args does the same thing as into()
    # Values the same
    assert dtypes.index.tolist() == rhs.index.tolist()
    assert dtypes.tolist() == rhs.tolist()
Example #12
0
    def test_extend(self):
        dd = CSV(self.filename, 'w', schema=self.schema, delimiter=' ')
        dd.extend(self.data)
        with open(self.filename) as f:
            lines = f.readlines()
            self.assertEqual(lines[0].strip(), 'Alice 100')
            self.assertEqual(lines[1].strip(), 'Bob 200')
            self.assertEqual(lines[2].strip(), 'Alice 50')

        expected_dshape = datashape.DataShape(datashape.Var(), self.schema)
        # TODO: datashape comparison is broken
        self.assertEqual(str(dd.dshape).replace(' ', ''),
                         str(expected_dshape).replace(' ', ''))
class Test_Dialect(unittest.TestCase):

    buf = sanitize(
    u"""Name Amount
        Alice 100
        Bob 200
        Alice 50
    """)

    schema = "{ f0: string, f1: int }"

    def setUp(self):
        self.csv_file = tempfile.mktemp(".csv")
        with open(self.csv_file, "w") as f:
            f.write(self.buf)
        self.dd = CSV(self.csv_file, dialect='excel', schema=self.schema,
                            delimiter=' ', mode='r+')

    def tearDown(self):
        os.remove(self.csv_file)

    def test_has_header(self):
        assert has_header(self.buf)

    def test_overwrite_delimiter(self):
        self.assertEquals(self.dd.dialect['delimiter'], ' ')

    def test_content(self):
        s = str(list(self.dd))
        assert 'Alice' in s and 'Bob' in s

    def test_append(self):
        self.dd.extend([('Alice', 100)])
        with open(self.csv_file) as f:
            self.assertEqual(f.readlines()[-1].strip(), 'Alice 100')

    def test_append_dict(self):
        self.dd.extend([{'f0': 'Alice', 'f1': 100}])
        with open(self.csv_file) as f:
            self.assertEqual(f.readlines()[-1].strip(), 'Alice 100')

    def test_extend_structured(self):
        with filetext('1,1.0\n2,2.0\n') as fn:
            csv = CSV(fn, 'r+', schema='{x: int32, y: float32}',
                            delimiter=',')
            csv.extend([(3, 3)])
            assert (list(csv) == [[1, 1.0], [2, 2.0], [3, 3.0]]
                 or list(csv) == [{'x': 1, 'y': 1.0},
                                  {'x': 2, 'y': 2.0},
                                  {'x': 3, 'y': 3.0}])
Example #14
0
    def test_json_csv_structured(self):
        data = [{'x': 1, 'y': 1}, {'x': 2, 'y': 2}]
        text = '\n'.join(map(json.dumps, data))
        schema = '{x: int, y: int}'

        with filetext(text) as json_fn:
            with filetext('') as csv_fn:
                js = JSON_Streaming(json_fn, schema=schema)
                csv = CSV(csv_fn, mode='r+', schema=schema)

                csv.extend(js)

                self.assertEquals(tuple(map(tuple, (csv))),
                                  ((1, 1), (2, 2)))
 def test_append(self):
     # Get a private file so as to not mess the original one
     csv_file = tempfile.mktemp(".csv")
     with open(csv_file, "w") as f:
         f.write(self.buf)
     dd = CSV(csv_file, schema=self.schema, mode='r+')
     dd.extend([["k4", "v4", 4, True]])
     vals = [nd.as_py(v) for v in dd.chunks(blen=2)]
     self.assertEqual(vals, [
         [{u'f0': u'k1', u'f1': u'v1', u'f2': 1, u'f3': False},
          {u'f0': u'k2', u'f1': u'v2', u'f2': 2, u'f3': True}],
         [{u'f0': u'k3', u'f1': u'v3', u'f2': 3, u'f3': False},
          {u'f0': u'k4', u'f1': u'v4', u'f2': 4, u'f3': True}]])
     self.assertRaises(ValueError, lambda: dd.extend([3.3]))
     os.remove(csv_file)
 def test_getitem_stop(self):
     dd = CSV(self.csv_file, schema=self.schema)
     self.assertEqual(dd[:1], [{
         u'f0': u'k1',
         u'f1': u'v1',
         u'f2': 1,
         u'f3': False
     }])
 def test_getitem_start_step(self):
     dd = CSV(self.csv_file, schema=self.schema)
     self.assertEqual(dd[1::2], [{
         u'f0': u'k2',
         u'f1': u'v2',
         u'f2': 2,
         u'f3': True
     }])
    def test_re_dialect(self):
        dialect1 = {'delimiter': ',', 'lineterminator': '\n'}
        dialect2 = {'delimiter': ';', 'lineterminator': '--'}

        text = '1,1\n2,2\n'

        schema = '2 * int32'

        with filetext(text) as source_fn:
            with filetext('') as dest_fn:
                src = CSV(source_fn, schema=schema, **dialect1)
                dst = CSV(dest_fn, mode='w', schema=schema, **dialect2)

                # Perform copy
                dst.extend(src)

                with open(dest_fn) as f:
                    self.assertEquals(f.read(), '1;1--2;2--')
Example #19
0
    def test_re_dialect(self):
        dialect1 = {'delimiter': ',', 'lineterminator': '\n'}
        dialect2 = {'delimiter': ';', 'lineterminator': '--'}

        text = '1,1\n2,2\n'

        schema = '2 * int32'

        with filetext(text) as source_fn:
            with filetext('') as dest_fn:
                src = CSV(source_fn, schema=schema, **dialect1)
                dst = CSV(dest_fn, mode='w', schema=schema, **dialect2)

                # Perform copy
                dst.extend(src)

                with open(dest_fn) as f:
                    self.assertEquals(f.read(), '1;1--2;2--')
Example #20
0
def test_DataFrame_CSV():
    with filetext('1,2\n3,4\n') as fn:
        csv = CSV(fn, schema='{a: int64, b: float64}')
        df = into(DataFrame, csv)

        expected = DataFrame([[1, 2.0], [3, 4.0]], columns=['a', 'b'])

        assert str(df) == str(expected)
        assert list(df.dtypes) == [np.int64, np.float64]
Example #21
0
def test_tuple_types():
    """
    CSVs with uniform types still create record types with names
    """
    with filetext('1,1\n2,2\n') as fn:
        csv = CSV(fn, 'r+', delimiter=',')
        assert csv[0] == (1, 1)
        assert isinstance(csv.schema[0], Record)
        assert len(csv.schema[0].types) == 2
        assert len(set(csv.schema[0].types)) == 1
    def test_as_py(self):
        dd = CSV(self.csv_file, schema=self.schema)

        self.assertEqual(dd.as_py(), [{
            u'f0': u'k1',
            u'f1': u'v1',
            u'f2': 1,
            u'f3': False
        }, {
            u'f0': u'k2',
            u'f1': u'v2',
            u'f2': 2,
            u'f3': True
        }, {
            u'f0': u'k3',
            u'f1': u'v3',
            u'f2': 3,
            u'f3': False
        }])
    def test_json_csv_structured(self):
        data = [{'x': 1, 'y': 1}, {'x': 2, 'y': 2}]
        text = '\n'.join(map(json.dumps, data))
        schema = '{x: int, y: int}'

        with filetext(text) as json_fn:
            with filetext('') as csv_fn:
                js = JSON_Streaming(json_fn, schema=schema)
                csv = CSV(csv_fn, mode='r+', schema=schema)

                csv.extend(js)

                self.assertEquals(list(csv), [{
                    'x': 1,
                    'y': 1
                }, {
                    'x': 2,
                    'y': 2
                }])
Example #24
0
    def test_csv_json_chunked(self):
        with filetext('1,1\n2,2\n') as csv_fn:
            with filetext('') as json_fn:
                schema = '{a: int32, b: int32}'
                csv = CSV(csv_fn, schema=schema)
                json = JSON_Streaming(json_fn, mode='r+', schema=schema)

                into(json, csv)

                self.assertEquals(tuplify(tuple(json)), ((1, 1), (2, 2)))
    def test_csv_json(self):
        with filetext('1,1\n2,2\n') as csv_fn:
            with filetext('') as json_fn:
                schema = '2 * int'
                csv = CSV(csv_fn, schema=schema)
                json = JSON_Streaming(json_fn, mode='r+', schema=schema)

                json.extend(csv)

                self.assertEquals(list(json), [[1, 1], [2, 2]])
    def test_csv_hdf5(self):
        import h5py
        from dynd import nd
        with tmpfile('hdf5') as hdf5_fn:
            with filetext('1,1\n2,2\n') as csv_fn:
                csv = CSV(csv_fn, schema='2 * int')
                hdf5 = HDF5(hdf5_fn, '/data', mode='a', schema='2 * int')

                copy(csv, hdf5)

                self.assertEquals(nd.as_py(hdf5.as_dynd()), [[1, 1], [2, 2]])
 def test_append(self):
     # Get a private file so as to not mess the original one
     csv_file = tempfile.mktemp(".csv")
     with open(csv_file, "w") as f:
         f.write(self.buf)
     dd = CSV(csv_file, schema=self.schema, mode='r+')
     dd.extend([["k4", "v4", 4, True]])
     vals = [nd.as_py(v) for v in dd.chunks(blen=2)]
     self.assertEqual(vals, [[{
         u'f0': u'k1',
         u'f1': u'v1',
         u'f2': 1,
         u'f3': False
     }, {
         u'f0': u'k2',
         u'f1': u'v2',
         u'f2': 2,
         u'f3': True
     }],
                             [{
                                 u'f0': u'k3',
                                 u'f1': u'v3',
                                 u'f2': 3,
                                 u'f3': False
                             }, {
                                 u'f0': u'k4',
                                 u'f1': u'v4',
                                 u'f2': 4,
                                 u'f3': True
                             }]])
     self.assertRaises(ValueError, lambda: dd.extend([3.3]))
     os.remove(csv_file)
Example #28
0
    def test_json_csv_chunked(self):
        data = [{'x': 1, 'y': 1}, {'x': 2, 'y': 2}]
        tuples = ((1, 1), (2, 2))
        text = '\n'.join(map(json.dumps, data))
        schema = '{x: int, y: int}'

        with filetext(text) as json_fn:
            with filetext('') as csv_fn:
                js = JSON_Streaming(json_fn, schema=schema)
                csv = CSV(csv_fn, mode='r+', schema=schema)

                into(csv, js)

                self.assertEquals(tuple(csv), tuples)
    def test_chunks(self):
        dd = CSV(self.csv_file, schema=self.schema)

        vals = []
        for el in dd.chunks(blen=2):
            self.assertTrue(isinstance(el, nd.array))
            vals.extend(nd.as_py(el))
        self.assertEqual(vals, [{
            u'f0': u'k1',
            u'f1': u'v1',
            u'f2': 1,
            u'f3': False
        }, {
            u'f0': u'k2',
            u'f1': u'v2',
            u'f2': 2,
            u'f3': True
        }, {
            u'f0': u'k3',
            u'f1': u'v3',
            u'f2': 3,
            u'f3': False
        }])
    def test_hdf5_csv(self):
        import h5py
        with tmpfile('hdf5') as hdf5_fn:
            with filetext('') as csv_fn:
                with h5py.File(hdf5_fn, 'w') as f:
                    d = f.create_dataset('data', (3, 3), dtype='i8')
                    d[:] = 1

                csv = CSV(csv_fn, mode='r+', schema='3 * int')
                hdf5 = HDF5(hdf5_fn, '/data')

                copy(hdf5, csv)

                self.assertEquals(list(csv), [[1, 1, 1], [1, 1, 1], [1, 1, 1]])
 def test_extend_structured(self):
     with filetext('1,1.0\n2,2.0\n') as fn:
         csv = CSV(fn, 'r+', schema='{x: int32, y: float32}', delimiter=',')
         csv.extend([(3, 3)])
         assert (list(csv) == [[1, 1.0], [2, 2.0], [3, 3.0]]
                 or list(csv) == [{
                     'x': 1,
                     'y': 1.0
                 }, {
                     'x': 2,
                     'y': 2.0
                 }, {
                     'x': 3,
                     'y': 3.0
                 }])
Example #32
0
def test_pandas_dynd():
    arr = nd.array(data, dtype=schema)

    result = into(DataFrame, arr)
    expected = DataFrame(data, columns=['name', 'amount'])
    assert str(result) == str(expected)

    nda = nd.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
    csv = CSV(example('accounts.csv'))
    df_csv = into(DataFrame, csv)
    df_nd = into(df_csv, nda)
    df_no_names = into(DataFrame, nda)

    assert list(df_nd.columns) == list(df_csv.columns)
    assert list(df_no_names.columns) == [0, 1, 2]
Example #33
0
    def test_csv_hdf5(self):
        from dynd import nd
        with tmpfile('hdf5') as hdf5_fn:
            with filetext('1,1\n2,2\n') as csv_fn:
                csv = CSV(csv_fn, schema='{a: int32, b: int32}')
                hdf5 = HDF5(hdf5_fn, '/data', schema='{a: int32, b: int32}')

                into(hdf5, csv)

                self.assertEquals(nd.as_py(hdf5.as_dynd()), [{
                    'a': 1,
                    'b': 1
                }, {
                    'a': 2,
                    'b': 2
                }])
Example #34
0
def test_csv_into_mongodb_complex(empty_collec):

    this_dir = os.path.dirname(__file__)
    file_name = os.path.join(this_dir, 'dummydata.csv')

    s = "{ Name : string, RegistrationDate : ?datetime, ZipCode : ?int64, Consts : ?float64 }"
    csv = CSV(file_name, schema=s)
    coll = empty_collec
    into(coll, csv)

    mongo_data = list(coll.find({}, {'_id': 0}))

    # This assertion doesn't work due to python floating errors
    # into(list, csv) == into(list, into(coll, csv))
    assert_allclose([list(csv[0])],
                    [[mongo_data[0][col] for col in csv.columns]])
    assert_allclose([list(csv[9])],
                    [[mongo_data[-1][col] for col in csv.columns]])
Example #35
0
    def test_hdf5_csv(self):
        import h5py
        with tmpfile('hdf5') as hdf5_fn:
            with filetext('') as csv_fn:
                with h5py.File(hdf5_fn, 'w') as f:
                    d = f.create_dataset('data', (3, ),
                                         dtype=np.dtype([(c, 'i4')
                                                         for c in 'abc']))
                    d[:] = np.array(1)

                csv = CSV(csv_fn,
                          mode='r+',
                          schema='{a: int32, b: int32, c: int32}')
                hdf5 = HDF5(hdf5_fn, '/data', schema=csv.schema)

                into(csv, hdf5)

                self.assertEquals(tuple(map(tuple, csv)),
                                  ((1, 1, 1), (1, 1, 1), (1, 1, 1)))
 def test_basic_object_type(self):
     dd = CSV(self.csv_file, schema=self.schema)
     self.assertTrue(isinstance(dd, DataDescriptor))
     self.assertTrue(isinstance(dd.dshape.shape[0], datashape.Var))
     self.assertEqual(list(dd), [{
         u'f0': u'k1',
         u'f1': u'v1',
         u'f2': 1,
         u'f3': False
     }, {
         u'f0': u'k2',
         u'f1': u'v2',
         u'f2': 2,
         u'f3': True
     }, {
         u'f0': u'k3',
         u'f1': u'v3',
         u'f2': 3,
         u'f3': False
     }])
    def test_csv_sql_json(self):
        data = [('Alice', 100), ('Bob', 200)]
        text = '\n'.join(','.join(map(str, row)) for row in data)
        schema = '{name: string, amount: int}'
        engine = create_engine('sqlite:///:memory:')
        with filetext(text) as csv_fn:
            with filetext('') as json_fn:

                csv = CSV(csv_fn, mode='r', schema=schema)
                sql = SQL(engine, 'testtable', schema=schema)
                json = JSON_Streaming(json_fn, mode='r+', schema=schema)

                copy(csv, sql)

                self.assertEqual(list(sql), data)

                copy(sql, json)

                with open(json_fn) as f:
                    assert 'Alice' in f.read()
Example #38
0
    def test_csv_sql_json(self):
        data = [('Alice', 100), ('Bob', 200)]
        text = '\n'.join(','.join(map(str, row)) for row in data)
        schema = '{name: string, amount: int}'
        with filetext(text) as csv_fn:
            with filetext('') as json_fn:
                with tmpfile('db') as sqldb:

                    csv = CSV(csv_fn, mode='r', schema=schema)
                    sql = SQL('sqlite:///' + sqldb, 'testtable', schema=schema)
                    json = JSON_Streaming(json_fn, mode='r+', schema=schema)

                    into(sql, csv)

                    self.assertEqual(into(list, sql), data)

                    into(json, sql)

                    with open(json_fn) as f:
                        assert 'Alice' in f.read()
Example #39
0
 def test_append(self):
     dd = CSV(self.filename, 'w', schema=self.schema, delimiter=' ')
     dd.extend([self.data[0]])
     with open(self.filename) as f:
         self.assertEqual(f.readlines()[0].strip(), 'Alice 100')
Example #40
0
def test_unicode():
    this_dir = os.path.dirname(__file__)
    filename = os.path.join(this_dir, 'unicode.csv')
    dd = CSV(filename, columns=['a', 'b'], encoding='utf8')
    assert dd.schema == dshape('{a: string, b: ?int64}')
    assert dd[0]
Example #41
0
def test_into_csv_blaze_table(good_csv):
    t = Data(CSV(good_csv))
    df = into(pd.DataFrame, t[['userid', 'text']])
    assert list(df.columns) == ['userid', 'text']
Example #42
0
def test_append(tmpcsv, schema):
    dd = CSV(tmpcsv, 'w', schema=schema, delimiter=' ')
    dd.extend([data[0]])
    with open(tmpcsv) as f:
        s = f.readlines()[0].strip()
    assert s == 'Alice 100'
Example #43
0
def test_chunks():
    with filetext('1,1\n2,2\n3,3\n4,4\n') as fn:
        dd = CSV(fn, schema='{a: int32, b: int32}')
        assert all(isinstance(chunk, nd.array) for chunk in dd.chunks())
        assert len(list(dd.chunks(blen=2))) == 2
        assert len(list(dd.chunks(blen=3))) == 2
Example #44
0
def test_write_csv_with_header_emits_header():
    with tmpfile('.csv') as fn:
        csv = CSV(fn, header=True, schema='{a: int, b: int}', mode='w')
        csv.extend([(1, 2), (10, 20)])
        with open(fn) as f:
            assert 'a' in f.read()
Example #45
0
def test_Data_attribute_repr():
    path = os.path.join(os.path.dirname(__file__), 'accounts.csv')
    t = Data(CSV(path))
    result = t.timestamp.day
    expected = pd.DataFrame({'timestamp_day': [25] * 3})
    assert repr(result) == repr(expected)
Example #46
0
def test_sep_kwarg():
    csv = CSV('foo', 'w', sep=';', schema='{x: int, y: int}')
    assert csv.dialect['delimiter'] == ';'
Example #47
0
from pandas import DataFrame

from blaze.api.into import into
from blaze.api.into import degrade_numpy_dtype_to_python, numpy_ensure_bytes
from blaze.utils import tmpfile
from blaze import Data
import bcolz
from blaze.data import CSV
from blaze.sql import SQL
from datetime import datetime
from toolz import pluck
import os

dirname = os.path.dirname(__file__)

csv = CSV(os.path.join(dirname, 'accounts.csv'))

L = [[100, 1, 'Alice', datetime(2000, 12, 25, 0, 0, 1)],
     [200, 2, 'Bob', datetime(2001, 12, 25, 0, 0, 1)],
     [300, 3, 'Charlie', datetime(2002, 12, 25, 0, 0, 1)]]

df = DataFrame(L, columns=['amount', 'id', 'name', 'timestamp'])

x = np.array(list(map(tuple, L)),
             dtype=[('amount', 'i8'), ('id', 'i8'), ('name', 'U7'),
                    ('timestamp', 'M8[us]')])

schema = '{amount: int64, id: int64, name: string, timestamp: datetime}'
sql_schema = '{amount: int64, id: int64, name: string, timestamp: datetime[tz="UTC"]}'

arr = nd.array(L, dtype=schema)
Example #48
0
 def test_chunks(self):
     with filetext('1,1\n2,2\n3,3\n4,4\n') as fn:
         dd = CSV(fn, schema='2 * int32')
         assert all(isinstance(chunk, nd.array) for chunk in dd.chunks())
         self.assertEquals(len(list(dd.chunks(blen=2))), 2)
         self.assertEquals(len(list(dd.chunks(blen=3))), 2)
Example #49
0
    def test_as_py(self):
        dd = CSV(self.csv_file, schema=self.schema)

        self.assertEqual(tuplify(dd.as_py()), self.data)
Example #50
0
def test_delayed_bad_datashape_with_bad_datetimes():
    with filetext('a,b\n1,10-10-2000\n1,10-10-2000') as fn:
        with pytest.raises(ValueError):
            csv = CSV(fn)
Example #51
0
class Test_Dialect(unittest.TestCase):

    buf = sanitize(
    u"""Name Amount
        Alice 100
        Bob 200
        Alice 50
    """)

    schema = "{ f0: string, f1: int }"

    def setUp(self):
        self.csv_file = tempfile.mktemp(".csv")
        with open(self.csv_file, "w") as f:
            f.write(self.buf)
        self.dd = CSV(self.csv_file, dialect='excel', schema=self.schema,
                            delimiter=' ', mode='r+')

    def tearDown(self):
        os.remove(self.csv_file)


    def test_schema_detection(self):
        dd = CSV(self.csv_file)
        assert dd.schema == dshape('{Name: string, Amount: ?int64}')

        dd = CSV(self.csv_file, columns=['foo', 'bar'])
        assert dd.schema == dshape('{foo: string, bar: ?int64}')

    @min_python_version
    def test_has_header(self):
        assert has_header(self.buf)

    def test_overwrite_delimiter(self):
        self.assertEquals(self.dd.dialect['delimiter'], ' ')

    def test_content(self):
        s = str(list(self.dd))
        assert 'Alice' in s and 'Bob' in s

    def test_append(self):
        self.dd.extend([('Alice', 100)])
        with open(self.csv_file) as f:
            self.assertEqual(f.readlines()[-1].strip(), 'Alice 100')

    def test_append_dict(self):
        self.dd.extend([{'f0': 'Alice', 'f1': 100}])
        with open(self.csv_file) as f:
            self.assertEqual(f.readlines()[-1].strip(), 'Alice 100')

    def test_extend_structured(self):
        with filetext('1,1.0\n2,2.0\n') as fn:
            csv = CSV(fn, 'r+', schema='{x: int32, y: float32}',
                            delimiter=',')
            csv.extend([(3, 3)])
            assert tuplify(tuple(csv)) == ((1, 1.0), (2, 2.0), (3, 3.0))

    def test_discover_dialect(self):
        s = '1,1\r\n2,2'
        self.assertEqual(discover_dialect(s),
                {'escapechar': None,
                 'skipinitialspace': False,
                 'quoting': 0,
                 'delimiter': ',',
                 'lineterminator': '\r\n',
                 'quotechar': '"',
                 'doublequote': False})
Example #52
0
 def setUp(self):
     self.csv_file = tempfile.mktemp(".csv")
     with open(self.csv_file, "w") as f:
         f.write(self.buf)
     self.dd = CSV(self.csv_file, dialect='excel', schema=self.schema,
                         delimiter=' ', mode='r+')
Example #53
0
def test_extend_structured_many_newlines():
    with filetext('1,1.0\n2,2.0\n\n\n\n') as fn:
        csv = CSV(fn, 'r+', schema='{x: int32, y: float32}', delimiter=',')
        csv.extend([(3, 3)])
        result = tuplify(tuple(csv))
        assert discover(result) == dshape('6 * (int64, float64)')
Example #54
0
 def test_extend_structured(self):
     with filetext('1,1.0\n2,2.0\n') as fn:
         csv = CSV(fn, 'r+', schema='{x: int32, y: float32}',
                         delimiter=',')
         csv.extend([(3, 3)])
         assert tuplify(tuple(csv)) == ((1, 1.0), (2, 2.0), (3, 3.0))
Example #55
0
def test_into_list_Column():
    with filetext('Alice,1\nBob,2') as fn:
        csv = CSV(fn, columns=['name', 'id'])
        t = Data(csv)
        assert into(list, t.name) == ['Alice', 'Bob']
Example #56
0
def test_columns():
    # This is really testing the core interface
    dd = CSV('foo', 'w', schema='{name: string, amount: int}')
    assert list(dd.columns) == ['name', 'amount']