def test_csv_json(self):
        with filetext('1,1\n2,2\n') as csv_fn:
            with filetext('') as json_fn:
                schema = '2 * int'
                csv = CSV(csv_fn, schema=schema)
                json = JSON_Streaming(json_fn, mode='r+', schema=schema)

                json.extend(csv)

                self.assertEquals(list(json), [[1, 1], [2, 2]])
Example #2
0
    def test_csv_json_chunked(self):
        with filetext('1,1\n2,2\n') as csv_fn:
            with filetext('') as json_fn:
                schema = '{a: int32, b: int32}'
                csv = CSV(csv_fn, schema=schema)
                json = JSON_Streaming(json_fn, mode='r+', schema=schema)

                into(json, csv)

                self.assertEquals(tuplify(tuple(json)), ((1, 1), (2, 2)))
Example #3
0
    def test_csv_json(self):
        with filetext('1,1\n2,2\n') as csv_fn:
            with filetext('') as json_fn:
                schema = '2 * int'
                csv = CSV(csv_fn, schema=schema)
                json = JSON_Streaming(json_fn, mode='r+', schema=schema)

                json.extend(csv)

                self.assertEquals(tuple(map(tuple, json)), ((1, 1), (2, 2)))
Example #4
0
    def test_csv_json_chunked(self):
        with filetext('1,1\n2,2\n') as csv_fn:
            with filetext('') as json_fn:
                schema = '{a: int32, b: int32}'
                csv = CSV(csv_fn, schema=schema)
                json = JSON_Streaming(json_fn, mode='r+', schema=schema)

                into(json, csv)

                self.assertEquals(tuplify(tuple(json)), ((1, 1), (2, 2)))
    def test_json_csv_chunked(self):
        data = [{'x': 1, 'y': 1}, {'x': 2, 'y': 2}]
        text = '\n'.join(map(json.dumps, data))
        schema = '{x: int, y: int}'

        with filetext(text) as json_fn:
            with filetext('') as csv_fn:
                js = JSON_Streaming(json_fn, schema=schema)
                csv = CSV(csv_fn, mode='r+', schema=schema)

                copy(js, csv)

                self.assertEquals(list(csv), data)
Example #6
0
    def test_json_csv_structured(self):
        data = [{'x': 1, 'y': 1}, {'x': 2, 'y': 2}]
        text = '\n'.join(map(json.dumps, data))
        schema = '{x: int, y: int}'

        with filetext(text) as json_fn:
            with filetext('') as csv_fn:
                js = JSON_Streaming(json_fn, schema=schema)
                csv = CSV(csv_fn, mode='r+', schema=schema)

                csv.extend(js)

                self.assertEquals(tuple(map(tuple, (csv))), ((1, 1), (2, 2)))
Example #7
0
    def test_json_csv_chunked(self):
        data = [{'x': 1, 'y': 1}, {'x': 2, 'y': 2}]
        tuples = ((1, 1), (2, 2))
        text = '\n'.join(map(json.dumps, data))
        schema = '{x: int, y: int}'

        with filetext(text) as json_fn:
            with filetext('') as csv_fn:
                js = JSON_Streaming(json_fn, schema=schema)
                csv = CSV(csv_fn, mode='r+', schema=schema)

                into(csv, js)

                self.assertEquals(tuple(csv), tuples)
Example #8
0
    def test_json_csv_structured(self):
        data = [{'x': 1, 'y': 1}, {'x': 2, 'y': 2}]
        text = '\n'.join(map(json.dumps, data))
        schema = '{x: int, y: int}'

        with filetext(text) as json_fn:
            with filetext('') as csv_fn:
                js = JSON_Streaming(json_fn, schema=schema)
                csv = CSV(csv_fn, mode='r+', schema=schema)

                csv.extend(js)

                self.assertEquals(tuple(map(tuple, (csv))),
                                  ((1, 1), (2, 2)))
Example #9
0
    def test_json_csv_chunked(self):
        data = [{'x': 1, 'y': 1}, {'x': 2, 'y': 2}]
        tuples = ((1, 1), (2, 2))
        text = '\n'.join(map(json.dumps, data))
        schema = '{x: int, y: int}'

        with filetext(text) as json_fn:
            with filetext('') as csv_fn:
                js = JSON_Streaming(json_fn, schema=schema)
                csv = CSV(csv_fn, mode='r+', schema=schema)

                into(csv, js)

                self.assertEquals(tuple(csv), tuples)
Example #10
0
def test_unused_datetime_columns():
    ds = dshape('2 * {val: string, when: datetime}')
    with filetext("val,when\na,2000-01-01\nb,2000-02-02") as fn:
        csv = CSV(fn, has_header=True)

        s = symbol('s', discover(csv))
        assert into(list, compute(s.val, csv)) == ['a', 'b']
Example #11
0
 def test_init(self):
     with filetext(self.text) as fn:
         dd = JSON_Streaming(fn, schema=self.schema)
         self.assertEquals(tuple(dd), self.tuples)
         assert dd.dshape in set((
             datashape.dshape('var * {name: string, amount: int32}'),
             datashape.dshape('5 * {name: string, amount: int32}')))
Example #12
0
 def test_chunks(self):
     with filetext(self.text) as fn:
         dd = JSON_Streaming(fn, schema=self.schema)
         chunks = list(dd.chunks(blen=2))
         assert isinstance(chunks[0], nd.array)
         self.assertEquals(len(chunks), 3)
         self.assertEquals(nd.as_py(chunks[0]), self.dicts[:2])
Example #13
0
 def test_init(self):
     with filetext(self.text) as fn:
         dd = JSON_Streaming(fn, schema=self.schema)
         self.assertEquals(list(dd), self.data)
         assert dd.dshape in set((
             datashape.dshape('var * {name: string, amount: int32}'),
             datashape.dshape('5 * {name: string, amount: int32}')))
Example #14
0
 def test_resource_gz(self):
     with filetext(b'1,1\n2,2\n', extension='.csv.gz', open=gzip.open,
                   mode='wb') as fn:
         dd = resource(fn, schema='{x: int, y: int}')
         assert isinstance(dd, CSV)
         assert dd.open == gzip.open
         assert into(list, dd) == [(1, 1), (2, 2)]
Example #15
0
 def test_into(self):
     with filetext('1,1\n2,2', extension='.csv') as a:
         with tmpfile(extension='.csv') as b:
             A = resource(a, schema='{x: int, y: int}')
             B = resource(b, schema='{x: int, y: int}', mode='a')
             B = into(B, A)
             assert into(list, B) == [(1, 1), (2, 2)]
Example #16
0
 def test_into(self):
     with filetext('1,1\n2,2', extension='.csv') as a:
         with tmpfile(extension='.csv') as b:
             A = resource(a, schema='{x: int, y: int}')
             B = resource(b, schema='{x: int, y: int}', mode='a')
             B = into(B, A)
             assert into(list, B) == [(1, 1), (2, 2)]
Example #17
0
def test_into_filename_filename():
    with filetext('1,2\n3,4', extension='csv') as source_fn:
        with tmpfile('csv') as target_fn:
            into(target_fn, source_fn)

            csv = CSV(target_fn)
            assert into(list, csv) == [(1, 2), (3, 4)]
Example #18
0
 def test_chunks(self):
     with filetext(self.text) as fn:
         dd = JSON_Streaming(fn, schema=self.schema)
         chunks = list(dd.chunks(blen=2))
         assert isinstance(chunks[0], nd.array)
         self.assertEquals(len(chunks), 3)
         self.assertEquals(nd.as_py(chunks[0]), self.data[:2])
Example #19
0
 def test_into(self):
     with filetext('1,1\n2,2', extension='.csv') as a:
         with tmpfile(extension='.csv') as b:
             A = resource(a, schema='2 * int')
             B = resource(b, schema='2 * int', mode='a')
             B = into(B, A)
             assert tuplify(list(B)) == ((1, 1), (2, 2))
Example #20
0
def test_unused_datetime_columns():
    ds = dshape('2 * {val: string, when: datetime}')
    with filetext("val,when\na,2000-01-01\nb,2000-02-02") as fn:
        csv = CSV(fn, has_header=True)

        s = symbol('s', discover(csv))
        assert into(list, compute(s.val, csv)) == ['a', 'b']
 def test_copy(self):
     with filetext('1,1\n2,2', extension='.csv') as a:
         with tmpfile(extension='.csv') as b:
             A = resource(a, schema='2 * int')
             B = resource(b, schema='2 * int', mode='a')
             copy(A, B)
             assert list(B) == [[1, 1], [2, 2]]
Example #22
0
def test_into_filename_filename():
    with filetext('1,2\n3,4', extension='csv') as source_fn:
        with tmpfile('csv') as target_fn:
            into(target_fn, source_fn)

            csv = CSV(target_fn)
            assert into(list, csv) == [(1, 2), (3, 4)]
Example #23
0
def test_delayed_bad_datashape():
    text = 'a,b\n' + '\n'.join(['1,2'] * 20) + '\n1,3.14'
    with filetext(text) as fn:
        csv = CSV(fn, nrows_discovery=2)
        assert csv.schema == dshape('{a: int64, b: int64}')

        with pytest.raises(ValueError):
            list(csv)
Example #24
0
def test_csv_gzip_into_sql():
    from blaze.data.csv import CSV
    engine, sql = single_table_engine()
    with filetext(b'Alice,2\nBob,4', extension='csv.gz',
                  open=gzip.open, mode='wb') as fn:
        csv = CSV(fn, schema=sql.schema)
        into(sql, csv)
        assert into(list, sql) == into(list, csv)
Example #25
0
def test_delayed_bad_datashape():
    text = 'a,b\n' + '\n'.join(['1,2']*20) + '\n1,3.14'
    with filetext(text) as fn:
        csv = CSV(fn, nrows_discovery=2)
        assert csv.schema == dshape('{a: int64, b: int64}')

        with pytest.raises(ValueError):
            list(csv)
Example #26
0
 def test_extend_structured_many_newlines(self):
     inan = np.array([np.nan]).astype('int32').item()
     with filetext('1,1.0\n2,2.0\n\n\n\n') as fn:
         csv = CSV(fn, 'r+', schema='{x: int32, y: float32}', delimiter=',')
         csv.extend([(3, 3)])
         result = tuplify(tuple(csv))
         expected = ((1, 1.0), (2, 2.0), (inan, np.nan), (inan, np.nan),
                     (inan, np.nan), (3, 3.0))
         assert np.isclose(result, expected, equal_nan=True).all()
Example #27
0
def test_a_mode():
    text = ("id, name, balance\n1, Alice, 100\n2, Bob, 200\n"
            "3, Charlie, 300\n4, Denis, 400\n5, Edith, 500")
    with filetext(text) as fn:
        csv = CSV(fn, 'a')
        csv.extend([(6, 'Frank', 600), (7, 'Georgina', 700)])

        result = set(csv[:, 'name'])
        assert 'Georgina' in result
Example #28
0
 def test_resource_gz(self):
     with filetext(b'1,1\n2,2\n',
                   extension='.csv.gz',
                   open=gzip.open,
                   mode='wb') as fn:
         dd = resource(fn, schema='{x: int, y: int}')
         assert isinstance(dd, CSV)
         assert dd.open == gzip.open
         assert into(list, dd) == [(1, 1), (2, 2)]
 def test_extend_structured(self):
     with filetext('1,1.0\n2,2.0\n') as fn:
         csv = CSV(fn, 'r+', schema='{x: int32, y: float32}',
                         delimiter=',')
         csv.extend([(3, 3)])
         assert (list(csv) == [[1, 1.0], [2, 2.0], [3, 3.0]]
              or list(csv) == [{'x': 1, 'y': 1.0},
                               {'x': 2, 'y': 2.0},
                               {'x': 3, 'y': 3.0}])
    def test_re_dialect(self):
        dialect1 = {'delimiter': ',', 'lineterminator': '\n'}
        dialect2 = {'delimiter': ';', 'lineterminator': '--'}

        text = '1,1\n2,2\n'

        schema = '2 * int32'

        with filetext(text) as source_fn:
            with filetext('') as dest_fn:
                src = CSV(source_fn, schema=schema, **dialect1)
                dst = CSV(dest_fn, mode='w', schema=schema, **dialect2)

                # Perform copy
                dst.extend(src)

                with open(dest_fn) as f:
                    self.assertEquals(f.read(), '1;1--2;2--')
Example #31
0
    def test_a_mode(self):
        text = ("id, name, balance\n1, Alice, 100\n2, Bob, 200\n"
                "3, Charlie, 300\n4, Denis, 400\n5, Edith, 500")
        with filetext(text) as fn:
            csv = CSV(fn, 'a')
            csv.extend([(6, 'Frank', 600),
                        (7, 'Georgina', 700)])

            assert 'Georgina' in set(csv.py[:, 'name'])
Example #32
0
def test_DataFrame_CSV():
    with filetext('1,2\n3,4\n') as fn:
        csv = CSV(fn, schema='{a: int64, b: float64}')
        df = into(DataFrame, csv)

        expected = DataFrame([[1, 2.0], [3, 4.0]], columns=['a', 'b'])

        assert str(df) == str(expected)
        assert list(df.dtypes) == [np.int64, np.float64]
Example #33
0
    def test_re_dialect(self):
        dialect1 = {'delimiter': ',', 'lineterminator': '\n'}
        dialect2 = {'delimiter': ';', 'lineterminator': '--'}

        text = '1,1\n2,2\n'

        schema = '2 * int32'

        with filetext(text) as source_fn:
            with filetext('') as dest_fn:
                src = CSV(source_fn, schema=schema, **dialect1)
                dst = CSV(dest_fn, mode='w', schema=schema, **dialect2)

                # Perform copy
                dst.extend(src)

                with open(dest_fn) as f:
                    self.assertEquals(f.read(), '1;1--2;2--')
Example #34
0
def test_tuple_types():
    """
    CSVs with uniform types still create record types with names
    """
    with filetext('1,1\n2,2\n') as fn:
        csv = CSV(fn, 'r+', delimiter=',')
        assert csv[0] == (1, 1)
        assert isinstance(csv.schema[0], Record)
        assert len(csv.schema[0].types) == 2
        assert len(set(csv.schema[0].types)) == 1
Example #35
0
def test_DataFrame_CSV():
    with filetext('1,2\n3,4\n') as fn:
        csv = CSV(fn, schema='{a: int64, b: float64}')
        df = into(DataFrame, csv)

        expected = DataFrame([[1, 2.0], [3, 4.0]],
                             columns=['a', 'b'])

        assert str(df) == str(expected)
        assert list(df.dtypes) == [np.int64, np.float64]
Example #36
0
def test_tuple_types():
    """
    CSVs with uniform types still create record types with names
    """
    with filetext('1,1\n2,2\n') as fn:
        csv = CSV(fn, 'r+', delimiter=',')
        assert csv[0] == (1, 1)
        assert isinstance(csv.schema[0], Record)
        assert len(csv.schema[0].types) == 2
        assert len(set(csv.schema[0].types)) == 1
Example #37
0
    def test_append(self):
        with filetext('') as fn:
            dd = JSON_Streaming(fn, mode='w', schema=self.schema)
            dd.extend([self.tuples[0]])
            with open(fn) as f:
                self.assertEquals(json.loads(f.read().strip()), self.dicts[0])

            self.assertRaises(ValueError, lambda : dd.extend([5.5]))
            self.assertRaises(ValueError,
                              lambda : dd.extend([{'name': 5, 'amount': 1.3}]))
Example #38
0
    def test_append(self):
        with filetext('') as fn:
            dd = JSON_Streaming(fn, mode='w', schema=self.schema)
            dd.extend([self.data[0]])
            with open(fn) as f:
                self.assertEquals(json.loads(f.read().strip()), self.data[0])

            self.assertRaises(ValueError, lambda : dd.extend([5.5]))
            self.assertRaises(ValueError,
                              lambda : dd.extend([{'name': 5, 'amount': 1.3}]))
    def test_csv_sql_json(self):
        data = [('Alice', 100), ('Bob', 200)]
        text = '\n'.join(','.join(map(str, row)) for row in data)
        schema = '{name: string, amount: int}'
        engine = create_engine('sqlite:///:memory:')
        with filetext(text) as csv_fn:
            with filetext('') as json_fn:

                csv = CSV(csv_fn, mode='r', schema=schema)
                sql = SQL(engine, 'testtable', schema=schema)
                json = JSON_Streaming(json_fn, mode='r+', schema=schema)

                copy(csv, sql)

                self.assertEqual(list(sql), data)

                copy(sql, json)

                with open(json_fn) as f:
                    assert 'Alice' in f.read()
Example #40
0
    def test_csv_sql_json(self):
        data = [('Alice', 100), ('Bob', 200)]
        text = '\n'.join(','.join(map(str, row)) for row in data)
        schema = '{name: string, amount: int}'
        with filetext(text) as csv_fn:
            with filetext('') as json_fn:
                with tmpfile('db') as sqldb:

                    csv = CSV(csv_fn, mode='r', schema=schema)
                    sql = SQL('sqlite:///' + sqldb, 'testtable', schema=schema)
                    json = JSON_Streaming(json_fn, mode='r+', schema=schema)

                    into(sql, csv)

                    self.assertEqual(into(list, sql), data)

                    into(json, sql)

                    with open(json_fn) as f:
                        assert 'Alice' in f.read()
    def test_csv_hdf5(self):
        import h5py
        from dynd import nd
        with tmpfile('hdf5') as hdf5_fn:
            with filetext('1,1\n2,2\n') as csv_fn:
                csv = CSV(csv_fn, schema='2 * int')
                hdf5 = HDF5(hdf5_fn, '/data', mode='a', schema='2 * int')

                copy(csv, hdf5)

                self.assertEquals(nd.as_py(hdf5.as_dynd()), [[1, 1], [2, 2]])
Example #42
0
    def test_csv_sql_json(self):
        data = [('Alice', 100), ('Bob', 200)]
        text = '\n'.join(','.join(map(str, row)) for row in data)
        schema = '{name: string, amount: int}'
        with filetext(text) as csv_fn:
            with filetext('') as json_fn:
                with tmpfile('db') as sqldb:

                    csv = CSV(csv_fn, mode='r', schema=schema)
                    sql = SQL('sqlite:///' + sqldb, 'testtable', schema=schema)
                    json = JSON_Streaming(json_fn, mode='r+', schema=schema)

                    into(sql, csv)

                    self.assertEqual(into(list, sql), data)

                    into(json, sql)

                    with open(json_fn) as f:
                        assert 'Alice' in f.read()
Example #43
0
    def test_csv_sql_json(self):
        data = [('Alice', 100), ('Bob', 200)]
        text = '\n'.join(','.join(map(str, row)) for row in data)
        schema = '{name: string, amount: int}'
        engine = create_engine('sqlite:///:memory:')
        with filetext(text) as csv_fn:
            with filetext('') as json_fn:

                csv = CSV(csv_fn, mode='r', schema=schema)
                sql = SQL(engine, 'testtable', schema=schema)
                json = JSON_Streaming(json_fn, mode='r+', schema=schema)

                copy(csv, sql)

                self.assertEqual(list(sql), data)

                copy(sql, json)

                with open(json_fn) as f:
                    assert 'Alice' in f.read()
Example #44
0
    def test_csv_hdf5(self):
        from dynd import nd
        with tmpfile('hdf5') as hdf5_fn:
            with filetext('1,1\n2,2\n') as csv_fn:
                csv = CSV(csv_fn, schema='{a: int32, b: int32}')
                hdf5 = resource(hdf5_fn + '::/data', dshape='var * {a: int32, b: int32}')

                into(hdf5, csv)

                self.assertEquals(hdf5[:].tolist(),
                                  [(1, 1), (2, 2)])
Example #45
0
    def test_csv_hdf5(self):
        import h5py
        from dynd import nd
        with tmpfile('hdf5') as hdf5_fn:
            with filetext('1,1\n2,2\n') as csv_fn:
                csv = CSV(csv_fn, schema='2 * int')
                hdf5 = HDF5(hdf5_fn, '/data', schema='2 * int')

                copy(csv, hdf5)

                self.assertEquals(nd.as_py(hdf5.as_dynd()),
                                  [[1, 1], [2, 2]])
Example #46
0
    def test_csv_hdf5(self):
        from dynd import nd
        with tmpfile('hdf5') as hdf5_fn:
            with filetext('1,1\n2,2\n') as csv_fn:
                csv = CSV(csv_fn, schema='{a: int32, b: int32}')
                hdf5 = HDF5(hdf5_fn, '/data', schema='{a: int32, b: int32}')

                into(hdf5, csv)

                self.assertEquals(nd.as_py(hdf5.as_dynd()),
                                  [{'a': 1, 'b': 1},
                                   {'a': 2, 'b': 2}])
Example #47
0
def test_csv_gzip_into_sql():
    from blaze.data.csv import CSV
    from blaze.data.sql import into
    engine = sa.create_engine('sqlite:///:memory:')
    sql = SQL(engine,
              'accounts',
              schema='{name: string, amount: int32}')
    with filetext(b'Alice,2\nBob,4', extension='csv.gz',
                  open=gzip.open, mode='wb') as fn:
        csv = CSV(fn, schema=sql.schema)
        into(sql, csv)
        assert list(sql) == list(csv)
Example #48
0
def test_csv_gzip_into_sql():
    from blaze.data.csv import CSV
    from blaze.data.sql import into
    engine = sa.create_engine('sqlite:///:memory:')
    sql = SQL(engine,
              'accounts',
              schema='{name: string, amount: int32}')
    with filetext(b'Alice,2\nBob,4', extension='csv.gz',
                  open=gzip.open, mode='wb') as fn:
        csv = CSV(fn, schema=sql.schema)
        into(sql, csv)
        assert list(sql) == list(csv)
Example #49
0
def test_schema_detection_modifiers():
    text = "name amount date\nAlice 100 20120101\nBob 200 20120102"
    with filetext(text) as fn:
        assert (CSV(fn).schema ==
                dshape('{name: string, amount: ?int64, date: ?int64}'))
        assert (CSV(fn, columns=['NAME', 'AMOUNT', 'DATE']).schema ==
                dshape('{NAME: string, AMOUNT: ?int64, DATE: ?int64}'))
        assert (str(CSV(fn, types=['string', 'int32', 'date']).schema) ==
                str(dshape('{name: string, amount: int32, date: date}')))

        a = CSV(fn, typehints={'date': 'date'}).schema
        b = dshape('{name: string, amount: ?int64, date: date}')
        assert str(a) == str(b)
Example #50
0
def test_schema_detection_modifiers():
    text = "name amount date\nAlice 100 20120101\nBob 200 20120102"
    with filetext(text) as fn:
        assert (CSV(fn).schema == dshape(
            '{name: string, amount: ?int64, date: ?int64}'))
        assert (CSV(fn, columns=[
            'NAME', 'AMOUNT', 'DATE'
        ]).schema == dshape('{NAME: string, AMOUNT: ?int64, DATE: ?int64}'))
        assert (str(CSV(fn, types=['string', 'int32', 'date']).schema) == str(
            dshape('{name: string, amount: int32, date: date}')))

        a = CSV(fn, typehints={'date': 'date'}).schema
        b = dshape('{name: string, amount: ?int64, date: date}')
        assert str(a) == str(b)
    def test_hdf5_csv(self):
        import h5py
        with tmpfile('hdf5') as hdf5_fn:
            with filetext('') as csv_fn:
                with h5py.File(hdf5_fn, 'w') as f:
                    d = f.create_dataset('data', (3, 3), dtype='i8')
                    d[:] = 1

                csv = CSV(csv_fn, mode='r+', schema='3 * int')
                hdf5 = HDF5(hdf5_fn, '/data')

                copy(hdf5, csv)

                self.assertEquals(list(csv), [[1, 1, 1], [1, 1, 1], [1, 1, 1]])
 def test_extend_structured(self):
     with filetext('1,1.0\n2,2.0\n') as fn:
         csv = CSV(fn, 'r+', schema='{x: int32, y: float32}', delimiter=',')
         csv.extend([(3, 3)])
         assert (list(csv) == [[1, 1.0], [2, 2.0], [3, 3.0]]
                 or list(csv) == [{
                     'x': 1,
                     'y': 1.0
                 }, {
                     'x': 2,
                     'y': 2.0
                 }, {
                     'x': 3,
                     'y': 3.0
                 }])
Example #53
0
    def test_hdf5_csv(self):
        import h5py
        with tmpfile('hdf5') as hdf5_fn:
            with filetext('') as csv_fn:
                with h5py.File(hdf5_fn, 'w') as f:
                    d = f.create_dataset('data', (3, 3), dtype='i8')
                    d[:] = 1

                csv = CSV(csv_fn, mode='r+', schema='3 * int')
                hdf5 = HDF5(hdf5_fn, '/data')

                copy(hdf5, csv)

                self.assertEquals(tuple(map(tuple, csv)),
                                  ((1, 1, 1), (1, 1, 1), (1, 1, 1)))
Example #54
0
    def test_csv_hdf5(self):
        from dynd import nd
        with tmpfile('hdf5') as hdf5_fn:
            with filetext('1,1\n2,2\n') as csv_fn:
                csv = CSV(csv_fn, schema='{a: int32, b: int32}')
                hdf5 = HDF5(hdf5_fn, '/data', schema='{a: int32, b: int32}')

                into(hdf5, csv)

                self.assertEquals(nd.as_py(hdf5.as_dynd()), [{
                    'a': 1,
                    'b': 1
                }, {
                    'a': 2,
                    'b': 2
                }])
Example #55
0
    def test_hdf5_csv(self):
        import h5py
        with tmpfile('hdf5') as hdf5_fn:
            with filetext('') as csv_fn:
                with h5py.File(hdf5_fn, 'w') as f:
                    d = f.create_dataset('data', (3,),
                                         dtype=np.dtype([(c, 'i4')
                                                         for c in 'abc']))
                    d[:] = np.array(1)

                csv = CSV(csv_fn, mode='r+', schema='{a: int32, b: int32, c: int32}')
                hdf5 = HDF5(hdf5_fn, '/data', schema=csv.schema)

                into(csv, hdf5)

                self.assertEquals(tuple(map(tuple, csv)),
                                  ((1, 1, 1), (1, 1, 1), (1, 1, 1)))
Example #56
0
def test_json_into_mongodb(empty_collec):

    with filetext(json.dumps(les_mis_data)) as filename:

        dd = JSON(filename)
        coll = empty_collec
        into(coll, dd)

        mongo_data = list(coll.find())

        last = mongo_data[0]['nodes'][-1]
        first = mongo_data[0]['nodes'][0]

        first = (first['group'], first['name'])
        last = (last['group'], last['name'])

        assert dd.as_py()[1][-1] == last
        assert dd.as_py()[1][0] == first
Example #57
0
    def test_hdf5_csv(self):
        import h5py
        with tmpfile('hdf5') as hdf5_fn:
            with filetext('') as csv_fn:
                with h5py.File(hdf5_fn, 'w') as f:
                    d = f.create_dataset('data', (3, ),
                                         dtype=np.dtype([(c, 'i4')
                                                         for c in 'abc']))
                    d[:] = np.array(1)

                csv = CSV(csv_fn,
                          mode='r+',
                          schema='{a: int32, b: int32, c: int32}')
                hdf5 = HDF5(hdf5_fn, '/data', schema=csv.schema)

                into(csv, hdf5)

                self.assertEquals(tuple(map(tuple, csv)),
                                  ((1, 1, 1), (1, 1, 1), (1, 1, 1)))
Example #58
0
def test_into_list_Column():
    with filetext('Alice,1\nBob,2') as fn:
        csv = CSV(fn, columns=['name', 'id'])
        t = Data(csv)
        assert into(list, t.name) == ['Alice', 'Bob']
Example #59
0
 def test_as_py(self):
     with filetext(self.text) as fn:
         dd = JSON_Streaming(fn, mode='r', schema=self.schema)
         assert dd.as_py() == self.data