def test_indexing_nested(self): assert tuplify(self.dd[0, 'name']) == self.tuples[0][0] assert tuplify(self.dd[0, 0]) == self.tuples[0][0] self.assertEqual(tuplify(self.dd[[2, 0], 'name']), ('Bob', 'Alice')) self.assertEqual(tuplify(self.dd[[2, 0], 0]), ('Bob', 'Alice')) self.assertEqual(tuplify(self.dd[[2, 0], [1, 0]]), ((10, 'Bob'), (100, 'Alice')))
def test_Concat(self): with filetexts(self.data) as filenames: descriptors = [CSV(fn, schema='2 * int32') for fn in sorted(filenames)] dd = Concat(descriptors) self.assertEqual(str(dd.schema), '2 * int32') self.assertEqual(str(dd.dshape), 'var * 2 * int32') expected = ((1, 1), (2, 2), (3, 3), (4, 4), (5, 5), (6, 6), (7, 7)) self.assertEqual(tuplify(tuple(dd)), expected) result = dd.as_dynd() expected2 = nd.array(expected, dtype='int32') self.assertEqual(nd.as_py(result), nd.as_py(expected2)) self.assertEqual(tuplify(tuple(dd)), expected) self.assertEqual(tuplify(tuple(dd)), expected) # Not one use only chunks = list(dd.chunks()) assert all(isinstance(chunk, nd.array) for chunk in chunks) self.assertEqual(tuple(dd[[0, 2], 0]), (1, 3)) self.assertEqual(tuple(dd[2, [1, 0]]), (3, 3)) assert isinstance(dd[:, 0], Iterator)
def test_indexing_nested(self): assert tuplify(self.dd[0, 'name']) == self.tuples[0][0] assert tuplify(self.dd[0, 0]) == self.tuples[0][0] self.assertEqual(tuplify(self.dd[[2, 0], 'name']), ('Bob', 'Alice')) self.assertEqual(tuplify(self.dd[[2, 0], 0]), ('Bob', 'Alice')) self.assertEqual(tuplify(self.dd[[2, 0], [1, 0]]), ((10, 'Bob'), (100, 'Alice')))
def test_stack(json_data): dd = json_data expected = (((1, 2), (3, 4)), ((5, 6), (7, 8)), ((9, 10), (11, 12))) assert tuplify(dd.as_py()) == expected tuplify(dd[::2, 1, :]) == ((3, 4), (11, 12)) tuplify(dd[::2, 1, 'x']) == (3, 11)
def test_stack(json_data): dd = json_data expected = (((1, 2), (3, 4)), ((5, 6), (7, 8)), ((9, 10), (11, 12))) assert tuplify(dd.as_py()) == expected tuplify(dd[::2, 1, :]) == ((3, 4), (11, 12)) tuplify(dd[::2, 1, 'x']) == (3, 11)
def test_Stack(self): with filetexts(self.text) as filenames: descriptors = [JSON_Streaming(fn, schema="{x: int32, y: int32}") for fn in sorted(filenames)] dd = Stack(descriptors) expected = (((1, 2), (3, 4)), ((5, 6), (7, 8)), ((9, 10), (11, 12))) self.assertEqual(tuplify(dd.as_py()), expected) self.assertEqual(tuplify(dd.py[::2, 1, :]), ((3, 4), (11, 12))) self.assertEqual(tuplify(dd.py[::2, 1, "x"]), (3, 11))
def test_into(self): with filetext('1,1\n2,2', extension='.csv') as a: with tmpfile(extension='.csv') as b: A = resource(a, schema='2 * int') B = resource(b, schema='2 * int', mode='a') B = into(B, A) assert tuplify(list(B)) == ((1, 1), (2, 2))
def test_filesystem(self): prefix = 'test_filesystem' d = {prefix + 'a.csv': '1,1\n2,2', prefix + 'b.csv': '1,1\n2,2'} with filetexts(d) as filenames: dd = resource(prefix + '*.csv', schema='2 * int') self.assertEqual(tuplify(tuple(dd)), (((1, 1), (2, 2)), ((1, 1), (2, 2))))
def test_extend_structured_many_newlines(self): inan = np.array([np.nan]).astype('int32').item() with filetext('1,1.0\n2,2.0\n\n\n\n') as fn: csv = CSV(fn, 'r+', schema='{x: int32, y: float32}', delimiter=',') csv.extend([(3, 3)]) result = tuplify(tuple(csv)) expected = ((1, 1.0), (2, 2.0), (inan, np.nan), (inan, np.nan), (inan, np.nan), (3, 3.0)) assert np.isclose(result, expected, equal_nan=True).all()
def test_csv_json_chunked(self): with filetext('1,1\n2,2\n') as csv_fn: with filetext('') as json_fn: schema = '{a: int32, b: int32}' csv = CSV(csv_fn, schema=schema) json = JSON_Streaming(json_fn, mode='r+', schema=schema) into(json, csv) self.assertEquals(tuplify(tuple(json)), ((1, 1), (2, 2)))
def test_csv_json_chunked(self): with filetext('1,1\n2,2\n') as csv_fn: with filetext('') as json_fn: schema = '{a: int32, b: int32}' csv = CSV(csv_fn, schema=schema) json = JSON_Streaming(json_fn, mode='r+', schema=schema) into(json, csv) self.assertEquals(tuplify(tuple(json)), ((1, 1), (2, 2)))
def test_gzopen_json(): with tmpfile('.json.gz') as filename: f = gzip.open(filename, 'wt') f.write('[[1, 1], [2, 2]]') f.close() # Not a valid JSON file assert raises(Exception, lambda: list(JSON(filename, schema='2 * int'))) dd = JSON(filename, schema='2 * int', open=gzip.open) assert tuplify(list(dd)) == ((1, 1), (2, 2))
def test_stack(stack_data): descriptors = [CSV(fn, schema='2 * int32') for fn in sorted(stack_data)] dd = Stack(descriptors) assert dd.dshape == 3 * descriptors[0].dshape expected = (((1, 1), (2, 2)), ((3, 3), (4, 4)), ((5, 5), (6, 6))) assert tuplify(tuple(dd.as_py())) == expected result = dd.as_dynd() expected2 = nd.array(expected, dtype='int32') assert nd.as_py(result) == nd.as_py(expected2) assert tuplify(tuple(dd)) == expected assert tuplify(tuple(dd)) == expected # Not one use only chunks = dd.chunks() assert all(isinstance(chunk, nd.array) for chunk in chunks) assert tuple(dd[[0, 2], 0, 0]) == (1, 5) assert tuplify(tuple(dd[0])) == ((1, 1), (2, 2)) res = dd[0, :, [1]] x = tuple(res) assert tuplify(x) == ((1, ), (2, )) assert tuplify(tuple(dd[0])) == expected[0] assert isinstance(dd[:, 0], Iterator) assert isinstance(dd[:], Iterator)
def test_Stack(self): with filetexts(self.data) as filenames: descriptors = [CSV(fn, schema='2 * int32') for fn in sorted(filenames)] dd = Stack(descriptors) self.assertEqual(dd.dshape, 3 * descriptors[0].dshape) expected = (((1, 1), (2, 2)), ((3, 3), (4, 4)), ((5, 5), (6, 6))) self.assertEqual(tuplify(tuple(dd.as_py())), expected) result = dd.as_dynd() expected2 = nd.array(expected, dtype='int32') self.assertEqual(nd.as_py(result), nd.as_py(expected2)) self.assertEqual(tuplify(tuple(dd)), expected) self.assertEqual(tuplify(tuple(dd)), expected) # Not one use only chunks = dd.chunks() assert all(isinstance(chunk, nd.array) for chunk in chunks) self.assertEqual(tuple(dd[[0, 2], 0, 0]), (1, 5)) self.assertEqual(tuplify(tuple(dd[0])), ((1, 1), (2, 2))) self.assertEqual(tuplify(tuple(dd[0, :, [1]])), ((1,), (2,))) self.assertEqual(tuplify(tuple(dd[0])), expected[0]) assert isinstance(dd[:, 0], Iterator) assert isinstance(dd[:], Iterator)
def test_stack(stack_data): descriptors = [CSV(fn, schema='2 * int32') for fn in sorted(stack_data)] dd = Stack(descriptors) assert dd.dshape == 3 * descriptors[0].dshape expected = (((1, 1), (2, 2)), ((3, 3), (4, 4)), ((5, 5), (6, 6))) assert tuplify(tuple(dd.as_py())) == expected result = dd.as_dynd() expected2 = nd.array(expected, dtype='int32') assert nd.as_py(result) == nd.as_py(expected2) assert tuplify(tuple(dd)) == expected assert tuplify(tuple(dd)) == expected # Not one use only chunks = dd.chunks() assert all(isinstance(chunk, nd.array) for chunk in chunks) assert tuple(dd[[0, 2], 0, 0]) == (1, 5) assert tuplify(tuple(dd[0])) == ((1, 1), (2, 2)) res = dd[0, :, [1]] x = tuple(res) assert tuplify(x) == ((1,), (2,)) assert tuplify(tuple(dd[0])) == expected[0] assert isinstance(dd[:, 0], Iterator) assert isinstance(dd[:], Iterator)
def test_gzopen_json(): with tmpfile('.json.gz') as filename: f = gzip.open(filename, 'wt') f.write('[[1, 1], [2, 2]]') f.close() # Not a valid JSON file assert raises(Exception, lambda: list(JSON(filename, schema='2 * int'))) dd = JSON(filename, schema='2 * int', open=gzip.open) assert tuplify(list(dd)) == ((1, 1), (2, 2))
def test_gzopen_csv(): with tmpfile('.csv.gz') as filename: f = gzip.open(filename, 'wt') f.write('1,1\n2,2') f.close() # Not a valid CSV file assert raises(Exception, lambda: list(CSV(filename, schema='2 * int'))) dd = CSV(filename, schema='2 * int', open=partial(gzip.open, mode='rt')) assert tuplify(list(dd)) == ((1, 1), (2, 2))
def test_concat(file_data): dd = file_data assert str(dd.schema) == '{ a : int32, b : int32 }' assert str(dd.dshape) == 'var * { a : int32, b : int32 }' expected = ((1, 1), (2, 2), (3, 3), (4, 4), (5, 5), (6, 6), (7, 7)) assert tuplify(tuple(dd)) == expected result = dd.as_dynd() expected2 = nd.array(expected, dtype='int32') nd.as_py(result) == nd.as_py(expected2) assert tuplify(tuple(dd)) == expected assert tuplify(tuple(dd)) == expected # Not one use only chunks = list(dd.chunks()) assert all(isinstance(chunk, nd.array) for chunk in chunks) tuple(dd[[0, 2], 0]) == (1, 3) tuple(dd[2, [1, 0]]) == (3, 3) assert isinstance(dd[:, 0], Iterator)
def test_concat(file_data): dd = file_data assert str(dd.schema) == '{ a : int32, b : int32 }' assert str(dd.dshape) == 'var * { a : int32, b : int32 }' expected = ((1, 1), (2, 2), (3, 3), (4, 4), (5, 5), (6, 6), (7, 7)) assert tuplify(tuple(dd)) == expected result = dd.as_dynd() expected2 = nd.array(expected, dtype='int32') nd.as_py(result) == nd.as_py(expected2) assert tuplify(tuple(dd)) == expected assert tuplify(tuple(dd)) == expected # Not one use only chunks = list(dd.chunks()) assert all(isinstance(chunk, nd.array) for chunk in chunks) tuple(dd[[0, 2], 0]) == (1, 3) tuple(dd[2, [1, 0]]) == (3, 3) assert isinstance(dd[:, 0], Iterator)
def test_basic(): data = ((1, 1), (2, 2)) dd = Python([], schema='2 * int32') dd.extend(data) assert str(dd.dshape) == 'var * 2 * int32' assert str(dd.schema) == '2 * int32' assert tuplify(tuple(dd)) == data print(dd.as_py()) assert dd.as_py() == data chunks = list(dd.chunks()) assert all(isinstance(chunk, nd.array) for chunk in chunks) assert nd.as_py(chunks[0]) == list(map(list, data)) assert isinstance(dd.as_dynd(), nd.array) assert tuple(dd.py[0]) == data[0] assert dd.py[0, 1] == data[0][1] assert tuple(dd.py[[0, 1], 1]) == (1, 2)
def test_getitem_start_step_kv(kv_dd, kv_data): assert tuplify(kv_dd[1::2]) == kv_data[1::2]
def test_getitem_stop_kv(kv_dd, kv_data): assert tuplify(kv_dd[:1]) == kv_data[:1]
def test_getitem_start_kv(kv_dd, kv_data): assert (tuplify(kv_dd[0]) == kv_data[0])
def test_as_py_kv(kv_dd, kv_data): assert tuplify(kv_dd.as_py()) == kv_data
def test_iter_kv(kv_dd, kv_data): assert tuplify(tuple(kv_dd)) == kv_data
def test_nested(self): self.assertEqual(tuplify(self.dd.py[[0, 1], 0]), ('Alice', 'Bob')) self.assertEqual(tuplify(self.dd.py[[0, 1], 1]), (100, 200)) self.assertEqual(tuplify(self.dd.py[0, [0, 1]]), ('Alice', 100)) self.assertEqual(tuplify(self.dd.py[[1, 0], [0, 1]]), (('Bob', 200), ('Alice', 100)))
def test_gzopen_no_gzip_open(rsrc): dd = CSV(rsrc, schema='{a: int32, b: int32}') assert tuplify(list(dd)) == ((1, 1), (2, 2))
def test_extend_tuples(self): with filetext('') as fn: dd = JSON_Streaming(fn, mode='r+', schema=self.schema) dd.extend(self.tuples) self.assertEquals(tuplify(tuple(dd)), self.tuples)
def test_as_py(self): dd = JSON(self.filename, 'r', dshape=self.dshape) self.assertEqual(tuplify(dd.as_py()), self.ordered)
def test_getitem_stop(self): dd = CSV(self.csv_file, schema=self.schema) self.assertEqual(tuplify(dd.py[:1]), self.data[:1])
def test_iter(self): dd = CSV(self.csv_file, schema=self.schema) self.assertEqual(tuplify(tuple(dd)), self.data)
def test_iter(self): with filetext('1,1\n2,2\n') as fn: dd = CSV(fn, schema='2 * int32') self.assertEquals(tuplify(list(dd)), ((1, 1), (2, 2)))
def test_dynd_complex(self): self.assertEqual(tuplify(self.dd.py[:, ['amount', 'name']]), tuplify(nd.as_py(self.dd.dynd[:, ['amount', 'name']], tuple=True)))
def test_getitem(self): with filetext(self.text) as fn: dd = JSON_Streaming(fn, mode='r', schema=self.schema) self.assertEqual(tuplify(dd[0]), self.tuples[0]) self.assertEqual(tuplify(dd[2:4]), self.tuples[2:4])
def test_gzopen_csv(rsrc): dd = CSV(rsrc, schema='{a: int32, b: int32}', open=partial(gzip.open, mode='rt')) assert tuplify(list(dd)) == ((1, 1), (2, 2))
def test_indexing_basic(self): assert tuplify(self.dd[0]) == self.tuples[0] assert tuplify(self.dd[0:3]) == self.tuples[0:3] assert tuplify(self.dd[0::2]) == self.tuples[0::2] self.assertEqual(tuplify(self.dd[[3, 1, 3]]), tuple(self.tuples[i] for i in [3, 1, 3]))
def test_row(self): self.assertEqual(tuplify(self.dd.py[0]), ('Alice', 100)) self.assertEqual(tuplify(self.dd.py[1]), ('Bob', 200))
def test_getitem(self): with filetext(self.text) as fn: dd = JSON_Streaming(fn, mode='r', schema=self.schema) self.assertEqual(tuplify(dd[0]), self.tuples[0]) self.assertEqual(tuplify(dd[2:4]), self.tuples[2:4])
def test_rows(self): self.assertEqual(tuplify(self.dd.py[[0, 1]]), (('Alice', 100), ('Bob', 200)))
def test_row(dd): assert tuplify(dd[0]) == ('Alice', 100) assert tuplify(dd[1]) == ('Bob', 200)
def test_names(self): self.assertEqual(list(self.dd.py[:, 'name']), ['Alice', 'Bob', 'Alice']) self.assertEqual(tuplify(self.dd.py[:, ['amount', 'name']]), ((100, 'Alice'), (200, 'Bob'), (50, 'Alice')))
def test_rows(dd): assert tuplify(dd[[0, 1]]) == (('Alice', 100), ('Bob', 200))
def test_extend_structured(self): with filetext('1,1.0\n2,2.0\n') as fn: csv = CSV(fn, 'r+', schema='{x: int32, y: float32}', delimiter=',') csv.extend([(3, 3)]) assert tuplify(tuple(csv)) == ((1, 1.0), (2, 2.0), (3, 3.0))
def test_nested(dd): assert tuplify(dd[[0, 1], 0]) == ('Alice', 'Bob') assert tuplify(dd[[0, 1], 1]) == (100, 200) assert tuplify(dd[0, [0, 1]]) == ('Alice', 100) assert tuplify(dd[[1, 0], [0, 1]]) == (('Bob', 200), ('Alice', 100))
def test_iter_structured(self): with filetext('1,2\n3,4\n') as fn: dd = CSV(fn, schema='{x: int, y: int}') self.assertEquals(tuplify(list(dd)), ((1, 2), (3, 4)))
def test_names(dd): assert list(dd[:, 'name']) == ['Alice', 'Bob', 'Alice'] assert tuplify(dd[:, ['amount', 'name']]) == ((100, 'Alice'), (200, 'Bob'), (50, 'Alice'))
def test_as_py(self): dd = CSV(self.csv_file, schema=self.schema) self.assertEqual(tuplify(dd.as_py()), self.data)
def test_dynd_complex(dd): assert (tuplify(dd[:, ['amount', 'name']]) == tuplify( nd.as_py(dd.dynd[:, ['amount', 'name']], tuple=True)))
def test_getitem_start_step(self): dd = CSV(self.csv_file, schema=self.schema) self.assertEqual(tuplify(dd.py[1::2]), self.data[1::2])
def test_extend_structured_no_newline(): with filetext('1,1.0\n2,2.0') as fn: csv = CSV(fn, 'r+', schema='{x: int32, y: float32}', delimiter=',') csv.extend([(3, 3)]) assert tuplify(tuple(csv)) == ((1, 1.0), (2, 2.0), (3, 3.0))
def test_indexing_basic(self): assert tuplify(self.dd[0]) == self.tuples[0] assert tuplify(self.dd[0:3]) == self.tuples[0:3] assert tuplify(self.dd[0::2]) == self.tuples[0::2] self.assertEqual(tuplify(self.dd[[3, 1, 3]]), tuple(self.tuples[i] for i in [3, 1, 3]))
def test_extend_structured_many_newlines(): with filetext('1,1.0\n2,2.0\n\n\n\n') as fn: csv = CSV(fn, 'r+', schema='{x: int32, y: float32}', delimiter=',') csv.extend([(3, 3)]) result = tuplify(tuple(csv)) assert discover(result) == dshape('6 * (int64, float64)')
def test_extend_tuples(self): with filetext('') as fn: dd = JSON_Streaming(fn, mode='r+', schema=self.schema) dd.extend(self.tuples) self.assertEquals(tuplify(tuple(dd)), self.tuples)
def test_iter(): with filetext('1,1\n2,2\n') as fn: dd = CSV(fn, schema='{a: int32, b: int32}') assert tuplify(list(dd)) == ((1, 1), (2, 2))
def test_as_py(self): dd = JSON(self.filename, 'r', dshape=self.dshape) self.assertEqual(tuplify(dd.as_py()), self.ordered)
def test_iter_structured(): with filetext('1,2\n3,4\n') as fn: dd = CSV(fn, schema='{x: int, y: int}') assert tuplify(list(dd)) == ((1, 2), (3, 4))