def test_chunks_holes(scidb_con, url): url = '{}/chunks_holes'.format(url) schema = '<v:int64> [i=-3:31:0:7]' scidb_con.iquery(""" xsave( filter( build({}, i), i < 4 or i >= 11 and i % 3 = 0), '{}')""".format(schema, url)) array = scidbbridge.Array(url) chunks = array.read_index() pandas.testing.assert_frame_equal( chunks, pandas.DataFrame(data={'i': [-3, 11, 18, 25]})) for i in range(-3, 31, 7): array.get_chunk(i) with pytest.raises(Exception) as ex: array.get_chunk() assert "does not match the number of dimensions" in str(ex.value) with pytest.raises(Exception) as ex: array.get_chunk(-5) assert "is outside of dimension range" in str(ex.value) with pytest.raises(Exception) as ex: array.get_chunk(35) assert "is outside of dimension range" in str(ex.value) with pytest.raises(Exception) as ex: array.get_chunk(17) assert "is not a multiple of chunk size" in str(ex.value) with pytest.raises(Exception) as ex: array.get_chunk(0, 5) assert "does not match the number of dimensions" in str(ex.value)
def test_chunks_all(scidb_con, url): url = '{}/chunks_all'.format(url) schema = '<v:int64> [i=0:19:0:5]' scidb_con.iquery(""" xsave( build({}, i), '{}')""".format(schema, url)) array = scidbbridge.Array(url) chunks = array.read_index() pandas.testing.assert_frame_equal( chunks, pandas.DataFrame(data={'i': range(0, 20, 5)})) for i in range(0, 20, 5): array.get_chunk(i) with pytest.raises(Exception) as ex: array.get_chunk() assert "does not match the number of dimensions" in str(ex.value) with pytest.raises(Exception) as ex: array.get_chunk(-1) assert "is outside of dimension range" in str(ex.value) with pytest.raises(Exception) as ex: array.get_chunk(20) assert "is outside of dimension range" in str(ex.value) with pytest.raises(Exception) as ex: array.get_chunk(17) assert "is not a multiple of chunk size" in str(ex.value) with pytest.raises(Exception) as ex: array.get_chunk(0, 5) assert "does not match the number of dimensions" in str(ex.value)
def test_unbound_dimension(scidb_con, url): url = '{}/unbound_dimension'.format(url) schema = '<v:int64> [i=0:19:0:5]' # Create Array Using xsave scidb_con.iquery(""" xsave( redimension( build({}, i), {}), '{}', index_split:100)""".format(schema, schema.replace('19', '*'), url)) # Fetch Chunk array = scidbbridge.Array(url) chunk = array.get_chunk(0)
def test_update_big_index(scidb_con, url): url = '{}/update_big_index'.format(url) schema = '<v:int64> [i=0:19:0:1; j=0:9:0:1]' # Create Array Using xsave scidb_con.iquery(""" xsave( build({}, i * j), '{}', index_split:100)""".format(schema, url)) # Fetch Chunks List Using Python API array = scidbbridge.Array(url) chunks = array.read_index() chunks_gold = pandas.DataFrame(data=[(i, j) for i in range(20) for j in range(10)], columns=('i', 'j')) pandas.testing.assert_frame_equal(chunks, chunks_gold) assert len(list(scidbbridge.driver.Driver.list(url + '/index'))) == 4 # Re-index with Larger Split Size index = array.read_index() array.write_index(index, split_size=200) # Fetch Chunks List Using Python API chunks = array.read_index() pandas.testing.assert_frame_equal(chunks, chunks_gold) assert len(list(scidbbridge.driver.Driver.list(url + '/index'))) == 2 # Re-index with Extra Large Split Size index = array.read_index() array.write_index(index, split_size=10000) # Fetch Chunks List Using Python API chunks = array.read_index() pandas.testing.assert_frame_equal(chunks, chunks_gold) assert len(list(scidbbridge.driver.Driver.list(url + '/index'))) == 1 # Re-build Index index_rebuild = array.build_index() pandas.testing.assert_frame_equal(index, index_rebuild) # Save Re-built Index array.write_index(index_rebuild, split_size=100) chunks = array.read_index() pandas.testing.assert_frame_equal(chunks, chunks_gold) assert len(list(scidbbridge.driver.Driver.list(url + '/index'))) == 4
def test_chunks_dim_holes(scidb_con, url): url = '{}/chunks_dim_holes'.format(url) schema = '<v:int64> [i=-3:31:0:7; j=11:23:0:3]' scidb_con.iquery(""" xsave( filter( build({}, i), i < 4 and j >= 20 or i >= 11 and j < 14 and i % 3 = 0 and j % 2 = 0), '{}')""".format(schema, url)) array = scidbbridge.Array(url) chunks = array.read_index() pandas.testing.assert_frame_equal( chunks, pandas.DataFrame(data=((i, j) for i in range(-3, 32, 7) for j in range(11, 24, 3) if i < 4 and j >= 20 or i >= 11 and j < 14), columns=('i', 'j'))) for i in range(-3, 31, 7): for j in range(11, 24, 3): array.get_chunk(i, j) with pytest.raises(Exception) as ex: array.get_chunk(-3) assert "does not match the number of dimensions" in str(ex.value) with pytest.raises(Exception) as ex: array.get_chunk(-5, 11) assert "is outside of dimension range" in str(ex.value) with pytest.raises(Exception) as ex: array.get_chunk(11, 26) assert "is outside of dimension range" in str(ex.value) with pytest.raises(Exception) as ex: array.get_chunk(-3, 12) assert "is not a multiple of chunk size" in str(ex.value) with pytest.raises(Exception) as ex: array.get_chunk(-3, 11, 0) assert "does not match the number of dimensions" in str(ex.value)
def test_update_add_chunks(scidb_con, url): url = '{}/update_add_chunks'.format(url) schema = '<v:int64> [i=0:49:0:5; j=0:29:0:10]' # Create Array Using xsave scidb_con.iquery(""" xsave( redimension( filter( build({}, i * j), i % 3 = 0 and j % 2 = 0), {}), '{}')""".format(schema.replace('49', '19').replace('29', '19'), schema, url)) # Fetch Array Using xinput array_pd = scidb_con.iquery("xinput('{}')".format(url), fetch=True) array_pd = array_pd.sort_values(by=['i', 'j']).reset_index(drop=True) i_lst = [] j_lst = [] v_lst = [] for i in range(0, 20): for j in range(0, 20): if i % 3 == 0 and j % 2 == 0: i_lst.append(i) j_lst.append(j) v_lst.append(float(i * j)) pandas.testing.assert_frame_equal( array_pd, pandas.DataFrame({ 'i': i_lst, 'j': j_lst, 'v': v_lst })) # Fetch Chunks List Using Python API array = scidbbridge.Array(url) chunks = array.read_index() chunks_list = [(i, j) for i in range(0, 20, 5) for j in range(0, 20, 10)] pandas.testing.assert_frame_equal( chunks, pandas.DataFrame(data=chunks_list, columns=('i', 'j'))) # Get New Chunk, Add Data to Chunk, Add Chunk to Index chunk = array.get_chunk(20, 10) chunk.from_pandas( pandas.DataFrame({ 'v': (100, ), 'i': (20, ), 'j': (10, ) })) chunk.save() chunks = chunks.append(pandas.DataFrame({'i': (20, ), 'j': (10, )})) array.write_index(chunks) chunks = array.read_index() # Fetch Array Using xinput array_pd = scidb_con.iquery("xinput('{}')".format(url), fetch=True) array_pd = array_pd.sort_values(by=['i', 'j']).reset_index(drop=True) i_lst.append(20) j_lst.append(10) v_lst.append(100) pandas.testing.assert_frame_equal( array_pd, pandas.DataFrame({ 'i': i_lst, 'j': j_lst, 'v': v_lst })) # Add Data to Chunk chunk = array.get_chunk(20, 10) pd = chunk.to_pandas() pd = pd.append(pandas.DataFrame({ 'v': (110, 120), 'i': (22, 24), 'j': (11, 11) }), ignore_index=True) chunk.from_pandas(pd) chunk.save() # Fetch Chunks List Using Python API chunks = array.read_index() chunks_list.append((20, 10)) pandas.testing.assert_frame_equal( chunks, pandas.DataFrame(data=chunks_list, columns=('i', 'j'))) # Fetch Array Using xinput array_pd = scidb_con.iquery("xinput('{}')".format(url), fetch=True) array_pd = array_pd.sort_values(by=['i', 'j']).reset_index(drop=True) i_lst = i_lst + [22, 24] j_lst = j_lst + [11, 11] v_lst = v_lst + [110, 120] pandas.testing.assert_frame_equal( array_pd, pandas.DataFrame({ 'i': i_lst, 'j': j_lst, 'v': v_lst })) # Get Two New Chunk, Add Data to Chunks, Add Chunks to Index chunk = array.get_chunk(40, 20) chunk.from_pandas( pandas.DataFrame({ 'v': (10, 10), 'i': (42, 43), 'j': (25, 25) })) chunk.save() chunk = array.get_chunk(45, 20) chunk.from_pandas( pandas.DataFrame({ 'v': (10, 10), 'i': (48, 49), 'j': (25, 25) })) chunk.save() chunks = array.build_index() array.write_index(chunks) # Fetch Chunks List Using Python API chunks = array.read_index() chunks_list = chunks_list + [(40, 20), (45, 20)] pandas.testing.assert_frame_equal( chunks, pandas.DataFrame(data=chunks_list, columns=('i', 'j'))) # Fetch Array Using xinput array_pd = scidb_con.iquery("xinput('{}')".format(url), fetch=True) array_pd = array_pd.sort_values(by=['i', 'j']).reset_index(drop=True) i_lst = i_lst + [42, 43, 48, 49] j_lst = j_lst + [25] * 4 v_lst = v_lst + [10] * 4 pandas.testing.assert_frame_equal( array_pd, pandas.DataFrame({ 'i': i_lst, 'j': j_lst, 'v': v_lst })) # Get Chunk Errors with pytest.raises(Exception) as ex: array.get_chunk(-5, 10) assert "outside of dimension range" in str(ex.value) with pytest.raises(Exception) as ex: array.get_chunk(0, 5) assert "not a multiple of chunk size" in str(ex.value) with pytest.raises(Exception) as ex: array.get_chunk(25, 10, 0) assert "does not match the number of dimensions" in str(ex.value) # Add Chunks to Index Errors with pytest.raises(Exception) as ex: array.write_index([1, 2]) assert "argument is not a Pandas DataFrame" in str(ex.value) with pytest.raises(Exception) as ex: array.write_index(pandas.DataFrame({'i': (25, )})) assert "does not match array dimensions count" in str(ex.value) with pytest.raises(Exception) as ex: array.write_index(pandas.DataFrame({'i': (25, ), 'k': (20, )})) assert "does not match array dimensions" in str(ex.value) with pytest.raises(Exception) as ex: array.write_index(pandas.DataFrame({'i': (25, ), 'j': (25, )})) assert "Index values misaligned with chunk size" in str(ex.value) with pytest.raises(Exception) as ex: array.write_index(pandas.DataFrame({'i': (50, ), 'j': (20, )})) assert "Index values bigger than upper bound" in str(ex.value) with pytest.raises(Exception) as ex: array.write_index(pandas.DataFrame({'i': (25, ), 'j': (30, )})) assert "Index values bigger than upper bound" in str(ex.value) with pytest.raises(Exception) as ex: array.write_index( pandas.DataFrame({ 'i': (40, ), 'j': (20, ), 'l': (0, ) })) assert "does not match array dimensions count" in str(ex.value) with pytest.raises(Exception) as ex: array.write_index(pandas.DataFrame({'i': (25, 25), 'j': (20, 20)})) assert "Duplicate entries" in str(ex.value)
def test_update_chunk(scidb_con, url): url = '{}/update_chunk'.format(url) schema = '<v:int64> [i=0:19:0:5; j=0:19:0:10]' # Create Array Using xsave scidb_con.iquery(""" xsave( filter( build({}, i * j), i % 3 = 0 and j % 2 = 0), '{}')""".format(schema, url)) # Fetch Array Using xinput array_pd = scidb_con.iquery("xinput('{}')".format(url), fetch=True) array_pd = array_pd.sort_values(by=['i', 'j']).reset_index(drop=True) i_lst = [] j_lst = [] v_lst = [] for i in range(0, 20): for j in range(0, 20): if i % 3 == 0 and j % 2 == 0: i_lst.append(i) j_lst.append(j) v_lst.append(float(i * j)) pandas.testing.assert_frame_equal( array_pd, pandas.DataFrame({ 'i': i_lst, 'j': j_lst, 'v': v_lst })) # Fetch Chunks List Using Python API array = scidbbridge.Array(url) chunks = array.read_index() pandas.testing.assert_frame_equal( chunks, pandas.DataFrame(data=((i, j) for i in range(0, 20, 5) for j in range(0, 20, 10)), columns=('i', 'j'))) # Fetch Chunk Using Python API chunk = array.get_chunk(0, 0) pd = chunk.to_pandas() pandas.testing.assert_frame_equal( pd, pandas.DataFrame(data=((i * j, i, j) for i in range(0, 5) for j in range(0, 10) if i % 3 == 0 and j % 2 == 0), columns=('v', 'i', 'j'))) # Update Chunk Using Python API pd = pd.append(pandas.DataFrame({ 'v': (100, 200), 'i': (4, 1), 'j': (3, 3) }), ignore_index=True) chunk.from_pandas(pd) chunk.save() # Insert duplicates pd_dup = pd.append({'v': 100, 'i': 4, 'j': 3}, ignore_index=True) with pytest.raises(Exception) as ex: chunk.from_pandas(pd_dup) assert "Duplicate coordinates" in str(ex.value) pd_dup = pd.append({'v': 100, 'i': 0, 'j': 2}, ignore_index=True) with pytest.raises(Exception) as ex: chunk.from_pandas(pd_dup) assert "Duplicate coordinates" in str(ex.value) # Insert coordinates outside chunk boundaries pd_out = pd.append({'v': 100, 'i': 0, 'j': -1}, ignore_index=True) with pytest.raises(Exception) as ex: chunk.from_pandas(pd_out) assert "Coordinates outside chunk boundaries" in str(ex.value) pd_out = pd.append({'v': 100, 'i': 5, 'j': 0}, ignore_index=True) with pytest.raises(Exception) as ex: chunk.from_pandas(pd_out) assert "Coordinates outside chunk boundaries" in str(ex.value) # Fetch Array Using xinput array_pd = scidb_con.iquery("xinput('{}')".format(url), fetch=True) array_pd = array_pd.sort_values(by=['i', 'j']).reset_index(drop=True) i_lst = [] j_lst = [] v_lst = [] for i in range(0, 20): for j in range(0, 20): if i % 3 == 0 and j % 2 == 0: i_lst.append(i) j_lst.append(j) v_lst.append(float(i * j)) elif i in (1, 4) and j == 3: i_lst.append(i) j_lst.append(j) if i == 4: v_lst.append(float(100)) else: v_lst.append(float(200)) pandas.testing.assert_frame_equal( array_pd, pandas.DataFrame({ 'i': i_lst, 'j': j_lst, 'v': v_lst }))
#!/usr/bin/env python3 import os import sys import scidbbridge if __name__ == "__main__": if len(sys.argv) != 2: print(f"Need path to array: {sys.argv}") sys.exit(1) ar = scidbbridge.Array(sys.argv[1]) idx = ar.build_index() print(f"Array has {len(idx)} chunks") ar.write_index(idx)