def test_simple(self): a = nd.array([(1, 2, 'a', 'b'), (3, 4, 'ab', 'cd'), (5, 6, 'def', 'ghi')], dtype='{x: int32, y: int32, z: string, w: string}') # Selecting a single field b = nd.fields(a, 'x') self.assertEqual(nd.dtype_of(b), ndt.make_struct([ndt.int32], ['x'])) self.assertEqual(nd.as_py(b.x), nd.as_py(a.x)) # Selecting two fields b = nd.fields(a, 'z', 'y') self.assertEqual(nd.dtype_of(b), ndt.make_struct([ndt.string, ndt.int32], ['z', 'y'])) self.assertEqual(nd.as_py(b.z), nd.as_py(a.z)) self.assertEqual(nd.as_py(b.y), nd.as_py(a.y)) # Selecting three fields b = nd.fields(a, 'w', 'y', 'z') self.assertEqual( nd.dtype_of(b), ndt.make_struct([ndt.string, ndt.int32, ndt.string], ['w', 'y', 'z'])) self.assertEqual(nd.as_py(b.w), nd.as_py(a.w)) self.assertEqual(nd.as_py(b.y), nd.as_py(a.y)) self.assertEqual(nd.as_py(b.z), nd.as_py(a.z)) # Reordering all four fields b = nd.fields(a, 'w', 'y', 'x', 'z') self.assertEqual( nd.dtype_of(b), ndt.make_struct([ndt.string, ndt.int32, ndt.int32, ndt.string], ['w', 'y', 'x', 'z'])) self.assertEqual(nd.as_py(b.w), nd.as_py(a.w)) self.assertEqual(nd.as_py(b.y), nd.as_py(a.y)) self.assertEqual(nd.as_py(b.x), nd.as_py(a.x)) self.assertEqual(nd.as_py(b.z), nd.as_py(a.z))
def test_fixed_var(self): a = nd.array( [[(1, 2, 'a', 'b'), (3, 4, 'ab', 'cd')], [(5, 6, 'def', 'ghi')], [(7, 8, 'alpha', 'beta'), (9, 10, 'X', 'Y'), (11, 12, 'the', 'end')]], type='3 * var * {x: int32, y: int32, z: string, w: string}') # Selecting a single field b = nd.fields(a, 'x') self.assertEqual( nd.type_of(b), ndt.make_fixed_dim( 3, ndt.make_var_dim(ndt.make_struct([ndt.int32], ['x'])))) self.assertEqual(nd.as_py(b.x), nd.as_py(a.x)) # Selecting two fields b = nd.fields(a, 'z', 'y') self.assertEqual( nd.type_of(b), ndt.make_fixed_dim( 3, ndt.make_var_dim( ndt.make_struct([ndt.string, ndt.int32], ['z', 'y'])))) self.assertEqual(nd.as_py(b.z), nd.as_py(a.z)) self.assertEqual(nd.as_py(b.y), nd.as_py(a.y)) # Selecting three fields b = nd.fields(a, 'w', 'y', 'z') self.assertEqual( nd.type_of(b), ndt.make_fixed_dim( 3, ndt.make_var_dim( ndt.make_struct([ndt.string, ndt.int32, ndt.string], ['w', 'y', 'z'])))) self.assertEqual(nd.as_py(b.w), nd.as_py(a.w)) self.assertEqual(nd.as_py(b.y), nd.as_py(a.y)) self.assertEqual(nd.as_py(b.z), nd.as_py(a.z)) # Reordering all four fields b = nd.fields(a, 'w', 'y', 'x', 'z') self.assertEqual( nd.type_of(b), ndt.make_fixed_dim( 3, ndt.make_var_dim( ndt.make_struct( [ndt.string, ndt.int32, ndt.int32, ndt.string], ['w', 'y', 'x', 'z'])))) self.assertEqual(nd.as_py(b.w), nd.as_py(a.w)) self.assertEqual(nd.as_py(b.y), nd.as_py(a.y)) self.assertEqual(nd.as_py(b.x), nd.as_py(a.x)) self.assertEqual(nd.as_py(b.z), nd.as_py(a.z))
def test_fixed_var(self): a = nd.array([ [(1, 2, 'a', 'b'), (3, 4, 'ab', 'cd')], [(5, 6, 'def', 'ghi')], [(7, 8, 'alpha', 'beta'), (9, 10, 'X', 'Y'), (11, 12, 'the', 'end')]], type='3 * var * {x: int32, y: int32, z: string, w: string}') # Selecting a single field b = nd.fields(a, 'x') self.assertEqual(nd.type_of(b), ndt.make_fixed_dim(3, ndt.make_var_dim(ndt.make_struct( [ndt.int32], ['x'])))) self.assertEqual(nd.as_py(b.x), nd.as_py(a.x)) # Selecting two fields b = nd.fields(a, 'z', 'y') self.assertEqual(nd.type_of(b), ndt.make_fixed_dim(3, ndt.make_var_dim(ndt.make_struct( [ndt.string, ndt.int32], ['z', 'y'])))) self.assertEqual(nd.as_py(b.z), nd.as_py(a.z)) self.assertEqual(nd.as_py(b.y), nd.as_py(a.y)) # Selecting three fields b = nd.fields(a, 'w', 'y', 'z') self.assertEqual(nd.type_of(b), ndt.make_fixed_dim(3, ndt.make_var_dim(ndt.make_struct( [ndt.string, ndt.int32, ndt.string], ['w', 'y', 'z'])))) self.assertEqual(nd.as_py(b.w), nd.as_py(a.w)) self.assertEqual(nd.as_py(b.y), nd.as_py(a.y)) self.assertEqual(nd.as_py(b.z), nd.as_py(a.z)) # Reordering all four fields b = nd.fields(a, 'w', 'y', 'x', 'z') self.assertEqual(nd.type_of(b), ndt.make_fixed_dim(3, ndt.make_var_dim(ndt.make_struct( [ndt.string, ndt.int32, ndt.int32, ndt.string], ['w', 'y', 'x', 'z'])))) self.assertEqual(nd.as_py(b.w), nd.as_py(a.w)) self.assertEqual(nd.as_py(b.y), nd.as_py(a.y)) self.assertEqual(nd.as_py(b.x), nd.as_py(a.x)) self.assertEqual(nd.as_py(b.z), nd.as_py(a.z))
def groupby(self, json_cmd): print('GroupBy operation') cmd = json.loads(json_cmd) array_url = cmd.get('input', self.base_url + self.array_name) if not array_url.startswith(self.base_url): raise RuntimeError('Input array must start with the base url') array_name = array_url[len(self.base_url):] fields = cmd['fields'] arr = self.get_session_array(array_name)[...].ddesc.dynd_arr() # Do the groupby, get its groups, then # evaluate it because deferred operations # through the groupby won't work well yet. res = nd.groupby(arr, nd.fields(arr, *fields)) groups = res.groups res = res.eval() # Write out the groupby result defarr_gb = self.array_provider.create_deferred_array_filename( self.session_name, 'groupby_', array(res)) dshape_gb = nd.dshape_of(res) defarr_gb[0].write( json.dumps({ 'dshape': dshape_gb, 'command': 'groupby', 'params': { 'fields': fields } })) defarr_gb[0].close() # Write out the groups defarr_groups = self.array_provider.create_deferred_array_filename( self.session_name, 'groups_', groups) dshape_groups = nd.dshape_of(groups) defarr_groups[0].write( json.dumps({ 'dshape': dshape_groups, 'command': 'groupby.groups', 'params': { 'fields': fields } })) defarr_groups[0].close() content_type = 'application/json; charset=utf-8' body = json.dumps({ 'session': self.base_url + self.session_name, 'output_gb': self.base_url + defarr_gb[1], 'dshape_gb': dshape_gb, 'output_groups': self.base_url + defarr_groups[1], 'dshape_groups': dshape_groups }) return (content_type, body)
def groupby(self, json_cmd): print('GroupBy operation') cmd = json.loads(json_cmd) array_url = cmd.get('input', self.base_url + self.array_name) if not array_url.startswith(self.base_url): raise RuntimeError('Input array must start with the base url') array_name = array_url[len(self.base_url):] fields = cmd['fields'] arr = self.get_session_array(array_name)[...].ddesc.dynd_arr() # Do the groupby, get its groups, then # evaluate it because deferred operations # through the groupby won't work well yet. res = nd.groupby(arr, nd.fields(arr, *fields)) groups = res.groups res = res.eval() # Write out the groupby result defarr_gb = self.array_provider.create_deferred_array_filename( self.session_name, 'groupby_', array(res)) dshape_gb = nd.dshape_of(res) defarr_gb[0].write(json.dumps({ 'dshape': dshape_gb, 'command': 'groupby', 'params': { 'fields': fields } })) defarr_gb[0].close() # Write out the groups defarr_groups = self.array_provider.create_deferred_array_filename( self.session_name, 'groups_', groups) dshape_groups = nd.dshape_of(groups) defarr_groups[0].write(json.dumps({ 'dshape': dshape_groups, 'command': 'groupby.groups', 'params': { 'fields': fields } })) defarr_groups[0].close() content_type = 'application/json; charset=utf-8' body = json.dumps({ 'session': self.base_url + self.session_name, 'output_gb': self.base_url + defarr_gb[1], 'dshape_gb': dshape_gb, 'output_groups': self.base_url + defarr_groups[1], 'dshape_groups': dshape_groups }) return (content_type, body)
def test_simple(self): a = nd.array([ (1, 2, 'a', 'b'), (3, 4, 'ab', 'cd'), (5, 6, 'def', 'ghi')], type='3 * {x: int32, y: int32, z: string, w: string}') # Selecting a single field b = nd.fields(a, 'x') self.assertEqual(nd.dtype_of(b), ndt.make_struct( [ndt.int32], ['x'])) self.assertEqual(nd.as_py(b.x), nd.as_py(a.x)) # Selecting two fields b = nd.fields(a, 'z', 'y') self.assertEqual(nd.dtype_of(b), ndt.make_struct( [ndt.string, ndt.int32], ['z', 'y'])) self.assertEqual(nd.as_py(b.z), nd.as_py(a.z)) self.assertEqual(nd.as_py(b.y), nd.as_py(a.y)) # Selecting three fields b = nd.fields(a, 'w', 'y', 'z') self.assertEqual(nd.dtype_of(b), ndt.make_struct( [ndt.string, ndt.int32, ndt.string], ['w', 'y', 'z'])) self.assertEqual(nd.as_py(b.w), nd.as_py(a.w)) self.assertEqual(nd.as_py(b.y), nd.as_py(a.y)) self.assertEqual(nd.as_py(b.z), nd.as_py(a.z)) # Reordering all four fields b = nd.fields(a, 'w', 'y', 'x', 'z') self.assertEqual(nd.dtype_of(b), ndt.make_struct( [ndt.string, ndt.int32, ndt.int32, ndt.string], ['w', 'y', 'x', 'z'])) self.assertEqual(nd.as_py(b.w), nd.as_py(a.w)) self.assertEqual(nd.as_py(b.y), nd.as_py(a.y)) self.assertEqual(nd.as_py(b.x), nd.as_py(a.x)) self.assertEqual(nd.as_py(b.z), nd.as_py(a.z))
def test_immutable(self): a = nd.array([ ('x', 0), ('y', 1), ('x', 2), ('x', 3), ('y', 4)], dtype='{A: string, B: int32}').eval_immutable() gb = nd.groupby(a, nd.fields(a, 'A')) self.assertEqual(nd.as_py(gb.groups), [{'A': 'x'}, {'A': 'y'}]) self.assertEqual(nd.as_py(gb), [ [{'A': 'x', 'B': 0}, {'A': 'x', 'B': 2}, {'A': 'x', 'B': 3}], [{'A': 'y', 'B': 1}, {'A': 'y', 'B': 4}]])
def test_aggregate(self): a = nd.array([ ('A', 1, 2), ('A', 3, 4), ('B', 1.5, 2.5), ('A', 0.5, 9), ('C', 1, 5), ('B', 2, 2)], dtype='c{cat: string, x: float32, y: float32}') gb = nd.groupby(a, nd.fields(a, 'cat')).eval() b = nd.make_computed_fields(gb, 1, fields=[('sum_x', ndt.float32, 'sum(x)'), ('mean_y', ndt.float32, 'mean(y)'), ('max_x', ndt.float32, 'max(x)'), ('max_y', ndt.float32, 'max(y)')]) self.assertEqual(nd.as_py(b.sum_x), [4.5, 3.5, 1]) self.assertEqual(nd.as_py(b.mean_y), [5, 2.25, 5]) self.assertEqual(nd.as_py(b.max_x), [3, 2, 1]) self.assertEqual(nd.as_py(b.max_y), [9, 2.5, 5])
bcolz.ctable: bc, CSV: csv, SQL: sql} schema_no_date = '{amount: int64, id: int64, name: string[7]}' sql_no_date = SQL('sqlite:///:memory:', 'accounts_no_date', schema=schema_no_date) L_no_date = list(pluck([0, 1, 2], L)) sql_no_date.extend(L_no_date) no_date = {list: list(pluck([0, 1, 2], L)), Table: Table(list(pluck([0, 1, 2], L)), '{amount: int64, id: int64, name: string[7]}'), DataFrame: df[['amount', 'id', 'name']], np.ndarray: x[['amount', 'id', 'name']], nd.array: nd.fields(arr, 'amount', 'id', 'name'), bcolz.ctable: bc[['amount', 'id', 'name']], SQL: sql_no_date} try: import pymongo except ImportError: pymongo = None Collection = None if pymongo: from pymongo.collection import Collection try: db = pymongo.MongoClient().db db.test.drop()
(bcolz.ctable, bc), (CSV, csv), (SQL, sql)] schema_no_date = '{amount: int64, id: int64, name: string[7]}' sql_no_date = SQL('sqlite:///:memory:', 'accounts_no_date', schema=schema_no_date) L_no_date = list(pluck([0, 1, 2], L)) sql_no_date.extend(L_no_date) no_date = [(list, list(pluck([0, 1, 2], L))), (Table, Table(list(pluck([0, 1, 2], L)), '{amount: int64, id: int64, name: string[7]}')), (DataFrame, df[['amount', 'id', 'name']]), (np.ndarray, x[['amount', 'id', 'name']]), (nd.array, nd.fields(arr, 'amount', 'id', 'name')), (bcolz.ctable, bc[['amount', 'id', 'name']]), (SQL, sql_no_date)] try: import pymongo except ImportError: pymongo = None Collection = None if pymongo: from pymongo.collection import Collection try: db = pymongo.MongoClient().db db.test.drop()
schema_no_date = '{amount: int64, id: int64, name: string[7]}' sql_no_date = SQL('sqlite:///:memory:', 'accounts_no_date', schema=schema_no_date) L_no_date = list(pluck([0, 1, 2], L)) sql_no_date.extend(L_no_date) no_date = [(list, list(pluck([0, 1, 2], L))), (Data, Data(list(pluck([0, 1, 2], L)), 'var * {amount: int64, id: int64, name: string[7]}')), (DataFrame, df[['amount', 'id', 'name']]), (np.ndarray, x[['amount', 'id', 'name']]), (nd.array, nd.fields(arr, 'amount', 'id', 'name')), (bcolz.ctable, bc[['amount', 'id', 'name']]), (SQL, sql_no_date)] try: import pymongo except ImportError: pymongo = None Collection = None if pymongo: from pymongo.collection import Collection try: db = pymongo.MongoClient().db db.test.drop() data.append((Collection, into(db.test, df)))