def zeros(dshape, caps={'efficient-write': True}, storage=None): """Create an array and fill it with zeros. Parameters ---------- dshape : datashape The datashape for the resulting array. caps : capabilities dictionary A dictionary containing the desired capabilities of the array. storage : Storage instance A Storage object with the necessary info for data storage. Returns ------- out : a concrete blaze array. """ dshape = _normalize_dshape(dshape) storage = _storage_convert(storage) if storage is not None: shape, dt = to_numpy(dshape) dd = BLZDataDescriptor(blz.zeros(shape, dt, rootdir=storage.path)) elif 'efficient-write' in caps: # TODO: Handle var dimension properly (raise exception?) dyndarr = nd.empty(str(dshape)) dyndarr[...] = False dd = DyNDDataDescriptor(dyndarr) elif 'compress' in caps: shape, dt = to_numpy(dshape) dd = BLZDataDescriptor(blz.zeros(shape, dt)) return Array(dd)
def empty(dshape, caps={'efficient-write': True}, storage=None): """Create an array with uninitialized data. Parameters ---------- dshape : datashape The datashape for the resulting array. caps : capabilities dictionary A dictionary containing the desired capabilities of the array. storage : Storage instance A Storage object with the necessary info for data storage. Returns ------- out : a concrete blaze array. """ dshape = _normalize_dshape(dshape) storage = _storage_convert(storage) if storage is not None: shape, dt = to_numpy(dshape) dd = BLZDataDescriptor(blz.zeros(shape, dt, rootdir=storage.path)) elif 'efficient-write' in caps: dd = DyNDDataDescriptor(nd.empty(str(dshape))) elif 'compress' in caps: dd = BLZDataDescriptor(blz.zeros(shape, dt)) return Array(dd)
def test01b(self): """Creating a barray in "w" mode.""" N = 50000 cn = blz.zeros(N, dtype="i1", rootdir=self.rootdir) self.assert_(len(cn) == N) cn = blz.zeros(N-2, dtype="i1", rootdir=self.rootdir, mode='w') self.assert_(len(cn) == N-2) # Now check some accesses (no errors should be raised) cn.append([1,1]) self.assert_(len(cn) == N) cn[1] = 2 self.assert_(cn[1] == 2)
def zeros(dshape, ddesc=None): """Create an array and fill it with zeros. Parameters ---------- dshape : datashape The datashape for the resulting array. ddesc : data descriptor instance This comes with the necessary info for storing the data. If None, a DyND_DDesc will be used. Returns ------- out : a concrete blaze array. """ dshape = _normalize_dshape(dshape) if ddesc is None: ddesc = DyND_DDesc(nd.zeros(str(dshape), access='rw')) return Array(ddesc) if isinstance(ddesc, BLZ_DDesc): shape, dt = to_numpy(dshape) ddesc.blzarr = blz.zeros( shape, dt, rootdir=ddesc.path, mode=ddesc.mode, **ddesc.kwargs) elif isinstance(ddesc, HDF5_DDesc): obj = nd.as_numpy(nd.zeros(str(dshape))) with tb.open_file(ddesc.path, mode=ddesc.mode) as f: where, name = split_path(ddesc.datapath) f.create_earray(where, name, filters=ddesc.filters, obj=obj) ddesc.mode = 'a' # change into 'a'ppend mode for further operations return Array(ddesc)
def test01c(self): """Testing zeros() constructor, with a string type.""" a = np.zeros(self.N, dtype='S5') ac = blz.zeros(self.N, dtype='S5', rootdir=self.rootdir) #print "ac-->", `ac` self.assert_(a.dtype == ac.dtype) self.assert_(np.all(a == ac[:]))
def test01b(self): """Testing zeros() constructor, with a `dtype`.""" a = np.zeros(self.N, dtype='i4') ac = blz.zeros(self.N, dtype='i4', rootdir=self.rootdir) #print "dtypes-->", a.dtype, ac.dtype self.assert_(a.dtype == ac.dtype) self.assert_(np.all(a == ac[:]))
def interpret(func, env, args, storage=None, **kwds): assert len(args) == len(func.args) # Make a copy, since we're going to mutate our IR! func, _ = copy_function(func) # If it's a BLZ output, we want an interpreter that streams # the processing through in chunks if storage is not None: if len(func.type.restype.shape) == 0: raise TypeError('Require an array, not a scalar, for outputting to BLZ') env['stream-outer'] = True result_ndim = env['result-ndim'] = len(func.type.restype.shape) else: # Convert any persistent inputs to memory # TODO: should stream the computation in this case for i, arg in enumerate(args): if isinstance(arg._data, BLZDataDescriptor): args[i] = arg[:] # Update environment with dynd type information dynd_types = dict((arg, get_dynd_type(array)) for arg, array in zip(func.args, args) if isinstance(array._data, DyNDDataDescriptor)) env['dynd-types'] = dynd_types # Lift ckernels func, env = run_pipeline(func, env, run_time_passes) if storage is None: # Evaluate once values = dict(zip(func.args, args)) interp = CKernelInterp(values) visit(interp, func) return interp.result else: res_shape, res_dt = datashape.to_numpy(func.type.restype) dim_size = operator.index(res_shape[0]) row_size = ndt.type(str(func.type.restype.subarray(1))).data_size chunk_size = min(max(1, (1024*1024) // row_size), dim_size) # Evaluate by streaming the outermost dimension, # and using the BLZ data descriptor's append dst_dd = BLZDataDescriptor(blz.zeros((0,)+res_shape[1:], res_dt, rootdir=storage.path)) # Loop through all the chunks for chunk_start in range(0, dim_size, chunk_size): # Tell the interpreter which chunk size to use (last # chunk might be smaller) chunk_size = min(chunk_size, dim_size - chunk_start) # Evaluate the chunk args_chunk = [arg[chunk_start:chunk_start+chunk_size] if len(arg.dshape.shape) == result_ndim else arg for arg in args] values = dict(zip(func.args, args_chunk)) interp = CKernelChunkInterp(values, chunk_size, result_ndim) visit(interp, func) chunk = interp.result._data.dynd_arr() dst_dd.append(chunk) return blaze.Array(dst_dd)
def test01c(self): """Testing `zeros` constructor (III)""" a = np.zeros((2, 2), dtype='(4,)i4') b = blz.zeros((2, 2), dtype='(4,)i4', rootdir=self.rootdir) if self.open: b = blz.open(rootdir=self.rootdir) #print "b->", `b` assert_array_equal(a, b, "Arrays are not equal")
def test01c(self): """Testing `zeros` constructor (III)""" a = np.zeros((2,2), dtype='(4,)i4') b = blz.zeros((2,2), dtype='(4,)i4', rootdir=self.rootdir) if self.open: b = blz.open(rootdir=self.rootdir) #print "b->", `b` assert_array_equal(a, b, "Arrays are not equal")
def test01c(self): """Creating a barray in "a" mode.""" N = 30003 cn = blz.zeros(N, dtype="i1", rootdir=self.rootdir) self.assert_(len(cn) == N) self.assertRaises(RuntimeError, blz.zeros, N-2, dtype="i1", rootdir=self.rootdir, mode='a')
def test00a(self): """Testing wheretrue() in combination with a list constructor""" a = blz.zeros(self.N, dtype="bool") a[30:40] = blz.ones(10, dtype="bool") alist = list(a) blist1 = [r for r in a.wheretrue()] self.assert_(blist1 == list(range(30, 40))) alist2 = list(a) self.assert_(alist == alist2, "wheretrue() not working correctly")
def test00a(self): """Testing wheretrue() in combination with a list constructor""" a = blz.zeros(self.N, dtype="bool") a[30:40] = blz.ones(10, dtype="bool") alist = list(a) blist1 = [r for r in a.wheretrue()] self.assert_(blist1 == list(range(30,40))) alist2 = list(a) self.assert_(alist == alist2, "wheretrue() not working correctly")
def getobject(self): if self.flavor == 'barray': obj = blz.zeros(10, dtype="i1", rootdir=self.rootdir) self.assertEqual(type(obj), blz.barray) elif self.flavor == 'btable': obj = blz.fromiter(((i,i*2) for i in range(10)), dtype='i2,f4', count=10, rootdir=self.rootdir) self.assertEqual(type(obj), blz.btable) return obj
def getobject(self): if self.flavor == 'barray': obj = blz.zeros(10, dtype="i1", rootdir=self.rootdir) self.assertEqual(type(obj), blz.barray) elif self.flavor == 'btable': obj = blz.fromiter(((i, i * 2) for i in range(10)), dtype='i2,f4', count=10, rootdir=self.rootdir) self.assertEqual(type(obj), blz.btable) return obj
def test02a(self): """Opening a barray in "r" mode.""" N = 10001 cn = blz.zeros(N, dtype="i1", rootdir=self.rootdir) self.assert_(len(cn) == N) cn = blz.barray(rootdir=self.rootdir, mode='r') self.assert_(len(cn) == N) # Now check some accesses self.assertRaises(RuntimeError, cn.__setitem__, 1, 1) self.assertRaises(RuntimeError, cn.append, 1)
def test02b(self): """Opening a barray in "w" mode.""" N = 100001 cn = blz.zeros(N, dtype="i1", rootdir=self.rootdir) self.assert_(len(cn) == N) cn = blz.barray(rootdir=self.rootdir, mode='w') self.assert_(len(cn) == 0) # Now check some accesses (no errors should be raised) cn.append([1,1]) self.assert_(len(cn) == 2) cn[1] = 2 self.assert_(cn[1] == 2)
def test07(self): """Checking barray constructor from another barray. Test introduced after it was seen failing (blaze issue #30) """ types = [np.int8, np.int16, np.int32, np.int64, np.uint8, np.uint16, np.uint32, np.uint64, np.float16, np.float32, np.float64, np.complex64, np.complex128] if hasattr(np, 'float128'): types.extend([np.float128, np.complex256]) shapes = [(10,), (10,10), (10,10,10)] for shape in shapes: for t in types: a = blz.zeros(shape, t) b = blz.barray(a) self.assertEqual(a.dtype, b.dtype) self.assertEqual(a.shape, b.shape) self.assertEqual(a.shape, shape)
def interpret(func, env, storage=None, **kwds): args = env['runtime.arglist'] if storage is None: # Evaluate once values = dict(zip(func.args, args)) interp = CKernelInterp(values) visit(interp, func) return interp.result else: result_ndim = env['result-ndim'] res_shape, res_dt = datashape.to_numpy(func.type.restype) dim_size = operator.index(res_shape[0]) row_size = ndt.type(str(func.type.restype.subarray(1))).data_size chunk_size = min(max(1, (1024 * 1024) // row_size), dim_size) # Evaluate by streaming the outermost dimension, # and using the BLZ data descriptor's append dst_dd = BLZDataDescriptor( blz.zeros((0, ) + res_shape[1:], res_dt, rootdir=storage.path)) # Loop through all the chunks for chunk_start in range(0, dim_size, chunk_size): # Tell the interpreter which chunk size to use (last # chunk might be smaller) chunk_size = min(chunk_size, dim_size - chunk_start) # Evaluate the chunk args_chunk = [ arg[chunk_start:chunk_start + chunk_size] if len(arg.dshape.shape) == result_ndim else arg for arg in args ] values = dict(zip(func.args, args_chunk)) interp = CKernelChunkInterp(values, chunk_size, result_ndim) visit(interp, func) chunk = interp.result._data.dynd_arr() dst_dd.append(chunk) return blaze.Array(dst_dd)
def zeros(dshape, ddesc=None): """Create an array and fill it with zeros. Parameters ---------- dshape : datashape The datashape for the resulting array. ddesc : data descriptor instance This comes with the necessary info for storing the data. If None, a DyND_DDesc will be used. Returns ------- out : a concrete blaze array. """ dshape = _normalize_dshape(dshape) if ddesc is None: ddesc = DyND_DDesc(nd.zeros(str(dshape), access='rw')) return Array(ddesc) if isinstance(ddesc, BLZ_DDesc): shape, dt = to_numpy(dshape) ddesc.blzarr = blz.zeros(shape, dt, rootdir=ddesc.path, mode=ddesc.mode, **ddesc.kwargs) elif isinstance(ddesc, HDF5_DDesc): obj = nd.as_numpy(nd.zeros(str(dshape))) with tb.open_file(ddesc.path, mode=ddesc.mode) as f: where, name = split_path(ddesc.datapath) f.create_earray(where, name, filters=ddesc.filters, obj=obj) ddesc.mode = 'a' # change into 'a'ppend mode for further operations return Array(ddesc)
def interpret(func, env, ddesc=None, **kwds): args = env['runtime.arglist'] if ddesc is None: # Evaluate once values = dict(zip(func.args, args)) interp = CKernelInterp(values) visit(interp, func) return interp.result else: result_ndim = env['result-ndim'] res_shape, res_dt = datashape.to_numpy(func.type.restype) dim_size = operator.index(res_shape[0]) row_size = ndt.type(str(func.type.restype.subarray(1))).default_data_size chunk_size = min(max(1, (1024*1024) // row_size), dim_size) # Evaluate by streaming the outermost dimension, # and using the BLZ data descriptor's append ddesc.blzarr = blz.zeros((0,)+res_shape[1:], res_dt, rootdir=ddesc.path, mode=ddesc.mode) # Loop through all the chunks for chunk_start in range(0, dim_size, chunk_size): # Tell the interpreter which chunk size to use (last # chunk might be smaller) chunk_size = min(chunk_size, dim_size - chunk_start) # Evaluate the chunk args_chunk = [arg[chunk_start:chunk_start+chunk_size] if len(arg.dshape.shape) == result_ndim else arg for arg in args] values = dict(zip(func.args, args_chunk)) interp = CKernelChunkInterp(values, chunk_size, result_ndim) visit(interp, func) chunk = interp.result.ddesc.dynd_arr() ddesc.append(chunk) return blaze.Array(ddesc)
def test01b(self): """Testing where() with a multidimensional array""" a = blz.zeros((self.N, 10), dtype="bool") a[30:40] = blz.ones(10, dtype="bool") b = blz.arange(self.N * 10, dtype="f4").reshape((self.N, 10)) self.assertRaises(NotImplementedError, b.where, a)
import numpy as np import blz from time import time N = 1e8 dtype = 'i4' t0 = time() a = np.zeros(N, dtype=dtype) print "Time numpy.zeros() --> %.4f" % (time() - t0) t0 = time() ac = blz.zeros(N, dtype=dtype) #ac = blz.barray(a) print "Time barray.zeros() --> %.4f" % (time() - t0) print "ac-->", ` ac ` #assert(np.all(a == ac))
def test02(self): """Testing sum() with strings (TypeError).""" ac = blz.zeros(10, 'S3') self.assertRaises(TypeError, ac.sum)
def test01a(self): """Testing zeros() constructor.""" a = np.zeros(self.N) ac = blz.zeros(self.N, rootdir=self.rootdir) self.assert_(a.dtype == ac.dtype) self.assert_(np.all(a == ac[:]))
def test00b(self): """Testing wheretrue() with a multidimensional array""" a = blz.zeros((self.N, 10), dtype="bool") a[30:40] = blz.ones(10, dtype="bool") self.assertRaises(NotImplementedError, a.wheretrue)
import numpy as np import blz from time import time N = 1e8 dtype = 'i4' t0 = time() a = np.zeros(N, dtype=dtype) print "Time numpy.zeros() --> %.4f" % (time()-t0) t0 = time() ac = blz.zeros(N, dtype=dtype) #ac = blz.barray(a) print "Time barray.zeros() --> %.4f" % (time()-t0) print "ac-->", `ac` #assert(np.all(a == ac))
## Benchmark to check the creation of an array of length > 2**32 (5e9) import blz from time import time t0 = time() #cn = blz.zeros(5e9, dtype="i1") cn = blz.zeros(5e9, dtype="i1", rootdir='ondisk_barray', mode='w') print "Creation time:", round(time() - t0, 3) assert len(cn) == int(5e9) t0 = time() cn = blz.barray(rootdir='ondisk_barray', mode='a') print "Re-open time:", round(time() - t0, 3) print "len(cn)", len(cn) assert len(cn) == int(5e9) # Now check some accesses cn[1] = 1 assert cn[1] == 1 cn[int(2e9)] = 2 assert cn[int(2e9)] == 2 cn[long(3e9)] = 3 assert cn[long(3e9)] == 3 cn[-1] = 4 assert cn[-1] == 4 t0 = time() assert cn.sum() == 10 print "Sum time:", round(time() - t0, 3)
def test01b(self): """Testing where() with a multidimensional array""" a = blz.zeros((self.N, 10), dtype="bool") a[30:40] = blz.ones(10, dtype="bool") b = blz.arange(self.N*10, dtype="f4").reshape((self.N, 10)) self.assertRaises(NotImplementedError, b.where, a)